changeset 58:4053c3e0fa7f

implement group()
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Fri, 12 Jun 2015 19:02:00 +0900
parents 71b497d25273
children af189c727733
files c/regexParser/main.cc
diffstat 1 files changed, 40 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/main.cc	Thu Jun 11 19:50:02 2015 +0900
+++ b/c/regexParser/main.cc	Fri Jun 12 19:02:00 2015 +0900
@@ -2,24 +2,29 @@
  * <literal> ::= [a-z][A-Z][0-9]
  * <charClass> ::= '['<literal>'-'<literal>']'
  * <string> ::= <literal><literal>*
- * <or> ::= '('<regex>'|'<regex>')'
+ * <group> ::= '('<regex>')'
+ * <or> ::= <regex>'|'<regex>
  * <*> ::= <regex>'*'
- * <regex> ::= <literal>|<conc>|<or>|<charClass>
+ * <regex> ::= <string>|<or>|<charClass>|<group>|<*>
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-char *ptr;
 typedef struct node {
+    struct node *self;
     char character;
     struct node *left;
     struct node *right;
 } Node, *NodePtr;
 
+char *ptr;
+NodePtr regexHeadNode;
+
 NodePtr charClass();
 NodePtr string();
+NodePtr group();
 NodePtr _or();
 NodePtr asterisk();
 NodePtr regex();
@@ -27,14 +32,17 @@
 
 NodePtr createNode(char character, NodePtr left, NodePtr right) {
     NodePtr n = (NodePtr)malloc(sizeof(Node));
+    n->self = n;
     n->character = character;
     n->left = left;
     n->right = right;
+
     return n;
 }
 
 // <charClass> ::= '['<literal>'-'<literal>']'
 NodePtr charClass() {
+    ptr++;
     NodePtr n = createNode(0,0,0);
     return n;
 }
@@ -50,25 +58,35 @@
 // <string> ::= <literal><literal>*
 NodePtr string() {
     char c = *ptr;
-    NodePtr n;
-
+    NodePtr n = NULL;
+    printf("%c\n",c);
     if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) {
         n = createNode(0,literal(),string());
-        return n;
     } else {
         n = createNode(0,0,0);
     }
+    return n;
 }
 
-// <or> ::= '('<regex>'|'<regex>')'
+// <group> ::= '('<regex>')' | '('<regex>'|'<regex>')'
+NodePtr group() {
+    NodePtr n;
+    if (*ptr == ')') {
+        n = createNode(0,0,0);
+        ptr++;
+    } else {
+        ptr++;
+        n = regex();
+    }
+
+    return n;
+}
+
+
+// <or> ::= <regex>'|'<regex>
 NodePtr _or() {
-    regex();
-    while(*ptr++ == ')') {
-        if (*ptr == '|') {
-            ptr++;
-            regex();
-        }
-    }
+    NodePtr n = createNode('|',regexHeadNode,regex());
+    return n;
 }
 
 // <*> ::= <regex>'*'
@@ -76,22 +94,22 @@
 
 }
 
-// <regex> ::= <literal>|<string>|<or>|<charClass>
-// <literal> は <string> に内包されるから、<regex> ::= <string>|<or>|<charClass>が正しい??
+// <regex> ::= <string>|<or>|<charClass>|<group>|<*>
 NodePtr regex() {
 
     NodePtr n;
 
-    while (char c = *ptr) {
-        if (c == '(') {
-            ptr++;
+    while (*ptr != '\0') {
+        if ((*ptr == '(') || (*ptr == ')')) {
+            n = group();
+        } else if (*ptr == '[') {
+            n = charClass();
+        } else if (*ptr == '|'){
             n = _or();
-        } else if (c == '[') {
-            n = charClass();
         } else {
             n = string();
+            regexHeadNode = n;
         }
-        ptr++;
     }
 
     return n;