diff c/regexParser/regexParser.cc @ 134:dbafc753078e pairPro

fix concatination & selection & grouping
author masa
date Fri, 04 Dec 2015 17:45:09 +0900
parents ccc673449351
children e1a262ec75f0
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc	Thu Dec 03 20:47:11 2015 +0900
+++ b/c/regexParser/regexParser.cc	Fri Dec 04 17:45:09 2015 +0900
@@ -5,10 +5,7 @@
 #include "regexParser.h"
 #include "error.h"
 
-static NodePtr allocateNode();
-static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr);
 static NodePtr charClass(RegexInfoPtr);
-static NodePtr group(RegexInfoPtr);
 static void token(RegexInfoPtr);
 static NodePtr regexAtom(RegexInfoPtr);
 NodePtr regex(RegexInfoPtr);
@@ -37,24 +34,22 @@
     cc->cond->w = NEW(Word);
     cc->cond->w->word = ri->tokenValue;
     cc->cond->w->length = ri->ptr - ri->tokenValue;
+    token(ri);
 
     return cc;
 }
 
 static
-NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) {
+NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right) {
     NodePtr n = allocateNode();
 
     n->tokenType = type;
+    n->cc = cc;
     n->left = left;
     n->right = right;
     n->nodeNumber = ri->nodeNumber;
     ri->nodeNumber++;
 
-    if (type == 'a') {
-        n->cc = createCharClassWord(ri);
-    }
-
     return n;
 }
 
@@ -62,11 +57,6 @@
 // <charClass> ::= '['<literal>'-'<literal>']'
 static
 NodePtr charClass(RegexInfoPtr ri) {
-    NodePtr n = allocateNode();
-
-    n->tokenType = 'c';
-    n->nodeNumber = ri->nodeNumber;
-    ri->nodeNumber++;
 
     CharClassPtr cc = NEW(CharClass);
     cc->type = 'r';
@@ -93,31 +83,19 @@
             // TODO literal support
 
     rangeList->end = ri->ptr + i - 1;
-
+    NodePtr n = createNode(ri,'c',cc,0,0);
+    token(ri);
     return n;
 }
 
 // <literal> ::= [a-z][A-Z][0-9]
 static
 NodePtr literal(RegexInfoPtr ri) {
-    NodePtr n = createNode(ri,'a',0,0);
+    CharClassPtr cc = createCharClassWord(ri);
+    NodePtr n = createNode(ri,'a',cc,0,0);
     return n;
 }
 
-// <group> ::= '('<regex>')'
-static
-NodePtr group(RegexInfoPtr ri) {
-    return regex(ri);
-}
-
-static
-void asterCheck(RegexInfoPtr ri) {
-    if (ri->ptr[0] == '*') {
-        ri->asterFlag = true;
-    }
-    return;
-}
-
 static
 void token(RegexInfoPtr ri) {
     while (ri->ptr[0] != '\0') {
@@ -130,7 +108,6 @@
             ri->ptr++;
             ri->tokenType = ')';
             ri->tokenValue = ri->ptr;
-            asterCheck(ri);
             return;
         } else if (ri->ptr[0] == '[') {
             ri->ptr++;
@@ -160,13 +137,14 @@
         } else {
             ri->tokenType = 'a';
             ri->tokenValue = ri->ptr;
-            while (isalnum(ri->ptr[0])) {
+            if (isalnum(ri->ptr[0])) {
                 ri->ptr++;
             }
-            asterCheck(ri);
             return;
         }
     }
+    ri->tokenType = 0;
+    ri->tokenValue = NULL;
     return;
 }
 
@@ -174,53 +152,44 @@
 static
 NodePtr regexAtom(RegexInfoPtr ri) {
 
-    token(ri);
     NodePtr n = NULL;
     if (ri->tokenType == 'c') n = charClass(ri);
-    if (ri->tokenType == 'a') n = literal(ri);
-    if (ri->tokenType == '(') n = group(ri);
+    else if (ri->tokenType == 'a') n = literal(ri);
+    else if (ri->tokenType == '(') {
+        n = regex(ri);
+        if (ri->tokenType != ')') {
+            // error
+        }
+        token(ri);
+    }
+    if (ri->tokenType == '*') {
+        n = createNode(ri,'*',0,n,0);
+        token(ri);
+    }
 
     return n;
 }
 
 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex>
 NodePtr regex(RegexInfoPtr ri) {
+    token(ri);
     NodePtr n = regexAtom(ri);
-    while (ri->ptr[0]) {
-        token(ri);
+    while (ri->tokenType) {
         if (ri->tokenType == '*') {
-            n = createNode(ri,'*',n,0);
-            ri->asterFlag = false;
+            n = createNode(ri,'*',0,n,0);
+            token(ri);
+            return n;
         } else if (ri->tokenType == '|') {
-            NodePtr n1 = regex(ri);
-            n = createNode(ri,'|',n,n1);
-        } else if (ri->tokenType == '(') {
-            ri->ptr--;
+            n = createNode(ri,'|',0,n,0);
             NodePtr n1 = regex(ri);
-            if (ri->asterFlag == true) {
-                n1 = createNode(ri,'*',n1,0);
-                ri->asterFlag = false;
-                ri->ptr++;
-            }
-            n = createNode(ri,'+',n,n1);
+            n->right = n1;
         } else if (ri->tokenType == ')') {
-            if (ri->orNum != 0 && ri->ptr[0] != ')') {
-                ri->ptr--;
-                ri->orNum--;
-            }
             return n;
         } else {
-            NodePtr n1 = NULL;
-            if (ri->asterFlag == true) {
-                ri->ptr = ri->tokenValue;
-                NodePtr n1 = regexAtom(ri);
-                n1 = createNode(ri,'*',n1,0);
-                ri->asterFlag = false;
-                ri->ptr++;
-            } else {
-                n1 = regex(ri);
-            }
-            n = createNode(ri,'+',n,n1);
+            n = createNode(ri,'+',0,n,0);
+            NodePtr n1 = regexAtom(ri);
+            n->right = n1;
         }
-    } return n;
+    }
+    return n;
 }