changeset 133:ccc673449351 pairPro

Look ahead '*'
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 03 Dec 2015 20:47:11 +0900
parents fb4c8adf3a80
children dbafc753078e
files c/regexParser/main.cc c/regexParser/regexParser.cc c/regexParser/regexParser.h
diffstat 3 files changed, 39 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/main.cc	Wed Dec 02 23:30:48 2015 +0900
+++ b/c/regexParser/main.cc	Thu Dec 03 20:47:11 2015 +0900
@@ -12,7 +12,8 @@
 {
     RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo));
     ri->nodeNumber = 1;
-    ri->orFlag = 0;
+    ri->orNum = 0;
+    ri->asterFlag = false;
 
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i],"-regex") == 0) {
--- a/c/regexParser/regexParser.cc	Wed Dec 02 23:30:48 2015 +0900
+++ b/c/regexParser/regexParser.cc	Thu Dec 03 20:47:11 2015 +0900
@@ -111,6 +111,14 @@
 }
 
 static
+void asterCheck(RegexInfoPtr ri) {
+    if (ri->ptr[0] == '*') {
+        ri->asterFlag = true;
+    }
+    return;
+}
+
+static
 void token(RegexInfoPtr ri) {
     while (ri->ptr[0] != '\0') {
         if (ri->ptr[0] == '('){
@@ -122,6 +130,7 @@
             ri->ptr++;
             ri->tokenType = ')';
             ri->tokenValue = ri->ptr;
+            asterCheck(ri);
             return;
         } else if (ri->ptr[0] == '[') {
             ri->ptr++;
@@ -132,7 +141,7 @@
             ri->ptr++;
             ri->tokenType = '|';
             ri->tokenValue = NULL;
-            ri->orFlag++;
+            ri->orNum++;
             return;
         } else if (ri->ptr[0] == '*'){
             ri->ptr++;
@@ -154,9 +163,7 @@
             while (isalnum(ri->ptr[0])) {
                 ri->ptr++;
             }
-            if (ri->ptr[0] == '*') {
-                ri->astarFlag = true;
-            }
+            asterCheck(ri);
             return;
         }
     }
@@ -176,25 +183,43 @@
     return n;
 }
 
-// <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regex> |
+// <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex>
 NodePtr regex(RegexInfoPtr ri) {
     NodePtr n = regexAtom(ri);
     while (ri->ptr[0]) {
         token(ri);
         if (ri->tokenType == '*') {
             n = createNode(ri,'*',n,0);
+            ri->asterFlag = false;
         } else if (ri->tokenType == '|') {
             NodePtr n1 = regex(ri);
             n = createNode(ri,'|',n,n1);
+        } else if (ri->tokenType == '(') {
+            ri->ptr--;
+            NodePtr n1 = regex(ri);
+            if (ri->asterFlag == true) {
+                n1 = createNode(ri,'*',n1,0);
+                ri->asterFlag = false;
+                ri->ptr++;
+            }
+            n = createNode(ri,'+',n,n1);
         } else if (ri->tokenType == ')') {
-            if (ri->orFlag != 0) {
-                if (ri->ptr[0] != ')') ri->ptr--;
-                ri->orFlag--;
+            if (ri->orNum != 0 && ri->ptr[0] != ')') {
+                ri->ptr--;
+                ri->orNum--;
             }
             return n;
         } else {
-            // return NULL
-            NodePtr n1 = regex(ri);
+            NodePtr n1 = NULL;
+            if (ri->asterFlag == true) {
+                ri->ptr = ri->tokenValue;
+                NodePtr n1 = regexAtom(ri);
+                n1 = createNode(ri,'*',n1,0);
+                ri->asterFlag = false;
+                ri->ptr++;
+            } else {
+                n1 = regex(ri);
+            }
             n = createNode(ri,'+',n,n1);
         }
     } return n;
--- a/c/regexParser/regexParser.h	Wed Dec 02 23:30:48 2015 +0900
+++ b/c/regexParser/regexParser.h	Thu Dec 03 20:47:11 2015 +0900
@@ -40,6 +40,6 @@
     unsigned char tokenType;
     unsigned char *tokenValue;
     int nodeNumber;
-    int orFlag;
-    bool astarFlag;
+    int orNum;
+    bool asterFlag;
 } RegexInfo, *RegexInfoPtr;