changeset 147:84d32375383a pairPro

implement insertCharClass
author masa
date Tue, 15 Dec 2015 17:14:35 +0900
parents 1c74ac7d56ec
children d1ebba6e117a
files c/regexParser/main.cc c/regexParser/node.cc c/regexParser/regexParser.cc
diffstat 3 files changed, 64 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/main.cc	Tue Dec 15 16:20:35 2015 +0900
+++ b/c/regexParser/main.cc	Tue Dec 15 17:14:35 2015 +0900
@@ -20,6 +20,5 @@
     printf("regex : %s\n",ri->ptr);
     NodePtr n = regex(ri);
     printTree(n);
-    TransitionGenerator tg = generateTransitionList(n);
     return 0;
 }
--- a/c/regexParser/node.cc	Tue Dec 15 16:20:35 2015 +0900
+++ b/c/regexParser/node.cc	Tue Dec 15 17:14:35 2015 +0900
@@ -3,6 +3,9 @@
 
 static void printCharacterClass(CharClassPtr cc, long nodeNumber,int d) {
     if (cc->type == 'r') {
+        if (cc->left) {
+            printCharacterClass(cc->left,nodeNumber,d+1);
+        }
         printf("%*c",d*4, ' ');
         for (RangeList range = cc->cond.range; range.begin != 0;) {
            printf("[%c-%c] ",(unsigned char)range.begin,(unsigned char)range.end);
@@ -13,14 +16,15 @@
            }
         }
         printf("(%lu)\n",nodeNumber);
+        if (cc->right) {
+            printCharacterClass(cc->right,nodeNumber,d+1);
+        }
     }
 }
 
 static void descendTree(NodePtr n, int d) {
     if (n->left != NULL) {
-        d++;
-        descendTree(n->left, d);
-        d--;
+        descendTree(n->left, d+1);
     }
     if (n->tokenType == 'a') {
         printf("%*c",d*4, ' ');
@@ -35,9 +39,7 @@
     }
 
     if (n->right != NULL) {
-        d++;
-        descendTree(n->right, d);
-        d--;
+        descendTree(n->right, d+1);
     }
 }
 
--- a/c/regexParser/regexParser.cc	Tue Dec 15 16:20:35 2015 +0900
+++ b/c/regexParser/regexParser.cc	Tue Dec 15 17:14:35 2015 +0900
@@ -39,6 +39,17 @@
     return n;
 }
 
+CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right) {
+    CharClassPtr cc = NEW(CharClass);
+    cc->type = 'r';
+    cc->cond.range.begin = begin;
+    cc->cond.range.end = end;
+    cc->left = left;
+    cc->right = right;
+    cc->nextState.bitContainer = 0;
+    return cc;
+}
+
 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
     CharClassPtr cc = NEW(CharClass);
     cc->type = 'a';
@@ -48,9 +59,39 @@
     return cc;
 }
 
-CharClassPtr charClassMerge(CharClassPtr cc, unsigned char begin, unsigned char end, CharClassPtr next) {
-    CharClassPtr cc1 = NEW(CharClass);
-    return cc1;
+CharClassPtr insertCharClass(CharClassPtr cc, unsigned char begin, unsigned char end) {
+    if (end < cc->cond.range.begin ) {
+        CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right);
+        if (cc->left) {
+            cc1->left = insertCharClass(cc->left,begin,end);
+            return cc1;
+        } else {
+            CharClassPtr cc2 = createCharClassRange(begin,end,0,0);
+            cc1->left = cc2;
+            return cc1;
+        }
+    } else if (end == cc->cond.range.begin ) {
+        cc->cond.range.begin = begin;
+    } else if (end <= cc->cond.range.end) {
+        if (begin < cc->cond.range.begin) {
+            cc->cond.range.begin = begin;
+        }
+    } else if (begin > cc->cond.range.end ) {
+        CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right);
+        if (cc->right) {
+            cc1->rigt = insertCharClass(cc->right,begin,end);
+            return cc1;
+        } else {
+            CharClassPtr cc2 = createCharClassRange(begin,end,0,0);
+            cc1->right = cc2;
+            return cc1;
+        }
+    } else if (begin == cc->cond.range.end ) {
+        cc->cond.range.end = end;
+    } else if (begin < cc->cond.range.begin) {
+        cc->cond.range.begin = begin;
+    }
+    return cc;
 }
 
 // <charClass> ::= '['<literal>'-'<literal>']'
@@ -61,27 +102,33 @@
     cc->type = 'r';
     cc->nextState.bitContainer = 0;
     RangeListPtr rangeList = &cc->cond.range;
-    rangeList->begin = (unsigned long)*ri->ptr;
-    rangeList->end = (unsigned long)*ri->ptr;
+    rangeList->begin = *ri->ptr;
+    rangeList->end = *ri->ptr;
     rangeList->next = NULL;
 
     for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) {
         if (*ri->ptr == '-') {
-            rangeList->end = (unsigned long)*(ri->ptr + 1);
+            rangeList->end = *(ri->ptr + 1);
             ri->ptr++;
             continue;
         }
         if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break;
         if (ri->ptr[0] == rangeList->end + 1) {
-            rangeList->end = (unsigned long)*ri->ptr;
+            rangeList->end = *ri->ptr;
             continue;
         }
         rangeList->next = NEW(RangeList);
         rangeList = rangeList->next;
-        rangeList->begin = (unsigned long)*ri->ptr;
-        rangeList->end = (unsigned long)*ri->ptr;
+        rangeList->begin = *ri->ptr;
+        rangeList->end = *ri->ptr;
         rangeList->next = NULL;
     }
+
+    for (RangeListPtr r = &cc->cond.range; r; r = r->next) {
+        cc = insertCharClass(cc, r->begin, r->end);
+    }
+
+    n->cc = cc;
     // TODO literal support
     // merge rangeList here
     if (*ri->ptr) ri->ptr++;