changeset 155:6cd0141bed6c pairPro

impl charClassMerge
author masa
date Fri, 18 Dec 2015 15:40:55 +0900
parents 1fad21fd6028
children b5ecfc008bcf
files c/regexParser/regexParser.h c/regexParser/subsetConstraction.cc
diffstat 2 files changed, 119 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/regexParser.h	Fri Dec 18 14:16:02 2015 +0900
+++ b/c/regexParser/regexParser.h	Fri Dec 18 15:40:55 2015 +0900
@@ -44,3 +44,5 @@
     unsigned char *tokenValue;
     int nodeNumber;
 } RegexInfo, *RegexInfoPtr;
+
+CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right);
--- a/c/regexParser/subsetConstraction.cc	Fri Dec 18 14:16:02 2015 +0900
+++ b/c/regexParser/subsetConstraction.cc	Fri Dec 18 15:40:55 2015 +0900
@@ -13,40 +13,132 @@
     // 必要ならばnextStateを重ねあわせる
     // 変更があった場合は新しくリストを作って返す
     if (end < cc->cond.range.begin ) { // 1
-        CharClassPtr cc1 = charClassMerge(cc,cc->cond.range.begin,cc->cond.range.end,nextState);
         if (cc->left) {
-            cc1->left = charClassMerge(cc->left,begin,end,nextState);
-            return cc1;
+            return createCharClassRange(cc->begin,cc->end,charClassMerge(cc->left,begin,end,nextState),cc->right);
         } else {
-            CharClassPtr cc2 = charClassMerge(cc,begin,end,nextState);
-            cc2->nextState = nextState;
-            cc1->left = cc2;
+            CharClassPtr cc1 =  createCharClassRange(begin,end,NULL,cc);
+            cc1->nextState = nextState;
             return cc1;
         }
     } else if (end == cc->cond.range.begin ) { // 2
-        cc->cond.range.begin = begin;
-        return cc;
-    } else if (end <= cc->cond.range.end) {  // 3,4,6,7,9,10
-        if (begin < cc->cond.range.begin) {  // 3,4
-            cc->cond.range.begin = begin;
-        }
-        return cc;
-    } else if (begin > cc->cond.range.end ) { // 13
-        if (cc->right) {
-            cc->right = charClassMerge(cc->right,begin,end,nextState);
+        CharClassPtr cc1;
+        if (cc->left) {
+            cc1 = charClassMerge(cc->left,begin,end-1,nextState);
         } else {
-            cc->right = charClassMerge(cc,begin,end,nextState);
+            cc1 = createCharClassRange(begin,end-1,NULL,NULL);
+            cc1->nextState = nextState;
+        }
+        if (cc->begin == cc->end) {
+            CharClassPtr cc2 = createCharClassRange(cc->begin,cc->end,cc1,cc->right);
+            cc2->nextState = cc->nextState | nextState;
+            return cc2;
+        }
+        CharClassPtr cc3;
+        createCharClassRange(cc->begin+1,cc->end,cc->left,cc->end);
+        cc3->nextState = cc->nextState;
+        CharClassPtr cc2 = createCharClassRange(cc->begin,cc->begin,cc1,cc3);
+        cc2->nextState = cc->nextState | nextState;
+        return cc2;
+    } else if (end < cc->cond.range.end) {
+        if (begin < cc->cond.range.begin) {  // 3
+            CharClassPtr cc1;
+            if (cc->left) {
+                cc1 = charClassMerge(cc->left,begin,cc->begin-1,nextState);
+            } else {
+                cc1 = createCharClassRange(begin,cc->begin-1,NULL,NULL);
+                cc1->nextState = nextState;
+            }
+            CharClassPtr cc3 = createCharClassRange(end+1,cc->end,cc->left,cc->right);
+            cc3->nextState = cc->nextState;
+            CharClassPtr cc2 = createCharClassRange(cc->begin,end,cc1,cc3);
+            cc2->nextState = cc->nextState | nextState;
+            return cc2;
         }
-        return cc;
+        if (begin == cc->cond.range.begin) {  // 6
+            CharClassPtr cc2 = createCharClassRange(end,cc->end,NULL,cc->right);
+            cc2->nextState = cc->nextState;
+            CharClassPtr cc1 = createCharClassRange(begin,end,cc->left,cc2);
+            cc1->nextState = cc->nextState | nextState;
+            return cc1;
+        }
+        // 9
+        CharClassPtr cc2 = createCharClassRange(cc->begin,begin-1,cc->left,NULL);
+        cc2->nextState = cc1->nextState;
+        CharClassPtr cc3 = createCharClassRange(end+1,cc->end,NULL,cc->right);
+        cc3->nextState = cc->nextState;
+        CharClassPtr cc1 = createCharClassRange(begin,end,cc2,cc3);
+        cc1->nextState = cc->nextState | nextState;
+        return cc1;
+    } else if (end == cc->cond.range.end) {
+        if (begin == cc->cond.range.begin) { // 7
+            CharClassPtr cc1 = createCharClassRange(begin,end,cc->left,cc->right);
+            cc1->nextState = cc->nextState | nextState;
+            return cc1;
+        } else if (begin < cc->cond.range.begin) { // 4
+            CharClassPtr cc1;
+            if (cc->left) {
+                cc1 = charClassMerge(cc->left,begin,end-1,nextState);
+            } else {
+                cc1 = createCharClassRange(begin,end-1,NULL,NULL);
+                cc1->nextState = nextState;
+            }
+            CharClassPtr cc3 = createCharClassRange(end+1,cc->end,cc1,cc->right);
+            cc3->nextState = cc->nextState | nextState;
+            return cc3;
+        }
+        // 10
+        CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,NULL,cc->right);
+        cc2->nextState = cc->nextState | nextState;
+        CharClassPtr cc1 = createCharClassRange(cc->cond.range.begin,begin-1,NULL,NULL);
+        cc1->nextState = cc->nextState;
+        return cc1;
+        
     }
-    if (cc->right) {
-        CharClassPtr right = cc->right;
-        begin = cc->cond.range.begin;
-        free(cc);
-        return charClassMerge(right,begin,end,nextState);
+    if (begin > cc->cond.range.end ) { // 13
+        if (cc->right) {
+            return createCharClassRange(cc->begin,cc->end,cc->left,charClassMerge(cc->right,begin,end,nextState));
+        } else {
+            CharClassPtr cc1 =  createCharClassRange(begin,end,cc,NULL);
+            cc1->nextState = nextState;
+            return cc1;
+        }
     }
-    if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12
-        if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8
+    if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
+        if (end > cc->cond.range.end) {
+            if (begin == cc->cond.range.begin) {    // 8
+                CharClassPtr cc1;
+                if (cc->left) {
+                    cc1 = charClassMerge(cc->left,begin,end-1,nextState);
+                } else {
+                    cc1 = createCharClassRange(begin,end-1,NULL,NULL);
+                    cc1->nextState = nextState;
+                }
+                CharClassPtr cc3 = createCharClassRange(end+1,cc->end,cc1,cc->right);
+                cc3->nextState = cc->nextState | nextState;
+                return cc3;
+            }
+            // 11
+            cc->cond.range.end = end; // 11,8
+            return cc;
+        }
+        // 12
+        CharClassPtr cc1;
+        if (cc->right) {
+            cc1 = charClassMerge(cc->right,begin+1,end,nextState);
+        } else {
+            cc1 = createCharClassRange(begin+1,end,NULL,NULL);
+            cc1->nextState = nextState;
+        }
+        if (cc->begin == cc->end) {
+            CharClassPtr cc2 = createCharClassRange(cc->begin,cc->end,cc->left,cc1);
+            cc2->nextState = cc->nextState | nextState;
+            return cc2;
+        }
+        CharClassPtr cc3 = createCharClassRange(cc->begin,cc->end-1,NULL,cc->right);
+        cc3->nextState = cc->nextState;
+        CharClassPtr cc2 = createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc1,cc3);
+        cc2->nextState = cc->nextState | nextState;
+        return cc2;
     } else if (begin < cc->cond.range.begin) { // 5
         cc->cond.range.begin = begin;
         cc->cond.range.end = end;