diff c/regexParser/regexParser.cc @ 142:de0f332d560c pairPro

insert charClassMerge function
author masa
date Fri, 11 Dec 2015 14:54:00 +0900
parents 15815fcb6c2f
children 32977f5a2ed0
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc	Fri Dec 11 13:12:42 2015 +0900
+++ b/c/regexParser/regexParser.cc	Fri Dec 11 14:54:00 2015 +0900
@@ -30,10 +30,9 @@
 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
     CharClassPtr cc = NEW(CharClass);
     cc->type = 'a';
-    cc->cond = NEW(Condition);
-    cc->cond->w = NEW(Word);
-    cc->cond->w->word = ri->tokenValue;
-    cc->cond->w->length = ri->ptr - ri->tokenValue;
+    cc->cond.w.word = ri->tokenValue;
+    cc->cond.w.length = ri->ptr - ri->tokenValue;
+    cc->nextState.bitContainer = 0;
     token(ri);
 
     return cc;
@@ -53,6 +52,28 @@
     return n;
 }
 
+CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) {
+    // 重なっているccの領域を分割する
+    // 必要ならばnextStateを重ねあわせる
+    // 変更があった場合は新しくリストを作って返す
+    if (src->type == 'a') {
+        if (add->type == 'a') {
+            if (src->cond.w.word[0] > add->cond.w.word[0]) {
+                // add のほうが小さいので小さい順のccをつくる
+                CharClassPtr left = charClassMerge(add->left.src);
+                return createCharClassWord(add->cond->w.word, left, add->right);
+            } else {
+                
+            }
+        } else if (add->type == 'c') {
+            // 
+            if (src->cond.w.word[0] < add->cond.range.begin) {
+
+            } else (src->cond->w.word[0] < add->end) {
+    } else if (src->type == 'c') {
+
+    }
+}
 
 // <charClass> ::= '['<literal>'-'<literal>']'
 static
@@ -60,14 +81,10 @@
     CharClassPtr cc = NEW(CharClass);
     NodePtr n = createNode(ri,'c',cc,0,0);
     cc->type = 'r';
-    cc->cond = NEW(Condition);
-    cc->cond->range = NEW(RangeList);
-    cc->cond->range->begin = ri->ptr;
-    cc->cond->range->end = ri->ptr;
-    cc->cond->range->next = NULL;
-
-
-    RangeListPtr rangeList = cc->cond->range;
+    cc->nextState.bitContainer = 0;
+    RangeListPtr rangeList = &cc->cond.range;
+    rangeList->begin = ri->ptr;
+    rangeList->end = ri->ptr;
 
     for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) {
         if (*ri->ptr == '-') {
@@ -86,8 +103,8 @@
         rangeList->end = ri->ptr;
         rangeList->next = NULL;
     }
-            // TODO literal support
-
+    // TODO literal support
+    // merge rangeList here
     if (*ri->ptr) ri->ptr++;
     token(ri);
     return n;
@@ -114,11 +131,6 @@
             ri->tokenType = ')';
             ri->tokenValue = ri->ptr;
             return;
-        } else if (ri->ptr[0] == '[') {
-            ri->ptr++;
-            ri->tokenType = 'c';
-            ri->tokenValue = ri->ptr;
-            return;
         } else if (ri->ptr[0] == ']') {
             ri->ptr++;
             ri->tokenType = ']';
@@ -144,6 +156,11 @@
                 \\
                 \utf-8 etc...
             */
+        } else if (ri->ptr[0] == '[') {
+            ri->ptr++;
+            ri->tokenType = 'c';
+            ri->tokenValue = ri->ptr;
+            return;
         } else {
             ri->tokenType = 'a';
             ri->tokenValue = ri->ptr;