changeset 142:de0f332d560c pairPro

insert charClassMerge function
author masa
date Fri, 11 Dec 2015 14:54:00 +0900
parents 71f36a59cf6a
children 32977f5a2ed0
files c/regexParser/regexParser.cc c/regexParser/regexParser.h c/regexParser/subsetConstraction.cc
diffstat 3 files changed, 52 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc	Fri Dec 11 13:12:42 2015 +0900
+++ b/c/regexParser/regexParser.cc	Fri Dec 11 14:54:00 2015 +0900
@@ -30,10 +30,9 @@
 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
     CharClassPtr cc = NEW(CharClass);
     cc->type = 'a';
-    cc->cond = NEW(Condition);
-    cc->cond->w = NEW(Word);
-    cc->cond->w->word = ri->tokenValue;
-    cc->cond->w->length = ri->ptr - ri->tokenValue;
+    cc->cond.w.word = ri->tokenValue;
+    cc->cond.w.length = ri->ptr - ri->tokenValue;
+    cc->nextState.bitContainer = 0;
     token(ri);
 
     return cc;
@@ -53,6 +52,28 @@
     return n;
 }
 
+CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) {
+    // 重なっているccの領域を分割する
+    // 必要ならばnextStateを重ねあわせる
+    // 変更があった場合は新しくリストを作って返す
+    if (src->type == 'a') {
+        if (add->type == 'a') {
+            if (src->cond.w.word[0] > add->cond.w.word[0]) {
+                // add のほうが小さいので小さい順のccをつくる
+                CharClassPtr left = charClassMerge(add->left.src);
+                return createCharClassWord(add->cond->w.word, left, add->right);
+            } else {
+                
+            }
+        } else if (add->type == 'c') {
+            // 
+            if (src->cond.w.word[0] < add->cond.range.begin) {
+
+            } else (src->cond->w.word[0] < add->end) {
+    } else if (src->type == 'c') {
+
+    }
+}
 
 // <charClass> ::= '['<literal>'-'<literal>']'
 static
@@ -60,14 +81,10 @@
     CharClassPtr cc = NEW(CharClass);
     NodePtr n = createNode(ri,'c',cc,0,0);
     cc->type = 'r';
-    cc->cond = NEW(Condition);
-    cc->cond->range = NEW(RangeList);
-    cc->cond->range->begin = ri->ptr;
-    cc->cond->range->end = ri->ptr;
-    cc->cond->range->next = NULL;
-
-
-    RangeListPtr rangeList = cc->cond->range;
+    cc->nextState.bitContainer = 0;
+    RangeListPtr rangeList = &cc->cond.range;
+    rangeList->begin = ri->ptr;
+    rangeList->end = ri->ptr;
 
     for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) {
         if (*ri->ptr == '-') {
@@ -86,8 +103,8 @@
         rangeList->end = ri->ptr;
         rangeList->next = NULL;
     }
-            // TODO literal support
-
+    // TODO literal support
+    // merge rangeList here
     if (*ri->ptr) ri->ptr++;
     token(ri);
     return n;
@@ -114,11 +131,6 @@
             ri->tokenType = ')';
             ri->tokenValue = ri->ptr;
             return;
-        } else if (ri->ptr[0] == '[') {
-            ri->ptr++;
-            ri->tokenType = 'c';
-            ri->tokenValue = ri->ptr;
-            return;
         } else if (ri->ptr[0] == ']') {
             ri->ptr++;
             ri->tokenType = ']';
@@ -144,6 +156,11 @@
                 \\
                 \utf-8 etc...
             */
+        } else if (ri->ptr[0] == '[') {
+            ri->ptr++;
+            ri->tokenType = 'c';
+            ri->tokenValue = ri->ptr;
+            return;
         } else {
             ri->tokenType = 'a';
             ri->tokenValue = ri->ptr;
--- a/c/regexParser/regexParser.h	Fri Dec 11 13:12:42 2015 +0900
+++ b/c/regexParser/regexParser.h	Fri Dec 11 14:54:00 2015 +0900
@@ -8,23 +8,22 @@
 } Word, *WordPtr;
 
 typedef struct utf8Range {
-    unsigned char *begin;
-    unsigned char *end;
-    struct utf8Range *next;
+    unsigned long begin;
+    unsigned long end;
+    struct utf8Range *next; // only used in the parser.
 } RangeList , *RangeListPtr;
 
 typedef union condition {
-    RangeListPtr range;
-    WordPtr w;
+    RangeList range;
+    Word w;
 } Condition, *ConditionList;
 
 typedef struct charClass {
     unsigned char type;
-    ConditionList cond;
     struct charClass *left;
     struct charClass *right;
-    unsigned char begin;
-    unsigned char end;
+    Condition cond;
+    BitVector nextState;
 } CharClass, *CharClassPtr;
 
 typedef struct node {
--- a/c/regexParser/subsetConstraction.cc	Fri Dec 11 13:12:42 2015 +0900
+++ b/c/regexParser/subsetConstraction.cc	Fri Dec 11 14:54:00 2015 +0900
@@ -12,13 +12,17 @@
     }
 }
 
-TGValuePtr generateTransition(NodePtr n,TransitionGeneratorPtr tg) {
-    TGValuePtr tgv0 = NULL;
-    TGValuePtr tgv1 = NULL;
+TGValue generateTransition(NodePtr n,TransitionGenerator tg) {
 
     if (n->tokenType == '+') {
-        tgv0 = generateTransition(n->left,tg);
-        tgv1 = generateTransition(n->right,tg);
+        TGValue tgv = generateTransition(n->left,tg);
+        if (tgv.asterisk) {
+            TGValue tgv1 = generateTransition(n->right,tg);
+            tgv.state |= tgv1.state;
+            return tgv;
+        }
+        tgv.state = n->right->nodeNumber;
+        return tgv;
     } else if (n->tokenType == '|') {
         tgv0 = generateTransition(n->left,tg);
         tgv1 = generateTransition(n->right,tg);