diff regexParser/subsetConstruction.cc @ 248:2b1fbfb92d54

implement tSearch
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Fri, 22 Jan 2016 20:09:42 +0900
parents a3cddb32b87f
children e60dd2fa3409
line wrap: on
line diff
--- a/regexParser/subsetConstruction.cc	Fri Jan 22 18:37:04 2016 +0900
+++ b/regexParser/subsetConstruction.cc	Fri Jan 22 20:09:42 2016 +0900
@@ -164,6 +164,7 @@
 }
 
 void setState(CharClassPtr cc, BitVector bi) {
+    // if (word case) setNext(bitVector to the word list)
     cc->nextState = bi;
     if (cc->left) {
         setState(cc->left,bi);
@@ -198,19 +199,30 @@
     state に対応する NodePtr を
  */
 StatePtr createState(TGValue tgv,NodePtr n) {
-    StatePtr s = NEW(State);
-    s->stateNum = n->stateNum = tgv.tg->totalStateCount;
+    BitVector bi = createBitVector(tgv.tg->totalStateCount);
+    StatePtr s = createState(tgv.tg,bi);
+    n->stateNum = s->stateNum;
     s->next = tgv.tg->stateList;
     tgv.tg->stateList = s;
     s->node = n;
-    BitVector bi = createBitVector(n->stateNum);
     s->bitState = bi;
     s->accept = false;
-    tgv.tg->totalStateCount++;
     s->cc = n->cc;
     return s;
 }
 
+SCValue createState(TransitionGeneratorPtr tg,BitVector bi) {
+    StatePtr s = NEW(State);
+    s->stateNum = tg->totalStateCount++;
+    s->next = NULL;
+    tg->stateEnd->next = s;
+    tg->stateEnd = s;
+    s->bitState = bi;
+    s->cc = NULL;
+    s->node = NULL;
+    return scv;
+}
+
 /**
     pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る
         前が * でない + は新しく状態を作る
@@ -351,18 +363,6 @@
     return scv;
 }
 
-SCValue createState(SCValue scv,BitVector bi) {
-    StatePtr s = NEW(State);
-    s->stateNum = ++scv.tg->totalStateCount;
-    s->next = NULL;
-    scv.stateEnd->next = s;
-    scv.stateEnd = s;
-    s->bitState = bi;
-    s->cc = NULL;
-    s->node = NULL;
-    return scv;
-}
-
 /**
     現在のステートに含まれる組み合わせ状態をとってくる
     組み合わされた個々の charclass をmerge して新しい charclass をつくり、組み合わせ状態に登録する
@@ -371,32 +371,35 @@
     
     charClassMerge の段階で新しい状態をリストに登録したら、charclasswalk をする必要がなくなる
  */
-SCValue subsetConstruction(SCValue scv) {
-    for (;scv.stateTop;scv.stateTop = scv.stateTop->next) {
-        CharClassWalkerPtr cw = createCharClassWalker(scv.stateTop->cc);
+void determinize(StatePtr s, TransitionGeneratorPtr tg) {
+    BitVector bi = s->bitState;
+    for (;;) {
+        int bitPosition = searchBit(bi);
+        if (!bitPosition) break;
+        unsigned long baseNum = 1 << (bitPosition-1);
+        // printf("bit %lx pos %d baseNum %lx\n",bi.bitContainer,bitPosition,baseNum);
+        bi.bitContainer ^= baseNum; 
+        if (baseNum==2) continue;   // EOF case
+        StatePtr base = tg->stateArray[baseNum];
+        if (base == NULL) {
+            errorMassege("No base state",__LINE__,__FILE__); break;
+        }
+        CharClassPtr merge = mergeTransition(s,base->cc);
+        s->cc = merge;
+    }
+}
+
+void subsetConstruction(TransitionGeneratorPtr tg) {
+    for (;tg->stateTop;tg->stateTop = tg->stateTop->next) {
+        CharClassWalkerPtr cw = createCharClassWalker(state->cc);
         while (hasNext(cw)) {
             CharClassPtr cc = getNext(cw);
             BitVector bi = cc->nextState;
-            if (scv.tg->stateArray[bi.bitContainer]) continue;  // already done
-            scv = createState(scv,bi);
-            StatePtr s = scv.stateEnd;
-            scv.tg->stateArray[bi.bitContainer] = s;
-            for (;;) {
-                int bitPosition = searchBit(bi);
-                if (!bitPosition) break;
-                unsigned long baseNum = 1 << (bitPosition-1);
-                // printf("bit %lx pos %d baseNum %lx\n",bi.bitContainer,bitPosition,baseNum);
-                bi.bitContainer ^= baseNum; 
-                if (baseNum==2) continue;   // EOF case
-                StatePtr base = scv.tg->stateArray[baseNum];
-                if (base == NULL) {
-                    errorMassege("No base state",__LINE__,__FILE__); break;
-                }
-                CharClassPtr merge = mergeTransition(s,base->cc);
-                s->cc = merge;
-            }
+            if (tg->stateArray[bi.bitContainer]) continue;  // already done
+            StatePtr s = createState(tg,bi);
+            tg->stateArray[bi.bitContainer] = s;
+            determinize(s,tg);
         }
         free(cw);
     }
-    return scv;
 }