changeset 257:ebb429c2b6a7

fix allocate state in generateTransition
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Mon, 25 Jan 2016 19:20:32 +0900
parents 72f3673dd7a5
children 29e467a491ba
files regexParser/regexParser.h regexParser/subsetConstruction.cc regexParser/threadedSearch.cc
diffstat 3 files changed, 15 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/regexParser.h	Mon Jan 25 18:28:57 2016 +0900
+++ b/regexParser/regexParser.h	Mon Jan 25 19:20:32 2016 +0900
@@ -61,6 +61,7 @@
 typedef struct tState {
     State *state;
     void (*stateSkip)(tsValue);
+    void (*stateMatch)(tsValue);
     int ccvSize;
     CCVPtr ccv;
 } TState, *TStatePtr;
--- a/regexParser/subsetConstruction.cc	Mon Jan 25 18:28:57 2016 +0900
+++ b/regexParser/subsetConstruction.cc	Mon Jan 25 19:20:32 2016 +0900
@@ -236,7 +236,6 @@
     if (n->tokenType == '+') {
         TGValue tgvLeft = tgv;
         tgvLeft.endState = n->right->state;
-        if (pass==2) n->right->state->accept = tgv.endState->accept;
         tgvLeft.asterisk = NULL;
         tgvLeft = generateTransition(n->left,tgvLeft,pass);
         TGValue tgvRight = tgv;
@@ -255,6 +254,7 @@
             tgvRight.tg->stateArray[tgvRight.startState->bitState.bitContainer] = tgvRight.startState ;
         }
         tgvRight = generateTransition(n->right,tgvRight,pass);
+        if (tgv.endState && tgvRight.asterisk) tgvRight.startState->accept = tgv.endState->accept;
         tgvLeft.asterisk = tgvRight.asterisk;
         return tgvLeft;
     } else if (n->tokenType == '|') {
@@ -278,7 +278,7 @@
             n->nextStateNum = nextState;
             n->nextState = tgv.endState;
             BitVector bi = createBitVector(nextState);
-            if (n->nextState->accept && n->nextState->node->tokenType == '*') bi = bitSet(bi,1);
+            if (n->nextState->accept) bi = bitSet(bi,1);
             setState(n->cc,bi);
             tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc);
         }
--- a/regexParser/threadedSearch.cc	Mon Jan 25 18:28:57 2016 +0900
+++ b/regexParser/threadedSearch.cc	Mon Jan 25 19:20:32 2016 +0900
@@ -9,6 +9,11 @@
     tsv.current->stateSkip(tsv);
 }
 
+void stateMatch(TSValue tsv) {
+    tsv.buff.matchBegin = tsv.buff.buffptr;
+    tsv.current->stateSkip(tsv);
+}
+
 TStatePtr generateTState(StatePtr state) {
     TStatePtr tState = NEW(TState);
     int ccvSize = 0;
@@ -40,8 +45,10 @@
         unsigned char c = *tsv.buff.buffptr++;
         for (int i = 0; i < tsv.current->ccvSize; i++) {
             CCVPtr ccv = &tsv.current->ccv[i];
-            if (c<ccv->begin) tsv.current->stateSkip(tsv);
-            else if (c<=ccv->end) {
+            if (c<ccv->begin) {
+                tsv.stateSkip(tsv);
+                goto next;
+            } else if (c<=ccv->end) {
                 // range matched.
                 if (ccv->w.word) {
                     // match the word.
@@ -65,6 +72,9 @@
                     }
                 }
                 tsv.current = ccv->tState;
+                if (tsv.current->state->bitState.bitContainer & 2) {
+                    tsv.stateMatch();
+                }
                 goto next;
             }
         }