changeset 180:d97bcab546e8 pairPro

implement getNext
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 24 Dec 2015 17:56:28 +0900
parents 6cf8252f3912
children 3c4db09b8581
files regexParser/main.cc regexParser/regexParser.cc regexParser/regexParser.h regexParser/subsetConstraction.cc regexParser/subsetConstraction.h regexParser/transition.cc regexParser/transition.h
diffstat 7 files changed, 52 insertions(+), 107 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/main.cc	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/main.cc	Thu Dec 24 17:56:28 2015 +0900
@@ -10,14 +10,6 @@
 {
     RegexInfo ri;
     ri.stateNumber = 1;
-    ri.asterisk = false;
-    ri.current = NEW(State);
-    ri.current->bitState.bitContainer = 0
-    bitSet(ri.current->bitState,ri.stateNumber);
-    ri.current->next = NULL;
-    ri.current->cc = NULL;
-    ri.current->node = NULL;
-    ri.states = ri.current;
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i],"-regex") == 0) {
             ri.ptr = (unsigned char*)argv[i+1]; i++;
--- a/regexParser/regexParser.cc	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/regexParser.cc	Thu Dec 24 17:56:28 2015 +0900
@@ -33,12 +33,8 @@
     n->cc = cc;
     n->left = left;
     n->right = right;
-    if (n->tokenType == 'a' || n->tokenType == 'c') {
-        n->nodeNumber = ri->nodeNumber;
-        ri->nodeNumber++;
-    } else {
-        n->nodeNumber = SYNTAX_NODENUMBER;
-    }
+    n->nodeNumber = ri->stateNumber;
+    ri->stateNumber++;
 
     return n;
 }
@@ -49,7 +45,6 @@
     cc->cond.w.word = ri->tokenValue;
     cc->cond.w.length = ri->ptr - ri->tokenValue;
     cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue;
-    cc->nextState = ri->current->bitSet;
     return cc;
 }
 
@@ -79,15 +74,15 @@
   13.                          b--e
 
  */
-CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end,BitVector nextState) {
+CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) {
     if (cc == NULL) {
-        createCharClassRange(begin,end,nextState.bitContainer,0,0);
+        createCharClassRange(begin,end,0,0);
     }
     if (end < cc->cond.range.begin ) { // 1
         if (cc->left) {
-            cc->left = insertCharClass(cc->left,begin,end,nextState);
+            cc->left = insertCharClass(cc->left,begin,end);
         } else {
-            cc->left = createCharClassRange(begin,end,nextState.bitContainer,0,0);
+            cc->left = createCharClassRange(begin,end,0,0);
         }
         return cc;
     } else if (end == cc->cond.range.begin ) { // 2
@@ -100,9 +95,9 @@
         return cc;
     } else if (begin > cc->cond.range.end ) { // 13
         if (cc->right) {
-            cc->right = insertCharClass(cc->right,begin,end,nextState);
+            cc->right = insertCharClass(cc->right,begin,end);
         } else {
-            cc->right = createCharClassRange(begin,end,nextState.bitContainer,0,0);
+            cc->right = createCharClassRange(begin,end,0,0);
         }
         return cc;
     }
@@ -110,7 +105,7 @@
         CharClassPtr right = cc->right;
         begin = cc->cond.range.begin;
         free(cc);
-        return insertCharClass(right,begin,end,nextState);
+        return insertCharClass(right,begin,end);
     }
     if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12
         if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8
@@ -154,10 +149,9 @@
     RangeListPtr r = cc->cond.range.next;
     cc->cond.range.next = 0;
     for (; r; r = r->next) {
-        cc = insertCharClass(cc, r->begin, r->end,ri->current->bitState);
+        cc = insertCharClass(cc, r->begin, r->end);
     }
 
-    n->cc = ri->current->cc;
     // TODO literal support
     // merge rangeList here
     if (*ri->ptr) ri->ptr++;
@@ -247,22 +241,12 @@
     if (ri->tokenType == '*') {
         n = createNode(ri,'*',0,n,0);
         token(ri);
-        ri->asterisk = true;
-        ri->node = n;
     }
     return n;
 }
 
 RegexInfoPtr createRegexInfo (RegexInfoPtr ri) {
     ri->stateNumber++;
-    ri->asterisk = false;
-    ri->current = NEW(State);
-    ri->current->bitState.bitContainer = 0
-    bitSet(ri.current->bitState,ri->stateNumber);
-    ri->current->next = ri->states;
-    ri->current->cc = NULL;
-    ri->current->node = NULL;
-    ri->states = ri->current;
     return ri;
 }
 
@@ -275,28 +259,17 @@
         if (ri->tokenType == '*') {
             n = createNode(ri,'*',0,n,0);
             token(ri);
-            ri->asterisk = true;
-            ri->node = n;
             return n;
         } else if (ri->tokenType == '|') {
             n = createNode(ri,'|',0,n,0);
             NodePtr n1 = regex(ri);
             n->right = n1;
-            n->node = n;
         } else if (ri->tokenType == ')') {
             return n;
         } else {
             n = createNode(ri,'+',0,n,0);
-            if (!ri->asterisk) {
-                StatePtr save = ri->current;
-                ri = createRegexInfo(ri);
-                NodePtr n1 = regexAtom(ri);
-                n->right = n1;
-                ri->current = save;
-            } else {
-                NodePtr n1 = regexAtom(ri);
-                n->right = n1;
-            }
+            NodePtr n1 = regexAtom(ri);
+            n->right = n1;
         }
     }
     return n;
--- a/regexParser/regexParser.h	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/regexParser.h	Thu Dec 24 17:56:28 2015 +0900
@@ -44,9 +44,6 @@
     unsigned char tokenType;
     unsigned char *tokenValue;
     int stateNumber;
-    bool asterisk;
-    StatePtr current;
-    StatePtr states;
 } RegexInfo, *RegexInfoPtr;
 
 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right);
--- a/regexParser/subsetConstraction.cc	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/subsetConstraction.cc	Thu Dec 24 17:56:28 2015 +0900
@@ -115,10 +115,10 @@
 CharClassWalkerPtr findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) {
     while (next->left) {
         CharClassStackPtr ccs = NEW(CharClassStack);
-        ccs->next = &walk->stack;
-        ccs->left = false;
+        ccs->next = walk->stack;
+        ccs->turn = LEFT;
         ccs->cc = next;
-        walk->stack = *ccs;
+        walk->stack = ccs;
         next = next->left;
     }
     walk->next = next;
@@ -128,6 +128,7 @@
 CharClassWalkerPtr createCharClassWalker (CharClassPtr next) {
     CharClassWalkerPtr walk = NEW(CharClassWalker);
     walk->next = NULL;
+    walk->stack = NULL;
     if (!next) return walk;
     if (!next->left) {
         walk->next = next;
@@ -141,20 +142,36 @@
     return walk->next != NULL;
 }
 
+CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) {
+    if (!walk->stack) {
+        return NULL;
+    }
+    CharClassStackPtr prev = walk->stack->next;
+    free(walk->stack);
+    walk->stack = prev;
+    return prev;
+}
+
 CharClassPtr getNext(CharClassWalkerPtr walk) {
     CharClassPtr current = walk->next;
-    if (walk->next->left && current->right) {
-        walk->stack.left = true;
-        CharClassPtr next = findLeftMost(current->right,walk)->next;
-        walk->next = next;
-    } else {
-        TransitionPtr tsOld = ts;
-        ts = ts->next;
-        free(tsOld);
-        CharClassPtr ret;
-        if (ts) ret = ts->cc;
-        else ret = NULL;
-        walk->next = ret;
+    walk->next = NULL;
+    while (walk->stack) {
+        while (walk->stack->turn == RIGHT) {
+            if (charClassStackPop(walk) == NULL) {
+                return current;
+            }
+        }
+        if (walk->stack->turn == LEFT) {
+            walk->next = walk->stack->cc;
+            walk->stack->tuen == SELF;
+            return current;
+        }
+        if (current->right) {
+            walk->stack->turn = RIGHT;
+            walk->next = findLeftMost(current->right,walk);
+            return current;
+        }
+        charClassStackPop(walk);
     }
     return current;
 }
--- a/regexParser/subsetConstraction.h	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/subsetConstraction.h	Thu Dec 24 17:56:28 2015 +0900
@@ -7,11 +7,9 @@
 } StateStack, *StateStackPtr;
 
 typedef struct transitionGenerator {
-    TransitionPtr ts;
     long stateMax;
     StateStack stack;
     StatePtr state;
-    TransitionPtr transitionList;
     StatePtr stateArray;
     StatePtr stateArrayLast;
     StatePtr currentState;
@@ -19,19 +17,24 @@
 } TransitionGenerator, *TransitionGeneratorPtr;
 
 typedef struct tgValue {
-    TransitionPtr ts;
     bool asterisk;
     TransitionGeneratorPtr tg;
 } TGValue, *TGValuePtr;
 
+enum charClassStackState {
+    LEFT,
+    SELF,
+    RIGHT
+}
+
 typedef struct charClassStack {
-    bool left;
+    charClassStackState turn;
     CharClassPtr cc;
     struct charClassStack *next;
 } CharClassStack, *CharClassStackPtr;
 
 typedef struct charClassWalker {
-    CharClassStack stack;
+    CharClassStackPtr stack;
     CharClassPtr next;
 } CharClassWalker, *CharClassWalkerPtr;
 
--- a/regexParser/transition.cc	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/transition.cc	Thu Dec 24 17:56:28 2015 +0900
@@ -38,36 +38,3 @@
     }
     return x0;
 }
-
-TransitionPtr createTransition(CharClassPtr cc, BitVectorPtr state) {
-    TransitionPtr transition = NEW(Transition);
-    transition->condition = cc;
-    transition->condition->nextState = *state;
-    return transition;
-}
-
-TransitionPtr appendTransition0(TransitionPtr x, TransitionPtr y) {
-    TransitionPtr x0 = x;
-    for(;;) {
-        if (x->next == NULL) {
-            x->next = y;
-            return x0;
-        }
-    }
-    return x;
-}
-
-TransitionPtr appendTransition(TransitionPtr x, TransitionPtr y) {
-    TransitionPtr x0 = createTransition(x->condition, &x->condition->nextState);
-    TransitionPtr x1 = x0;
-    for(;;) {
-        if (x->next == NULL) {
-            x1->next = y;
-            return x0;
-        }
-        x = x->next;
-        x1->next = createTransition(x->condition, &x->condition->nextState);
-        x1 = x1->next;
-    }
-    return x0;
-}
--- a/regexParser/transition.h	Wed Dec 23 19:44:48 2015 +0900
+++ b/regexParser/transition.h	Thu Dec 24 17:56:28 2015 +0900
@@ -9,10 +9,6 @@
 
 StatePtr createState(BitVector bi);
 StatePtr appendState(StatePtr x,StatePtr y);
-TransitionPtr createTransition(CharClassPtr cc, BitVectorPtr state);
-TransitionPtr appendTransition0(TransitionPtr curr,TransitionPtr next);
-TransitionPtr appendTransition(TransitionPtr curr,TransitionPtr next);
-
 /*
   正規表現木を辿って transition のList をつくる
   CharClass のかさなりを判定して重なりのない新しいCharClassをつくる