# HG changeset patch # User Masataka Kohagura # Date 1450947388 -32400 # Node ID d97bcab546e82b4abb36871ff4c088a7c94d7b74 # Parent 6cf8252f39121ccbaf740669dccda2d4c053559d implement getNext diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/main.cc --- a/regexParser/main.cc Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/main.cc Thu Dec 24 17:56:28 2015 +0900 @@ -10,14 +10,6 @@ { RegexInfo ri; ri.stateNumber = 1; - ri.asterisk = false; - ri.current = NEW(State); - ri.current->bitState.bitContainer = 0 - bitSet(ri.current->bitState,ri.stateNumber); - ri.current->next = NULL; - ri.current->cc = NULL; - ri.current->node = NULL; - ri.states = ri.current; for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ri.ptr = (unsigned char*)argv[i+1]; i++; diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/regexParser.cc --- a/regexParser/regexParser.cc Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/regexParser.cc Thu Dec 24 17:56:28 2015 +0900 @@ -33,12 +33,8 @@ n->cc = cc; n->left = left; n->right = right; - if (n->tokenType == 'a' || n->tokenType == 'c') { - n->nodeNumber = ri->nodeNumber; - ri->nodeNumber++; - } else { - n->nodeNumber = SYNTAX_NODENUMBER; - } + n->nodeNumber = ri->stateNumber; + ri->stateNumber++; return n; } @@ -49,7 +45,6 @@ cc->cond.w.word = ri->tokenValue; cc->cond.w.length = ri->ptr - ri->tokenValue; cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; - cc->nextState = ri->current->bitSet; return cc; } @@ -79,15 +74,15 @@ 13. b--e */ -CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end,BitVector nextState) { +CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { if (cc == NULL) { - createCharClassRange(begin,end,nextState.bitContainer,0,0); + createCharClassRange(begin,end,0,0); } if (end < cc->cond.range.begin ) { // 1 if (cc->left) { - cc->left = insertCharClass(cc->left,begin,end,nextState); + cc->left = insertCharClass(cc->left,begin,end); } else { - cc->left = createCharClassRange(begin,end,nextState.bitContainer,0,0); + cc->left = createCharClassRange(begin,end,0,0); } return cc; } else if (end == cc->cond.range.begin ) { // 2 @@ -100,9 +95,9 @@ return cc; } else if (begin > cc->cond.range.end ) { // 13 if (cc->right) { - cc->right = insertCharClass(cc->right,begin,end,nextState); + cc->right = insertCharClass(cc->right,begin,end); } else { - cc->right = createCharClassRange(begin,end,nextState.bitContainer,0,0); + cc->right = createCharClassRange(begin,end,0,0); } return cc; } @@ -110,7 +105,7 @@ CharClassPtr right = cc->right; begin = cc->cond.range.begin; free(cc); - return insertCharClass(right,begin,end,nextState); + return insertCharClass(right,begin,end); } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 @@ -154,10 +149,9 @@ RangeListPtr r = cc->cond.range.next; cc->cond.range.next = 0; for (; r; r = r->next) { - cc = insertCharClass(cc, r->begin, r->end,ri->current->bitState); + cc = insertCharClass(cc, r->begin, r->end); } - n->cc = ri->current->cc; // TODO literal support // merge rangeList here if (*ri->ptr) ri->ptr++; @@ -247,22 +241,12 @@ if (ri->tokenType == '*') { n = createNode(ri,'*',0,n,0); token(ri); - ri->asterisk = true; - ri->node = n; } return n; } RegexInfoPtr createRegexInfo (RegexInfoPtr ri) { ri->stateNumber++; - ri->asterisk = false; - ri->current = NEW(State); - ri->current->bitState.bitContainer = 0 - bitSet(ri.current->bitState,ri->stateNumber); - ri->current->next = ri->states; - ri->current->cc = NULL; - ri->current->node = NULL; - ri->states = ri->current; return ri; } @@ -275,28 +259,17 @@ if (ri->tokenType == '*') { n = createNode(ri,'*',0,n,0); token(ri); - ri->asterisk = true; - ri->node = n; return n; } else if (ri->tokenType == '|') { n = createNode(ri,'|',0,n,0); NodePtr n1 = regex(ri); n->right = n1; - n->node = n; } else if (ri->tokenType == ')') { return n; } else { n = createNode(ri,'+',0,n,0); - if (!ri->asterisk) { - StatePtr save = ri->current; - ri = createRegexInfo(ri); - NodePtr n1 = regexAtom(ri); - n->right = n1; - ri->current = save; - } else { - NodePtr n1 = regexAtom(ri); - n->right = n1; - } + NodePtr n1 = regexAtom(ri); + n->right = n1; } } return n; diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/regexParser.h --- a/regexParser/regexParser.h Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/regexParser.h Thu Dec 24 17:56:28 2015 +0900 @@ -44,9 +44,6 @@ unsigned char tokenType; unsigned char *tokenValue; int stateNumber; - bool asterisk; - StatePtr current; - StatePtr states; } RegexInfo, *RegexInfoPtr; CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right); diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/subsetConstraction.cc --- a/regexParser/subsetConstraction.cc Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/subsetConstraction.cc Thu Dec 24 17:56:28 2015 +0900 @@ -115,10 +115,10 @@ CharClassWalkerPtr findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { while (next->left) { CharClassStackPtr ccs = NEW(CharClassStack); - ccs->next = &walk->stack; - ccs->left = false; + ccs->next = walk->stack; + ccs->turn = LEFT; ccs->cc = next; - walk->stack = *ccs; + walk->stack = ccs; next = next->left; } walk->next = next; @@ -128,6 +128,7 @@ CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { CharClassWalkerPtr walk = NEW(CharClassWalker); walk->next = NULL; + walk->stack = NULL; if (!next) return walk; if (!next->left) { walk->next = next; @@ -141,20 +142,36 @@ return walk->next != NULL; } +CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { + if (!walk->stack) { + return NULL; + } + CharClassStackPtr prev = walk->stack->next; + free(walk->stack); + walk->stack = prev; + return prev; +} + CharClassPtr getNext(CharClassWalkerPtr walk) { CharClassPtr current = walk->next; - if (walk->next->left && current->right) { - walk->stack.left = true; - CharClassPtr next = findLeftMost(current->right,walk)->next; - walk->next = next; - } else { - TransitionPtr tsOld = ts; - ts = ts->next; - free(tsOld); - CharClassPtr ret; - if (ts) ret = ts->cc; - else ret = NULL; - walk->next = ret; + walk->next = NULL; + while (walk->stack) { + while (walk->stack->turn == RIGHT) { + if (charClassStackPop(walk) == NULL) { + return current; + } + } + if (walk->stack->turn == LEFT) { + walk->next = walk->stack->cc; + walk->stack->tuen == SELF; + return current; + } + if (current->right) { + walk->stack->turn = RIGHT; + walk->next = findLeftMost(current->right,walk); + return current; + } + charClassStackPop(walk); } return current; } diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/subsetConstraction.h --- a/regexParser/subsetConstraction.h Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/subsetConstraction.h Thu Dec 24 17:56:28 2015 +0900 @@ -7,11 +7,9 @@ } StateStack, *StateStackPtr; typedef struct transitionGenerator { - TransitionPtr ts; long stateMax; StateStack stack; StatePtr state; - TransitionPtr transitionList; StatePtr stateArray; StatePtr stateArrayLast; StatePtr currentState; @@ -19,19 +17,24 @@ } TransitionGenerator, *TransitionGeneratorPtr; typedef struct tgValue { - TransitionPtr ts; bool asterisk; TransitionGeneratorPtr tg; } TGValue, *TGValuePtr; +enum charClassStackState { + LEFT, + SELF, + RIGHT +} + typedef struct charClassStack { - bool left; + charClassStackState turn; CharClassPtr cc; struct charClassStack *next; } CharClassStack, *CharClassStackPtr; typedef struct charClassWalker { - CharClassStack stack; + CharClassStackPtr stack; CharClassPtr next; } CharClassWalker, *CharClassWalkerPtr; diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/transition.cc --- a/regexParser/transition.cc Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/transition.cc Thu Dec 24 17:56:28 2015 +0900 @@ -38,36 +38,3 @@ } return x0; } - -TransitionPtr createTransition(CharClassPtr cc, BitVectorPtr state) { - TransitionPtr transition = NEW(Transition); - transition->condition = cc; - transition->condition->nextState = *state; - return transition; -} - -TransitionPtr appendTransition0(TransitionPtr x, TransitionPtr y) { - TransitionPtr x0 = x; - for(;;) { - if (x->next == NULL) { - x->next = y; - return x0; - } - } - return x; -} - -TransitionPtr appendTransition(TransitionPtr x, TransitionPtr y) { - TransitionPtr x0 = createTransition(x->condition, &x->condition->nextState); - TransitionPtr x1 = x0; - for(;;) { - if (x->next == NULL) { - x1->next = y; - return x0; - } - x = x->next; - x1->next = createTransition(x->condition, &x->condition->nextState); - x1 = x1->next; - } - return x0; -} diff -r 6cf8252f3912 -r d97bcab546e8 regexParser/transition.h --- a/regexParser/transition.h Wed Dec 23 19:44:48 2015 +0900 +++ b/regexParser/transition.h Thu Dec 24 17:56:28 2015 +0900 @@ -9,10 +9,6 @@ StatePtr createState(BitVector bi); StatePtr appendState(StatePtr x,StatePtr y); -TransitionPtr createTransition(CharClassPtr cc, BitVectorPtr state); -TransitionPtr appendTransition0(TransitionPtr curr,TransitionPtr next); -TransitionPtr appendTransition(TransitionPtr curr,TransitionPtr next); - /* 正規表現木を辿って transition のList をつくる CharClass のかさなりを判定して重なりのない新しいCharClassをつくる