Mercurial > hg > Applications > Grep
diff regexParser/subsetConstruction.cc @ 248:2b1fbfb92d54
implement tSearch
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 22 Jan 2016 20:09:42 +0900 |
parents | a3cddb32b87f |
children | e60dd2fa3409 |
line wrap: on
line diff
--- a/regexParser/subsetConstruction.cc Fri Jan 22 18:37:04 2016 +0900 +++ b/regexParser/subsetConstruction.cc Fri Jan 22 20:09:42 2016 +0900 @@ -164,6 +164,7 @@ } void setState(CharClassPtr cc, BitVector bi) { + // if (word case) setNext(bitVector to the word list) cc->nextState = bi; if (cc->left) { setState(cc->left,bi); @@ -198,19 +199,30 @@ state に対応する NodePtr を */ StatePtr createState(TGValue tgv,NodePtr n) { - StatePtr s = NEW(State); - s->stateNum = n->stateNum = tgv.tg->totalStateCount; + BitVector bi = createBitVector(tgv.tg->totalStateCount); + StatePtr s = createState(tgv.tg,bi); + n->stateNum = s->stateNum; s->next = tgv.tg->stateList; tgv.tg->stateList = s; s->node = n; - BitVector bi = createBitVector(n->stateNum); s->bitState = bi; s->accept = false; - tgv.tg->totalStateCount++; s->cc = n->cc; return s; } +SCValue createState(TransitionGeneratorPtr tg,BitVector bi) { + StatePtr s = NEW(State); + s->stateNum = tg->totalStateCount++; + s->next = NULL; + tg->stateEnd->next = s; + tg->stateEnd = s; + s->bitState = bi; + s->cc = NULL; + s->node = NULL; + return scv; +} + /** pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る 前が * でない + は新しく状態を作る @@ -351,18 +363,6 @@ return scv; } -SCValue createState(SCValue scv,BitVector bi) { - StatePtr s = NEW(State); - s->stateNum = ++scv.tg->totalStateCount; - s->next = NULL; - scv.stateEnd->next = s; - scv.stateEnd = s; - s->bitState = bi; - s->cc = NULL; - s->node = NULL; - return scv; -} - /** 現在のステートに含まれる組み合わせ状態をとってくる 組み合わされた個々の charclass をmerge して新しい charclass をつくり、組み合わせ状態に登録する @@ -371,32 +371,35 @@ charClassMerge の段階で新しい状態をリストに登録したら、charclasswalk をする必要がなくなる */ -SCValue subsetConstruction(SCValue scv) { - for (;scv.stateTop;scv.stateTop = scv.stateTop->next) { - CharClassWalkerPtr cw = createCharClassWalker(scv.stateTop->cc); +void determinize(StatePtr s, TransitionGeneratorPtr tg) { + BitVector bi = s->bitState; + for (;;) { + int bitPosition = searchBit(bi); + if (!bitPosition) break; + unsigned long baseNum = 1 << (bitPosition-1); + // printf("bit %lx pos %d baseNum %lx\n",bi.bitContainer,bitPosition,baseNum); + bi.bitContainer ^= baseNum; + if (baseNum==2) continue; // EOF case + StatePtr base = tg->stateArray[baseNum]; + if (base == NULL) { + errorMassege("No base state",__LINE__,__FILE__); break; + } + CharClassPtr merge = mergeTransition(s,base->cc); + s->cc = merge; + } +} + +void subsetConstruction(TransitionGeneratorPtr tg) { + for (;tg->stateTop;tg->stateTop = tg->stateTop->next) { + CharClassWalkerPtr cw = createCharClassWalker(state->cc); while (hasNext(cw)) { CharClassPtr cc = getNext(cw); BitVector bi = cc->nextState; - if (scv.tg->stateArray[bi.bitContainer]) continue; // already done - scv = createState(scv,bi); - StatePtr s = scv.stateEnd; - scv.tg->stateArray[bi.bitContainer] = s; - for (;;) { - int bitPosition = searchBit(bi); - if (!bitPosition) break; - unsigned long baseNum = 1 << (bitPosition-1); - // printf("bit %lx pos %d baseNum %lx\n",bi.bitContainer,bitPosition,baseNum); - bi.bitContainer ^= baseNum; - if (baseNum==2) continue; // EOF case - StatePtr base = scv.tg->stateArray[baseNum]; - if (base == NULL) { - errorMassege("No base state",__LINE__,__FILE__); break; - } - CharClassPtr merge = mergeTransition(s,base->cc); - s->cc = merge; - } + if (tg->stateArray[bi.bitContainer]) continue; // already done + StatePtr s = createState(tg,bi); + tg->stateArray[bi.bitContainer] = s; + determinize(s,tg); } free(cw); } - return scv; }