# HG changeset patch # User Shinji KONO # Date 1454472670 -32400 # Node ID bdfe0a32c48f870c210714c5fa5a44525f4ad64b # Parent 63213964502a96d4c95777a02e353d74f48bf15b grepWalk diff -r 63213964502a -r bdfe0a32c48f regexParser/CeriumGrep.cc --- a/regexParser/CeriumGrep.cc Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/CeriumGrep.cc Wed Feb 03 13:11:10 2016 +0900 @@ -65,7 +65,7 @@ st_mmap_t st_mmap = createSt_mmap(filename,fd); Buffer buff = createBuffer(st_mmap); if (ts) threadedSearch(tgv.tg,buff); - else grepWalk(tgv.tg,buff.buffptr,buff); + else grepWalk(tgv.tg,buff); close(fd); } diff -r 63213964502a -r bdfe0a32c48f regexParser/Makefile --- a/regexParser/Makefile Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/Makefile Wed Feb 03 13:11:10 2016 +0900 @@ -71,7 +71,7 @@ ./sequentialSearch -file $(TESTFILE) $(WC) grepWalk: regexParser $(TESTFILE) - ./regexParser -$(SUBSET) -regex $(REGEX)-file $(TESTFILE) $(WC) + ./regexParser $(SUBSET) -regex $(REGEX) -file $(TESTFILE) $(WC) sequentialSearch: sequentialSearch.cc regexParser fileread.o ./regexParser -seq -subset -regex $(REGEX) diff -r 63213964502a -r bdfe0a32c48f regexParser/grepWalk.cc --- a/regexParser/grepWalk.cc Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/grepWalk.cc Wed Feb 03 13:11:10 2016 +0900 @@ -1,54 +1,65 @@ #include +#include #include "grepWalk.h" #include "subsetConstruction.h" - -void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) ; - -void grepSkip(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) { - matchBegin = buff.buffptr; - grep(tg,matchBegin,buff,1); // 1 is initState -} +#include "threadedSearch.h" -void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff) { - grepSkip(tg,matchBegin,buff); -} - -void grepMatch(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) { - fwrite(matchBegin,buff.buffptr-matchBegin,1,stdout); - puts("\n"); - grepSkip(tg,matchBegin,buff); +StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) { + // create tSearch in next state. + StatePtr state = tg->stateArray[bi.bitContainer]; + if (state == NULL) { + // on the fly subset construction. + state = createState(tg,bi); + determinize(state,tg); + tg->stateArray[bi.bitContainer] = state; + } + return state; } -void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) { - unsigned char c = *buff.buffptr++; - if (c=='\0') return; - StatePtr state = tg->stateList; +void grepWalk(TransitionGeneratorPtr tg,Buffer buff) { + TSValue tsv = createTSValue(tg,buff); + tg->stateStart = NEW(State); + *tg->stateStart = *tg->stateList; + tg->stateStart->accept = false; // Start state never accept + StatePtr state = tg->stateStart; - while (state->bitState.bitContainer != d) state = state->next; // 配列へのアクセスへ変更 - CharClassWalkerPtr ccw = createCharClassWalker(state->cc); - CharClassPtr cc = NULL; - bool found = false; - while (hasNext(ccw)) { - cc = getNext(ccw); - unsigned long begin = cc->cond.range.begin; - unsigned long end = cc->cond.range.end; - if (begin == end) { - if (c == begin) found = true; - else found = false; - } else { - if (c < begin) found = false; - else if (c < end) found = true; +#if DEBUG + TSValuePtr tsvp = &tsv; // make tsv visible in lldb +#endif + next: while (tsv.buff.buffptr < tsv.buff.buffend) { + if (state->accept) { + tsv = tg->stateMatch(tsv); + } + CharClassWalkerPtr ccw = createCharClassWalker(state->cc); + if (!hasNext(ccw)) { + // matched start again + state = tg->stateStart; + ccw = createCharClassWalker(state->cc); } - if (found == true) break; + unsigned char c = *tsv.buff.buffptr++; +// printState(tsv.current->state); + while (hasNext(ccw)) { + CharClassPtr cc = getNext(ccw); + if (ccond.range.begin) { + state = tg->stateStart; + tsv = tg->stateSkip(tsv); + goto next; + } else if (c<=cc->cond.range.end) { + // range matched. + if (cc->cond.w.word) { + // match the word. + // if (not match) continue; + } + state = nextState(cc->nextState,tg); + goto next; + } + } + state = tg->stateStart; + tsv = tg->stateSkip(tsv); } - - if (found == false) { - grepSkip(tg,matchBegin,buff); - } else if (found == true && (cc->nextState.bitContainer | 2)) { // Accept - grepMatch(tg,matchBegin,buff); - } else { - grep(tg,matchBegin,buff,cc->nextState.bitContainer); - } +#if DEBUG + *tsvp = tsv; +#endif } diff -r 63213964502a -r bdfe0a32c48f regexParser/grepWalk.h --- a/regexParser/grepWalk.h Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/grepWalk.h Wed Feb 03 13:11:10 2016 +0900 @@ -1,3 +1,3 @@ #include "regexParser.h" -extern void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff); +extern void grepWalk(TransitionGeneratorPtr tg, Buffer buff); diff -r 63213964502a -r bdfe0a32c48f regexParser/threadedSearch.cc --- a/regexParser/threadedSearch.cc Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/threadedSearch.cc Wed Feb 03 13:11:10 2016 +0900 @@ -12,7 +12,6 @@ static TSValue stateSkip(TSValue tsv) { - tsv.current = tsv.tg->stateStart->tState; if (tsv.matchEnd) { fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); puts(""); @@ -100,6 +99,7 @@ for (int i = 0; i < tsv.current->ccvSize; i++) { CCVPtr ccv = &tsv.current->ccv[i]; if (cbegin) { + tsv.current = tsv.tg->stateStart->tState; tsv = tsv.current->stateSkip(tsv); goto next; } else if (c<=ccv->end) { @@ -117,6 +117,7 @@ goto next; } } + tsv.current = tsv.tg->stateStart->tState; tsv = tsv.current->stateSkip(tsv); } #if DEBUG