# HG changeset patch # User Shinji KONO # Date 1454469874 -32400 # Node ID 63213964502a96d4c95777a02e353d74f48bf15b # Parent c5a7caa37f61b57b9b43b3142839aea0f419b81e refactoring .... diff -r c5a7caa37f61 -r 63213964502a regexParser/Makefile --- a/regexParser/Makefile Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/Makefile Wed Feb 03 12:24:34 2016 +0900 @@ -1,5 +1,6 @@ TARGET= regexParser test/ccMerge CFLAGS= -Wall -O0 -g -I$(CERIUM)/include/TaskManager -I. +SEQCFLAGS= CFLAGS= -Wall -O -g -I$(CERIUM)/include/TaskManager -I. CC= clang++ CERIUM= ../../Cerium AR= libCeriumGrep.a @@ -11,7 +12,7 @@ # REGEX = '[A-Z][A-Za-z0-9_]*' REGEX = '(a|b)*a(a|b)(a|b)' -# REGEX = '(mil|have)' +# REGEX = '(mil|hav)[A-Za-z]' SUBST = -subset @@ -75,8 +76,8 @@ sequentialSearch: sequentialSearch.cc regexParser fileread.o ./regexParser -seq -subset -regex $(REGEX) $(CC) $(CFLAGS) -c sequentialSearch.cc - $(CC) -O sequentialSearch.o fileread.o -o $@ - ./$@ -file sequentialSearch.cc + $(CC) $(SEQDFLAGS) sequentialSearch.o $(OBJS) -o $@ + - ./$@ -file $(TESTFILE) test1: regexParser diff -r c5a7caa37f61 -r 63213964502a regexParser/TODO --- a/regexParser/TODO Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/TODO Wed Feb 03 12:24:34 2016 +0900 @@ -1,4 +1,17 @@ -Tue Feb 2 11:21:14 JST 2016 +Wed Feb 3 08:20:06 JST 2016 + + state : 1 [w-w] (4) + state : 4 [o-o] (8) + state : 8 [r-r] (10) + node : a 10 -> 2 [d-d] (2) + + w | o r d + 4 8 10 2 + + x | w o r d + 1 4 8 10 2 + +Tue Feb 2 11:21:14 JST 2016 kono あとは word の処理だけだ charClassMergeをなおさないといけない @@ -8,7 +21,7 @@ Cerirum 側で、最初のmatchが表示されてない -Tue Feb 2 09:55:40 JST 2016 +Tue Feb 2 09:55:40 JST 2016 kono % ./regexParser -subst -regex '(a|b)*a(a|b)(a|b)' ---Print Node---- diff -r c5a7caa37f61 -r 63213964502a regexParser/cerium/CeriumMain.cc --- a/regexParser/cerium/CeriumMain.cc Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/cerium/CeriumMain.cc Wed Feb 03 12:24:34 2016 +0900 @@ -40,6 +40,7 @@ tsv.blk->resultEnd = &r->next; tsv.matchEnd=NULL; } + tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin return tsv; } diff -r c5a7caa37f61 -r 63213964502a regexParser/cerium/ppe/Exec.cc --- a/regexParser/cerium/ppe/Exec.cc Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/cerium/ppe/Exec.cc Wed Feb 03 12:24:34 2016 +0900 @@ -11,10 +11,8 @@ SchedDefineTask1(Exec,blockedGrep); TSValue blockSearch(TransitionGeneratorPtr tg,Buffer buff,int task_spawned) { - TSValue tsv; + TSValue tsv = createTSValue(tg,buff); BlockOutput blk; - tsv.buff = buff; - tsv.tg = tg; tsv.blk = &blk; if (task_spawned == 0) { tsv.current = tg->stateStart->tState; @@ -26,8 +24,6 @@ tsv.blk->resultEnd = &result; unsigned char *end = tsv.buff.buffend; tsv.buff.buffend = tsv.buff.buff+1; - tsv.matchBegin = tsv.buff.buffptr; - tsv.matchEnd = NULL; tsv = tSearch(tsv); tsv.blk->blockBegin = tsv.current; tsv.buff.buffend = end; @@ -79,3 +75,5 @@ o_data[2] = (unsigned long)tsv.blk->blockEnd->state; return 0; } + +/* end */ diff -r c5a7caa37f61 -r 63213964502a regexParser/generateSequentialSearch.cc --- a/regexParser/generateSequentialSearch.cc Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/generateSequentialSearch.cc Wed Feb 03 12:24:34 2016 +0900 @@ -4,7 +4,35 @@ #include "generateSequentialSearch.h" #include "subsetConstruction.h" -void exportState(TransitionGeneratorPtr tg) { +void +generateState1(StatePtr state,long stateNum, bool accept, FILE *fp, TransitionGeneratorPtr tg) { + fprintf(fp,"TSValue state%lx(TSValue tsv) {\n",stateNum); + if (accept && state->accept) { + fputs(" tsv=tsv.tg->stateMatch(tsv);\n",fp); + } + fputs(" if (tsv.buff.buffptr >= tsv.buff.buffend) return tsv;\n",fp); + CharClassWalkerPtr ccw = createCharClassWalker(state->cc); + if (hasNext(ccw)) fputs(" unsigned char c = *tsv.buff.buffptr++;\n",fp); + fputs(" if (0) ;\n",fp); + while (hasNext(ccw)) { + CharClassPtr cc = getNext(ccw); + unsigned long begin = cc->cond.range.begin; + unsigned long end = cc->cond.range.end; + BitVector bi = cc->nextState; + if (begin == end) { + fprintf(fp," else if (c=='%c') { return state%lx(tsv);}\n",(unsigned char)begin, bi.bitContainer); + } else { + fprintf(fp," else if (c<'%c') { tsv=tsv.tg->stateSkip(tsv);return state0(tsv);}\n",(unsigned char)begin); + fprintf(fp," else if (c<='%c') { return state%lx(tsv);} \n",(unsigned char)end, bi.bitContainer); + } + } + free(ccw); + fprintf(fp," else { tsv=tsv.tg->stateSkip(tsv); return state0(tsv);}\n"); + fputs("}\n\n",fp); +} + +void +exportState(TransitionGeneratorPtr tg) { StatePtr state = tg->stateList; FILE *fp = fopen("state.cc","w"); if (fp==NULL) { @@ -13,48 +41,13 @@ exit(1); } for (;state;state = state->next) { - fprintf(fp,"void state%lx(Buffer buff);\n",state->bitState.bitContainer); + fprintf(fp,"TSValue state%lx(TSValue tsv);\n",state->bitState.bitContainer); } fputs("\n",fp); + // initial state must not accept empty string + generateState1(tg->stateList,0L,false,fp,tg); for (state = tg->stateList;state;state = state->next) { - fprintf(fp,"void state%lx(Buffer buff) {\n",state->bitState.bitContainer); - if (state->bitState.bitContainer == 2) { // Accept - fputs(" stateMatch(buff);\n",fp); - } else { // not Accept - fputs(" if (buff.buffptr >= buff.buffend) return;\n",fp); - fputs(" unsigned char c = *buff.buffptr++;\n",fp); - CharClassWalkerPtr ccw = createCharClassWalker(state->cc); - bool flag = true; - while (hasNext(ccw)) { - CharClassPtr cc = getNext(ccw); - unsigned long begin = cc->cond.range.begin; - unsigned long end = cc->cond.range.end; - BitVector bi = cc->nextState; - if (flag) { - flag = false; - fputs(" ",fp); - } else { - fputs(" else ",fp); - } - if (begin == end) { - fprintf(fp,"if (c=='%c') state%lx(buff);\n",(unsigned char)begin, bi.bitContainer); - } else { - if (state->accept) { - fprintf(fp,"if (c<'%c') stateMatch(buff);\n",(unsigned char)begin); - } else { - fprintf(fp,"if (c<'%c') stateSkip(buff);\n",(unsigned char)begin); - } - fprintf(fp," else if (c<='%c') state%lx(buff);\n",(unsigned char)end, bi.bitContainer); - } - } - free(ccw); - if (state->bitState.bitContainer & 2) { - fprintf(fp," else stateMatch(buff);\n"); - } else { - fprintf(fp," else stateSkip(buff);\n"); - } - } - fputs("}\n\n",fp); + generateState1(state,state->bitState.bitContainer,true,fp,tg); } fclose(fp); } diff -r c5a7caa37f61 -r 63213964502a regexParser/sequentialSearch.cc --- a/regexParser/sequentialSearch.cc Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/sequentialSearch.cc Wed Feb 03 12:24:34 2016 +0900 @@ -6,20 +6,9 @@ #include #include "fileread.h" - -void state1(Buffer buff); +#include "threadedSearch.h" -void stateSkip(Buffer buff) { - buff.matchBegin = buff.buffptr; - state1(buff); -} - -void stateMatch(Buffer buff) { - fwrite(buff.matchBegin,buff.buffptr-buff.matchBegin-1,1,stdout); - puts(""); - buff.buffptr--; - stateSkip(buff); -} +TSValue state0(TSValue ts); #include "state.cc" int main(int argc, char **argv) { @@ -33,7 +22,10 @@ int fd = 0; st_mmap_t st_mmap = createSt_mmap(filename,fd); Buffer buff = createBuffer(st_mmap); - stateSkip(buff); + TSValue tsv = createTSValue(NULL,buff); + tsv = state0(tsv); close(fd); return 0; } + +/* end */ diff -r c5a7caa37f61 -r 63213964502a regexParser/subsetConstruction.h --- a/regexParser/subsetConstruction.h Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/subsetConstruction.h Wed Feb 03 12:24:34 2016 +0900 @@ -14,3 +14,4 @@ extern void determinize(StatePtr s, TransitionGeneratorPtr tg); extern void subsetConstruction(TransitionGeneratorPtr tg); extern void createAnyState(TransitionGeneratorPtr tg); +extern TransitionGeneratorPtr createTransitionGenerator(); diff -r c5a7caa37f61 -r 63213964502a regexParser/threadedSearch.cc --- a/regexParser/threadedSearch.cc Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/threadedSearch.cc Wed Feb 03 12:24:34 2016 +0900 @@ -18,12 +18,13 @@ puts(""); tsv.matchEnd = NULL; } + tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin return tsv; } static TSValue stateMatch(TSValue tsv) { - tsv.matchEnd = tsv.buff.buffptr; // next char of the match + tsv.matchEnd = tsv.buff.buffptr; // next char of the match return tsv; } @@ -38,11 +39,11 @@ } tState->ccvSize = ccvSize; if (state->accept) { - tState->stateSkip = tg->stateSkip; - tState->stateMatch = tg->stateMatch; + tState->stateMatch = tg->stateMatch; + tState->stateSkip = tg->stateSkip; } else { - tState->stateSkip = tg->stateSkip; - tState->stateMatch = tg->stateNothing; + tState->stateMatch = tg->stateNothing; + tState->stateSkip = tg->stateSkip; } if (ccvSize == 0) { tState->ccv = NULL; @@ -89,13 +90,17 @@ TSValuePtr tsvp = &tsv; // make tsv visible in lldb #endif next: while (tsv.buff.buffptr < tsv.buff.buffend) { + tsv = tsv.current->stateMatch(tsv); + if (tsv.current->ccvSize==0) { + // matched start again + tsv.current = tsv.tg->stateStart->tState; + } unsigned char c = *tsv.buff.buffptr++; // printState(tsv.current->state); for (int i = 0; i < tsv.current->ccvSize; i++) { CCVPtr ccv = &tsv.current->ccv[i]; if (cbegin) { tsv = tsv.current->stateSkip(tsv); - tsv.matchBegin = tsv.buff.buffptr; goto next; } else if (c<=ccv->end) { // range matched. @@ -103,7 +108,6 @@ // match the word. // if (not match) continue; } - tsv = tsv.current->stateMatch(tsv); if (ccv->tState) { tsv.current = ccv->tState; } else { @@ -114,7 +118,6 @@ } } tsv = tsv.current->stateSkip(tsv); - tsv.matchBegin = tsv.buff.buffptr; } #if DEBUG *tsvp = tsv; @@ -124,21 +127,32 @@ #endif } -void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { +TSValue +createTSValue(TransitionGeneratorPtr tg, Buffer buff) { TSValue tsv; + if (!tg) { + tg = createTransitionGenerator(); + } tsv.buff = buff; tsv.tg = tg; tsv.blk = NULL; + tsv.matchBegin = buff.buffptr; + tsv.matchEnd = NULL; tsv.tg->stateSkip = stateSkip; tsv.tg->stateMatch = stateMatch; tsv.tg->stateNothing = stateNothing; - tsv.matchBegin = buff.buffptr; - tsv.matchEnd = NULL; + return tsv; +} + + +void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { + TSValue tsv = createTSValue(tg,buff); tsv.current = generateTState(tg->stateList,tg); tg->stateStart = NEW(State); *tg->stateStart = *tg->stateList; tg->stateStart->accept = false; // Start state never accept generateTState(tg->stateStart,tg); - tSearch(tsv); } + +/* end */ diff -r c5a7caa37f61 -r 63213964502a regexParser/threadedSearch.h --- a/regexParser/threadedSearch.h Tue Feb 02 11:58:59 2016 +0900 +++ b/regexParser/threadedSearch.h Wed Feb 03 12:24:34 2016 +0900 @@ -1,3 +1,4 @@ extern void threadedSearch(TransitionGeneratorPtr tg, Buffer buff); extern TStatePtr generateTState(StatePtr s, TransitionGeneratorPtr tg); -TSValue tSearch(TSValue tsv); +extern TSValue createTSValue(TransitionGeneratorPtr tg, Buffer buff) ; +extern TSValue tSearch(TSValue tsv);