# HG changeset patch # User Shinji KONO # Date 1454889578 -32400 # Node ID 27414e6fb33c50244f0b261a4fe86544d2893014 # Parent 4cb6eea3ab120414acfa02abf78eafdf2fa04b5c retrying blocked search fix for CbC support diff -r 4cb6eea3ab12 -r 27414e6fb33c regexParser/Makefile --- a/regexParser/Makefile Thu Feb 04 03:26:18 2016 +0900 +++ b/regexParser/Makefile Mon Feb 08 08:59:38 2016 +0900 @@ -1,12 +1,13 @@ -TARGET= regexParser test/ccMerge regexParserCbC +TARGET= regexParser test/ccMerge CFLAGS= -Wall -O0 -g -I$(CERIUM)/include/TaskManager -I. SEQCFLAGS= CFLAGS= -Wall -O -g -I$(CERIUM)/include/TaskManager -I. -CC= /Users/e105711/prog/seminar/CbC/llvmInst/bin/clang++ +CC= clang++ +CbC= clang++ CERIUM= ../../Cerium AR= libCeriumGrep.a SRCS_TMP = $(wildcard *.cc) -SRCS_EXCLUDE = sequentialSearch.cc state.cc parallelSearch.cc taskInit.cc searchTask.cc main.cc stateCbC.cc sequentialSearchCbC.cc generateSequentialSearchCbC.cc sequentialSearch.cc generateSequentialSearch.cc +SRCS_EXCLUDE = sequentialSearch.cc state.cc parallelSearch.cc taskInit.cc searchTask.cc main.cc stateCbC.cc sequentialSearchCbC.cc generateSequentialSearchCbC.cc sequentialSearch.cc generateSequentialSearch.cc sequentialSearchCbC.cc SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) @@ -29,7 +30,7 @@ $(CC) $(CFLAGS) -o $@ $(OBJS) main.o generateSequentialSearch.o regexParserCbC: $(OBJS) main.o sequentialSearchCbC.o generateSequentialSearchCbC.o - $(CC) $(CFLAGS) -o $@ $(OBJS) main.o generateSequentialSearchCbC.o + $(CbC) $(CFLAGS) -o $@ $(OBJS) main.o generateSequentialSearchCbC.o test/searchBit: test/searchBit.cc $(CC) $(CFLAGS) $< bitVector.cc -o $@ @@ -84,8 +85,8 @@ sequentialSearchCbC: sequentialSearchCbC.cc regexParserCbC fileread.o ./regexParserCbC -seq -subset -regex $(REGEX) - $(CC) $(CFLAGS) -c sequentialSearchCbC.cc - $(CC) $(SEQDFLAGS) sequentialSearchCbC.o generateSequentialSearchCbC.o $(OBJS) -o $@ + $(CbC) $(CFLAGS) -c sequentialSearchCbC.cc + $(CbC) $(SEQDFLAGS) sequentialSearchCbC.o generateSequentialSearchCbC.o $(OBJS) -o $@ - ./$@ -file $(TESTFILE) test1: regexParser diff -r 4cb6eea3ab12 -r 27414e6fb33c regexParser/TODO --- a/regexParser/TODO Thu Feb 04 03:26:18 2016 +0900 +++ b/regexParser/TODO Mon Feb 08 08:59:38 2016 +0900 @@ -1,3 +1,43 @@ +Sat Feb 6 19:50:04 JST 2016 + + ちょっとあれだけど、 + + 各blockはstate 1から始める + 終わりの状態が1でなかったら、そこだけやりなおす + + ってのが簡単。最悪、全部やり直す可能性があるが... + +Wed Feb 3 21:15:49 JST 2016 + + blockedSearch だと一つはoverrapさせる必要がある。 + + (aaa|aaabb) + state : 1 [a-a] (14) + state : 2* + state : 4 [a-a] (8) + state : 8 [a-a] (2) + state : 10 [a-a] (20) + state : 20 [a-a] (40) + state : 40 [b-b] (80) + state : 80 [b-b] (2) + state : 14 [a-a] (28) + state : 28 [a-a] (42) + state : 42* [b-b] (80) + + a | a | a bbb + prev 14 28 + curret 7F ... .. + + a a | a | a bbb + prev 14 28 + curret 7F ... .. + + false positive がある → 再判定 + maxmum match による見落としがある (元々そういうものはあるのだが...) + なくそうと思うと、ちょっと大変(可能な resultを全部推移させる必要がある) + 内部の非決定性がなければ、こういう問題は出ない + + Wed Feb 3 08:20:06 JST 2016 state : 1 [w-w] (4) diff -r 4cb6eea3ab12 -r 27414e6fb33c regexParser/cerium/CeriumMain.cc --- a/regexParser/cerium/CeriumMain.cc Thu Feb 04 03:26:18 2016 +0900 +++ b/regexParser/cerium/CeriumMain.cc Mon Feb 08 08:59:38 2016 +0900 @@ -15,6 +15,9 @@ static double ed_time; const char* usr_help_str = ""; +void exportState(TransitionGeneratorPtr tg) { +} + static double getTime() { struct timeval tv; @@ -22,6 +25,17 @@ return tv.tv_sec + (double)tv.tv_usec*1e-6; } +void addResult(TSValue tsv,bool cont, unsigned char *begin, unsigned char *end) { + ResultPtr r = NEW(Result); + r->continued = cont; + r->begin = begin; + r->end = end; + *tsv.blk->resultEnd = r; + r->next = NULL; + tsv.blk->resultEnd = &r->next; +} + + static TSValue stateNothing(TSValue tsv) { return tsv; @@ -31,14 +45,7 @@ TSValue stateSkip(TSValue tsv) { tsv.current = tsv.tg->stateStart->tState; if (tsv.matchEnd) { - ResultPtr r = NEW(Result); - r->continued = false; - r->begin = tsv.matchBegin; - r->end = tsv.matchEnd; - *tsv.blk->resultEnd = r; - r->next = NULL; - tsv.blk->resultEnd = &r->next; - tsv.matchEnd=NULL; + addResult(tsv,false,tsv.matchBegin,tsv.matchEnd); } tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin return tsv; diff -r 4cb6eea3ab12 -r 27414e6fb33c regexParser/cerium/CeriumMain.h --- a/regexParser/cerium/CeriumMain.h Thu Feb 04 03:26:18 2016 +0900 +++ b/regexParser/cerium/CeriumMain.h Mon Feb 08 08:59:38 2016 +0900 @@ -1,1 +1,3 @@ extern ResultPtr resultPrint(ResultPtr r,const char* comment); +extern void addResult(TSValue tsv,bool cont, unsigned char *begin, unsigned char *end) ; + diff -r 4cb6eea3ab12 -r 27414e6fb33c regexParser/cerium/ppe/Exec.cc --- a/regexParser/cerium/ppe/Exec.cc Thu Feb 04 03:26:18 2016 +0900 +++ b/regexParser/cerium/ppe/Exec.cc Mon Feb 08 08:59:38 2016 +0900 @@ -14,31 +14,19 @@ TSValue tsv = createTSValue(tg,buff); BlockOutput blk; tsv.blk = &blk; - if (task_spawned == 0) { - tsv.current = tg->stateStart->tState; - } else { - tsv.current = tg->anyState->tState; - } + tsv.current = tg->stateStart->tState; tsv.blk->result = NULL; ResultPtr result = NULL; tsv.blk->resultEnd = &result; unsigned char *end = tsv.buff.buffend; - tsv.buff.buffend = tsv.buff.buff+1; - tsv = tSearch(tsv); - tsv.blk->blockBegin = tsv.current; tsv.buff.buffend = end; tsv = tSearch(tsv); tsv.blk->blockEnd = tsv.current; + addResult(tsv,true,buff.buff,buff.buffend); // entire buffer if (tsv.blk->blockEnd->state->bitState.bitContainer != 1) { if (tsv.matchBegin != tsv.buff.buffptr) { // partial match case at block end. - ResultPtr r = NEW(Result); - r->continued = true; - r->begin = tsv.matchBegin; - r->end = tsv.matchEnd; - *tsv.blk->resultEnd = r; - r->next = NULL; - tsv.blk->resultEnd = &r->next; + addResult(tsv,true,tsv.matchBegin,tsv.matchEnd); // printf("Exec %lx r->begin : %p r->end : %p\n",tsv.blockEnd->state->bitState.bitContainer, r->begin,r->end); } } diff -r 4cb6eea3ab12 -r 27414e6fb33c regexParser/cerium/ppe/Print.cc --- a/regexParser/cerium/ppe/Print.cc Thu Feb 04 03:26:18 2016 +0900 +++ b/regexParser/cerium/ppe/Print.cc Mon Feb 08 08:59:38 2016 +0900 @@ -5,11 +5,22 @@ #include "FileMapReduce.h" #include "regexParser.h" #include "CeriumMain.h" +#include "threadedSearch.h" #define STATUS_NUM 2 /* これは必須 */ SchedDefineTask1(Print,run_print); +static +TSValue stateSkipOnce(TSValue tsv) { + if (tsv.matchEnd) { + addResult(tsv,false,tsv.matchBegin,tsv.matchEnd); + } + tsv.buff.buffend = tsv.buff.buffptr; // end search + return tsv; +} + + static int run_print(SchedTask *s, void *rbuf, void *wbuf) { @@ -20,6 +31,10 @@ ResultPtr prev = NULL; for (int i = 0; i < out_task_num ; i++) { ResultPtr r = (ResultPtr)w->o_data[i*out_size+0]; + // first reply contains block begin and block end + unsigned char *begin = r->begin; + unsigned char *end = r->end; + r = r->next; if (r == NULL) { prev = NULL; continue; @@ -35,13 +50,27 @@ fwrite(r->begin,r->end - r->begin-1,1,stdout); printf(" match %d\n", ((prevBlockEnd->bitState.bitContainer & ~blockBegin->bitState.bitContainer)==0)? 1 : 0 ); #endif - if ((prevBlockEnd->bitState.bitContainer & ~blockBegin->bitState.bitContainer)==0) { - // 前のブロックの matchBegin から最初 result の end までがマッチ - fwrite(prev->begin,r->end - prev->begin,1,stdout); -// printf("####"); - if (!r->continued) puts(""); + if (prevBlockEnd->bitState.bitContainer !=1) { + // そこから最初の stateSkip までやり直し。マッチしたら表示。 + TransitionGeneratorPtr tg = (TransitionGeneratorPtr)w->global; + tg->stateSkip = stateSkipOnce; + Buffer buff; + buff.buff = buff.buffptr = begin; + buff.buffend = end; + TSValue tsv = createTSValue(tg,buff); + BlockOutput blk; + tsv.blk = &blk; + tsv.current = prevBlockEnd->tState; + tsv.blk->result = NULL; + ResultPtr result = NULL; + tsv.blk->resultEnd = &result; + tsv.matchBegin = prev->begin; + tsv.matchEnd = prev->end; + tsv = tSearch(tsv); + if (result) { + resultPrint(prev->next,"Print"); + } } - r = r->next; // printf("%p\n",r); } else { if ((blockBegin->bitState.bitContainer & 1)!=1) r = r->next;