changeset 302:27414e6fb33c

retrying blocked search fix for CbC support
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 08 Feb 2016 08:59:38 +0900
parents 4cb6eea3ab12
children cc770cd8d000
files regexParser/Makefile regexParser/TODO regexParser/cerium/CeriumMain.cc regexParser/cerium/CeriumMain.h regexParser/cerium/ppe/Exec.cc regexParser/cerium/ppe/Print.cc
diffstat 6 files changed, 102 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/Makefile	Thu Feb 04 03:26:18 2016 +0900
+++ b/regexParser/Makefile	Mon Feb 08 08:59:38 2016 +0900
@@ -1,12 +1,13 @@
-TARGET= regexParser test/ccMerge regexParserCbC
+TARGET= regexParser test/ccMerge 
 CFLAGS= -Wall -O0 -g -I$(CERIUM)/include/TaskManager -I.
 SEQCFLAGS= CFLAGS= -Wall -O -g -I$(CERIUM)/include/TaskManager -I.
-CC= /Users/e105711/prog/seminar/CbC/llvmInst/bin/clang++
+CC= clang++
+CbC= clang++
 CERIUM= ../../Cerium
 AR= libCeriumGrep.a
 
 SRCS_TMP = $(wildcard *.cc)
-SRCS_EXCLUDE = sequentialSearch.cc state.cc parallelSearch.cc taskInit.cc searchTask.cc main.cc stateCbC.cc sequentialSearchCbC.cc generateSequentialSearchCbC.cc sequentialSearch.cc generateSequentialSearch.cc
+SRCS_EXCLUDE = sequentialSearch.cc state.cc parallelSearch.cc taskInit.cc searchTask.cc main.cc stateCbC.cc sequentialSearchCbC.cc generateSequentialSearchCbC.cc sequentialSearch.cc generateSequentialSearch.cc sequentialSearchCbC.cc
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
@@ -29,7 +30,7 @@
 	$(CC) $(CFLAGS) -o $@ $(OBJS) main.o generateSequentialSearch.o
 
 regexParserCbC: $(OBJS) main.o sequentialSearchCbC.o generateSequentialSearchCbC.o
-	$(CC) $(CFLAGS) -o $@ $(OBJS) main.o generateSequentialSearchCbC.o
+	$(CbC) $(CFLAGS) -o $@ $(OBJS) main.o generateSequentialSearchCbC.o
 
 test/searchBit: test/searchBit.cc
 	$(CC) $(CFLAGS)  $< bitVector.cc -o $@
@@ -84,8 +85,8 @@
 
 sequentialSearchCbC: sequentialSearchCbC.cc regexParserCbC fileread.o
 	./regexParserCbC -seq -subset -regex $(REGEX)
-	$(CC) $(CFLAGS)  -c sequentialSearchCbC.cc 
-	$(CC) $(SEQDFLAGS)  sequentialSearchCbC.o generateSequentialSearchCbC.o $(OBJS) -o $@
+	$(CbC) $(CFLAGS)  -c sequentialSearchCbC.cc 
+	$(CbC) $(SEQDFLAGS)  sequentialSearchCbC.o generateSequentialSearchCbC.o $(OBJS) -o $@
 	- ./$@ -file $(TESTFILE)
 
 test1: regexParser
--- a/regexParser/TODO	Thu Feb 04 03:26:18 2016 +0900
+++ b/regexParser/TODO	Mon Feb 08 08:59:38 2016 +0900
@@ -1,3 +1,43 @@
+Sat Feb  6 19:50:04 JST 2016
+
+    ちょっとあれだけど、
+
+    各blockはstate 1から始める
+    終わりの状態が1でなかったら、そこだけやりなおす
+
+    ってのが簡単。最悪、全部やり直す可能性があるが...
+
+Wed Feb  3 21:15:49 JST 2016
+
+    blockedSearch だと一つはoverrapさせる必要がある。
+
+    (aaa|aaabb)
+        state : 1 [a-a] (14)
+        state : 2*
+        state : 4 [a-a] (8)
+        state : 8 [a-a] (2)
+        state : 10 [a-a] (20)
+        state : 20 [a-a] (40)
+        state : 40 [b-b] (80)
+        state : 80 [b-b] (2)
+        state : 14 [a-a] (28)
+        state : 28 [a-a] (42)
+        state : 42* [b-b] (80)
+
+              a | a | a    bbb 
+ prev        14  28   
+ curret          7F   ... ..
+
+            a   a   | a | a    bbb 
+ prev       14  28   
+ curret               7F   ... ..
+
+    false positive がある  → 再判定
+    maxmum match による見落としがある (元々そういうものはあるのだが...)
+    なくそうと思うと、ちょっと大変(可能な resultを全部推移させる必要がある)
+    内部の非決定性がなければ、こういう問題は出ない
+
+
 Wed Feb  3 08:20:06 JST 2016
 
     state : 1 [w-w] (4)
--- a/regexParser/cerium/CeriumMain.cc	Thu Feb 04 03:26:18 2016 +0900
+++ b/regexParser/cerium/CeriumMain.cc	Mon Feb 08 08:59:38 2016 +0900
@@ -15,6 +15,9 @@
 static double ed_time;
 const char* usr_help_str = "";
 
+void exportState(TransitionGeneratorPtr tg) {
+}
+
 static double
 getTime() {
     struct timeval tv;
@@ -22,6 +25,17 @@
     return tv.tv_sec + (double)tv.tv_usec*1e-6;
 }
 
+void addResult(TSValue tsv,bool cont, unsigned char *begin, unsigned char *end) {
+        ResultPtr r = NEW(Result);
+        r->continued = cont;
+        r->begin = begin;
+        r->end = end;
+        *tsv.blk->resultEnd = r;
+        r->next = NULL;
+        tsv.blk->resultEnd = &r->next;
+}
+
+
 static
 TSValue stateNothing(TSValue tsv) {
     return tsv;
@@ -31,14 +45,7 @@
 TSValue stateSkip(TSValue tsv) {
     tsv.current = tsv.tg->stateStart->tState;
     if (tsv.matchEnd) {
-        ResultPtr r = NEW(Result);
-        r->continued = false;
-        r->begin = tsv.matchBegin;
-        r->end = tsv.matchEnd;
-        *tsv.blk->resultEnd = r;
-        r->next = NULL;
-        tsv.blk->resultEnd = &r->next;
-        tsv.matchEnd=NULL;
+        addResult(tsv,false,tsv.matchBegin,tsv.matchEnd);
     }
     tsv.matchBegin = tsv.buff.buffptr;  // next char may be matchBegin
     return tsv;
--- a/regexParser/cerium/CeriumMain.h	Thu Feb 04 03:26:18 2016 +0900
+++ b/regexParser/cerium/CeriumMain.h	Mon Feb 08 08:59:38 2016 +0900
@@ -1,1 +1,3 @@
 extern ResultPtr resultPrint(ResultPtr r,const char* comment);
+extern void addResult(TSValue tsv,bool cont, unsigned char *begin, unsigned char *end) ;
+
--- a/regexParser/cerium/ppe/Exec.cc	Thu Feb 04 03:26:18 2016 +0900
+++ b/regexParser/cerium/ppe/Exec.cc	Mon Feb 08 08:59:38 2016 +0900
@@ -14,31 +14,19 @@
     TSValue tsv = createTSValue(tg,buff);
     BlockOutput blk;
     tsv.blk = &blk;
-    if (task_spawned == 0) {
-        tsv.current = tg->stateStart->tState;
-    } else {
-        tsv.current = tg->anyState->tState;
-    }
+    tsv.current = tg->stateStart->tState;
     tsv.blk->result = NULL;
     ResultPtr result = NULL;
     tsv.blk->resultEnd = &result;
     unsigned char *end = tsv.buff.buffend;
-    tsv.buff.buffend = tsv.buff.buff+1;
-    tsv = tSearch(tsv);
-    tsv.blk->blockBegin = tsv.current;
     tsv.buff.buffend = end;
     tsv = tSearch(tsv);
     tsv.blk->blockEnd = tsv.current;
+    addResult(tsv,true,buff.buff,buff.buffend);  // entire buffer
     if (tsv.blk->blockEnd->state->bitState.bitContainer != 1) {
         if (tsv.matchBegin != tsv.buff.buffptr) {
             // partial match case at block end.
-            ResultPtr r = NEW(Result);
-            r->continued = true;
-            r->begin = tsv.matchBegin;
-            r->end = tsv.matchEnd;
-            *tsv.blk->resultEnd = r;
-            r->next = NULL;
-            tsv.blk->resultEnd = &r->next;
+            addResult(tsv,true,tsv.matchBegin,tsv.matchEnd);
 // printf("Exec %lx r->begin : %p r->end : %p\n",tsv.blockEnd->state->bitState.bitContainer, r->begin,r->end);
         }
     }
--- a/regexParser/cerium/ppe/Print.cc	Thu Feb 04 03:26:18 2016 +0900
+++ b/regexParser/cerium/ppe/Print.cc	Mon Feb 08 08:59:38 2016 +0900
@@ -5,11 +5,22 @@
 #include "FileMapReduce.h"
 #include "regexParser.h"
 #include "CeriumMain.h"
+#include "threadedSearch.h"
 
 #define STATUS_NUM 2
 /* これは必須 */
 SchedDefineTask1(Print,run_print);
 
+static
+TSValue stateSkipOnce(TSValue tsv) {
+    if (tsv.matchEnd) {
+        addResult(tsv,false,tsv.matchBegin,tsv.matchEnd);
+    }
+    tsv.buff.buffend = tsv.buff.buffptr;    // end search
+    return tsv;
+}
+
+
 static int
 run_print(SchedTask *s, void *rbuf, void *wbuf)
 {
@@ -20,6 +31,10 @@
     ResultPtr prev = NULL;
     for (int i = 0; i < out_task_num ; i++) {
         ResultPtr r = (ResultPtr)w->o_data[i*out_size+0];
+        //  first reply contains block begin and block end
+        unsigned char *begin = r->begin;
+        unsigned char *end = r->end;
+        r = r->next;
         if (r == NULL) {
             prev = NULL;
             continue;
@@ -35,13 +50,27 @@
 fwrite(r->begin,r->end - r->begin-1,1,stdout);
 printf(" match %d\n", ((prevBlockEnd->bitState.bitContainer & ~blockBegin->bitState.bitContainer)==0)? 1 : 0  );
 #endif
-            if ((prevBlockEnd->bitState.bitContainer & ~blockBegin->bitState.bitContainer)==0) {
-                // 前のブロックの matchBegin から最初 result の end までがマッチ
-                fwrite(prev->begin,r->end - prev->begin,1,stdout);
-// printf("####");
-                if (!r->continued) puts("");
+            if (prevBlockEnd->bitState.bitContainer !=1) {
+                // そこから最初の stateSkip までやり直し。マッチしたら表示。
+                TransitionGeneratorPtr tg = (TransitionGeneratorPtr)w->global;
+                tg->stateSkip = stateSkipOnce;
+                Buffer buff;
+                buff.buff = buff.buffptr  = begin;
+                buff.buffend = end;
+                TSValue tsv = createTSValue(tg,buff);
+                BlockOutput blk;
+                tsv.blk = &blk;
+                tsv.current = prevBlockEnd->tState;
+                tsv.blk->result = NULL;
+                ResultPtr result = NULL;
+                tsv.blk->resultEnd = &result;
+                tsv.matchBegin = prev->begin;
+                tsv.matchEnd = prev->end;
+                tsv = tSearch(tsv);
+                if (result) {
+                    resultPrint(prev->next,"Print");
+                }
             }
-            r = r->next;
 // printf("%p\n",r);
         } else {
             if ((blockBegin->bitState.bitContainer & 1)!=1) r = r->next;