changeset 298:63213964502a

refactoring ....
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 03 Feb 2016 12:24:34 +0900
parents c5a7caa37f61
children bdfe0a32c48f
files regexParser/Makefile regexParser/TODO regexParser/cerium/CeriumMain.cc regexParser/cerium/ppe/Exec.cc regexParser/generateSequentialSearch.cc regexParser/sequentialSearch.cc regexParser/subsetConstruction.h regexParser/threadedSearch.cc regexParser/threadedSearch.h
diffstat 9 files changed, 91 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/Makefile	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/Makefile	Wed Feb 03 12:24:34 2016 +0900
@@ -1,5 +1,6 @@
 TARGET= regexParser test/ccMerge
 CFLAGS= -Wall -O0 -g -I$(CERIUM)/include/TaskManager -I.
+SEQCFLAGS= CFLAGS= -Wall -O -g -I$(CERIUM)/include/TaskManager -I.
 CC= clang++
 CERIUM= ../../Cerium
 AR= libCeriumGrep.a
@@ -11,7 +12,7 @@
 
 # REGEX = '[A-Z][A-Za-z0-9_]*'
 REGEX = '(a|b)*a(a|b)(a|b)'
-# REGEX = '(mil|have)'
+# REGEX = '(mil|hav)[A-Za-z]'
 
 SUBST = -subset
 
@@ -75,8 +76,8 @@
 sequentialSearch: sequentialSearch.cc regexParser fileread.o
 	./regexParser -seq -subset -regex $(REGEX)
 	$(CC) $(CFLAGS)  -c sequentialSearch.cc 
-	$(CC) -O  sequentialSearch.o fileread.o -o $@
-	./$@ -file sequentialSearch.cc
+	$(CC) $(SEQDFLAGS)  sequentialSearch.o $(OBJS) -o $@
+	- ./$@ -file $(TESTFILE)
 
 
 test1: regexParser
--- a/regexParser/TODO	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/TODO	Wed Feb 03 12:24:34 2016 +0900
@@ -1,4 +1,17 @@
-Tue Feb  2 11:21:14 JST 2016
+Wed Feb  3 08:20:06 JST 2016
+
+    state : 1 [w-w] (4)
+    state : 4 [o-o] (8)
+    state : 8 [r-r] (10)
+    node : a 10 -> 2 [d-d] (2)
+
+    w  |  o r  d
+    4     8 10 2
+
+    x  |  w  o  r  d
+    1     4  8 10  2
+
+Tue Feb  2 11:21:14 JST 2016 kono
 
     あとは word の処理だけだ
     charClassMergeをなおさないといけない
@@ -8,7 +21,7 @@
 
     Cerirum 側で、最初のmatchが表示されてない
 
-Tue Feb  2 09:55:40 JST 2016
+Tue Feb  2 09:55:40 JST 2016 kono
 
     % ./regexParser -subst -regex '(a|b)*a(a|b)(a|b)'
     ---Print Node----
--- a/regexParser/cerium/CeriumMain.cc	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/cerium/CeriumMain.cc	Wed Feb 03 12:24:34 2016 +0900
@@ -40,6 +40,7 @@
         tsv.blk->resultEnd = &r->next;
         tsv.matchEnd=NULL;
     }
+    tsv.matchBegin = tsv.buff.buffptr;  // next char may be matchBegin
     return tsv;
 }
 
--- a/regexParser/cerium/ppe/Exec.cc	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/cerium/ppe/Exec.cc	Wed Feb 03 12:24:34 2016 +0900
@@ -11,10 +11,8 @@
 SchedDefineTask1(Exec,blockedGrep);
 
 TSValue blockSearch(TransitionGeneratorPtr tg,Buffer buff,int task_spawned) {
-    TSValue tsv;
+    TSValue tsv = createTSValue(tg,buff);
     BlockOutput blk;
-    tsv.buff = buff;
-    tsv.tg = tg;
     tsv.blk = &blk;
     if (task_spawned == 0) {
         tsv.current = tg->stateStart->tState;
@@ -26,8 +24,6 @@
     tsv.blk->resultEnd = &result;
     unsigned char *end = tsv.buff.buffend;
     tsv.buff.buffend = tsv.buff.buff+1;
-    tsv.matchBegin = tsv.buff.buffptr;
-    tsv.matchEnd = NULL;
     tsv = tSearch(tsv);
     tsv.blk->blockBegin = tsv.current;
     tsv.buff.buffend = end;
@@ -79,3 +75,5 @@
     o_data[2] = (unsigned long)tsv.blk->blockEnd->state;
     return 0;
 }
+
+/* end */
--- a/regexParser/generateSequentialSearch.cc	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/generateSequentialSearch.cc	Wed Feb 03 12:24:34 2016 +0900
@@ -4,7 +4,35 @@
 #include "generateSequentialSearch.h"
 #include "subsetConstruction.h"
 
-void exportState(TransitionGeneratorPtr tg) {
+void
+generateState1(StatePtr state,long stateNum, bool accept, FILE *fp, TransitionGeneratorPtr tg) {
+    fprintf(fp,"TSValue state%lx(TSValue tsv) {\n",stateNum);
+    if (accept && state->accept) {
+        fputs("    tsv=tsv.tg->stateMatch(tsv);\n",fp);
+    }
+    fputs("    if (tsv.buff.buffptr >= tsv.buff.buffend) return tsv;\n",fp);
+    CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
+    if (hasNext(ccw)) fputs("    unsigned char c = *tsv.buff.buffptr++;\n",fp);
+    fputs("    if (0) ;\n",fp);
+    while (hasNext(ccw)) {
+        CharClassPtr cc = getNext(ccw);
+        unsigned long begin = cc->cond.range.begin;
+        unsigned long end = cc->cond.range.end;
+        BitVector bi = cc->nextState;
+        if (begin == end) {
+            fprintf(fp,"    else if (c=='%c') { return state%lx(tsv);}\n",(unsigned char)begin, bi.bitContainer);
+        } else {
+            fprintf(fp,"    else if (c<'%c')  { tsv=tsv.tg->stateSkip(tsv);return state0(tsv);}\n",(unsigned char)begin);
+            fprintf(fp,"    else if (c<='%c') { return state%lx(tsv);} \n",(unsigned char)end,  bi.bitContainer);
+        }
+    }
+    free(ccw);
+    fprintf(fp,"    else { tsv=tsv.tg->stateSkip(tsv); return state0(tsv);}\n");
+    fputs("}\n\n",fp);
+}
+
+void
+exportState(TransitionGeneratorPtr tg) {
     StatePtr state = tg->stateList;
     FILE *fp = fopen("state.cc","w");
     if (fp==NULL) {
@@ -13,48 +41,13 @@
         exit(1);
     }
     for (;state;state = state->next) {
-        fprintf(fp,"void state%lx(Buffer buff);\n",state->bitState.bitContainer);
+        fprintf(fp,"TSValue state%lx(TSValue tsv);\n",state->bitState.bitContainer);
     }
     fputs("\n",fp);
+    // initial state must not accept empty string
+    generateState1(tg->stateList,0L,false,fp,tg);
     for (state = tg->stateList;state;state = state->next) {
-        fprintf(fp,"void state%lx(Buffer buff) {\n",state->bitState.bitContainer);
-        if (state->bitState.bitContainer == 2) { // Accept
-            fputs("    stateMatch(buff);\n",fp);
-        } else {  // not Accept
-            fputs("    if (buff.buffptr >= buff.buffend) return;\n",fp);
-            fputs("    unsigned char c = *buff.buffptr++;\n",fp);
-            CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
-            bool flag = true;
-            while (hasNext(ccw)) {
-                CharClassPtr cc = getNext(ccw);
-                unsigned long begin = cc->cond.range.begin;
-                unsigned long end = cc->cond.range.end;
-                BitVector bi = cc->nextState;
-                if (flag) {
-                    flag = false;
-                    fputs("    ",fp);
-                } else {
-                    fputs("    else ",fp);
-                }
-                if (begin == end) {
-                    fprintf(fp,"if (c=='%c') state%lx(buff);\n",(unsigned char)begin, bi.bitContainer);
-                } else {
-                    if (state->accept) {
-                        fprintf(fp,"if (c<'%c') stateMatch(buff);\n",(unsigned char)begin);
-                    } else {
-                        fprintf(fp,"if (c<'%c') stateSkip(buff);\n",(unsigned char)begin);
-                    }
-                    fprintf(fp,"    else if (c<='%c') state%lx(buff);\n",(unsigned char)end, bi.bitContainer);
-                }
-            }
-            free(ccw);
-            if (state->bitState.bitContainer & 2) {
-                fprintf(fp,"    else stateMatch(buff);\n");
-            } else {
-                fprintf(fp,"    else stateSkip(buff);\n");
-            }
-        }
-        fputs("}\n\n",fp);
+        generateState1(state,state->bitState.bitContainer,true,fp,tg);
     }
     fclose(fp);
 }
--- a/regexParser/sequentialSearch.cc	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/sequentialSearch.cc	Wed Feb 03 12:24:34 2016 +0900
@@ -6,20 +6,9 @@
 #include <sys/mman.h>
 
 #include "fileread.h"
-
-void state1(Buffer buff);
+#include "threadedSearch.h"
 
-void stateSkip(Buffer buff) {
-    buff.matchBegin = buff.buffptr;
-    state1(buff);
-}
-
-void stateMatch(Buffer buff) {
-    fwrite(buff.matchBegin,buff.buffptr-buff.matchBegin-1,1,stdout);
-    puts("");
-    buff.buffptr--;
-    stateSkip(buff);
-}
+TSValue state0(TSValue ts);
 
 #include "state.cc"
 int main(int argc, char **argv) {
@@ -33,7 +22,10 @@
     int fd = 0;
     st_mmap_t st_mmap = createSt_mmap(filename,fd);
     Buffer buff = createBuffer(st_mmap);
-    stateSkip(buff);
+    TSValue tsv = createTSValue(NULL,buff);
+    tsv = state0(tsv);
     close(fd);
     return 0;
 }
+
+/* end */
--- a/regexParser/subsetConstruction.h	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/subsetConstruction.h	Wed Feb 03 12:24:34 2016 +0900
@@ -14,3 +14,4 @@
 extern void determinize(StatePtr s, TransitionGeneratorPtr tg);
 extern void subsetConstruction(TransitionGeneratorPtr tg);
 extern void createAnyState(TransitionGeneratorPtr tg);
+extern TransitionGeneratorPtr createTransitionGenerator();
--- a/regexParser/threadedSearch.cc	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/threadedSearch.cc	Wed Feb 03 12:24:34 2016 +0900
@@ -18,12 +18,13 @@
         puts("");
         tsv.matchEnd = NULL;
     }
+    tsv.matchBegin = tsv.buff.buffptr;  // next char may be matchBegin
     return tsv;
 }
 
 static
 TSValue stateMatch(TSValue tsv) {
-    tsv.matchEnd = tsv.buff.buffptr;  // next char of the match
+    tsv.matchEnd = tsv.buff.buffptr;    // next char of the match
     return tsv;
 }
 
@@ -38,11 +39,11 @@
     }
     tState->ccvSize = ccvSize;
     if (state->accept) {
-        tState->stateSkip = tg->stateSkip;
-        tState->stateMatch = tg->stateMatch;
+        tState->stateMatch  = tg->stateMatch;
+        tState->stateSkip  = tg->stateSkip;
     } else {
-        tState->stateSkip = tg->stateSkip;
-        tState->stateMatch = tg->stateNothing;
+        tState->stateMatch  = tg->stateNothing;
+        tState->stateSkip  = tg->stateSkip;
     }
     if (ccvSize == 0) {
         tState->ccv = NULL;
@@ -89,13 +90,17 @@
     TSValuePtr tsvp = &tsv;   // make tsv visible in lldb
 #endif
     next: while (tsv.buff.buffptr < tsv.buff.buffend) {
+        tsv = tsv.current->stateMatch(tsv);
+        if (tsv.current->ccvSize==0) {
+            // matched start again
+            tsv.current = tsv.tg->stateStart->tState;
+        }
         unsigned char c = *tsv.buff.buffptr++;
 //        printState(tsv.current->state);
         for (int i = 0; i < tsv.current->ccvSize; i++) {
             CCVPtr ccv = &tsv.current->ccv[i];
             if (c<ccv->begin) {
                 tsv = tsv.current->stateSkip(tsv);
-                tsv.matchBegin = tsv.buff.buffptr;
                 goto next;
             } else if (c<=ccv->end) {
                 // range matched.
@@ -103,7 +108,6 @@
                     // match the word.
                     // if (not match) continue;
                 }
-                tsv = tsv.current->stateMatch(tsv);
                 if (ccv->tState) {
                     tsv.current = ccv->tState;
                 } else {
@@ -114,7 +118,6 @@
             }
         }
         tsv = tsv.current->stateSkip(tsv);
-        tsv.matchBegin = tsv.buff.buffptr;
     }
 #if DEBUG
     *tsvp = tsv;
@@ -124,21 +127,32 @@
 #endif
 }
 
-void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) {
+TSValue
+createTSValue(TransitionGeneratorPtr tg, Buffer buff) {
     TSValue tsv;
+    if (!tg) {
+        tg = createTransitionGenerator();
+    }
     tsv.buff = buff;
     tsv.tg = tg;
     tsv.blk = NULL;
+    tsv.matchBegin = buff.buffptr;
+    tsv.matchEnd = NULL;
     tsv.tg->stateSkip = stateSkip;
     tsv.tg->stateMatch = stateMatch;
     tsv.tg->stateNothing = stateNothing;
-    tsv.matchBegin = buff.buffptr;
-    tsv.matchEnd = NULL;
+    return tsv;
+}
+
+
+void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) {
+    TSValue tsv = createTSValue(tg,buff);
     tsv.current = generateTState(tg->stateList,tg);
     tg->stateStart = NEW(State);
     *tg->stateStart = *tg->stateList;
     tg->stateStart->accept = false; // Start state never accept
     generateTState(tg->stateStart,tg);
-
     tSearch(tsv);
 }
+
+/* end */
--- a/regexParser/threadedSearch.h	Tue Feb 02 11:58:59 2016 +0900
+++ b/regexParser/threadedSearch.h	Wed Feb 03 12:24:34 2016 +0900
@@ -1,3 +1,4 @@
 extern void threadedSearch(TransitionGeneratorPtr tg, Buffer buff);
 extern TStatePtr generateTState(StatePtr s, TransitionGeneratorPtr tg);
-TSValue tSearch(TSValue tsv);
+extern TSValue createTSValue(TransitionGeneratorPtr tg, Buffer buff) ;
+extern TSValue tSearch(TSValue tsv);