changeset 321:a1b65d39b947

bmSearch fix
author mir3636
date Mon, 16 May 2016 17:03:17 +0900
parents da02a7258d54
children 62f4628d2c0d
files regexParser/CeriumGrep.cc regexParser/regexParser.cc regexParser/threadedSearch.cc
diffstat 3 files changed, 18 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/CeriumGrep.cc	Sun May 08 23:31:14 2016 +0900
+++ b/regexParser/CeriumGrep.cc	Mon May 16 17:03:17 2016 +0900
@@ -24,6 +24,7 @@
     RegexInfo ri;
     ri.stateNumber = 1;
     ri.wordMode = true;
+    ri.maxWordLen = 0;
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i],"-regex") == 0) {
             ri.ptr = (unsigned char*)argv[i+1]; i++;
@@ -68,6 +69,8 @@
         }
     }
 
+    tgv.tg->maxWordLen = ri.maxWordLen;
+
     if (filename != NULL && !parallel) {
         int fd = 0;
         st_mmap_t st_mmap = createSt_mmap(filename,fd);
--- a/regexParser/regexParser.cc	Sun May 08 23:31:14 2016 +0900
+++ b/regexParser/regexParser.cc	Mon May 16 17:03:17 2016 +0900
@@ -77,8 +77,12 @@
             token(ri);
         }
         cc->cond.w.length = ri->ptr - word - 1;
+        if (ri->tokenType == '\0')
+            cc->cond.w.length++;
         if (cc->cond.w.length > 1)
             cc->cond.w.word = word;
+        if (ri->maxWordLen < cc->cond.w.length)
+            ri->maxWordLen = cc->cond.w.length;
     } else 
         token(ri);
     NodePtr n = createNode(ri,'a',cc,0,0);
--- a/regexParser/threadedSearch.cc	Sun May 08 23:31:14 2016 +0900
+++ b/regexParser/threadedSearch.cc	Mon May 16 17:03:17 2016 +0900
@@ -6,6 +6,7 @@
 #include "CharClass.h"
 #include "threadedSearch.h"
 #include "subsetConstruction.h"
+#include "bmSearch.h"
 
 #define max(a,b)((a)>(b)?a:b) 
 #define min(a,b)((a)<(b)?a:b)
@@ -26,11 +27,12 @@
     }
     tsv.matchBegin = tsv.buff.buffptr;  // next char may be matchBegin
     // if possible use bmsearch
+    if (!tsv.current || !tsv.current->ccv[0].w.bm ) return tsv;
     while (tsv.buff.buffptr < tsv.buff.buffend) {
         long skip = tsv.tg->maxWordLen;
         for (int k = 0; k < tsv.current->ccvSize; k++) {
             CCVPtr ccv = &tsv.current->ccv[k];
-            if (ccv->w.word) {
+            if (ccv->w.bm) {
                 int i = ccv->w.length - 1;
                 while (tsv.buff.buffptr[i] == ccv->w.word[i]) {
                     if (i == 0) {
@@ -40,7 +42,8 @@
                             tsv.current = nextTState(ccv->state,tsv.tg);
                             ccv->tState = tsv.current;
                         }
-                        tsv.buff.buffptr += ccv->w.length - 1;
+                        tsv.matchBegin = tsv.buff.buffptr;
+                        tsv.buff.buffptr += ccv->w.length;
                         return tsv; 
                     }
                     --i;
@@ -50,6 +53,7 @@
         }
         tsv.buff.buffptr += skip;
     }
+    tsv.matchBegin = tsv.buff.buffptr;
     return tsv;
 }
 
@@ -179,17 +183,20 @@
     tsv.tg->stateSkip = stateSkip;
     tsv.tg->stateMatch = stateMatch;
     tsv.tg->stateNothing = stateNothing;
+    tsv.current = NULL;
     return tsv;
 }
 
 
 void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) {
     TSValue tsv = createTSValue(tg,buff);
-    tsv.current = generateTState(tg->stateList,tg);
+    generateTState(tg->stateList,tg);
     tg->stateStart = NEW(State);
     *tg->stateStart = *tg->stateList;
     tg->stateStart->accept = false; // Start state never accept
-    generateTState(tg->stateStart,tg);
+    StatePtr state = tg->stateStart;
+    checkBMSearch(state->cc);
+    tsv.current = generateTState(tg->stateStart,tg);
     tSearch(tsv);
 }