changeset 292:868f01f1ba8e

maximum match
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 01 Feb 2016 21:52:57 +0900
parents 1b75546ff65f
children 948428caf616
files regexParser/Makefile regexParser/cerium/CeriumMain.cc regexParser/cerium/ppe/Exec.cc regexParser/regexParser.h regexParser/threadedSearch.cc
diffstat 5 files changed, 59 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/Makefile	Mon Feb 01 12:20:16 2016 +0900
+++ b/regexParser/Makefile	Mon Feb 01 21:52:57 2016 +0900
@@ -9,8 +9,8 @@
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
-REGEX = '[A-Z][A-Za-z0-9_]*'
-# REGEX = '(a|b)*a(a|b)(a|b)'
+# REGEX = '[A-Z][A-Za-z0-9_]*'
+REGEX = '(a|b)*a(a|b)(a|b)'
 # REGEX = '(mil|have)'
 
 SUBST = -subset
--- a/regexParser/cerium/CeriumMain.cc	Mon Feb 01 12:20:16 2016 +0900
+++ b/regexParser/cerium/CeriumMain.cc	Mon Feb 01 21:52:57 2016 +0900
@@ -30,21 +30,22 @@
 static
 TSValue stateSkip(TSValue tsv) {
     tsv.current = tsv.tg->stateStart->tState;
-    tsv.buff.matchBegin = tsv.buff.buffptr;
+    if (tsv.matchEnd) {
+        ResultPtr r = NEW(Result);
+        r->continued = false;
+        r->begin = tsv.matchBegin;
+        r->end = tsv.buff.buffptr;
+        *tsv.blk->resultEnd = r;
+        r->next = NULL;
+        tsv.blk->resultEnd = &r->next;
+        tsv.matchEnd=NULL;
+    }
     return tsv;
 }
 
 static
 TSValue stateMatch(TSValue tsv) {
-    ResultPtr r = NEW(Result);
-    r->continued = false;
-    r->begin = tsv.buff.matchBegin;
-    r->end = tsv.buff.buffptr;
-    *tsv.resultEnd = r;
-    r->next = NULL;
-    tsv.resultEnd = &r->next;
-    tsv.buff.buffptr--;
-    tsv = stateSkip(tsv);
+    tsv.matchEnd = tsv.buff.buffptr;  // next char of the match
     return tsv;
 }
 
@@ -56,7 +57,7 @@
             prev = r;
             break;
         }
-        fwrite(r->begin,r->end - r->begin-1,1,stdout);
+        fwrite(r->begin,r->end - r->begin,1,stdout);
         puts("");
     }
     return prev;
--- a/regexParser/cerium/ppe/Exec.cc	Mon Feb 01 12:20:16 2016 +0900
+++ b/regexParser/cerium/ppe/Exec.cc	Mon Feb 01 21:52:57 2016 +0900
@@ -12,37 +12,41 @@
 
 TSValue blockSearch(TransitionGeneratorPtr tg,Buffer buff,int task_spawned) {
     TSValue tsv;
+    BlockOutput blk;
     tsv.buff = buff;
     tsv.tg = tg;
+    tsv.blk = &blk;
     if (task_spawned == 0) {
         tsv.current = tg->stateStart->tState;
     } else {
         tsv.current = tg->anyState->tState;
     }
-    tsv.result = NULL;
+    tsv.blk->result = NULL;
     ResultPtr result = NULL;
-    tsv.resultEnd = &result;
+    tsv.blk->resultEnd = &result;
     unsigned char *end = tsv.buff.buffend;
     tsv.buff.buffend = tsv.buff.buff+1;
+    tsv.matchBegin = NULL;
+    tsv.matchEnd = NULL;
     tsv = tSearch(tsv);
-    tsv.blockBegin = tsv.current;
+    tsv.blk->blockBegin = tsv.current;
     tsv.buff.buffend = end;
     tsv = tSearch(tsv);
-    tsv.blockEnd = tsv.current;
-    if (tsv.blockEnd->state->bitState.bitContainer != 1) {
-        if (tsv.buff.matchBegin != tsv.buff.buffptr) {
+    tsv.blk->blockEnd = tsv.current;
+    if (tsv.blk->blockEnd->state->bitState.bitContainer != 1) {
+        if (tsv.matchBegin != tsv.buff.buffptr) {
             // partial match case at block end.
             ResultPtr r = NEW(Result);
             r->continued = true;
-            r->begin = tsv.buff.matchBegin;
+            r->begin = tsv.matchBegin;
             r->end = tsv.buff.buffptr;
-            *tsv.resultEnd = r;
+            *tsv.blk->resultEnd = r;
             r->next = NULL;
-            tsv.resultEnd = &r->next;
+            tsv.blk->resultEnd = &r->next;
 // printf("Exec %lx r->begin : %p r->end : %p\n",tsv.blockEnd->state->bitState.bitContainer, r->begin,r->end);
         }
     }
-    tsv.result = result;
+    tsv.blk->result = result;
     return tsv;
 }
 
@@ -66,12 +70,12 @@
     }
     TransitionGeneratorPtr tg = (TransitionGeneratorPtr)w->global;
     Buffer buff;
-    buff.buff = buff.buffptr = buff.matchBegin = i_data;
+    buff.buff = buff.buffptr = i_data;
     buff.buffend = buff.buff + length;
     TSValue tsv = blockSearch(tg,buff,task_spawned);
 //    resultPrint(tsv.result,"Exec");
-    o_data[0] = (unsigned long)tsv.result;
-    o_data[1] = (unsigned long)tsv.blockBegin->state;
-    o_data[2] = (unsigned long)tsv.blockEnd->state;
+    o_data[0] = (unsigned long)tsv.blk->result;
+    o_data[1] = (unsigned long)tsv.blk->blockBegin->state;
+    o_data[2] = (unsigned long)tsv.blk->blockEnd->state;
     return 0;
 }
--- a/regexParser/regexParser.h	Mon Feb 01 12:20:16 2016 +0900
+++ b/regexParser/regexParser.h	Mon Feb 01 21:52:57 2016 +0900
@@ -61,7 +61,7 @@
 typedef struct tState {
     State *state;
     tsValue (*stateSkip)(tsValue);
-    tsValue (*stateContinue)(tsValue);
+    tsValue (*stateMatch)(tsValue);
     int ccvSize;
     CCVPtr ccv;
 } TState, *TStatePtr;
@@ -107,17 +107,22 @@
     unsigned char *buff;
     unsigned char *buffptr;
     unsigned char *buffend;
-    unsigned char *matchBegin;
 } Buffer, *BufferPtr;
 
+typedef struct blockOutput {
+    ResultPtr result;
+    ResultPtr *resultEnd;
+    TState *blockBegin;
+    TState *blockEnd;
+} BlockOutput, *BlockOutputPtr;
+
 typedef struct tsValue {
+    TState *current;
     Buffer buff;
-    ResultPtr result;
-    ResultPtr *resultEnd;
+    unsigned char *matchBegin;
+    unsigned char *matchEnd;
+    BlockOutputPtr blk;
     TransitionGeneratorPtr tg;
-    TState *current;
-    TState *blockBegin;
-    TState *blockEnd;
 } TSValue, *TSValuePtr;
 
 typedef struct tgValue {
--- a/regexParser/threadedSearch.cc	Mon Feb 01 12:20:16 2016 +0900
+++ b/regexParser/threadedSearch.cc	Mon Feb 01 21:52:57 2016 +0900
@@ -13,16 +13,17 @@
 static
 TSValue stateSkip(TSValue tsv) {
     tsv.current = tsv.tg->stateStart->tState;
-    tsv.buff.matchBegin = tsv.buff.buffptr;
+    if (tsv.matchEnd) {
+        fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout);
+        puts("");
+        tsv.matchEnd = NULL;
+    }
     return tsv;
 }
 
 static
 TSValue stateMatch(TSValue tsv) {
-    fwrite(tsv.buff.matchBegin,tsv.buff.buffptr-tsv.buff.matchBegin-1,1,stdout);
-    puts("");
-    tsv.buff.buffptr--;
-    tsv = stateSkip(tsv);
+    tsv.matchEnd = tsv.buff.buffptr;  // next char of the match
     return tsv;
 }
 
@@ -36,12 +37,12 @@
         ccvSize++;
     }
     tState->ccvSize = ccvSize;
-    if (state->accept && state != tg->stateList ) {
-        tState->stateSkip = tg->stateMatch;
-        tState->stateContinue = tg->stateNothing;
+    if (state->accept) {
+        tState->stateSkip = tg->stateSkip;
+        tState->stateMatch = tg->stateMatch;
     } else {
         tState->stateSkip = tg->stateSkip;
-        tState->stateContinue = tg->stateNothing;
+        tState->stateMatch = tg->stateNothing;
     }
     if (ccvSize == 0) {
         tState->ccv = NULL;
@@ -90,6 +91,7 @@
             CCVPtr ccv = &tsv.current->ccv[i];
             if (c<ccv->begin) {
                 tsv = tsv.current->stateSkip(tsv);
+                tsv.matchBegin = tsv.buff.buffptr;
                 goto next;
             } else if (c<=ccv->end) {
                 // range matched.
@@ -103,11 +105,12 @@
                     tsv.current = nextTState(ccv->state,tsv.tg);
                     ccv->tState = tsv.current;
                 }
-                // tsv = tsv.current->stateContinue(tsv);
+                tsv = tsv.current->stateMatch(tsv);
                 goto next;
             }
         }
         tsv = tsv.current->stateSkip(tsv);
+        tsv.matchBegin = tsv.buff.buffptr;
     }
     *tsvp = tsv;
     return *tsvp;
@@ -117,10 +120,11 @@
     TSValue tsv;
     tsv.buff = buff;
     tsv.tg = tg;
-    tsv.result = NULL;
+    tsv.blk = NULL;
     tsv.tg->stateSkip = stateSkip;
     tsv.tg->stateMatch = stateMatch;
     tsv.tg->stateNothing = stateNothing;
+    tsv.matchBegin = tsv.matchEnd = NULL;
     tsv.current = generateTState(tg->stateList,tg);
     tg->stateStart = NEW(State);
     *tg->stateStart = *tg->stateList;