annotate regexParser/grepWalk.cc @ 319:7b8234c090f7

bmSearch
author mir3636
date Sun, 08 May 2016 22:53:20 +0900
parents fa590a7272ae
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
2 #include <stdlib.h>
316
c4d33b7c3ccd no compile error
mir3636
parents: 315
diff changeset
3 #include <string.h>
231
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4
d67649929e96 add grepWalk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include "grepWalk.h"
234
df4d04b3c34a implement grepWalk (not confirm correct working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
6 #include "subsetConstruction.h"
308
1188debbef10 separate CharClass
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
7 #include "CharClass.h"
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
8 #include "threadedSearch.h"
319
7b8234c090f7 bmSearch
mir3636
parents: 317
diff changeset
9 #include "bmSearch.h"
234
df4d04b3c34a implement grepWalk (not confirm correct working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
10
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
11 StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
12 // create tSearch in next state.
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
13 StatePtr state = tg->stateArray[bi.bitContainer];
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
14 if (state == NULL) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
15 // on the fly subset construction.
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
16 state = createState(tg,bi);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
17 determinize(state,tg);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
18 tg->stateArray[bi.bitContainer] = state;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
19 }
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
20 return state;
293
948428caf616 NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 266
diff changeset
21 }
948428caf616 NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 266
diff changeset
22
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
23 void grepWalk(TransitionGeneratorPtr tg,Buffer buff) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
24 TSValue tsv = createTSValue(tg,buff);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
25 tg->stateStart = NEW(State);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
26 *tg->stateStart = *tg->stateList;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
27 tg->stateStart->accept = false; // Start state never accept
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
28 StatePtr state = tg->stateStart;
319
7b8234c090f7 bmSearch
mir3636
parents: 317
diff changeset
29 checkBMSearch(state->cc);
234
df4d04b3c34a implement grepWalk (not confirm correct working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
30
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
31 #if DEBUG
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
32 TSValuePtr tsvp = &tsv; // make tsv visible in lldb
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
33 #endif
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
34 next: while (tsv.buff.buffptr < tsv.buff.buffend) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
35 if (state->accept) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
36 tsv = tg->stateMatch(tsv);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
37 }
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
38 CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
39 if (!hasNext(ccw)) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
40 // matched start again
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
41 state = tg->stateStart;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
42 ccw = createCharClassWalker(state->cc);
235
4aab1e93a971 fix condition grepWalk.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 234
diff changeset
43 }
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
44 unsigned char c = *tsv.buff.buffptr++;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
45 // printState(tsv.current->state);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
46 while (hasNext(ccw)) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
47 CharClassPtr cc = getNext(ccw);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
48 if (c<cc->cond.range.begin) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
49 state = tg->stateStart;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
50 tsv = tg->stateSkip(tsv);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
51 goto next;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
52 } else if (c<=cc->cond.range.end) {
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
53 // range matched.
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
54 if (cc->cond.w.word) {
315
66012db6a717 add wordMode in grepWalk
mir3636
parents: 308
diff changeset
55 WordPtr w;
316
c4d33b7c3ccd no compile error
mir3636
parents: 315
diff changeset
56 for (w = &cc->cond.w;w;w = w->next) {
315
66012db6a717 add wordMode in grepWalk
mir3636
parents: 308
diff changeset
57 // match the word.
317
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 316
diff changeset
58 if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break;
315
66012db6a717 add wordMode in grepWalk
mir3636
parents: 308
diff changeset
59 }
317
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 316
diff changeset
60 if (!w) continue; // if (not match) continue;
316
c4d33b7c3ccd no compile error
mir3636
parents: 315
diff changeset
61 tsv.buff.buffptr += w->length - 1;
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
62 }
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
63 state = nextState(cc->nextState,tg);
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
64 goto next;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
65 }
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
66 }
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
67 state = tg->stateStart;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
68 tsv = tg->stateSkip(tsv);
234
df4d04b3c34a implement grepWalk (not confirm correct working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
69 }
299
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
70 #if DEBUG
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
71 *tsvp = tsv;
bdfe0a32c48f grepWalk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 293
diff changeset
72 #endif
234
df4d04b3c34a implement grepWalk (not confirm correct working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
73 }
df4d04b3c34a implement grepWalk (not confirm correct working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 232
diff changeset
74