Mercurial > hg > Applications > Grep
comparison regexParser/grepWalk.cc @ 299:bdfe0a32c48f
grepWalk
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 03 Feb 2016 13:11:10 +0900 |
parents | 948428caf616 |
children | 1188debbef10 |
comparison
equal
deleted
inserted
replaced
298:63213964502a | 299:bdfe0a32c48f |
---|---|
1 #include <stdio.h> | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
2 | 3 |
3 #include "grepWalk.h" | 4 #include "grepWalk.h" |
4 #include "subsetConstruction.h" | 5 #include "subsetConstruction.h" |
6 #include "threadedSearch.h" | |
5 | 7 |
6 void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) ; | 8 StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) { |
7 | 9 // create tSearch in next state. |
8 void grepSkip(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) { | 10 StatePtr state = tg->stateArray[bi.bitContainer]; |
9 matchBegin = buff.buffptr; | 11 if (state == NULL) { |
10 grep(tg,matchBegin,buff,1); // 1 is initState | 12 // on the fly subset construction. |
13 state = createState(tg,bi); | |
14 determinize(state,tg); | |
15 tg->stateArray[bi.bitContainer] = state; | |
16 } | |
17 return state; | |
11 } | 18 } |
12 | 19 |
13 void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff) { | 20 void grepWalk(TransitionGeneratorPtr tg,Buffer buff) { |
14 grepSkip(tg,matchBegin,buff); | 21 TSValue tsv = createTSValue(tg,buff); |
22 tg->stateStart = NEW(State); | |
23 *tg->stateStart = *tg->stateList; | |
24 tg->stateStart->accept = false; // Start state never accept | |
25 StatePtr state = tg->stateStart; | |
26 | |
27 #if DEBUG | |
28 TSValuePtr tsvp = &tsv; // make tsv visible in lldb | |
29 #endif | |
30 next: while (tsv.buff.buffptr < tsv.buff.buffend) { | |
31 if (state->accept) { | |
32 tsv = tg->stateMatch(tsv); | |
33 } | |
34 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); | |
35 if (!hasNext(ccw)) { | |
36 // matched start again | |
37 state = tg->stateStart; | |
38 ccw = createCharClassWalker(state->cc); | |
39 } | |
40 unsigned char c = *tsv.buff.buffptr++; | |
41 // printState(tsv.current->state); | |
42 while (hasNext(ccw)) { | |
43 CharClassPtr cc = getNext(ccw); | |
44 if (c<cc->cond.range.begin) { | |
45 state = tg->stateStart; | |
46 tsv = tg->stateSkip(tsv); | |
47 goto next; | |
48 } else if (c<=cc->cond.range.end) { | |
49 // range matched. | |
50 if (cc->cond.w.word) { | |
51 // match the word. | |
52 // if (not match) continue; | |
53 } | |
54 state = nextState(cc->nextState,tg); | |
55 goto next; | |
56 } | |
57 } | |
58 state = tg->stateStart; | |
59 tsv = tg->stateSkip(tsv); | |
60 } | |
61 #if DEBUG | |
62 *tsvp = tsv; | |
63 #endif | |
15 } | 64 } |
16 | 65 |
17 void grepMatch(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) { | |
18 fwrite(matchBegin,buff.buffptr-matchBegin,1,stdout); | |
19 puts("\n"); | |
20 grepSkip(tg,matchBegin,buff); | |
21 } | |
22 | |
23 void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) { | |
24 unsigned char c = *buff.buffptr++; | |
25 if (c=='\0') return; | |
26 StatePtr state = tg->stateList; | |
27 | |
28 while (state->bitState.bitContainer != d) state = state->next; // 配列へのアクセスへ変更 | |
29 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); | |
30 CharClassPtr cc = NULL; | |
31 bool found = false; | |
32 while (hasNext(ccw)) { | |
33 cc = getNext(ccw); | |
34 unsigned long begin = cc->cond.range.begin; | |
35 unsigned long end = cc->cond.range.end; | |
36 if (begin == end) { | |
37 if (c == begin) found = true; | |
38 else found = false; | |
39 } else { | |
40 if (c < begin) found = false; | |
41 else if (c < end) found = true; | |
42 } | |
43 if (found == true) break; | |
44 } | |
45 | |
46 if (found == false) { | |
47 grepSkip(tg,matchBegin,buff); | |
48 } else if (found == true && (cc->nextState.bitContainer | 2)) { // Accept | |
49 grepMatch(tg,matchBegin,buff); | |
50 } else { | |
51 grep(tg,matchBegin,buff,cc->nextState.bitContainer); | |
52 } | |
53 } | |
54 |