Mercurial > hg > Applications > Grep
annotate regexParser/threadedSearch.cc @ 299:bdfe0a32c48f
grepWalk
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 03 Feb 2016 13:11:10 +0900 |
parents | 63213964502a |
children | 1188debbef10 |
rev | line source |
---|---|
247 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 | |
246 | 4 #include "regexParser.h" |
258
29e467a491ba
remove error and add threadedSearch.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
257
diff
changeset
|
5 #include "threadedSearch.h" |
246 | 6 #include "subsetConstruction.h" |
7 | |
272 | 8 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
9 TSValue stateNothing(TSValue tsv) { |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
10 return tsv; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 |
272 | 13 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
14 TSValue stateSkip(TSValue tsv) { |
292 | 15 if (tsv.matchEnd) { |
16 fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); | |
17 puts(""); | |
18 tsv.matchEnd = NULL; | |
19 } | |
298 | 20 tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
21 return tsv; |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
22 } |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
23 |
272 | 24 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
25 TSValue stateMatch(TSValue tsv) { |
298 | 26 tsv.matchEnd = tsv.buff.buffptr; // next char of the match |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
27 return tsv; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
28 } |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
29 |
266 | 30 TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { |
247 | 31 TStatePtr tState = NEW(TState); |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
32 tState->state = state; |
247 | 33 int ccvSize = 0; |
251 | 34 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); |
247 | 35 while (hasNext(ccw)) { |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
36 getNext(ccw); |
247 | 37 ccvSize++; |
38 } | |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
39 tState->ccvSize = ccvSize; |
292 | 40 if (state->accept) { |
298 | 41 tState->stateMatch = tg->stateMatch; |
42 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
43 } else { |
298 | 44 tState->stateMatch = tg->stateNothing; |
45 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
46 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
47 if (ccvSize == 0) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
48 tState->ccv = NULL; |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
49 state->tState = tState; |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
50 return tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
51 } else tState->ccv = (ccv*)malloc(sizeof(ccv)*ccvSize); |
251 | 52 ccw = createCharClassWalker(state->cc); |
247 | 53 int i = 0; |
54 while (hasNext(ccw)) { | |
55 CharClassPtr cc = getNext(ccw); | |
56 unsigned long begin = cc->cond.range.begin; | |
57 unsigned long end = cc->cond.range.end; | |
58 struct ccv *ccv = &tState->ccv[i++]; | |
59 ccv->begin = begin; | |
60 ccv->end = end; | |
61 ccv->tState = NULL; | |
62 ccv->state = cc->nextState; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
63 ccv->w = cc->cond.w; |
247 | 64 } |
65 free(ccw); | |
283
fbdb94df9eac
TState atomic update
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
282
diff
changeset
|
66 state->tState = tState; |
247 | 67 return tState; |
68 } | |
69 | |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
70 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
71 // create tSearch in next state. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
72 StatePtr state = tg->stateArray[bi.bitContainer]; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
73 if (state == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
74 // on the fly subset construction. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
75 state = createState(tg,bi); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
76 determinize(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
77 tg->stateArray[bi.bitContainer] = state; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
78 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
79 if (state->tState == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
80 generateTState(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
81 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
82 return state->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
83 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
84 |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
85 #define DEBUG 0 |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
86 |
281
b74e3b4b11d7
parallel search done
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
278
diff
changeset
|
87 TSValue tSearch(TSValue tsv) { |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
88 #if DEBUG |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
89 TSValuePtr tsvp = &tsv; // make tsv visible in lldb |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
90 #endif |
247 | 91 next: while (tsv.buff.buffptr < tsv.buff.buffend) { |
298 | 92 tsv = tsv.current->stateMatch(tsv); |
93 if (tsv.current->ccvSize==0) { | |
94 // matched start again | |
95 tsv.current = tsv.tg->stateStart->tState; | |
96 } | |
247 | 97 unsigned char c = *tsv.buff.buffptr++; |
282
87a801c14117
fix match condition (parallel search doesn't work)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
281
diff
changeset
|
98 // printState(tsv.current->state); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 for (int i = 0; i < tsv.current->ccvSize; i++) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
100 CCVPtr ccv = &tsv.current->ccv[i]; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
101 if (c<ccv->begin) { |
299 | 102 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
103 tsv = tsv.current->stateSkip(tsv); |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
104 goto next; |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
105 } else if (c<=ccv->end) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
106 // range matched. |
251 | 107 if (ccv->w.word) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
108 // match the word. |
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
109 // if (not match) continue; |
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
110 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
111 if (ccv->tState) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
112 tsv.current = ccv->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
113 } else { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
114 tsv.current = nextTState(ccv->state,tsv.tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
115 ccv->tState = tsv.current; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 goto next; |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 } |
299 | 120 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
121 tsv = tsv.current->stateSkip(tsv); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 } |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
123 #if DEBUG |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
124 *tsvp = tsv; |
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
125 return *tsvp; |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
126 #else |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
127 return tsv; |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
128 #endif |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 } |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
130 |
298 | 131 TSValue |
132 createTSValue(TransitionGeneratorPtr tg, Buffer buff) { | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
133 TSValue tsv; |
298 | 134 if (!tg) { |
135 tg = createTransitionGenerator(); | |
136 } | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
137 tsv.buff = buff; |
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
138 tsv.tg = tg; |
292 | 139 tsv.blk = NULL; |
298 | 140 tsv.matchBegin = buff.buffptr; |
141 tsv.matchEnd = NULL; | |
266 | 142 tsv.tg->stateSkip = stateSkip; |
143 tsv.tg->stateMatch = stateMatch; | |
144 tsv.tg->stateNothing = stateNothing; | |
298 | 145 return tsv; |
146 } | |
147 | |
148 | |
149 void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { | |
150 TSValue tsv = createTSValue(tg,buff); | |
270
c82f7e7f66f7
running ts
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
268
diff
changeset
|
151 tsv.current = generateTState(tg->stateList,tg); |
288
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
152 tg->stateStart = NEW(State); |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
153 *tg->stateStart = *tg->stateList; |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
154 tg->stateStart->accept = false; // Start state never accept |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
155 generateTState(tg->stateStart,tg); |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
156 tSearch(tsv); |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
157 } |
298 | 158 |
159 /* end */ |