Mercurial > hg > Applications > Grep
annotate regexParser/threadedSearch.cc @ 320:da02a7258d54
fix
author | mir3636 |
---|---|
date | Sun, 08 May 2016 23:31:14 +0900 |
parents | 7b8234c090f7 |
children | a1b65d39b947 |
rev | line source |
---|---|
247 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
318 | 3 #include <string.h> |
247 | 4 |
246 | 5 #include "regexParser.h" |
308 | 6 #include "CharClass.h" |
258
29e467a491ba
remove error and add threadedSearch.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
257
diff
changeset
|
7 #include "threadedSearch.h" |
246 | 8 #include "subsetConstruction.h" |
9 | |
320 | 10 #define max(a,b)((a)>(b)?a:b) |
11 #define min(a,b)((a)<(b)?a:b) | |
12 | |
13 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg); | |
14 | |
272 | 15 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
16 TSValue stateNothing(TSValue tsv) { |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
17 return tsv; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 |
272 | 20 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
21 TSValue stateSkip(TSValue tsv) { |
292 | 22 if (tsv.matchEnd) { |
23 fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); | |
24 puts(""); | |
25 tsv.matchEnd = NULL; | |
26 } | |
298 | 27 tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin |
319 | 28 // if possible use bmsearch |
29 while (tsv.buff.buffptr < tsv.buff.buffend) { | |
30 long skip = tsv.tg->maxWordLen; | |
31 for (int k = 0; k < tsv.current->ccvSize; k++) { | |
32 CCVPtr ccv = &tsv.current->ccv[k]; | |
320 | 33 if (ccv->w.word) { |
34 int i = ccv->w.length - 1; | |
35 while (tsv.buff.buffptr[i] == ccv->w.word[i]) { | |
319 | 36 if (i == 0) { |
37 if (ccv->tState) { | |
38 tsv.current = ccv->tState; | |
39 } else { | |
40 tsv.current = nextTState(ccv->state,tsv.tg); | |
41 ccv->tState = tsv.current; | |
42 } | |
320 | 43 tsv.buff.buffptr += ccv->w.length - 1; |
319 | 44 return tsv; |
45 } | |
46 --i; | |
47 } | |
320 | 48 skip = min(skip,max(ccv->w.bm->skip_table[tsv.buff.buffptr[i]],ccv->w.length - i)); |
319 | 49 } |
50 } | |
51 tsv.buff.buffptr += skip; | |
52 } | |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
53 return tsv; |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
54 } |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
55 |
272 | 56 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
57 TSValue stateMatch(TSValue tsv) { |
298 | 58 tsv.matchEnd = tsv.buff.buffptr; // next char of the match |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
59 return tsv; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
60 } |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
61 |
266 | 62 TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { |
247 | 63 TStatePtr tState = NEW(TState); |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
64 tState->state = state; |
247 | 65 int ccvSize = 0; |
251 | 66 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); |
247 | 67 while (hasNext(ccw)) { |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
68 getNext(ccw); |
247 | 69 ccvSize++; |
70 } | |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
71 tState->ccvSize = ccvSize; |
292 | 72 if (state->accept) { |
298 | 73 tState->stateMatch = tg->stateMatch; |
74 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
75 } else { |
298 | 76 tState->stateMatch = tg->stateNothing; |
77 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
78 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
79 if (ccvSize == 0) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
80 tState->ccv = NULL; |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
81 state->tState = tState; |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
82 return tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
83 } else tState->ccv = (ccv*)malloc(sizeof(ccv)*ccvSize); |
251 | 84 ccw = createCharClassWalker(state->cc); |
247 | 85 int i = 0; |
86 while (hasNext(ccw)) { | |
87 CharClassPtr cc = getNext(ccw); | |
88 unsigned long begin = cc->cond.range.begin; | |
89 unsigned long end = cc->cond.range.end; | |
90 struct ccv *ccv = &tState->ccv[i++]; | |
91 ccv->begin = begin; | |
92 ccv->end = end; | |
93 ccv->tState = NULL; | |
94 ccv->state = cc->nextState; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
95 ccv->w = cc->cond.w; |
247 | 96 } |
97 free(ccw); | |
283
fbdb94df9eac
TState atomic update
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
282
diff
changeset
|
98 state->tState = tState; |
247 | 99 return tState; |
100 } | |
101 | |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
102 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
103 // create tSearch in next state. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
104 StatePtr state = tg->stateArray[bi.bitContainer]; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
105 if (state == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
106 // on the fly subset construction. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
107 state = createState(tg,bi); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
108 determinize(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
109 tg->stateArray[bi.bitContainer] = state; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
110 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
111 if (state->tState == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
112 generateTState(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
113 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
114 return state->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
115 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
116 |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
117 #define DEBUG 0 |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
118 |
281
b74e3b4b11d7
parallel search done
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
278
diff
changeset
|
119 TSValue tSearch(TSValue tsv) { |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
120 #if DEBUG |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
121 TSValuePtr tsvp = &tsv; // make tsv visible in lldb |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
122 #endif |
247 | 123 next: while (tsv.buff.buffptr < tsv.buff.buffend) { |
298 | 124 tsv = tsv.current->stateMatch(tsv); |
125 if (tsv.current->ccvSize==0) { | |
126 // matched start again | |
127 tsv.current = tsv.tg->stateStart->tState; | |
128 } | |
247 | 129 unsigned char c = *tsv.buff.buffptr++; |
282
87a801c14117
fix match condition (parallel search doesn't work)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
281
diff
changeset
|
130 // printState(tsv.current->state); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 for (int i = 0; i < tsv.current->ccvSize; i++) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
132 CCVPtr ccv = &tsv.current->ccv[i]; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
133 if (c<ccv->begin) { |
299 | 134 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
135 tsv = tsv.current->stateSkip(tsv); |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
136 goto next; |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
137 } else if (c<=ccv->end) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
138 // range matched. |
251 | 139 if (ccv->w.word) { |
318 | 140 WordPtr w; |
141 for (w = &ccv->w;w;w = w->next) { | |
142 // match the word. | |
143 if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break; | |
144 } | |
145 if (!w) continue; // if (not match) continue; | |
146 tsv.buff.buffptr += w->length - 1; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
147 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
148 if (ccv->tState) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
149 tsv.current = ccv->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
150 } else { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
151 tsv.current = nextTState(ccv->state,tsv.tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
152 ccv->tState = tsv.current; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 goto next; |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
156 } |
299 | 157 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
158 tsv = tsv.current->stateSkip(tsv); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159 } |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
160 #if DEBUG |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
161 *tsvp = tsv; |
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
162 return *tsvp; |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
163 #else |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
164 return tsv; |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
165 #endif |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 } |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
167 |
298 | 168 TSValue |
169 createTSValue(TransitionGeneratorPtr tg, Buffer buff) { | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
170 TSValue tsv; |
298 | 171 if (!tg) { |
172 tg = createTransitionGenerator(); | |
173 } | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
174 tsv.buff = buff; |
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
175 tsv.tg = tg; |
292 | 176 tsv.blk = NULL; |
298 | 177 tsv.matchBegin = buff.buffptr; |
178 tsv.matchEnd = NULL; | |
266 | 179 tsv.tg->stateSkip = stateSkip; |
180 tsv.tg->stateMatch = stateMatch; | |
181 tsv.tg->stateNothing = stateNothing; | |
298 | 182 return tsv; |
183 } | |
184 | |
185 | |
186 void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { | |
187 TSValue tsv = createTSValue(tg,buff); | |
270
c82f7e7f66f7
running ts
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
268
diff
changeset
|
188 tsv.current = generateTState(tg->stateList,tg); |
288
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
189 tg->stateStart = NEW(State); |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
190 *tg->stateStart = *tg->stateList; |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
191 tg->stateStart->accept = false; // Start state never accept |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
192 generateTState(tg->stateStart,tg); |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
193 tSearch(tsv); |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
194 } |
298 | 195 |
196 /* end */ |