Mercurial > hg > Applications > Grep
annotate regexParser/threadedSearch.cc @ 324:879dc5d1cb6a default tip
fix
author | mir3636 |
---|---|
date | Fri, 27 May 2016 21:21:09 +0900 |
parents | 62f4628d2c0d |
children |
rev | line source |
---|---|
247 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
318 | 3 #include <string.h> |
247 | 4 |
246 | 5 #include "regexParser.h" |
308 | 6 #include "CharClass.h" |
258
29e467a491ba
remove error and add threadedSearch.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
257
diff
changeset
|
7 #include "threadedSearch.h" |
246 | 8 #include "subsetConstruction.h" |
321 | 9 #include "bmSearch.h" |
246 | 10 |
320 | 11 #define max(a,b)((a)>(b)?a:b) |
12 #define min(a,b)((a)<(b)?a:b) | |
13 | |
14 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg); | |
15 | |
272 | 16 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
17 TSValue stateNothing(TSValue tsv) { |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
18 return tsv; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 |
272 | 21 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
22 TSValue stateSkip(TSValue tsv) { |
292 | 23 if (tsv.matchEnd) { |
24 fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); | |
25 puts(""); | |
26 tsv.matchEnd = NULL; | |
27 } | |
298 | 28 tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin |
319 | 29 // if possible use bmsearch |
321 | 30 if (!tsv.current || !tsv.current->ccv[0].w.bm ) return tsv; |
319 | 31 while (tsv.buff.buffptr < tsv.buff.buffend) { |
32 long skip = tsv.tg->maxWordLen; | |
33 for (int k = 0; k < tsv.current->ccvSize; k++) { | |
34 CCVPtr ccv = &tsv.current->ccv[k]; | |
321 | 35 if (ccv->w.bm) { |
320 | 36 int i = ccv->w.length - 1; |
37 while (tsv.buff.buffptr[i] == ccv->w.word[i]) { | |
319 | 38 if (i == 0) { |
39 if (ccv->tState) { | |
40 tsv.current = ccv->tState; | |
41 } else { | |
42 tsv.current = nextTState(ccv->state,tsv.tg); | |
43 ccv->tState = tsv.current; | |
44 } | |
321 | 45 tsv.matchBegin = tsv.buff.buffptr; |
46 tsv.buff.buffptr += ccv->w.length; | |
319 | 47 return tsv; |
48 } | |
49 --i; | |
50 } | |
320 | 51 skip = min(skip,max(ccv->w.bm->skip_table[tsv.buff.buffptr[i]],ccv->w.length - i)); |
319 | 52 } |
53 } | |
54 tsv.buff.buffptr += skip; | |
55 } | |
321 | 56 tsv.matchBegin = tsv.buff.buffptr; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
57 return tsv; |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
58 } |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
59 |
272 | 60 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
61 TSValue stateMatch(TSValue tsv) { |
298 | 62 tsv.matchEnd = tsv.buff.buffptr; // next char of the match |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
63 return tsv; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
64 } |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
65 |
266 | 66 TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { |
247 | 67 TStatePtr tState = NEW(TState); |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
68 tState->state = state; |
247 | 69 int ccvSize = 0; |
251 | 70 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); |
247 | 71 while (hasNext(ccw)) { |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
72 getNext(ccw); |
247 | 73 ccvSize++; |
74 } | |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
75 tState->ccvSize = ccvSize; |
292 | 76 if (state->accept) { |
298 | 77 tState->stateMatch = tg->stateMatch; |
78 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
79 } else { |
298 | 80 tState->stateMatch = tg->stateNothing; |
81 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
82 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
83 if (ccvSize == 0) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
84 tState->ccv = NULL; |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
85 state->tState = tState; |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
86 return tState; |
322 | 87 } else tState->ccv = (ccv*)malloc(sizeof(ccv)*(ccvSize+1)); |
251 | 88 ccw = createCharClassWalker(state->cc); |
247 | 89 int i = 0; |
90 while (hasNext(ccw)) { | |
91 CharClassPtr cc = getNext(ccw); | |
92 unsigned long begin = cc->cond.range.begin; | |
93 unsigned long end = cc->cond.range.end; | |
94 struct ccv *ccv = &tState->ccv[i++]; | |
95 ccv->begin = begin; | |
96 ccv->end = end; | |
97 ccv->tState = NULL; | |
98 ccv->state = cc->nextState; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
99 ccv->w = cc->cond.w; |
247 | 100 } |
322 | 101 struct ccv *ccv = &tState->ccv[i]; |
102 ccv->begin = MAXCHAR+1; | |
103 ccv->end = ccv->begin; | |
104 ccv->tState = NULL; | |
247 | 105 free(ccw); |
283
fbdb94df9eac
TState atomic update
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
282
diff
changeset
|
106 state->tState = tState; |
247 | 107 return tState; |
108 } | |
109 | |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
110 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
111 // create tSearch in next state. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
112 StatePtr state = tg->stateArray[bi.bitContainer]; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
113 if (state == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
114 // on the fly subset construction. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
115 state = createState(tg,bi); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
116 determinize(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
117 tg->stateArray[bi.bitContainer] = state; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
118 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
119 if (state->tState == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
120 generateTState(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
121 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
122 return state->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
123 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
124 |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
125 #define DEBUG 0 |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
126 |
281
b74e3b4b11d7
parallel search done
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
278
diff
changeset
|
127 TSValue tSearch(TSValue tsv) { |
247 | 128 next: while (tsv.buff.buffptr < tsv.buff.buffend) { |
298 | 129 tsv = tsv.current->stateMatch(tsv); |
130 if (tsv.current->ccvSize==0) { | |
131 // matched start again | |
132 tsv.current = tsv.tg->stateStart->tState; | |
133 } | |
247 | 134 unsigned char c = *tsv.buff.buffptr++; |
282
87a801c14117
fix match condition (parallel search doesn't work)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
281
diff
changeset
|
135 // printState(tsv.current->state); |
322 | 136 CCVPtr ccv = &tsv.current->ccv[0]; |
137 for (;;) { | |
138 if (c<ccv[0].begin) goto noMatch; | |
139 else if (c<=ccv[0].end) goto match; | |
140 if (c<ccv[1].begin) goto noMatch; | |
141 else if (c<=ccv[1].end) goto match; | |
142 if (c<ccv[2].begin) goto noMatch; | |
143 else if (c<=ccv[2].end) goto match; | |
144 if (c<ccv[3].begin) goto noMatch; | |
145 else if (c<=ccv[3].end) goto match; | |
146 if (c<ccv[4].begin) goto noMatch; | |
147 else if (c<=ccv[4].end) goto match; | |
148 ccv += 5; | |
149 } | |
150 match: | |
151 // range matched. | |
152 if (ccv->w.word) { | |
153 WordPtr w; | |
154 for (w = &ccv->w;w;w = w->next) { | |
155 // match the word. | |
156 if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
157 } |
322 | 158 if (!w) continue; // if (not match) continue; |
159 tsv.buff.buffptr += w->length - 1; | |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 } |
322 | 161 if (ccv->tState) { |
162 tsv.current = ccv->tState; | |
163 } else { | |
164 tsv.current = nextTState(ccv->state,tsv.tg); | |
165 ccv->tState = tsv.current; | |
166 } | |
167 goto next; | |
168 noMatch: | |
169 tsv.current = tsv.tg->stateStart->tState; | |
170 tsv = tsv.current->stateSkip(tsv); | |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 } |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
172 return tsv; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 } |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
174 |
298 | 175 TSValue |
176 createTSValue(TransitionGeneratorPtr tg, Buffer buff) { | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
177 TSValue tsv; |
298 | 178 if (!tg) { |
179 tg = createTransitionGenerator(); | |
180 } | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
181 tsv.buff = buff; |
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
182 tsv.tg = tg; |
292 | 183 tsv.blk = NULL; |
298 | 184 tsv.matchBegin = buff.buffptr; |
185 tsv.matchEnd = NULL; | |
266 | 186 tsv.tg->stateSkip = stateSkip; |
187 tsv.tg->stateMatch = stateMatch; | |
188 tsv.tg->stateNothing = stateNothing; | |
321 | 189 tsv.current = NULL; |
298 | 190 return tsv; |
191 } | |
192 | |
193 | |
194 void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { | |
195 TSValue tsv = createTSValue(tg,buff); | |
321 | 196 generateTState(tg->stateList,tg); |
288
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
197 tg->stateStart = NEW(State); |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
198 *tg->stateStart = *tg->stateList; |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
199 tg->stateStart->accept = false; // Start state never accept |
321 | 200 StatePtr state = tg->stateStart; |
201 checkBMSearch(state->cc); | |
202 tsv.current = generateTState(tg->stateStart,tg); | |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
203 tSearch(tsv); |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
204 } |
298 | 205 |
206 /* end */ |