Mercurial > hg > Applications > Grep
annotate regexParser/threadedSearch.cc @ 318:c9458ffecb87
word mode in tSearch
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 08 May 2016 11:56:49 +0900 |
parents | 1188debbef10 |
children | 7b8234c090f7 |
rev | line source |
---|---|
247 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
318 | 3 #include <string.h> |
247 | 4 |
246 | 5 #include "regexParser.h" |
308 | 6 #include "CharClass.h" |
258
29e467a491ba
remove error and add threadedSearch.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
257
diff
changeset
|
7 #include "threadedSearch.h" |
246 | 8 #include "subsetConstruction.h" |
9 | |
272 | 10 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
11 TSValue stateNothing(TSValue tsv) { |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
12 return tsv; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 |
272 | 15 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
16 TSValue stateSkip(TSValue tsv) { |
292 | 17 if (tsv.matchEnd) { |
18 fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); | |
19 puts(""); | |
20 tsv.matchEnd = NULL; | |
21 } | |
298 | 22 tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
23 return tsv; |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
24 } |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
25 |
272 | 26 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
27 TSValue stateMatch(TSValue tsv) { |
298 | 28 tsv.matchEnd = tsv.buff.buffptr; // next char of the match |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
29 return tsv; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
30 } |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
31 |
266 | 32 TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { |
247 | 33 TStatePtr tState = NEW(TState); |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
34 tState->state = state; |
247 | 35 int ccvSize = 0; |
251 | 36 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); |
247 | 37 while (hasNext(ccw)) { |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
38 getNext(ccw); |
247 | 39 ccvSize++; |
40 } | |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
41 tState->ccvSize = ccvSize; |
292 | 42 if (state->accept) { |
298 | 43 tState->stateMatch = tg->stateMatch; |
44 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
45 } else { |
298 | 46 tState->stateMatch = tg->stateNothing; |
47 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
48 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
49 if (ccvSize == 0) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
50 tState->ccv = NULL; |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
51 state->tState = tState; |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
52 return tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
53 } else tState->ccv = (ccv*)malloc(sizeof(ccv)*ccvSize); |
251 | 54 ccw = createCharClassWalker(state->cc); |
247 | 55 int i = 0; |
56 while (hasNext(ccw)) { | |
57 CharClassPtr cc = getNext(ccw); | |
58 unsigned long begin = cc->cond.range.begin; | |
59 unsigned long end = cc->cond.range.end; | |
60 struct ccv *ccv = &tState->ccv[i++]; | |
61 ccv->begin = begin; | |
62 ccv->end = end; | |
63 ccv->tState = NULL; | |
64 ccv->state = cc->nextState; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
65 ccv->w = cc->cond.w; |
247 | 66 } |
67 free(ccw); | |
283
fbdb94df9eac
TState atomic update
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
282
diff
changeset
|
68 state->tState = tState; |
247 | 69 return tState; |
70 } | |
71 | |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
72 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
73 // create tSearch in next state. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
74 StatePtr state = tg->stateArray[bi.bitContainer]; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
75 if (state == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
76 // on the fly subset construction. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
77 state = createState(tg,bi); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
78 determinize(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
79 tg->stateArray[bi.bitContainer] = state; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
80 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
81 if (state->tState == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
82 generateTState(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
83 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
84 return state->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
85 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
86 |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
87 #define DEBUG 0 |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
88 |
281
b74e3b4b11d7
parallel search done
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
278
diff
changeset
|
89 TSValue tSearch(TSValue tsv) { |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
90 #if DEBUG |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
91 TSValuePtr tsvp = &tsv; // make tsv visible in lldb |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
92 #endif |
247 | 93 next: while (tsv.buff.buffptr < tsv.buff.buffend) { |
298 | 94 tsv = tsv.current->stateMatch(tsv); |
95 if (tsv.current->ccvSize==0) { | |
96 // matched start again | |
97 tsv.current = tsv.tg->stateStart->tState; | |
98 } | |
247 | 99 unsigned char c = *tsv.buff.buffptr++; |
282
87a801c14117
fix match condition (parallel search doesn't work)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
281
diff
changeset
|
100 // printState(tsv.current->state); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 for (int i = 0; i < tsv.current->ccvSize; i++) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
102 CCVPtr ccv = &tsv.current->ccv[i]; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
103 if (c<ccv->begin) { |
299 | 104 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
105 tsv = tsv.current->stateSkip(tsv); |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
106 goto next; |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
107 } else if (c<=ccv->end) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
108 // range matched. |
251 | 109 if (ccv->w.word) { |
318 | 110 WordPtr w; |
111 for (w = &ccv->w;w;w = w->next) { | |
112 // match the word. | |
113 if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break; | |
114 } | |
115 if (!w) continue; // if (not match) continue; | |
116 tsv.buff.buffptr += w->length - 1; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
117 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
118 if (ccv->tState) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
119 tsv.current = ccv->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
120 } else { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
121 tsv.current = nextTState(ccv->state,tsv.tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
122 ccv->tState = tsv.current; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 goto next; |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126 } |
299 | 127 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
128 tsv = tsv.current->stateSkip(tsv); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 } |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
130 #if DEBUG |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
131 *tsvp = tsv; |
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
132 return *tsvp; |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
133 #else |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
134 return tsv; |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
135 #endif |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 } |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
137 |
298 | 138 TSValue |
139 createTSValue(TransitionGeneratorPtr tg, Buffer buff) { | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
140 TSValue tsv; |
298 | 141 if (!tg) { |
142 tg = createTransitionGenerator(); | |
143 } | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
144 tsv.buff = buff; |
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
145 tsv.tg = tg; |
292 | 146 tsv.blk = NULL; |
298 | 147 tsv.matchBegin = buff.buffptr; |
148 tsv.matchEnd = NULL; | |
266 | 149 tsv.tg->stateSkip = stateSkip; |
150 tsv.tg->stateMatch = stateMatch; | |
151 tsv.tg->stateNothing = stateNothing; | |
298 | 152 return tsv; |
153 } | |
154 | |
155 | |
156 void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { | |
157 TSValue tsv = createTSValue(tg,buff); | |
270
c82f7e7f66f7
running ts
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
268
diff
changeset
|
158 tsv.current = generateTState(tg->stateList,tg); |
288
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
159 tg->stateStart = NEW(State); |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
160 *tg->stateStart = *tg->stateList; |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
161 tg->stateStart->accept = false; // Start state never accept |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
162 generateTState(tg->stateStart,tg); |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
163 tSearch(tsv); |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
164 } |
298 | 165 |
166 /* end */ |