Mercurial > hg > Applications > Grep
annotate regexParser/threadedSearch.cc @ 308:1188debbef10
separate CharClass
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 12:45:45 +0900 |
parents | bdfe0a32c48f |
children | c9458ffecb87 |
rev | line source |
---|---|
247 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 | |
246 | 4 #include "regexParser.h" |
308 | 5 #include "CharClass.h" |
258
29e467a491ba
remove error and add threadedSearch.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
257
diff
changeset
|
6 #include "threadedSearch.h" |
246 | 7 #include "subsetConstruction.h" |
8 | |
272 | 9 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
10 TSValue stateNothing(TSValue tsv) { |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
11 return tsv; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 |
272 | 14 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
15 TSValue stateSkip(TSValue tsv) { |
292 | 16 if (tsv.matchEnd) { |
17 fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); | |
18 puts(""); | |
19 tsv.matchEnd = NULL; | |
20 } | |
298 | 21 tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
22 return tsv; |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
23 } |
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
24 |
272 | 25 static |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
26 TSValue stateMatch(TSValue tsv) { |
298 | 27 tsv.matchEnd = tsv.buff.buffptr; // next char of the match |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
28 return tsv; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
29 } |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
30 |
266 | 31 TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { |
247 | 32 TStatePtr tState = NEW(TState); |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
33 tState->state = state; |
247 | 34 int ccvSize = 0; |
251 | 35 CharClassWalkerPtr ccw = createCharClassWalker(state->cc); |
247 | 36 while (hasNext(ccw)) { |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
37 getNext(ccw); |
247 | 38 ccvSize++; |
39 } | |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
40 tState->ccvSize = ccvSize; |
292 | 41 if (state->accept) { |
298 | 42 tState->stateMatch = tg->stateMatch; |
43 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
44 } else { |
298 | 45 tState->stateMatch = tg->stateNothing; |
46 tState->stateSkip = tg->stateSkip; | |
275
8879eb8c64a8
remove segmentation fault
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
272
diff
changeset
|
47 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
48 if (ccvSize == 0) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
49 tState->ccv = NULL; |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
50 state->tState = tState; |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
51 return tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
52 } else tState->ccv = (ccv*)malloc(sizeof(ccv)*ccvSize); |
251 | 53 ccw = createCharClassWalker(state->cc); |
247 | 54 int i = 0; |
55 while (hasNext(ccw)) { | |
56 CharClassPtr cc = getNext(ccw); | |
57 unsigned long begin = cc->cond.range.begin; | |
58 unsigned long end = cc->cond.range.end; | |
59 struct ccv *ccv = &tState->ccv[i++]; | |
60 ccv->begin = begin; | |
61 ccv->end = end; | |
62 ccv->tState = NULL; | |
63 ccv->state = cc->nextState; | |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
64 ccv->w = cc->cond.w; |
247 | 65 } |
66 free(ccw); | |
283
fbdb94df9eac
TState atomic update
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
282
diff
changeset
|
67 state->tState = tState; |
247 | 68 return tState; |
69 } | |
70 | |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
71 TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
72 // create tSearch in next state. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
73 StatePtr state = tg->stateArray[bi.bitContainer]; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
74 if (state == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
75 // on the fly subset construction. |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
76 state = createState(tg,bi); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
77 determinize(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
78 tg->stateArray[bi.bitContainer] = state; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
79 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
80 if (state->tState == NULL) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
81 generateTState(state,tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
82 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
83 return state->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
84 } |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
85 |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
86 #define DEBUG 0 |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
87 |
281
b74e3b4b11d7
parallel search done
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
278
diff
changeset
|
88 TSValue tSearch(TSValue tsv) { |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
89 #if DEBUG |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
90 TSValuePtr tsvp = &tsv; // make tsv visible in lldb |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
91 #endif |
247 | 92 next: while (tsv.buff.buffptr < tsv.buff.buffend) { |
298 | 93 tsv = tsv.current->stateMatch(tsv); |
94 if (tsv.current->ccvSize==0) { | |
95 // matched start again | |
96 tsv.current = tsv.tg->stateStart->tState; | |
97 } | |
247 | 98 unsigned char c = *tsv.buff.buffptr++; |
282
87a801c14117
fix match condition (parallel search doesn't work)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
281
diff
changeset
|
99 // printState(tsv.current->state); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 for (int i = 0; i < tsv.current->ccvSize; i++) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
101 CCVPtr ccv = &tsv.current->ccv[i]; |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
102 if (c<ccv->begin) { |
299 | 103 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
104 tsv = tsv.current->stateSkip(tsv); |
257
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
105 goto next; |
ebb429c2b6a7
fix allocate state in generateTransition
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
254
diff
changeset
|
106 } else if (c<=ccv->end) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
107 // range matched. |
251 | 108 if (ccv->w.word) { |
248
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
109 // match the word. |
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
110 // if (not match) continue; |
2b1fbfb92d54
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
111 } |
277
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
112 if (ccv->tState) { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
113 tsv.current = ccv->tState; |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
114 } else { |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
115 tsv.current = nextTState(ccv->state,tsv.tg); |
7b4bcc7b5ae6
nextTState implemented
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
275
diff
changeset
|
116 ccv->tState = tsv.current; |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 goto next; |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 } |
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 } |
299 | 121 tsv.current = tsv.tg->stateStart->tState; |
264
ef95a7f1bc03
implement tSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
263
diff
changeset
|
122 tsv = tsv.current->stateSkip(tsv); |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123 } |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
124 #if DEBUG |
285
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
125 *tsvp = tsv; |
3ea12df96bcf
add *tsvp
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
283
diff
changeset
|
126 return *tsvp; |
293
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
127 #else |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
128 return tsv; |
948428caf616
NFA maximum match worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
292
diff
changeset
|
129 #endif |
245
d34de5edaa96
add threadedSearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 } |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
131 |
298 | 132 TSValue |
133 createTSValue(TransitionGeneratorPtr tg, Buffer buff) { | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
134 TSValue tsv; |
298 | 135 if (!tg) { |
136 tg = createTransitionGenerator(); | |
137 } | |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
138 tsv.buff = buff; |
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
139 tsv.tg = tg; |
292 | 140 tsv.blk = NULL; |
298 | 141 tsv.matchBegin = buff.buffptr; |
142 tsv.matchEnd = NULL; | |
266 | 143 tsv.tg->stateSkip = stateSkip; |
144 tsv.tg->stateMatch = stateMatch; | |
145 tsv.tg->stateNothing = stateNothing; | |
298 | 146 return tsv; |
147 } | |
148 | |
149 | |
150 void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { | |
151 TSValue tsv = createTSValue(tg,buff); | |
270
c82f7e7f66f7
running ts
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
268
diff
changeset
|
152 tsv.current = generateTState(tg->stateList,tg); |
288
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
153 tg->stateStart = NEW(State); |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
154 *tg->stateStart = *tg->stateList; |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
155 tg->stateStart->accept = false; // Start state never accept |
f2491681914e
special state for start search
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
286
diff
changeset
|
156 generateTState(tg->stateStart,tg); |
263
292753bb31e4
fix Makefile
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
262
diff
changeset
|
157 tSearch(tsv); |
262
157f6886ba55
write driver of threadedSearch
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
260
diff
changeset
|
158 } |
298 | 159 |
160 /* end */ |