Mercurial > hg > Applications > Grep
comparison regexParser/subsetConstraction.cc @ 215:63e9224c7b2b
try to fix asterisk
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 29 Dec 2015 19:01:23 +0900 |
parents | a94f57af1600 |
children |
comparison
equal
deleted
inserted
replaced
214:a94f57af1600 | 215:63e9224c7b2b |
---|---|
207 s->cc = n->cc; | 207 s->cc = n->cc; |
208 return s; | 208 return s; |
209 } | 209 } |
210 | 210 |
211 /** | 211 /** |
212 正規表現に必要な状態を探して、それぞれに番号を割り振る | 212 pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る |
213 前が * でない + は新しく状態を作る | 213 前が * でない + は新しく状態を作る |
214 * があったら、次の状態はその時の先頭の状態になる | 214 * があったら、次の状態はその時の先頭の状態になる |
215 常に先頭の状態を返す | |
216 最後が*ならば、それを持ち歩く | |
217 pass 2: | |
218 割り当てられた状態に沿って charclass の行き先を書き換える | |
219 書き換えた charclass を merge する | |
220 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する | |
215 */ | 221 */ |
216 TGValue stateAllocate(NodePtr n,TGValue tgv) { | 222 TGValue generateTransition(NodePtr n,TGValue tgv, int pass) { |
217 if (n->tokenType == '+') { | 223 if (n->tokenType == '+') { |
218 TGValue tgvLeft = stateAllocate(n->left,tgv); | 224 TGValue tgvLeft = tgv; |
225 tgvLeft.endState = n->right->state; | |
226 tgvLeft.asterisk = NULL; | |
227 tgvLeft = generateTransition(n->left,tgv,pass); | |
228 TGValue tgvRight = tgvLeft; | |
219 if (tgvLeft.asterisk) { | 229 if (tgvLeft.asterisk) { |
220 TGValue tgvRight = tgvLeft; | 230 n->right->state = tgv.endState; |
221 tgvRight.asterisk = false; | 231 tgvRight.startState = tgvRight.asterisk; |
222 tgvRight = stateAllocate(n->right,tgvRight); | 232 tgvRight = generateTransition(n->right,tgvRight,pass); |
223 tgvRight.asterisk = true; | 233 tgvLeft.asterisk = tgvRight.asterisk; |
224 return tgvRight; | 234 return tgvLeft; |
225 } | 235 } |
226 TGValue tgvRight = tgvLeft; | 236 tgvRight.asterisk = NULL; |
227 n->right->state = createState(tgvRight,n->right); | 237 if (pass==1) { |
228 tgvRight.startState = n->right->state; | 238 n->right->state = tgvRight.startState = createState(tgvRight,n->right); |
229 tgvRight.asterisk = false; | 239 } else { |
230 tgvRight = stateAllocate(n->right,tgvRight); | 240 tgvLeft.startState = n->right->state; |
231 return tgvRight; | 241 tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = tgvLeft.startState ; |
242 } | |
243 tgvRight = generateTransition(n->right,tgvRight,pass); | |
244 tgvLeft.asterisk = tgvRight.asterisk; | |
245 return tgvLeft; | |
232 } else if (n->tokenType == '|') { | 246 } else if (n->tokenType == '|') { |
233 TGValue tgv1 = stateAllocate(n->left,tgv); | 247 TGValue tgv1 = generateTransition(n->left,tgv,pass); |
234 TGValue tgv2 = stateAllocate(n->right,tgv1); | 248 TGValue tgv2 = generateTransition(n->right,tgv1,pass); |
235 return tgv2; | 249 return tgv2; |
236 } else if (n->tokenType == '*') { | 250 } else if (n->tokenType == '*') { |
237 TGValue tgvAstah = tgv; | 251 TGValue tgvAstah = tgv; |
238 tgvAstah.endState = tgvAstah.startState; | 252 tgvAstah.endState = tgvAstah.startState; |
239 tgvAstah = stateAllocate(n->left,tgvAstah); | 253 tgvAstah = generateTransition(n->left,tgvAstah,pass); |
240 tgvAstah.asterisk = true; | 254 tgvAstah.asterisk = tgvAstah.startState; |
241 return tgvAstah; | 255 return tgvAstah; |
242 } else if (n->tokenType == 'c' || n->tokenType == 'a'){ | 256 } else if (n->tokenType == 'c' || n->tokenType == 'a'){ |
243 TGValue tgv1 = tgv; | 257 TGValue tgv1 = tgv; |
244 tgv1.asterisk = false; | 258 if (pass==1) { |
245 n->stateNum = tgv.startState->stateNum; | 259 n->stateNum = tgv.startState->stateNum; |
246 n->nextStateNum = tgv.endState->stateNum; | 260 n->nextStateNum = tgv.endState->stateNum; |
247 n->state = tgv.startState;; | 261 n->state = tgv.startState;; |
248 n->nextState = tgv.endState; | 262 n->nextState = tgv.endState; |
249 return tgv1; | 263 } else { |
250 } else { | 264 BitVector bi = createBitVector(n->nextStateNum); |
251 return tgv; | 265 setState(n->cc,bi); |
252 } | 266 tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); |
253 } | 267 } |
254 | |
255 /** | |
256 割り当てられた状態に沿って charclass の行き先を書き換える | |
257 書き換えた charclass を merge する | |
258 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する | |
259 */ | |
260 TGValue generateTransition(NodePtr n,TGValue tgv) { | |
261 if (n->tokenType == '+') { | |
262 TGValue tgvLeft = generateTransition(n->left,tgv); | |
263 if (tgvLeft.asterisk) { | |
264 TGValue tgvRight = tgvLeft; | |
265 tgvRight.asterisk = false; | |
266 tgvRight = generateTransition(n->right,tgvRight); | |
267 tgvRight.asterisk = true; | |
268 return tgvRight; | |
269 } | |
270 TGValue tgvRight = tgvLeft; | |
271 StatePtr left = tgvLeft.startState; | |
272 tgvLeft.startState = n->right->state; | |
273 tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = left; | |
274 tgvRight.asterisk = false; | |
275 tgvRight = generateTransition(n->right,tgvRight); | |
276 return tgvRight; | |
277 } else if (n->tokenType == '|') { | |
278 TGValue tgv1 = generateTransition(n->left,tgv); | |
279 TGValue tgv2 = generateTransition(n->right,tgv1); | |
280 return tgv2; | |
281 } else if (n->tokenType == '*') { | |
282 TGValue tgvAstah = generateTransition(n->left,tgv); | |
283 tgvAstah.asterisk = true; | |
284 return tgvAstah; | |
285 } else if (n->tokenType == 'c' || n->tokenType == 'a'){ | |
286 TGValue tgv1 = tgv; | |
287 tgv1.asterisk = false; | |
288 BitVector bi = createBitVector(n->nextStateNum); | |
289 setState(n->cc,bi); | |
290 tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); | |
291 return tgv1; | 268 return tgv1; |
292 } else { | 269 } else { |
293 return tgv; | 270 return tgv; |
294 } | 271 } |
295 } | 272 } |
305 | 282 |
306 TGValue createTGValue() { | 283 TGValue createTGValue() { |
307 TGValue tgv; | 284 TGValue tgv; |
308 tgv.startState = NULL; | 285 tgv.startState = NULL; |
309 tgv.endState = NULL; | 286 tgv.endState = NULL; |
310 tgv.asterisk = false; | 287 tgv.asterisk = NULL; |
311 tgv.tg = createTransitionGenerator(); | 288 tgv.tg = createTransitionGenerator(); |
312 return tgv; | 289 return tgv; |
313 } | 290 } |
314 | 291 |
315 TransitionGeneratorPtr generateTransitionList(NodePtr n) { | 292 TransitionGeneratorPtr generateTransitionList(NodePtr n) { |
316 TGValue tgv = createTGValue(); | 293 TGValue tgv = createTGValue(); |
317 StatePtr startState = tgv.startState = createState(tgv,n); | 294 StatePtr startState = tgv.startState = createState(tgv,n); |
318 NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); | 295 NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); |
319 StatePtr endState = tgv.endState = createState(tgv,eof); | 296 StatePtr endState = tgv.endState = createState(tgv,eof); |
320 tgv = stateAllocate(n,tgv); | 297 tgv = generateTransition(n,tgv,1); |
321 if (tgv.tg->totalStateCount > BITBLOCK) { | 298 if (tgv.tg->totalStateCount > BITBLOCK) { |
322 errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); | 299 errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); |
323 } | 300 } |
324 BitVector bi = createBitVector(tgv.tg->totalStateCount); | 301 BitVector bi = createBitVector(tgv.tg->totalStateCount); |
325 tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); | 302 tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); |
326 tgv.tg->stateArray[startState->bitState.bitContainer] = startState; | 303 tgv.tg->stateArray[startState->bitState.bitContainer] = startState; |
327 tgv.tg->stateArray[endState->bitState.bitContainer] = endState; | 304 tgv.tg->stateArray[endState->bitState.bitContainer] = endState; |
328 generateTransition(n,tgv); | 305 tgv = generateTransition(n,tgv,2); |
329 return tgv.tg; | 306 return tgv.tg; |
330 } | 307 } |
331 | 308 |
332 void printState(StatePtr state) { | 309 void printState(StatePtr state) { |
333 printf("state : %lx\n",state->bitState.bitContainer); | 310 printf("state : %lx\n",state->bitState.bitContainer); |