comparison regexParser/subsetConstraction.cc @ 215:63e9224c7b2b

try to fix asterisk
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 29 Dec 2015 19:01:23 +0900
parents a94f57af1600
children
comparison
equal deleted inserted replaced
214:a94f57af1600 215:63e9224c7b2b
207 s->cc = n->cc; 207 s->cc = n->cc;
208 return s; 208 return s;
209 } 209 }
210 210
211 /** 211 /**
212 正規表現に必要な状態を探して、それぞれに番号を割り振る 212 pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る
213 前が * でない + は新しく状態を作る 213 前が * でない + は新しく状態を作る
214 * があったら、次の状態はその時の先頭の状態になる 214 * があったら、次の状態はその時の先頭の状態になる
215 常に先頭の状態を返す
216 最後が*ならば、それを持ち歩く
217 pass 2:
218 割り当てられた状態に沿って charclass の行き先を書き換える
219 書き換えた charclass を merge する
220 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する
215 */ 221 */
216 TGValue stateAllocate(NodePtr n,TGValue tgv) { 222 TGValue generateTransition(NodePtr n,TGValue tgv, int pass) {
217 if (n->tokenType == '+') { 223 if (n->tokenType == '+') {
218 TGValue tgvLeft = stateAllocate(n->left,tgv); 224 TGValue tgvLeft = tgv;
225 tgvLeft.endState = n->right->state;
226 tgvLeft.asterisk = NULL;
227 tgvLeft = generateTransition(n->left,tgv,pass);
228 TGValue tgvRight = tgvLeft;
219 if (tgvLeft.asterisk) { 229 if (tgvLeft.asterisk) {
220 TGValue tgvRight = tgvLeft; 230 n->right->state = tgv.endState;
221 tgvRight.asterisk = false; 231 tgvRight.startState = tgvRight.asterisk;
222 tgvRight = stateAllocate(n->right,tgvRight); 232 tgvRight = generateTransition(n->right,tgvRight,pass);
223 tgvRight.asterisk = true; 233 tgvLeft.asterisk = tgvRight.asterisk;
224 return tgvRight; 234 return tgvLeft;
225 } 235 }
226 TGValue tgvRight = tgvLeft; 236 tgvRight.asterisk = NULL;
227 n->right->state = createState(tgvRight,n->right); 237 if (pass==1) {
228 tgvRight.startState = n->right->state; 238 n->right->state = tgvRight.startState = createState(tgvRight,n->right);
229 tgvRight.asterisk = false; 239 } else {
230 tgvRight = stateAllocate(n->right,tgvRight); 240 tgvLeft.startState = n->right->state;
231 return tgvRight; 241 tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = tgvLeft.startState ;
242 }
243 tgvRight = generateTransition(n->right,tgvRight,pass);
244 tgvLeft.asterisk = tgvRight.asterisk;
245 return tgvLeft;
232 } else if (n->tokenType == '|') { 246 } else if (n->tokenType == '|') {
233 TGValue tgv1 = stateAllocate(n->left,tgv); 247 TGValue tgv1 = generateTransition(n->left,tgv,pass);
234 TGValue tgv2 = stateAllocate(n->right,tgv1); 248 TGValue tgv2 = generateTransition(n->right,tgv1,pass);
235 return tgv2; 249 return tgv2;
236 } else if (n->tokenType == '*') { 250 } else if (n->tokenType == '*') {
237 TGValue tgvAstah = tgv; 251 TGValue tgvAstah = tgv;
238 tgvAstah.endState = tgvAstah.startState; 252 tgvAstah.endState = tgvAstah.startState;
239 tgvAstah = stateAllocate(n->left,tgvAstah); 253 tgvAstah = generateTransition(n->left,tgvAstah,pass);
240 tgvAstah.asterisk = true; 254 tgvAstah.asterisk = tgvAstah.startState;
241 return tgvAstah; 255 return tgvAstah;
242 } else if (n->tokenType == 'c' || n->tokenType == 'a'){ 256 } else if (n->tokenType == 'c' || n->tokenType == 'a'){
243 TGValue tgv1 = tgv; 257 TGValue tgv1 = tgv;
244 tgv1.asterisk = false; 258 if (pass==1) {
245 n->stateNum = tgv.startState->stateNum; 259 n->stateNum = tgv.startState->stateNum;
246 n->nextStateNum = tgv.endState->stateNum; 260 n->nextStateNum = tgv.endState->stateNum;
247 n->state = tgv.startState;; 261 n->state = tgv.startState;;
248 n->nextState = tgv.endState; 262 n->nextState = tgv.endState;
249 return tgv1; 263 } else {
250 } else { 264 BitVector bi = createBitVector(n->nextStateNum);
251 return tgv; 265 setState(n->cc,bi);
252 } 266 tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc);
253 } 267 }
254
255 /**
256 割り当てられた状態に沿って charclass の行き先を書き換える
257 書き換えた charclass を merge する
258 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する
259 */
260 TGValue generateTransition(NodePtr n,TGValue tgv) {
261 if (n->tokenType == '+') {
262 TGValue tgvLeft = generateTransition(n->left,tgv);
263 if (tgvLeft.asterisk) {
264 TGValue tgvRight = tgvLeft;
265 tgvRight.asterisk = false;
266 tgvRight = generateTransition(n->right,tgvRight);
267 tgvRight.asterisk = true;
268 return tgvRight;
269 }
270 TGValue tgvRight = tgvLeft;
271 StatePtr left = tgvLeft.startState;
272 tgvLeft.startState = n->right->state;
273 tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = left;
274 tgvRight.asterisk = false;
275 tgvRight = generateTransition(n->right,tgvRight);
276 return tgvRight;
277 } else if (n->tokenType == '|') {
278 TGValue tgv1 = generateTransition(n->left,tgv);
279 TGValue tgv2 = generateTransition(n->right,tgv1);
280 return tgv2;
281 } else if (n->tokenType == '*') {
282 TGValue tgvAstah = generateTransition(n->left,tgv);
283 tgvAstah.asterisk = true;
284 return tgvAstah;
285 } else if (n->tokenType == 'c' || n->tokenType == 'a'){
286 TGValue tgv1 = tgv;
287 tgv1.asterisk = false;
288 BitVector bi = createBitVector(n->nextStateNum);
289 setState(n->cc,bi);
290 tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc);
291 return tgv1; 268 return tgv1;
292 } else { 269 } else {
293 return tgv; 270 return tgv;
294 } 271 }
295 } 272 }
305 282
306 TGValue createTGValue() { 283 TGValue createTGValue() {
307 TGValue tgv; 284 TGValue tgv;
308 tgv.startState = NULL; 285 tgv.startState = NULL;
309 tgv.endState = NULL; 286 tgv.endState = NULL;
310 tgv.asterisk = false; 287 tgv.asterisk = NULL;
311 tgv.tg = createTransitionGenerator(); 288 tgv.tg = createTransitionGenerator();
312 return tgv; 289 return tgv;
313 } 290 }
314 291
315 TransitionGeneratorPtr generateTransitionList(NodePtr n) { 292 TransitionGeneratorPtr generateTransitionList(NodePtr n) {
316 TGValue tgv = createTGValue(); 293 TGValue tgv = createTGValue();
317 StatePtr startState = tgv.startState = createState(tgv,n); 294 StatePtr startState = tgv.startState = createState(tgv,n);
318 NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); 295 NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL);
319 StatePtr endState = tgv.endState = createState(tgv,eof); 296 StatePtr endState = tgv.endState = createState(tgv,eof);
320 tgv = stateAllocate(n,tgv); 297 tgv = generateTransition(n,tgv,1);
321 if (tgv.tg->totalStateCount > BITBLOCK) { 298 if (tgv.tg->totalStateCount > BITBLOCK) {
322 errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); 299 errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__);
323 } 300 }
324 BitVector bi = createBitVector(tgv.tg->totalStateCount); 301 BitVector bi = createBitVector(tgv.tg->totalStateCount);
325 tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); 302 tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*));
326 tgv.tg->stateArray[startState->bitState.bitContainer] = startState; 303 tgv.tg->stateArray[startState->bitState.bitContainer] = startState;
327 tgv.tg->stateArray[endState->bitState.bitContainer] = endState; 304 tgv.tg->stateArray[endState->bitState.bitContainer] = endState;
328 generateTransition(n,tgv); 305 tgv = generateTransition(n,tgv,2);
329 return tgv.tg; 306 return tgv.tg;
330 } 307 }
331 308
332 void printState(StatePtr state) { 309 void printState(StatePtr state) {
333 printf("state : %lx\n",state->bitState.bitContainer); 310 printf("state : %lx\n",state->bitState.bitContainer);