Mercurial > hg > Applications > Grep
diff regexParser/subsetConstraction.cc @ 215:63e9224c7b2b
try to fix asterisk
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 29 Dec 2015 19:01:23 +0900 |
parents | a94f57af1600 |
children |
line wrap: on
line diff
--- a/regexParser/subsetConstraction.cc Mon Dec 28 20:32:36 2015 +0900 +++ b/regexParser/subsetConstraction.cc Tue Dec 29 19:01:23 2015 +0900 @@ -209,85 +209,62 @@ } /** - 正規表現に必要な状態を探して、それぞれに番号を割り振る - 前が * でない + は新しく状態を作る - * があったら、次の状態はその時の先頭の状態になる + pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る + 前が * でない + は新しく状態を作る + * があったら、次の状態はその時の先頭の状態になる + 常に先頭の状態を返す + 最後が*ならば、それを持ち歩く + pass 2: + 割り当てられた状態に沿って charclass の行き先を書き換える + 書き換えた charclass を merge する + 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する */ -TGValue stateAllocate(NodePtr n,TGValue tgv) { +TGValue generateTransition(NodePtr n,TGValue tgv, int pass) { if (n->tokenType == '+') { - TGValue tgvLeft = stateAllocate(n->left,tgv); + TGValue tgvLeft = tgv; + tgvLeft.endState = n->right->state; + tgvLeft.asterisk = NULL; + tgvLeft = generateTransition(n->left,tgv,pass); + TGValue tgvRight = tgvLeft; if (tgvLeft.asterisk) { - TGValue tgvRight = tgvLeft; - tgvRight.asterisk = false; - tgvRight = stateAllocate(n->right,tgvRight); - tgvRight.asterisk = true; - return tgvRight; + n->right->state = tgv.endState; + tgvRight.startState = tgvRight.asterisk; + tgvRight = generateTransition(n->right,tgvRight,pass); + tgvLeft.asterisk = tgvRight.asterisk; + return tgvLeft; } - TGValue tgvRight = tgvLeft; - n->right->state = createState(tgvRight,n->right); - tgvRight.startState = n->right->state; - tgvRight.asterisk = false; - tgvRight = stateAllocate(n->right,tgvRight); - return tgvRight; + tgvRight.asterisk = NULL; + if (pass==1) { + n->right->state = tgvRight.startState = createState(tgvRight,n->right); + } else { + tgvLeft.startState = n->right->state; + tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = tgvLeft.startState ; + } + tgvRight = generateTransition(n->right,tgvRight,pass); + tgvLeft.asterisk = tgvRight.asterisk; + return tgvLeft; } else if (n->tokenType == '|') { - TGValue tgv1 = stateAllocate(n->left,tgv); - TGValue tgv2 = stateAllocate(n->right,tgv1); + TGValue tgv1 = generateTransition(n->left,tgv,pass); + TGValue tgv2 = generateTransition(n->right,tgv1,pass); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = tgv; tgvAstah.endState = tgvAstah.startState; - tgvAstah = stateAllocate(n->left,tgvAstah); - tgvAstah.asterisk = true; + tgvAstah = generateTransition(n->left,tgvAstah,pass); + tgvAstah.asterisk = tgvAstah.startState; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; - tgv1.asterisk = false; - n->stateNum = tgv.startState->stateNum; - n->nextStateNum = tgv.endState->stateNum; - n->state = tgv.startState;; - n->nextState = tgv.endState; - return tgv1; - } else { - return tgv; - } -} - -/** - 割り当てられた状態に沿って charclass の行き先を書き換える - 書き換えた charclass を merge する - 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する - */ -TGValue generateTransition(NodePtr n,TGValue tgv) { - if (n->tokenType == '+') { - TGValue tgvLeft = generateTransition(n->left,tgv); - if (tgvLeft.asterisk) { - TGValue tgvRight = tgvLeft; - tgvRight.asterisk = false; - tgvRight = generateTransition(n->right,tgvRight); - tgvRight.asterisk = true; - return tgvRight; + if (pass==1) { + n->stateNum = tgv.startState->stateNum; + n->nextStateNum = tgv.endState->stateNum; + n->state = tgv.startState;; + n->nextState = tgv.endState; + } else { + BitVector bi = createBitVector(n->nextStateNum); + setState(n->cc,bi); + tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); } - TGValue tgvRight = tgvLeft; - StatePtr left = tgvLeft.startState; - tgvLeft.startState = n->right->state; - tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = left; - tgvRight.asterisk = false; - tgvRight = generateTransition(n->right,tgvRight); - return tgvRight; - } else if (n->tokenType == '|') { - TGValue tgv1 = generateTransition(n->left,tgv); - TGValue tgv2 = generateTransition(n->right,tgv1); - return tgv2; - } else if (n->tokenType == '*') { - TGValue tgvAstah = generateTransition(n->left,tgv); - tgvAstah.asterisk = true; - return tgvAstah; - } else if (n->tokenType == 'c' || n->tokenType == 'a'){ - TGValue tgv1 = tgv; - tgv1.asterisk = false; - BitVector bi = createBitVector(n->nextStateNum); - setState(n->cc,bi); - tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); return tgv1; } else { return tgv; @@ -307,7 +284,7 @@ TGValue tgv; tgv.startState = NULL; tgv.endState = NULL; - tgv.asterisk = false; + tgv.asterisk = NULL; tgv.tg = createTransitionGenerator(); return tgv; } @@ -317,7 +294,7 @@ StatePtr startState = tgv.startState = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr endState = tgv.endState = createState(tgv,eof); - tgv = stateAllocate(n,tgv); + tgv = generateTransition(n,tgv,1); if (tgv.tg->totalStateCount > BITBLOCK) { errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); } @@ -325,7 +302,7 @@ tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tgv.tg->stateArray[startState->bitState.bitContainer] = startState; tgv.tg->stateArray[endState->bitState.bitContainer] = endState; - generateTransition(n,tgv); + tgv = generateTransition(n,tgv,2); return tgv.tg; }