Mercurial > hg > Applications > Grep
diff regexParser/subsetConstruction.cc @ 308:1188debbef10
separate CharClass
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 12:45:45 +0900 |
parents | 3e78631a6222 |
children |
line wrap: on
line diff
--- a/regexParser/subsetConstruction.cc Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/subsetConstruction.cc Mon Feb 08 12:45:45 2016 +0900 @@ -5,221 +5,10 @@ #include "regexParser.h" #include "subsetConstruction.h" #include "node.h" -#include "BitVector.h" +#include "bitVector.h" +#include "CharClass.h" #include "error.h" -#define SIZE 256 - -CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { - CharClassPtr cc = NEW(CharClass); - cc->type = 'r'; - cc->cond.range.begin = begin; - cc->cond.range.end = end; - cc->cond.w.word = NULL; - cc->cond.w.length = 0; - cc->left = left; - cc->right = right; - cc->nextState.bitContainer = state; - return cc; -} - -CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { - CharClassPtr cc1; - if (cc) { - cc1 = charClassMerge(cc,mBegin,mEnd,nextState); - } else { - cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); - } - return cc1; -} - -/* - cond.range.begin cond.range.end - |----------------| - 1.b---e - 2.b------e - 3.b------------e - 4.b-----------------------e - 5.b----------------------------e - - |----------------| - 6. b---------e - 7. b----------------e - 8. b---------------------e - - |----------------| - 9. b-----e - 10. b--------e - 11. b-------------e - - |----------------| - 12. b-----e - - |----------------| - 13. b--e - - */ - -CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { - // 重なっているccの領域を分割する - // 必要ならばnextStateを重ねあわせる - // 変更があった場合は新しくリストを作って返す - if (end < cc->cond.range.begin ) { // 1 - if (cc->left) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); - } else { - return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); - } - } else if (end == cc->cond.range.begin && begin != end ) { // 2 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); - if (cc->cond.range.begin == cc->cond.range.end) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, - cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); - } - CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, - cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); - } else if (end < cc->cond.range.end) { // range.begin < end - if (begin < cc->cond.range.begin) { // 3 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); - return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); - } - if (begin == cc->cond.range.begin) { // 6 - CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); - } - // 9 - CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); - CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); - } else if (end == cc->cond.range.end) { - if (begin == cc->cond.range.begin) { // 7 - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); - } else if (begin < cc->cond.range.begin) { // 4 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); - } - // 10 cond.range.begin < begin - CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); - return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); - } - if (begin > cc->cond.range.end ) { // 13 - if (cc->right) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); - } else { - return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); - } - } - if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { - if (end > cc->cond.range.end) { // cond.range.end < end - if (begin == cc->cond.range.begin) { // 8 - CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, - cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); - } - if (begin > cc->cond.range.begin) { // 11,12 - CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); - return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); - } - } - } else if (begin < cc->cond.range.begin) { // 5 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); - } else { - printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); - } - return cc; -} - -void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { - while (next->left) { - CharClassStackPtr ccs = NEW(CharClassStack); - ccs->next = walk->stack; - ccs->turn = walk->turn; - walk->turn = LEFT; - ccs->cc = next; - walk->stack = ccs; - next = next->left; - } - walk->turn = SELF; - walk->next = next; -} - -CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { - CharClassWalkerPtr walk = NEW(CharClassWalker); - walk->next = NULL; - walk->stack = NULL; - walk->turn = LEFT; - if (!next) return walk; - findLeftMost(next,walk); - return walk; -} - -bool hasNext(CharClassWalkerPtr walk) { - return walk->next != NULL; -} - -CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { - CharClassStackPtr prev = walk->stack->next; - walk->turn = walk->stack->turn; - free(walk->stack); - walk->stack = prev; - return prev; -} - -CharClassPtr getNext(CharClassWalkerPtr walk) { - CharClassPtr current = walk->next; - walk->next = NULL; - if (walk->turn == SELF) { - if (current->right) { - walk->turn = RIGHT; - findLeftMost(current->right,walk); - return current; - } - } - while (walk->stack) { - walk->next = walk->stack->cc; - charClassStackPop(walk); - if (walk->turn == LEFT) { - walk->turn = SELF; - return current; - } - } - return current; -} - -void setState(CharClassPtr cc, BitVector bi) { - // if (word case) setNext(bitVector to the word list) - cc->nextState = bi; - if (cc->left) { - setState(cc->left,bi); - } - if (cc->right) { - setState(cc->right,bi); - } -} - -CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { - if (x->cc == NULL) { - return y; - } - CharClassWalkerPtr walk = createCharClassWalker(x->cc); - CharClassPtr ccy = y; - BitVector bi; - while (hasNext(walk)) { - CharClassPtr cc = getNext(walk); - unsigned long begin = cc->cond.range.begin; - unsigned long end = cc->cond.range.end; - bi = cc->nextState; - ccy = charClassMerge(ccy,begin,end,bi); - } - free(walk); - return ccy; -} - /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納