Mercurial > hg > Applications > Grep
diff regexParser/subsetConstraction.cc @ 167:3bf2c6d6d53e pairPro
move regexparser dir
author | masa |
---|---|
date | Sat, 19 Dec 2015 15:38:45 +0900 |
parents | c/regexParser/subsetConstraction.cc@96854eba17e5 |
children | 6b31d6ef9ba4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/subsetConstraction.cc Sat Dec 19 15:38:45 2015 +0900 @@ -0,0 +1,189 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include "subsetConstraction.h" + +CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) { + CharClassPtr cc = NEW(CharClass); + return cc; +} + +CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { + CharClassPtr cc1; + if (cc) { + cc1 = charClassMerge(cc,mBegin,mEnd,nextState); + } else { + cc1 = createCharClassRange(mBegin,mEnd,NULL,NULL); + cc1->nextState = nextState; + } + return cc1; +} + +CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { + // 重なっているccの領域を分割する + // 必要ならばnextStateを重ねあわせる + // 変更があった場合は新しくリストを作って返す + if (end < cc->cond.range.begin ) { // 1 + if (cc->left) { + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,charClassMerge(cc->left,begin,end,nextState),cc->right); + } else { + CharClassPtr cc1 = createCharClassRange(begin,end,NULL,cc); + cc1->nextState = nextState; + return cc1; + } + } else if (end == cc->cond.range.begin && begin != end ) { // 2 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); + if (cc->cond.range.begin == cc->cond.range.end) { + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc->right); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->left,cc->right); + cc3->nextState = cc->nextState; + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,cc1,cc3); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } else if (end < cc->cond.range.end) { // range.begin < end + if (begin < cc->cond.range.begin) { // 3 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); + CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->left,cc->right); + cc3->nextState = cc->nextState; + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,end,cc1,cc3); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } + if (begin == cc->cond.range.begin) { // 6 + CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,NULL,cc->right); + cc2->nextState = cc->nextState; + CharClassPtr cc1 = createCharClassRange(begin,end,cc->left,cc2); + cc1->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc1; + } + // 9 + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->left,NULL); + cc2->nextState = cc->nextState; + CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,NULL,cc->right); + cc3->nextState = cc->nextState; + CharClassPtr cc1 = createCharClassRange(begin,end,cc2,cc3); + cc1->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc1; + } else if (end == cc->cond.range.end) { + if (begin == cc->cond.range.begin) { // 7 + CharClassPtr cc1 = createCharClassRange(begin,end,cc->left,cc->right); + cc1->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc1; + } else if (begin < cc->cond.range.begin) { // 4 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,end,cc1,cc->right); + cc3->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc3; + } + // 10 cond.range.begin < begin + CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,NULL,cc->right); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + CharClassPtr cc1 = createCharClassRange(cc->cond.range.begin,begin-1,cc->left,cc2); + cc1->nextState = cc->nextState; + return cc1; + } + if (begin > cc->cond.range.end ) { // 13 + if (cc->right) { + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->left,charClassMerge(cc->right,begin,end,nextState)); + } else { + CharClassPtr cc1 = createCharClassRange(begin,end,cc,NULL); + cc1->nextState = nextState; + return cc1; + } + } + if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { + if (end > cc->cond.range.end) { // cond.range.end < end + if (begin == cc->cond.range.begin) { // 8 + CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->left,cc1); + cc3->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc3; + } + if (begin > cc->cond.range.begin) { // 11 + CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->left,NULL); + cc3->nextState = cc->nextState; + CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc3,cc1); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } + } + // begin != end && end != cc->cond.range.end + if (begin == cc->cond.range.end) { // 12 + CharClassPtr cc1 = mergeCCTree(cc->right,begin+1,end,nextState); + if (cc->cond.range.begin == cc->cond.range.end) { + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc->right); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end-1,cc->left,NULL); + cc3->nextState = cc->nextState; + CharClassPtr cc2 = createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc1,cc3); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } + } else if (begin < cc->cond.range.begin) { // 5 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); + CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc3); + cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; + return cc2; + } else { + printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); + } + return cc; +} + +TGValue generateTransition(NodePtr n,TransitionGenerator tg) { + TGValue tgv2; + if (n->tokenType == '+') { + TGValue tgv = generateTransition(n->left,tg); + if (tgv.asterisk) { + TGValue tgv1 = generateTransition(n->right,tg); + tgv.ts->nextState->bitContainer |= tgv1.ts->nextState->bitContainer; + return tgv; + } + TGValue tgv1 = generateTransition(n->right,tg); + tgv.ts->nextState = tgv1.ts->nextState; + return tgv; + } else if (n->tokenType == '|') { + TGValue tgv = generateTransition(n->left,tg); + TGValue tgv1 = generateTransition(n->right,tg); + tgv.ts = appendTransition(tgv.ts,tgv1.ts); + return tgv; + } else if (n->tokenType == '*') { + TGValue tgv = generateTransition(n->left,tg); + tgv.asterisk = true; + return tgv; + } else if (n->tokenType == 'c'){ + return tgv2; + } else if (n->tokenType == 'a'){ + TGValue tgv; + tgv.ts = (TransitionPtr)malloc(sizeof(Transition)); + tgv.ts->condition = n->cc; + tgv.ts->nextState = (BitVectorPtr)malloc(sizeof(BitVector)); + bitSet(tgv.ts->nextState,n->nodeNumber); + tg.ts = appendTransition(tg.ts,tgv.ts); + return tgv; + } else { + // error + } + return tgv2; +} + +void printTransitionList(TransitionPtr ts) { + for (;ts;ts = ts->next) { + printf("\n"); + } +} + +TransitionGenerator generateTransitionList(NodePtr n) { + TransitionGenerator tg; + tg.ts = (TransitionPtr)malloc(sizeof(Transition)); + generateTransition(n,tg); + printTransitionList(tg.ts); + return tg; +}