# HG changeset patch # User Masataka Kohagura # Date 1451209568 -32400 # Node ID 2ec95755238ed1e16da309e433ccc3cb072551eb # Parent 58d3c01bf421f42e0e055ecffbd8b2a5b70f5f78 fix mergetest diff -r 58d3c01bf421 -r 2ec95755238e regexParser/Makefile --- a/regexParser/Makefile Sun Dec 27 16:51:14 2015 +0900 +++ b/regexParser/Makefile Sun Dec 27 18:46:08 2015 +0900 @@ -1,5 +1,4 @@ -TARGET= regexParser -MERGE = ccMerge +TARGET= regexParser test/ccMerge CFLAGS= -Wall -O0 -g CC= clang++ @@ -13,7 +12,7 @@ all: $(TARGET) -$(TARGET):$(OBJS) +regexParser: $(OBJS) $(CC) $(CFLAGS) -o $@ $(OBJS) test/searchBit: test/searchBit.cc @@ -35,68 +34,78 @@ .SUFFIXES: .cc .o test1: - ./$(TARGET) -regex 'a' - ./$(TARGET) -regex 'ab' - ./$(TARGET) -regex 'a*b' - ./$(TARGET) -regex 'ab*' - ./$(TARGET) -regex '(ab)*' - ./$(TARGET) -regex '(ab)*c' - ./$(TARGET) -regex '(ab)c' - ./$(TARGET) -regex '(a|b)c' - ./$(TARGET) -regex 'a(bc)*d' - ./$(TARGET) -regex 'abc*d' - ./$(TARGET) -regex '(ab)c*d' - ./$(TARGET) -regex 'a(b)c' - ./$(TARGET) -regex '(a|b|c)d' - ./$(TARGET) -regex '(a|b|c)*d' - ./$(TARGET) -regex '[a]' - ./$(TARGET) -regex '[ab]' - ./$(TARGET) -regex '[a-z]' - ./$(TARGET) -regex '[a-zA-Z]' - ./$(TARGET) -regex '[a-zA-Z]*' - ./$(TARGET) -regex 'a[a-zA-Z]*' - ./$(TARGET) -regex 'a([a-z])' - ./$(TARGET) -regex 'a|[a-z]' - ./$(TARGET) -regex 'a|[a-z]*' - ./$(TARGET) -regex 'a*|[a-z]' - ./$(TARGET) -regex '[d-ga-c]' - ./$(TARGET) -regex '[d-ga-d]' - ./$(TARGET) -regex '[d-ga-e]' - ./$(TARGET) -regex '[d-ga-f]' - ./$(TARGET) -regex '[d-ga-h]' - ./$(TARGET) -regex '[d-gd-e]' - ./$(TARGET) -regex '[d-gd-f]' - ./$(TARGET) -regex '[d-gd-h]' - ./$(TARGET) -regex '[d-ge-f]' - ./$(TARGET) -regex '[d-ge-g]' - ./$(TARGET) -regex '[d-ge-i]' - ./$(TARGET) -regex '[d-gg-i]' - ./$(TARGET) -regex '[d-gh-i]' - ./$(TARGET) -regex '[d-ga-cx-z]' - ./$(TARGET) -regex '[d-ga-de-d]' - ./$(TARGET) -regex '[d-ga-db-e]' - ./$(TARGET) -regex '[d-gh-ja-e]' + ./regexParser -regex 'a' + ./regexParser -regex 'ab' + ./regexParser -regex 'a*b' + ./regexParser -regex 'ab*' + ./regexParser -regex '(ab)*' + ./regexParser -regex '(ab)*c' + ./regexParser -regex '(ab)c' + ./regexParser -regex '(a|b)c' + ./regexParser -regex 'a(bc)*d' + ./regexParser -regex 'abc*d' + ./regexParser -regex '(ab)c*d' + ./regexParser -regex 'a(b)c' + ./regexParser -regex '(a|b|c)d' + ./regexParser -regex '(a|b|c)*d' + ./regexParser -regex '[a]' + ./regexParser -regex '[ab]' + ./regexParser -regex '[a-z]' + ./regexParser -regex '[a-zA-Z]' + ./regexParser -regex '[a-zA-Z]*' + ./regexParser -regex 'a[a-zA-Z]*' + ./regexParser -regex 'a([a-z])' + ./regexParser -regex 'a|[a-z]' + ./regexParser -regex 'a|[a-z]*' + ./regexParser -regex 'a*|[a-z]' + ./regexParser -regex '[d-ga-c]' + ./regexParser -regex '[d-ga-d]' + ./regexParser -regex '[d-ga-e]' + ./regexParser -regex '[d-ga-f]' + ./regexParser -regex '[d-ga-h]' + ./regexParser -regex '[d-gd-e]' + ./regexParser -regex '[d-gd-f]' + ./regexParser -regex '[d-gd-h]' + ./regexParser -regex '[d-ge-f]' + ./regexParser -regex '[d-ge-g]' + ./regexParser -regex '[d-ge-i]' + ./regexParser -regex '[d-gg-i]' + ./regexParser -regex '[d-gh-i]' + ./regexParser -regex '[d-ga-cx-z]' + ./regexParser -regex '[d-ga-de-d]' + ./regexParser -regex '[d-ga-db-e]' + ./regexParser -regex '[d-gh-ja-e]' merge_test: - ./test/$(MERGE) -regex '[f-i]' -merge '[d-e]' - ./test/$(MERGE) -regex '[f-i]' -merge '[d-f]' - ./test/$(MERGE) -regex '[f-i]' -merge '[d-g]' - ./test/$(MERGE) -regex '[f-i]' -merge '[d-i]' - ./test/$(MERGE) -regex '[f-i]' -merge '[d-k]' - ./test/$(MERGE) -regex '[f-i]' -merge '[f-g]' - ./test/$(MERGE) -regex '[f-i]' -merge '[f-i]' - ./test/$(MERGE) -regex '[f-i]' -merge '[f-k]' - ./test/$(MERGE) -regex '[f-i]' -merge '[g-h]' - ./test/$(MERGE) -regex '[f-i]' -merge '[g-i]' - ./test/$(MERGE) -regex '[f-i]' -merge '[g-k]' - ./test/$(MERGE) -regex '[f-i]' -merge '[i-k]' - ./test/$(MERGE) -regex '[f-i]' -merge '[j-k]' - ./test/$(MERGE) -regex '[c-ef-i]' -merge '[d-e]' - ./test/$(MERGE) -regex '[c-ef-i]' -merge '[d-f]' - ./test/$(MERGE) -regex '[c-ef-i]' -merge '[d-g]' - ./test/$(MERGE) -regex '[c-ef-i]' -merge '[d-i]' - ./test/$(MERGE) -regex '[c-ef-ij-m]' -merge '[d-k]' - ./test/$(MERGE) -regex '[f-ij-m]' -merge '[f-k]' - ./test/$(MERGE) -regex '[f-ij-m]' -merge '[g-k]' - ./test/$(MERGE) -regex '[f-ij-m]' -merge '[i-k]' - ./test/$(MERGE) -regex '[f-ij-m]' -merge '[j-k]' + ./test/ccMerge -regex '[f-i]' -regex 'e' + ./test/ccMerge -regex '[f-i]' -regex 'f' + ./test/ccMerge -regex '[f-i]' -regex 'g' + ./test/ccMerge -regex '[f-i]' -regex 'i' + ./test/ccMerge -regex '[f-i]' -regex 'j' + ./test/ccMerge -regex 'e' -regex '[f-i]' + ./test/ccMerge -regex 'f' -regex '[f-i]' + ./test/ccMerge -regex 'g' -regex '[f-i]' + ./test/ccMerge -regex 'i' -regex '[f-i]' + ./test/ccMerge -regex 'j' -regex '[f-i]' + ./test/ccMerge -regex '[f-i]' -regex '[d-e]' + ./test/ccMerge -regex '[f-i]' -regex '[d-f]' + ./test/ccMerge -regex '[f-i]' -regex '[d-g]' + ./test/ccMerge -regex '[f-i]' -regex '[d-i]' + ./test/ccMerge -regex '[f-i]' -regex '[d-k]' + ./test/ccMerge -regex '[f-i]' -regex '[f-g]' + ./test/ccMerge -regex '[f-i]' -regex '[f-i]' + ./test/ccMerge -regex '[f-i]' -regex '[f-k]' + ./test/ccMerge -regex '[f-i]' -regex '[g-h]' + ./test/ccMerge -regex '[f-i]' -regex '[g-i]' + ./test/ccMerge -regex '[f-i]' -regex '[g-k]' + ./test/ccMerge -regex '[f-i]' -regex '[i-k]' + ./test/ccMerge -regex '[f-i]' -regex '[j-k]' + ./test/ccMerge -regex '[c-ef-i]' -regex '[d-e]' + ./test/ccMerge -regex '[c-ef-i]' -regex '[d-f]' + ./test/ccMerge -regex '[c-ef-i]' -regex '[d-g]' + ./test/ccMerge -regex '[c-ef-i]' -regex '[d-i]' + ./test/ccMerge -regex '[c-ef-ij-m]' -regex '[d-k]' + ./test/ccMerge -regex '[f-ij-m]' -regex '[f-k]' + ./test/ccMerge -regex '[f-ij-m]' -regex '[g-k]' + ./test/ccMerge -regex '[f-ij-m]' -regex '[i-k]' + ./test/ccMerge -regex '[f-ij-m]' -regex '[j-k]' diff -r 58d3c01bf421 -r 2ec95755238e regexParser/subsetConstraction.cc --- a/regexParser/subsetConstraction.cc Sun Dec 27 16:51:14 2015 +0900 +++ b/regexParser/subsetConstraction.cc Sun Dec 27 18:46:08 2015 +0900 @@ -188,7 +188,8 @@ CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; BitVector bi; - for (CharClassPtr cc = getNext(walk); hasNext(walk); cc=getNext(walk)) { + while (hasNext(walk)) { + CharClassPtr cc = getNext(walk); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; bi = cc->nextState; @@ -211,7 +212,7 @@ s->node = n; BitVector bi = createBitVector(n->stateNum); s->bitState = bi; - s->cc = NULL; + s->cc = n->cc; return s; } @@ -308,25 +309,29 @@ return tg; } -TransitionGeneratorPtr generateTransitionList(NodePtr n) { +TGValue createTGValue() { TransitionGeneratorPtr tg = createTransitionGenerator(); TGValue tgv; - // initiarize tgv tgv.asterisk = false; tgv.tg = tg; + return tgv; +} + +TransitionGeneratorPtr generateTransitionList(NodePtr n) { + TGValue tgv = createTGValue(); StatePtr startState = tgv.startState = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr endState = tgv.endState = createState(tgv,eof); tgv = stateAllocate(n,tgv); - if (tg->totalStateCount > BITBLOCK) { + if (tgv.tg->totalStateCount > BITBLOCK) { errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); } - BitVector bi = createBitVector(tg->totalStateCount); + BitVector bi = createBitVector(tgv.tg->totalStateCount); tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tgv.tg->stateArray[startState->bitState.bitContainer] = startState; tgv.tg->stateArray[endState->bitState.bitContainer] = endState; generateTransition(n,tgv); - return tg; + return tgv.tg; } void printState(StatePtr state) { diff -r 58d3c01bf421 -r 2ec95755238e regexParser/subsetConstraction.h --- a/regexParser/subsetConstraction.h Sun Dec 27 16:51:14 2015 +0900 +++ b/regexParser/subsetConstraction.h Sun Dec 27 18:46:08 2015 +0900 @@ -1,3 +1,7 @@ extern CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState); +extern TGValue createTGValue(); +extern CharClassPtr mergeTransition(StatePtr x,CharClassPtr y); +extern void setState(CharClassPtr cc, BitVector bi); +extern StatePtr createState(TGValue tgv,NodePtr n); extern TransitionGeneratorPtr generateTransitionList(NodePtr n); extern void printState(TransitionGeneratorPtr tg); diff -r 58d3c01bf421 -r 2ec95755238e regexParser/test/ccMerge.cc --- a/regexParser/test/ccMerge.cc Sun Dec 27 16:51:14 2015 +0900 +++ b/regexParser/test/ccMerge.cc Sun Dec 27 18:46:08 2015 +0900 @@ -19,36 +19,27 @@ int main(int argc, char **argv) { - RegexInfo ri; - unsigned char* merge = NULL; + RegexInfo riRegex; + NodePtr n = NULL; + StatePtr s = NULL; + TGValue tgv = createTGValue(); for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { - ri.ptr = (unsigned char*)argv[i+1]; i++; - } else if (strcmp(argv[i],"-merge") == 0) { - merge = (unsigned char*)argv[i+1]; i++; + riRegex.ptr = (unsigned char*)argv[i+1]; i++; + printf("regex : %s\n",riRegex.ptr); + NodePtr nMerge = regex(&riRegex); + StatePtr sMerge = createState(tgv,nMerge); + setState(sMerge->cc,sMerge->bitState); + if (s == NULL) { + s = sMerge; + n = nMerge; + printCCTree(s->cc); + continue; + } + CharClassPtr cc = mergeTransition(s,sMerge->cc); + s->cc = cc; + printCCTree(s->cc); } } - printf("regex : %s\n",ri.ptr); - printf("merge : %s\n",merge); - unsigned char begin, end; - if (merge[0] == '[') merge++; - begin = *merge; - end = *merge; - for (; *merge && *merge != ']'; merge++) { - if (*merge == '-') { - end = *(merge + 1); - merge++; - continue; - } - if (merge[0] == 0 || merge[0] == ']') break; - begin = *merge; - end = *merge; - } - NodePtr n = regex(&ri); - TransitionGeneratorPtr tg = generateTransitionList(n); - BitVector nextState; - nextState.bitContainer = 8; - CharClassPtr cc = charClassMerge(n->cc,begin,end,nextState); - printCCTree(cc); return 0; }