diff regexParser/CharClass.cc @ 308:1188debbef10

separate CharClass
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 08 Feb 2016 12:45:45 +0900
parents regexParser/subsetConstruction.cc@3e78631a6222
children 058c87665213
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regexParser/CharClass.cc	Mon Feb 08 12:45:45 2016 +0900
@@ -0,0 +1,311 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "regexParser.h"
+#include "subsetConstruction.h"
+#include "node.h"
+#include "BitVector.h"
+#include "error.h"
+
+#include "CharClass.h"
+
+
+CharClassPtr createCharClassWord(RegexInfoPtr ri) {
+    CharClassPtr cc = NEW(CharClass);
+    cc->type = 'a';
+    cc->left = NULL;
+    cc->right = NULL;
+    cc->cond.w.word = ri->tokenValue;
+    cc->cond.w.length = ri->ptr - ri->tokenValue;
+    cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue;
+    return cc;
+}
+
+/*
+    cond.range.begin  cond.range.end
+           |----------------|
+  1.b---e
+  2.b------e
+  3.b------------e
+  4.b-----------------------e
+  5.b----------------------------e
+
+           |----------------|
+  6.       b---------e
+  7.       b----------------e
+  8.       b---------------------e
+
+           |----------------|
+  9.               b-----e
+  10.              b--------e
+  11.              b-------------e
+
+           |----------------|
+  12.                       b-----e
+
+           |----------------|
+  13.                          b--e
+
+ */
+CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) {
+    if (begin>end) {
+        unsigned long tmp = begin; begin = end; end = tmp;
+    }
+    if (cc == NULL) {
+        return createCharClassRange(begin,end,0,0,0);
+    }
+    if (end < cc->cond.range.begin ) { // 1
+        if (cc->left) {
+            cc->left = insertCharClass(cc->left,begin,end);
+        } else {
+            cc->left = createCharClassRange(begin,end,0,0,0);
+        }
+        return cc;
+    } else if (end == cc->cond.range.begin ) { // 2
+        cc->cond.range.begin = begin;
+        return cc;
+    } else if (end <= cc->cond.range.end) {  // 3,4,6,7,9,10
+        if (begin < cc->cond.range.begin) {  // 3,4
+            cc->cond.range.begin = begin;
+        }
+        return cc;
+    } else if (begin > cc->cond.range.end ) { // 13
+        if (cc->right) {
+            cc->right = insertCharClass(cc->right,begin,end);
+        } else {
+            cc->right = createCharClassRange(begin,end,0,0,0);
+        }
+        return cc;
+    }
+    if (cc->right) {
+        CharClassPtr right = cc->right;
+        begin = cc->cond.range.begin;
+        free(cc);
+        return insertCharClass(right,begin,end);
+    }
+    if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12
+        if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8
+    } else if (begin < cc->cond.range.begin) { // 5
+        cc->cond.range.begin = begin;
+        cc->cond.range.end = end; 
+    } else {
+        printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
+    }
+    return cc;
+}
+
+
+CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) {
+    CharClassPtr cc = NEW(CharClass);
+    cc->type = 'r';
+    cc->cond.range.begin = begin;
+    cc->cond.range.end = end;
+    cc->cond.w.word = NULL;
+    cc->cond.w.length = 0;
+    cc->left = left;
+    cc->right = right;
+    cc->nextState.bitContainer = state;
+    return cc;
+}
+
+CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ;
+
+CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) {
+    CharClassPtr cc1;
+    if (cc) {
+        cc1 = charClassMerge(cc,mBegin,mEnd,nextState);
+    } else {
+        cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL);
+    }
+    return cc1;
+}
+
+/*
+    cond.range.begin  cond.range.end
+           |----------------|
+  1.b---e
+  2.b------e
+  3.b------------e
+  4.b-----------------------e
+  5.b----------------------------e
+
+           |----------------|
+  6.       b---------e
+  7.       b----------------e
+  8.       b---------------------e
+
+           |----------------|
+  9.               b-----e
+  10.              b--------e
+  11.              b-------------e
+
+           |----------------|
+  12.                       b-----e
+
+           |----------------|
+  13.                          b--e
+
+ */
+
+CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) {
+    // 重なっているccの領域を分割する
+    // 必要ならばnextStateを重ねあわせる
+    // 変更があった場合は新しくリストを作って返す
+    if (end < cc->cond.range.begin ) { // 1
+        if (cc->left) {
+            return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right);
+        } else {
+            return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc);
+        }
+    } else if (end == cc->cond.range.begin && begin != end ) { // 2
+        CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState);
+        if (cc->cond.range.begin == cc->cond.range.end) {
+            return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
+                cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right);
+        }
+        CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
+        return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,
+            cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
+    } else if (end < cc->cond.range.end) { // range.begin < end
+        if (begin < cc->cond.range.begin) {  // 3
+            CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
+            CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
+            return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
+        }
+        if (begin == cc->cond.range.begin) {  // 6
+            CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
+            return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2);
+        }
+        // 9
+        CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
+        CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
+        return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3);
+    } else if (end == cc->cond.range.end) {
+        if (begin == cc->cond.range.begin) { // 7
+            return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right);
+        } else if (begin < cc->cond.range.begin) { // 4
+            CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
+            return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right);
+        }
+        // 10 cond.range.begin < begin
+        CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right);
+        return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2);
+    }
+    if (begin > cc->cond.range.end ) { // 13
+        if (cc->right) {
+            return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState));
+        } else {
+            return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL);
+        }
+    }
+    if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
+        if (end > cc->cond.range.end) { // cond.range.end < end
+            if (begin == cc->cond.range.begin) {    // 8
+                CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
+                return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
+                    cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1);
+            }
+            if (begin > cc->cond.range.begin) {  // 11,12
+                CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
+                CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
+                return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1);
+            }
+        }
+    } else if (begin < cc->cond.range.begin) { // 5
+        CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
+        CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
+        return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
+    } else {
+        printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
+    }
+    return cc;
+}
+
+void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) {
+    while (next->left) {
+        CharClassStackPtr ccs = NEW(CharClassStack);
+        ccs->next = walk->stack;
+        ccs->turn = walk->turn;
+        walk->turn = LEFT;
+        ccs->cc = next;
+        walk->stack = ccs;
+        next = next->left;
+    }
+    walk->turn = SELF;
+    walk->next = next;
+}
+
+CharClassWalkerPtr createCharClassWalker (CharClassPtr next) {
+    CharClassWalkerPtr walk = NEW(CharClassWalker);
+    walk->next = NULL;
+    walk->stack = NULL;
+    walk->turn = LEFT;
+    if (!next) return walk;
+    findLeftMost(next,walk);
+    return walk;
+}
+
+bool hasNext(CharClassWalkerPtr walk) {
+    return walk->next != NULL;
+}
+
+CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) {
+    CharClassStackPtr prev = walk->stack->next;
+    walk->turn = walk->stack->turn;
+    free(walk->stack);
+    walk->stack = prev;
+    return prev;
+}
+
+CharClassPtr getNext(CharClassWalkerPtr walk) {
+    CharClassPtr current = walk->next;
+    walk->next = NULL;
+    if (walk->turn == SELF) {
+        if (current->right) {
+            walk->turn = RIGHT;
+            findLeftMost(current->right,walk);
+            return current;
+        }
+    }
+    while (walk->stack) {
+        walk->next = walk->stack->cc;
+        charClassStackPop(walk);
+        if (walk->turn == LEFT) {
+            walk->turn = SELF;
+            return current;
+        }
+    }
+    return current;
+}
+
+void setState(CharClassPtr cc, BitVector bi) {
+    // if (word case) setNext(bitVector to the word list)
+    cc->nextState = bi;
+    if (cc->left) {
+        setState(cc->left,bi);
+    }
+    if (cc->right) {
+        setState(cc->right,bi);
+    }
+}
+
+CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) {
+    if (x->cc == NULL) {
+        return y;
+    }
+    CharClassWalkerPtr walk = createCharClassWalker(x->cc);
+    CharClassPtr ccy = y;
+    BitVector bi;
+    while (hasNext(walk)) {
+        CharClassPtr cc = getNext(walk);
+        unsigned long begin = cc->cond.range.begin;
+        unsigned long end = cc->cond.range.end;
+        bi = cc->nextState;
+        ccy = charClassMerge(ccy,begin,end,bi);
+    }
+    free(walk);
+    return ccy;
+}
+
+/* end */