comparison c/regexParser/subsetConstraction.cc @ 166:96854eba17e5 pairPro

implement mergeCCTree()
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Sat, 19 Dec 2015 15:02:02 +0900
parents f0a347cd9c6a
children
comparison
equal deleted inserted replaced
165:42f4ee38196e 166:96854eba17e5
4 #include "subsetConstraction.h" 4 #include "subsetConstraction.h"
5 5
6 CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) { 6 CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) {
7 CharClassPtr cc = NEW(CharClass); 7 CharClassPtr cc = NEW(CharClass);
8 return cc; 8 return cc;
9 }
10
11 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) {
12 CharClassPtr cc1;
13 if (cc) {
14 cc1 = charClassMerge(cc,mBegin,mEnd,nextState);
15 } else {
16 cc1 = createCharClassRange(mBegin,mEnd,NULL,NULL);
17 cc1->nextState = nextState;
18 }
19 return cc1;
9 } 20 }
10 21
11 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { 22 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) {
12 // 重なっているccの領域を分割する 23 // 重なっているccの領域を分割する
13 // 必要ならばnextStateを重ねあわせる 24 // 必要ならばnextStateを重ねあわせる
19 CharClassPtr cc1 = createCharClassRange(begin,end,NULL,cc); 30 CharClassPtr cc1 = createCharClassRange(begin,end,NULL,cc);
20 cc1->nextState = nextState; 31 cc1->nextState = nextState;
21 return cc1; 32 return cc1;
22 } 33 }
23 } else if (end == cc->cond.range.begin && begin != end ) { // 2 34 } else if (end == cc->cond.range.begin && begin != end ) { // 2
24 CharClassPtr cc1; 35 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState);
25 if (cc->left) {
26 cc1 = charClassMerge(cc->left,begin,end-1,nextState);
27 } else {
28 cc1 = createCharClassRange(begin,end-1,NULL,NULL);
29 cc1->nextState = nextState;
30 }
31 if (cc->cond.range.begin == cc->cond.range.end) { 36 if (cc->cond.range.begin == cc->cond.range.end) {
32 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc->right); 37 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc->right);
33 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 38 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
34 return cc2; 39 return cc2;
35 } 40 }
38 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,cc1,cc3); 43 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,cc1,cc3);
39 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 44 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
40 return cc2; 45 return cc2;
41 } else if (end < cc->cond.range.end) { // range.begin < end 46 } else if (end < cc->cond.range.end) { // range.begin < end
42 if (begin < cc->cond.range.begin) { // 3 47 if (begin < cc->cond.range.begin) { // 3
43 CharClassPtr cc1; 48 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
44 if (cc->left) {
45 cc1 = charClassMerge(cc->left,begin,cc->cond.range.begin-1,nextState);
46 } else {
47 cc1 = createCharClassRange(begin,cc->cond.range.begin-1,NULL,NULL);
48 cc1->nextState = nextState;
49 }
50 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->left,cc->right); 49 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->left,cc->right);
51 cc3->nextState = cc->nextState; 50 cc3->nextState = cc->nextState;
52 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,end,cc1,cc3); 51 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,end,cc1,cc3);
53 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 52 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
54 return cc2; 53 return cc2;
72 if (begin == cc->cond.range.begin) { // 7 71 if (begin == cc->cond.range.begin) { // 7
73 CharClassPtr cc1 = createCharClassRange(begin,end,cc->left,cc->right); 72 CharClassPtr cc1 = createCharClassRange(begin,end,cc->left,cc->right);
74 cc1->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 73 cc1->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
75 return cc1; 74 return cc1;
76 } else if (begin < cc->cond.range.begin) { // 4 75 } else if (begin < cc->cond.range.begin) { // 4
77 CharClassPtr cc1; 76 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
78 if (cc->left) {
79 cc1 = charClassMerge(cc->left,begin,cc->cond.range.begin-1,nextState);
80 } else {
81 cc1 = createCharClassRange(begin,cc->cond.range.begin-1,NULL,NULL);
82 cc1->nextState = nextState;
83 }
84 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,end,cc1,cc->right); 77 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,end,cc1,cc->right);
85 cc3->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 78 cc3->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
86 return cc3; 79 return cc3;
87 } 80 }
88 // 10 cond.range.begin < begin 81 // 10 cond.range.begin < begin
102 } 95 }
103 } 96 }
104 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { 97 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
105 if (end > cc->cond.range.end) { // cond.range.end < end 98 if (end > cc->cond.range.end) { // cond.range.end < end
106 if (begin == cc->cond.range.begin) { // 8 99 if (begin == cc->cond.range.begin) { // 8
107 CharClassPtr cc1; 100 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
108 if (cc->right) {
109 cc1 = charClassMerge(cc->right,cc->cond.range.end+1,end,nextState);
110 } else {
111 cc1 = createCharClassRange(cc->cond.range.end+1,end,NULL,NULL);
112 cc1->nextState = nextState;
113 }
114 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->left,cc1); 101 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->left,cc1);
115 cc3->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 102 cc3->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
116 return cc3; 103 return cc3;
117 } 104 }
118 if (begin > cc->cond.range.begin) { // 11 105 if (begin > cc->cond.range.begin) { // 11
119 CharClassPtr cc1; 106 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
120 if (cc->right) {
121 cc1 = charClassMerge(cc->right,cc->cond.range.end+1,end,nextState);
122 } else {
123 cc1 = createCharClassRange(cc->cond.range.end+1,end,NULL,NULL);
124 cc1->nextState = nextState;
125 }
126 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->left,NULL); 107 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->left,NULL);
127 cc3->nextState = cc->nextState; 108 cc3->nextState = cc->nextState;
128 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc3,cc1); 109 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc3,cc1);
129 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 110 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
130 return cc2; 111 return cc2;
131 } 112 }
132 } 113 }
133 // begin != end && end != cc->cond.range.end 114 // begin != end && end != cc->cond.range.end
134 if (begin == cc->cond.range.end) { // 12 115 if (begin == cc->cond.range.end) { // 12
135 CharClassPtr cc1; 116 CharClassPtr cc1 = mergeCCTree(cc->right,begin+1,end,nextState);
136 if (cc->right) {
137 cc1 = charClassMerge(cc->right,begin+1,end,nextState);
138 } else {
139 cc1 = createCharClassRange(begin+1,end,NULL,NULL);
140 cc1->nextState = nextState;
141 }
142 if (cc->cond.range.begin == cc->cond.range.end) { 117 if (cc->cond.range.begin == cc->cond.range.end) {
143 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc->right); 118 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc->right);
144 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 119 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
145 return cc2; 120 return cc2;
146 } 121 }
149 CharClassPtr cc2 = createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc1,cc3); 124 CharClassPtr cc2 = createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc1,cc3);
150 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 125 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
151 return cc2; 126 return cc2;
152 } 127 }
153 } else if (begin < cc->cond.range.begin) { // 5 128 } else if (begin < cc->cond.range.begin) { // 5
154 CharClassPtr cc1; 129 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
155 if (cc->left) { 130 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
156 cc1 = charClassMerge(cc->left,begin,cc->cond.range.begin-1,nextState);
157 } else {
158 cc1 = createCharClassRange(begin,cc->cond.range.begin-1,NULL,NULL);
159 cc1->nextState = nextState;
160 }
161 CharClassPtr cc3;
162 if (cc->right) {
163 cc3 = charClassMerge(cc->right,cc->cond.range.end+1,end,nextState);
164 } else {
165 cc3 = createCharClassRange(cc->cond.range.end+1,end,NULL,NULL);
166 cc3->nextState = nextState;
167 }
168 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc3); 131 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc1,cc3);
169 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer; 132 cc2->nextState.bitContainer = cc->nextState.bitContainer | nextState.bitContainer;
170 return cc2; 133 return cc2;
171 } else { 134 } else {
172 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); 135 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);