comparison regexParser/CharClass.cc @ 308:1188debbef10

separate CharClass
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 08 Feb 2016 12:45:45 +0900
parents regexParser/subsetConstruction.cc@3e78631a6222
children 058c87665213
comparison
equal deleted inserted replaced
307:9f0df6ce89a2 308:1188debbef10
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <ctype.h>
4
5 #include "regexParser.h"
6 #include "subsetConstruction.h"
7 #include "node.h"
8 #include "BitVector.h"
9 #include "error.h"
10
11 #include "CharClass.h"
12
13
14 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
15 CharClassPtr cc = NEW(CharClass);
16 cc->type = 'a';
17 cc->left = NULL;
18 cc->right = NULL;
19 cc->cond.w.word = ri->tokenValue;
20 cc->cond.w.length = ri->ptr - ri->tokenValue;
21 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue;
22 return cc;
23 }
24
25 /*
26 cond.range.begin cond.range.end
27 |----------------|
28 1.b---e
29 2.b------e
30 3.b------------e
31 4.b-----------------------e
32 5.b----------------------------e
33
34 |----------------|
35 6. b---------e
36 7. b----------------e
37 8. b---------------------e
38
39 |----------------|
40 9. b-----e
41 10. b--------e
42 11. b-------------e
43
44 |----------------|
45 12. b-----e
46
47 |----------------|
48 13. b--e
49
50 */
51 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) {
52 if (begin>end) {
53 unsigned long tmp = begin; begin = end; end = tmp;
54 }
55 if (cc == NULL) {
56 return createCharClassRange(begin,end,0,0,0);
57 }
58 if (end < cc->cond.range.begin ) { // 1
59 if (cc->left) {
60 cc->left = insertCharClass(cc->left,begin,end);
61 } else {
62 cc->left = createCharClassRange(begin,end,0,0,0);
63 }
64 return cc;
65 } else if (end == cc->cond.range.begin ) { // 2
66 cc->cond.range.begin = begin;
67 return cc;
68 } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10
69 if (begin < cc->cond.range.begin) { // 3,4
70 cc->cond.range.begin = begin;
71 }
72 return cc;
73 } else if (begin > cc->cond.range.end ) { // 13
74 if (cc->right) {
75 cc->right = insertCharClass(cc->right,begin,end);
76 } else {
77 cc->right = createCharClassRange(begin,end,0,0,0);
78 }
79 return cc;
80 }
81 if (cc->right) {
82 CharClassPtr right = cc->right;
83 begin = cc->cond.range.begin;
84 free(cc);
85 return insertCharClass(right,begin,end);
86 }
87 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12
88 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8
89 } else if (begin < cc->cond.range.begin) { // 5
90 cc->cond.range.begin = begin;
91 cc->cond.range.end = end;
92 } else {
93 printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
94 }
95 return cc;
96 }
97
98
99 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) {
100 CharClassPtr cc = NEW(CharClass);
101 cc->type = 'r';
102 cc->cond.range.begin = begin;
103 cc->cond.range.end = end;
104 cc->cond.w.word = NULL;
105 cc->cond.w.length = 0;
106 cc->left = left;
107 cc->right = right;
108 cc->nextState.bitContainer = state;
109 return cc;
110 }
111
112 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ;
113
114 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) {
115 CharClassPtr cc1;
116 if (cc) {
117 cc1 = charClassMerge(cc,mBegin,mEnd,nextState);
118 } else {
119 cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL);
120 }
121 return cc1;
122 }
123
124 /*
125 cond.range.begin cond.range.end
126 |----------------|
127 1.b---e
128 2.b------e
129 3.b------------e
130 4.b-----------------------e
131 5.b----------------------------e
132
133 |----------------|
134 6. b---------e
135 7. b----------------e
136 8. b---------------------e
137
138 |----------------|
139 9. b-----e
140 10. b--------e
141 11. b-------------e
142
143 |----------------|
144 12. b-----e
145
146 |----------------|
147 13. b--e
148
149 */
150
151 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) {
152 // 重なっているccの領域を分割する
153 // 必要ならばnextStateを重ねあわせる
154 // 変更があった場合は新しくリストを作って返す
155 if (end < cc->cond.range.begin ) { // 1
156 if (cc->left) {
157 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right);
158 } else {
159 return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc);
160 }
161 } else if (end == cc->cond.range.begin && begin != end ) { // 2
162 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState);
163 if (cc->cond.range.begin == cc->cond.range.end) {
164 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
165 cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right);
166 }
167 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
168 return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,
169 cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
170 } else if (end < cc->cond.range.end) { // range.begin < end
171 if (begin < cc->cond.range.begin) { // 3
172 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
173 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
174 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
175 }
176 if (begin == cc->cond.range.begin) { // 6
177 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
178 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2);
179 }
180 // 9
181 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
182 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
183 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3);
184 } else if (end == cc->cond.range.end) {
185 if (begin == cc->cond.range.begin) { // 7
186 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right);
187 } else if (begin < cc->cond.range.begin) { // 4
188 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
189 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right);
190 }
191 // 10 cond.range.begin < begin
192 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right);
193 return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2);
194 }
195 if (begin > cc->cond.range.end ) { // 13
196 if (cc->right) {
197 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState));
198 } else {
199 return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL);
200 }
201 }
202 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
203 if (end > cc->cond.range.end) { // cond.range.end < end
204 if (begin == cc->cond.range.begin) { // 8
205 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
206 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
207 cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1);
208 }
209 if (begin > cc->cond.range.begin) { // 11,12
210 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
211 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
212 return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1);
213 }
214 }
215 } else if (begin < cc->cond.range.begin) { // 5
216 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
217 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
218 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
219 } else {
220 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
221 }
222 return cc;
223 }
224
225 void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) {
226 while (next->left) {
227 CharClassStackPtr ccs = NEW(CharClassStack);
228 ccs->next = walk->stack;
229 ccs->turn = walk->turn;
230 walk->turn = LEFT;
231 ccs->cc = next;
232 walk->stack = ccs;
233 next = next->left;
234 }
235 walk->turn = SELF;
236 walk->next = next;
237 }
238
239 CharClassWalkerPtr createCharClassWalker (CharClassPtr next) {
240 CharClassWalkerPtr walk = NEW(CharClassWalker);
241 walk->next = NULL;
242 walk->stack = NULL;
243 walk->turn = LEFT;
244 if (!next) return walk;
245 findLeftMost(next,walk);
246 return walk;
247 }
248
249 bool hasNext(CharClassWalkerPtr walk) {
250 return walk->next != NULL;
251 }
252
253 CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) {
254 CharClassStackPtr prev = walk->stack->next;
255 walk->turn = walk->stack->turn;
256 free(walk->stack);
257 walk->stack = prev;
258 return prev;
259 }
260
261 CharClassPtr getNext(CharClassWalkerPtr walk) {
262 CharClassPtr current = walk->next;
263 walk->next = NULL;
264 if (walk->turn == SELF) {
265 if (current->right) {
266 walk->turn = RIGHT;
267 findLeftMost(current->right,walk);
268 return current;
269 }
270 }
271 while (walk->stack) {
272 walk->next = walk->stack->cc;
273 charClassStackPop(walk);
274 if (walk->turn == LEFT) {
275 walk->turn = SELF;
276 return current;
277 }
278 }
279 return current;
280 }
281
282 void setState(CharClassPtr cc, BitVector bi) {
283 // if (word case) setNext(bitVector to the word list)
284 cc->nextState = bi;
285 if (cc->left) {
286 setState(cc->left,bi);
287 }
288 if (cc->right) {
289 setState(cc->right,bi);
290 }
291 }
292
293 CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) {
294 if (x->cc == NULL) {
295 return y;
296 }
297 CharClassWalkerPtr walk = createCharClassWalker(x->cc);
298 CharClassPtr ccy = y;
299 BitVector bi;
300 while (hasNext(walk)) {
301 CharClassPtr cc = getNext(walk);
302 unsigned long begin = cc->cond.range.begin;
303 unsigned long end = cc->cond.range.end;
304 bi = cc->nextState;
305 ccy = charClassMerge(ccy,begin,end,bi);
306 }
307 free(walk);
308 return ccy;
309 }
310
311 /* end */