Mercurial > hg > Applications > Grep
comparison regexParser/CharClass.cc @ 308:1188debbef10
separate CharClass
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 12:45:45 +0900 |
parents | regexParser/subsetConstruction.cc@3e78631a6222 |
children | 058c87665213 |
comparison
equal
deleted
inserted
replaced
307:9f0df6ce89a2 | 308:1188debbef10 |
---|---|
1 #include <stdio.h> | |
2 #include <stdlib.h> | |
3 #include <ctype.h> | |
4 | |
5 #include "regexParser.h" | |
6 #include "subsetConstruction.h" | |
7 #include "node.h" | |
8 #include "BitVector.h" | |
9 #include "error.h" | |
10 | |
11 #include "CharClass.h" | |
12 | |
13 | |
14 CharClassPtr createCharClassWord(RegexInfoPtr ri) { | |
15 CharClassPtr cc = NEW(CharClass); | |
16 cc->type = 'a'; | |
17 cc->left = NULL; | |
18 cc->right = NULL; | |
19 cc->cond.w.word = ri->tokenValue; | |
20 cc->cond.w.length = ri->ptr - ri->tokenValue; | |
21 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; | |
22 return cc; | |
23 } | |
24 | |
25 /* | |
26 cond.range.begin cond.range.end | |
27 |----------------| | |
28 1.b---e | |
29 2.b------e | |
30 3.b------------e | |
31 4.b-----------------------e | |
32 5.b----------------------------e | |
33 | |
34 |----------------| | |
35 6. b---------e | |
36 7. b----------------e | |
37 8. b---------------------e | |
38 | |
39 |----------------| | |
40 9. b-----e | |
41 10. b--------e | |
42 11. b-------------e | |
43 | |
44 |----------------| | |
45 12. b-----e | |
46 | |
47 |----------------| | |
48 13. b--e | |
49 | |
50 */ | |
51 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { | |
52 if (begin>end) { | |
53 unsigned long tmp = begin; begin = end; end = tmp; | |
54 } | |
55 if (cc == NULL) { | |
56 return createCharClassRange(begin,end,0,0,0); | |
57 } | |
58 if (end < cc->cond.range.begin ) { // 1 | |
59 if (cc->left) { | |
60 cc->left = insertCharClass(cc->left,begin,end); | |
61 } else { | |
62 cc->left = createCharClassRange(begin,end,0,0,0); | |
63 } | |
64 return cc; | |
65 } else if (end == cc->cond.range.begin ) { // 2 | |
66 cc->cond.range.begin = begin; | |
67 return cc; | |
68 } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 | |
69 if (begin < cc->cond.range.begin) { // 3,4 | |
70 cc->cond.range.begin = begin; | |
71 } | |
72 return cc; | |
73 } else if (begin > cc->cond.range.end ) { // 13 | |
74 if (cc->right) { | |
75 cc->right = insertCharClass(cc->right,begin,end); | |
76 } else { | |
77 cc->right = createCharClassRange(begin,end,0,0,0); | |
78 } | |
79 return cc; | |
80 } | |
81 if (cc->right) { | |
82 CharClassPtr right = cc->right; | |
83 begin = cc->cond.range.begin; | |
84 free(cc); | |
85 return insertCharClass(right,begin,end); | |
86 } | |
87 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 | |
88 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 | |
89 } else if (begin < cc->cond.range.begin) { // 5 | |
90 cc->cond.range.begin = begin; | |
91 cc->cond.range.end = end; | |
92 } else { | |
93 printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | |
94 } | |
95 return cc; | |
96 } | |
97 | |
98 | |
99 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { | |
100 CharClassPtr cc = NEW(CharClass); | |
101 cc->type = 'r'; | |
102 cc->cond.range.begin = begin; | |
103 cc->cond.range.end = end; | |
104 cc->cond.w.word = NULL; | |
105 cc->cond.w.length = 0; | |
106 cc->left = left; | |
107 cc->right = right; | |
108 cc->nextState.bitContainer = state; | |
109 return cc; | |
110 } | |
111 | |
112 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ; | |
113 | |
114 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { | |
115 CharClassPtr cc1; | |
116 if (cc) { | |
117 cc1 = charClassMerge(cc,mBegin,mEnd,nextState); | |
118 } else { | |
119 cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); | |
120 } | |
121 return cc1; | |
122 } | |
123 | |
124 /* | |
125 cond.range.begin cond.range.end | |
126 |----------------| | |
127 1.b---e | |
128 2.b------e | |
129 3.b------------e | |
130 4.b-----------------------e | |
131 5.b----------------------------e | |
132 | |
133 |----------------| | |
134 6. b---------e | |
135 7. b----------------e | |
136 8. b---------------------e | |
137 | |
138 |----------------| | |
139 9. b-----e | |
140 10. b--------e | |
141 11. b-------------e | |
142 | |
143 |----------------| | |
144 12. b-----e | |
145 | |
146 |----------------| | |
147 13. b--e | |
148 | |
149 */ | |
150 | |
151 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { | |
152 // 重なっているccの領域を分割する | |
153 // 必要ならばnextStateを重ねあわせる | |
154 // 変更があった場合は新しくリストを作って返す | |
155 if (end < cc->cond.range.begin ) { // 1 | |
156 if (cc->left) { | |
157 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); | |
158 } else { | |
159 return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); | |
160 } | |
161 } else if (end == cc->cond.range.begin && begin != end ) { // 2 | |
162 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); | |
163 if (cc->cond.range.begin == cc->cond.range.end) { | |
164 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, | |
165 cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); | |
166 } | |
167 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); | |
168 return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, | |
169 cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | |
170 } else if (end < cc->cond.range.end) { // range.begin < end | |
171 if (begin < cc->cond.range.begin) { // 3 | |
172 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | |
173 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); | |
174 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | |
175 } | |
176 if (begin == cc->cond.range.begin) { // 6 | |
177 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); | |
178 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); | |
179 } | |
180 // 9 | |
181 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); | |
182 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); | |
183 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); | |
184 } else if (end == cc->cond.range.end) { | |
185 if (begin == cc->cond.range.begin) { // 7 | |
186 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); | |
187 } else if (begin < cc->cond.range.begin) { // 4 | |
188 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | |
189 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); | |
190 } | |
191 // 10 cond.range.begin < begin | |
192 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); | |
193 return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); | |
194 } | |
195 if (begin > cc->cond.range.end ) { // 13 | |
196 if (cc->right) { | |
197 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); | |
198 } else { | |
199 return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); | |
200 } | |
201 } | |
202 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { | |
203 if (end > cc->cond.range.end) { // cond.range.end < end | |
204 if (begin == cc->cond.range.begin) { // 8 | |
205 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | |
206 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, | |
207 cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); | |
208 } | |
209 if (begin > cc->cond.range.begin) { // 11,12 | |
210 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | |
211 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); | |
212 return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); | |
213 } | |
214 } | |
215 } else if (begin < cc->cond.range.begin) { // 5 | |
216 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | |
217 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | |
218 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | |
219 } else { | |
220 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | |
221 } | |
222 return cc; | |
223 } | |
224 | |
225 void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { | |
226 while (next->left) { | |
227 CharClassStackPtr ccs = NEW(CharClassStack); | |
228 ccs->next = walk->stack; | |
229 ccs->turn = walk->turn; | |
230 walk->turn = LEFT; | |
231 ccs->cc = next; | |
232 walk->stack = ccs; | |
233 next = next->left; | |
234 } | |
235 walk->turn = SELF; | |
236 walk->next = next; | |
237 } | |
238 | |
239 CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { | |
240 CharClassWalkerPtr walk = NEW(CharClassWalker); | |
241 walk->next = NULL; | |
242 walk->stack = NULL; | |
243 walk->turn = LEFT; | |
244 if (!next) return walk; | |
245 findLeftMost(next,walk); | |
246 return walk; | |
247 } | |
248 | |
249 bool hasNext(CharClassWalkerPtr walk) { | |
250 return walk->next != NULL; | |
251 } | |
252 | |
253 CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { | |
254 CharClassStackPtr prev = walk->stack->next; | |
255 walk->turn = walk->stack->turn; | |
256 free(walk->stack); | |
257 walk->stack = prev; | |
258 return prev; | |
259 } | |
260 | |
261 CharClassPtr getNext(CharClassWalkerPtr walk) { | |
262 CharClassPtr current = walk->next; | |
263 walk->next = NULL; | |
264 if (walk->turn == SELF) { | |
265 if (current->right) { | |
266 walk->turn = RIGHT; | |
267 findLeftMost(current->right,walk); | |
268 return current; | |
269 } | |
270 } | |
271 while (walk->stack) { | |
272 walk->next = walk->stack->cc; | |
273 charClassStackPop(walk); | |
274 if (walk->turn == LEFT) { | |
275 walk->turn = SELF; | |
276 return current; | |
277 } | |
278 } | |
279 return current; | |
280 } | |
281 | |
282 void setState(CharClassPtr cc, BitVector bi) { | |
283 // if (word case) setNext(bitVector to the word list) | |
284 cc->nextState = bi; | |
285 if (cc->left) { | |
286 setState(cc->left,bi); | |
287 } | |
288 if (cc->right) { | |
289 setState(cc->right,bi); | |
290 } | |
291 } | |
292 | |
293 CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { | |
294 if (x->cc == NULL) { | |
295 return y; | |
296 } | |
297 CharClassWalkerPtr walk = createCharClassWalker(x->cc); | |
298 CharClassPtr ccy = y; | |
299 BitVector bi; | |
300 while (hasNext(walk)) { | |
301 CharClassPtr cc = getNext(walk); | |
302 unsigned long begin = cc->cond.range.begin; | |
303 unsigned long end = cc->cond.range.end; | |
304 bi = cc->nextState; | |
305 ccy = charClassMerge(ccy,begin,end,bi); | |
306 } | |
307 free(walk); | |
308 return ccy; | |
309 } | |
310 | |
311 /* end */ |