Mercurial > hg > Applications > Grep
comparison regexParser/CharClass.cc @ 310:df27e6cab846
CharClassMerge with Word ( no match implementation )
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 19:23:37 +0900 |
parents | 058c87665213 |
children | 66012db6a717 |
comparison
equal
deleted
inserted
replaced
309:058c87665213 | 310:df27e6cab846 |
---|---|
14 CharClassPtr cc = NEW(CharClass); | 14 CharClassPtr cc = NEW(CharClass); |
15 cc->cond.range.begin = begin; | 15 cc->cond.range.begin = begin; |
16 cc->cond.range.end = end; | 16 cc->cond.range.end = end; |
17 cc->cond.w.word = NULL; | 17 cc->cond.w.word = NULL; |
18 cc->cond.w.length = 0; | 18 cc->cond.w.length = 0; |
19 cc->cond.w.next = NULL; | |
19 cc->left = left; | 20 cc->left = left; |
20 cc->right = right; | 21 cc->right = right; |
21 cc->nextState.bitContainer = state; | 22 cc->nextState.bitContainer = state; |
22 return cc; | 23 return cc; |
23 } | 24 } |
26 CharClassPtr cc = NEW(CharClass); | 27 CharClassPtr cc = NEW(CharClass); |
27 cc->left = NULL; | 28 cc->left = NULL; |
28 cc->right = NULL; | 29 cc->right = NULL; |
29 cc->cond.w.word = ri->tokenValue; | 30 cc->cond.w.word = ri->tokenValue; |
30 cc->cond.w.length = ri->ptr - ri->tokenValue; | 31 cc->cond.w.length = ri->ptr - ri->tokenValue; |
32 cc->cond.w.next = NULL; | |
31 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; | 33 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; |
32 return cc; | 34 return cc; |
33 } | 35 } |
36 | |
37 CharClassPtr createCharClassRangeWord(unsigned long begin, unsigned long end,CharClassPtr cc,CharClassPtr m, CharClassPtr left, CharClassPtr right) { | |
38 // same range and seme nextState? | |
39 CharClassPtr ncc = NEW(CharClass); | |
40 ncc->cond.range.begin = begin; | |
41 ncc->cond.range.end = end; | |
42 ncc->cond.w = cc->cond.w; | |
43 ncc->left = left; | |
44 ncc->right = right; | |
45 if (!m) { | |
46 ncc->nextState.bitContainer = cc->nextState.bitContainer; | |
47 return ncc; | |
48 } | |
49 if (m->cond.w.word) { | |
50 WordPtr w = &m->cond.w; | |
51 WordPtr *next = &ncc->cond.w.next; | |
52 while(w->next) { // insert sort? | |
53 WordPtr n = NEW(Word); | |
54 n->word = w->word; | |
55 n->length = w->length; | |
56 *next = n; | |
57 next = &n->next; | |
58 w = w->next; | |
59 } | |
60 *next = &m->cond.w; | |
61 } | |
62 ncc->nextState.bitContainer = m->nextState.bitContainer | cc->nextState.bitContainer; | |
63 return ncc; | |
64 } | |
65 | |
34 | 66 |
35 /* | 67 /* |
36 cond.range.begin cond.range.end | 68 cond.range.begin cond.range.end |
37 |----------------| | 69 |----------------| |
38 1.b---e | 70 1.b---e |
105 return cc; | 137 return cc; |
106 } | 138 } |
107 | 139 |
108 | 140 |
109 | 141 |
110 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ; | 142 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, CharClassPtr m) ; |
111 | 143 |
112 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { | 144 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,CharClassPtr m) { |
113 CharClassPtr cc1; | 145 CharClassPtr cc1; |
114 if (cc) { | 146 if (cc) { |
115 cc1 = charClassMerge(cc,mBegin,mEnd,nextState); | 147 cc1 = charClassMerge(cc,mBegin,mEnd,m); |
116 } else { | 148 } else { |
117 cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); | 149 cc1 = createCharClassRangeWord(mBegin,mEnd,m,NULL,NULL,NULL); |
118 } | 150 } |
119 return cc1; | 151 return cc1; |
120 } | 152 } |
121 | 153 |
122 /* | 154 /* |
144 |----------------| | 176 |----------------| |
145 13. b--e | 177 13. b--e |
146 | 178 |
147 */ | 179 */ |
148 | 180 |
149 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { | 181 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, CharClassPtr m) { |
150 // 重なっているccの領域を分割する | 182 // 重なっているccの領域を分割する |
151 // 必要ならばnextStateを重ねあわせる | 183 // 必要ならばnextStateを重ねあわせる |
152 // 変更があった場合は新しくリストを作って返す | 184 // 変更があった場合は新しくリストを作って返す |
153 if (end < cc->cond.range.begin ) { // 1 | 185 if (end < cc->cond.range.begin ) { // 1 |
154 if (cc->left) { | 186 if (cc->left) { |
155 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); | 187 return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,m,NULL,charClassMerge(cc->left,begin,end,m),cc->right); |
156 } else { | 188 } else { |
157 return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); | 189 return createCharClassRangeWord(begin,end,m,NULL,NULL,cc); |
158 } | 190 } |
159 } else if (end == cc->cond.range.begin && begin != end ) { // 2 | 191 } else if (end == cc->cond.range.begin && begin != end ) { // 2 |
160 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); | 192 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,m); |
161 if (cc->cond.range.begin == cc->cond.range.end) { | 193 if (cc->cond.range.begin == cc->cond.range.end) { |
162 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, | 194 return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end, |
163 cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); | 195 cc,m, cc1,cc->right); |
164 } | 196 } |
165 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); | 197 CharClassPtr cc3 = createCharClassRangeWord(cc->cond.range.begin+1,cc->cond.range.end,cc,NULL,cc->left,cc->right); |
166 return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, | 198 return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.begin, |
167 cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | 199 cc,m,cc1,cc3); |
168 } else if (end < cc->cond.range.end) { // range.begin < end | 200 } else if (end < cc->cond.range.end) { // range.begin < end |
169 if (begin < cc->cond.range.begin) { // 3 | 201 if (begin < cc->cond.range.begin) { // 3 |
170 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | 202 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); |
171 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); | 203 CharClassPtr cc3 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,cc->left,cc->right); |
172 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | 204 return createCharClassRangeWord(cc->cond.range.begin,end,m,cc,cc1,cc3); |
173 } | 205 } |
174 if (begin == cc->cond.range.begin) { // 6 | 206 if (begin == cc->cond.range.begin) { // 6 |
175 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); | 207 CharClassPtr cc2 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,NULL,cc->right); |
176 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); | 208 return createCharClassRangeWord(begin,end,cc,m,cc->left,cc2); |
177 } | 209 } |
178 // 9 | 210 // 9 |
179 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); | 211 CharClassPtr cc2 = createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,NULL,cc->left,NULL); |
180 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); | 212 CharClassPtr cc3 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,NULL,cc->right); |
181 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); | 213 return createCharClassRangeWord(begin,end,cc,m,cc2,cc3); |
182 } else if (end == cc->cond.range.end) { | 214 } else if (end == cc->cond.range.end) { |
183 if (begin == cc->cond.range.begin) { // 7 | 215 if (begin == cc->cond.range.begin) { // 7 |
184 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); | 216 return createCharClassRangeWord(begin,end,cc,m,cc->left,cc->right); |
185 } else if (begin < cc->cond.range.begin) { // 4 | 217 } else if (begin < cc->cond.range.begin) { // 4 |
186 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | 218 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); |
187 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); | 219 return createCharClassRangeWord(cc->cond.range.begin,end,cc,m,cc1,cc->right); |
188 } | 220 } |
189 // 10 cond.range.begin < begin | 221 // 10 cond.range.begin < begin |
190 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); | 222 CharClassPtr cc2 = createCharClassRangeWord(begin,cc->cond.range.end,cc,m,NULL,cc->right); |
191 return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); | 223 return createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,NULL,cc->left,cc2); |
192 } | 224 } |
193 if (begin > cc->cond.range.end ) { // 13 | 225 if (begin > cc->cond.range.end ) { // 13 |
194 if (cc->right) { | 226 if (cc->right) { |
195 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); | 227 return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,cc,NULL,cc->left,charClassMerge(cc->right,begin,end,m)); |
196 } else { | 228 } else { |
197 return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); | 229 return createCharClassRangeWord(begin,end,m,NULL,cc,NULL); |
198 } | 230 } |
199 } | 231 } |
200 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { | 232 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { |
201 if (end > cc->cond.range.end) { // cond.range.end < end | 233 if (end > cc->cond.range.end) { // cond.range.end < end |
202 if (begin == cc->cond.range.begin) { // 8 | 234 if (begin == cc->cond.range.begin) { // 8 |
203 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | 235 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); |
204 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, | 236 return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end, |
205 cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); | 237 cc,m,cc->left,cc1); |
206 } | 238 } |
207 if (begin > cc->cond.range.begin) { // 11,12 | 239 if (begin > cc->cond.range.begin) { // 11,12 |
208 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | 240 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); |
209 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); | 241 CharClassPtr cc3 = createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,m,cc->left,NULL); |
210 return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); | 242 return createCharClassRangeWord(begin,cc->cond.range.end,cc,m,cc3,cc1); |
211 } | 243 } |
212 } | 244 } |
213 } else if (begin < cc->cond.range.begin) { // 5 | 245 } else if (begin < cc->cond.range.begin) { // 5 |
214 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | 246 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); |
215 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | 247 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); |
216 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | 248 return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,cc,m,cc1,cc3); |
217 } else { | 249 } else { |
218 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | 250 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); |
219 } | 251 } |
220 return cc; | 252 return cc; |
221 } | 253 } |
292 if (x->cc == NULL) { | 324 if (x->cc == NULL) { |
293 return y; | 325 return y; |
294 } | 326 } |
295 CharClassWalkerPtr walk = createCharClassWalker(x->cc); | 327 CharClassWalkerPtr walk = createCharClassWalker(x->cc); |
296 CharClassPtr ccy = y; | 328 CharClassPtr ccy = y; |
297 BitVector bi; | |
298 while (hasNext(walk)) { | 329 while (hasNext(walk)) { |
299 CharClassPtr cc = getNext(walk); | 330 CharClassPtr cc = getNext(walk); |
300 unsigned long begin = cc->cond.range.begin; | 331 unsigned long begin = cc->cond.range.begin; |
301 unsigned long end = cc->cond.range.end; | 332 unsigned long end = cc->cond.range.end; |
302 bi = cc->nextState; | 333 ccy = charClassMerge(ccy,begin,end,cc); |
303 ccy = charClassMerge(ccy,begin,end,bi); | |
304 } | 334 } |
305 free(walk); | 335 free(walk); |
306 return ccy; | 336 return ccy; |
307 } | 337 } |
308 | 338 |