Mercurial > hg > Applications > Grep
comparison regexParser/regexParser.cc @ 180:d97bcab546e8 pairPro
implement getNext
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 24 Dec 2015 17:56:28 +0900 |
parents | 6cf8252f3912 |
children | 7ae0a3070647 |
comparison
equal
deleted
inserted
replaced
179:6cf8252f3912 | 180:d97bcab546e8 |
---|---|
31 | 31 |
32 n->tokenType = type; | 32 n->tokenType = type; |
33 n->cc = cc; | 33 n->cc = cc; |
34 n->left = left; | 34 n->left = left; |
35 n->right = right; | 35 n->right = right; |
36 if (n->tokenType == 'a' || n->tokenType == 'c') { | 36 n->nodeNumber = ri->stateNumber; |
37 n->nodeNumber = ri->nodeNumber; | 37 ri->stateNumber++; |
38 ri->nodeNumber++; | |
39 } else { | |
40 n->nodeNumber = SYNTAX_NODENUMBER; | |
41 } | |
42 | 38 |
43 return n; | 39 return n; |
44 } | 40 } |
45 | 41 |
46 CharClassPtr createCharClassWord(RegexInfoPtr ri) { | 42 CharClassPtr createCharClassWord(RegexInfoPtr ri) { |
47 CharClassPtr cc = NEW(CharClass); | 43 CharClassPtr cc = NEW(CharClass); |
48 cc->type = 'a'; | 44 cc->type = 'a'; |
49 cc->cond.w.word = ri->tokenValue; | 45 cc->cond.w.word = ri->tokenValue; |
50 cc->cond.w.length = ri->ptr - ri->tokenValue; | 46 cc->cond.w.length = ri->ptr - ri->tokenValue; |
51 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; | 47 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; |
52 cc->nextState = ri->current->bitSet; | |
53 return cc; | 48 return cc; |
54 } | 49 } |
55 | 50 |
56 /* | 51 /* |
57 cond.range.begin cond.range.end | 52 cond.range.begin cond.range.end |
77 | 72 |
78 |----------------| | 73 |----------------| |
79 13. b--e | 74 13. b--e |
80 | 75 |
81 */ | 76 */ |
82 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end,BitVector nextState) { | 77 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { |
83 if (cc == NULL) { | 78 if (cc == NULL) { |
84 createCharClassRange(begin,end,nextState.bitContainer,0,0); | 79 createCharClassRange(begin,end,0,0); |
85 } | 80 } |
86 if (end < cc->cond.range.begin ) { // 1 | 81 if (end < cc->cond.range.begin ) { // 1 |
87 if (cc->left) { | 82 if (cc->left) { |
88 cc->left = insertCharClass(cc->left,begin,end,nextState); | 83 cc->left = insertCharClass(cc->left,begin,end); |
89 } else { | 84 } else { |
90 cc->left = createCharClassRange(begin,end,nextState.bitContainer,0,0); | 85 cc->left = createCharClassRange(begin,end,0,0); |
91 } | 86 } |
92 return cc; | 87 return cc; |
93 } else if (end == cc->cond.range.begin ) { // 2 | 88 } else if (end == cc->cond.range.begin ) { // 2 |
94 cc->cond.range.begin = begin; | 89 cc->cond.range.begin = begin; |
95 return cc; | 90 return cc; |
98 cc->cond.range.begin = begin; | 93 cc->cond.range.begin = begin; |
99 } | 94 } |
100 return cc; | 95 return cc; |
101 } else if (begin > cc->cond.range.end ) { // 13 | 96 } else if (begin > cc->cond.range.end ) { // 13 |
102 if (cc->right) { | 97 if (cc->right) { |
103 cc->right = insertCharClass(cc->right,begin,end,nextState); | 98 cc->right = insertCharClass(cc->right,begin,end); |
104 } else { | 99 } else { |
105 cc->right = createCharClassRange(begin,end,nextState.bitContainer,0,0); | 100 cc->right = createCharClassRange(begin,end,0,0); |
106 } | 101 } |
107 return cc; | 102 return cc; |
108 } | 103 } |
109 if (cc->right) { | 104 if (cc->right) { |
110 CharClassPtr right = cc->right; | 105 CharClassPtr right = cc->right; |
111 begin = cc->cond.range.begin; | 106 begin = cc->cond.range.begin; |
112 free(cc); | 107 free(cc); |
113 return insertCharClass(right,begin,end,nextState); | 108 return insertCharClass(right,begin,end); |
114 } | 109 } |
115 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 | 110 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 |
116 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 | 111 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 |
117 } else if (begin < cc->cond.range.begin) { // 5 | 112 } else if (begin < cc->cond.range.begin) { // 5 |
118 cc->cond.range.begin = begin; | 113 cc->cond.range.begin = begin; |
152 } | 147 } |
153 | 148 |
154 RangeListPtr r = cc->cond.range.next; | 149 RangeListPtr r = cc->cond.range.next; |
155 cc->cond.range.next = 0; | 150 cc->cond.range.next = 0; |
156 for (; r; r = r->next) { | 151 for (; r; r = r->next) { |
157 cc = insertCharClass(cc, r->begin, r->end,ri->current->bitState); | 152 cc = insertCharClass(cc, r->begin, r->end); |
158 } | 153 } |
159 | 154 |
160 n->cc = ri->current->cc; | |
161 // TODO literal support | 155 // TODO literal support |
162 // merge rangeList here | 156 // merge rangeList here |
163 if (*ri->ptr) ri->ptr++; | 157 if (*ri->ptr) ri->ptr++; |
164 token(ri); | 158 token(ri); |
165 return n; | 159 return n; |
245 token(ri); | 239 token(ri); |
246 } | 240 } |
247 if (ri->tokenType == '*') { | 241 if (ri->tokenType == '*') { |
248 n = createNode(ri,'*',0,n,0); | 242 n = createNode(ri,'*',0,n,0); |
249 token(ri); | 243 token(ri); |
250 ri->asterisk = true; | |
251 ri->node = n; | |
252 } | 244 } |
253 return n; | 245 return n; |
254 } | 246 } |
255 | 247 |
256 RegexInfoPtr createRegexInfo (RegexInfoPtr ri) { | 248 RegexInfoPtr createRegexInfo (RegexInfoPtr ri) { |
257 ri->stateNumber++; | 249 ri->stateNumber++; |
258 ri->asterisk = false; | |
259 ri->current = NEW(State); | |
260 ri->current->bitState.bitContainer = 0 | |
261 bitSet(ri.current->bitState,ri->stateNumber); | |
262 ri->current->next = ri->states; | |
263 ri->current->cc = NULL; | |
264 ri->current->node = NULL; | |
265 ri->states = ri->current; | |
266 return ri; | 250 return ri; |
267 } | 251 } |
268 | 252 |
269 | 253 |
270 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> | 254 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> |
273 NodePtr n = regexAtom(ri); | 257 NodePtr n = regexAtom(ri); |
274 while (ri->tokenType) { | 258 while (ri->tokenType) { |
275 if (ri->tokenType == '*') { | 259 if (ri->tokenType == '*') { |
276 n = createNode(ri,'*',0,n,0); | 260 n = createNode(ri,'*',0,n,0); |
277 token(ri); | 261 token(ri); |
278 ri->asterisk = true; | |
279 ri->node = n; | |
280 return n; | 262 return n; |
281 } else if (ri->tokenType == '|') { | 263 } else if (ri->tokenType == '|') { |
282 n = createNode(ri,'|',0,n,0); | 264 n = createNode(ri,'|',0,n,0); |
283 NodePtr n1 = regex(ri); | 265 NodePtr n1 = regex(ri); |
284 n->right = n1; | 266 n->right = n1; |
285 n->node = n; | |
286 } else if (ri->tokenType == ')') { | 267 } else if (ri->tokenType == ')') { |
287 return n; | 268 return n; |
288 } else { | 269 } else { |
289 n = createNode(ri,'+',0,n,0); | 270 n = createNode(ri,'+',0,n,0); |
290 if (!ri->asterisk) { | 271 NodePtr n1 = regexAtom(ri); |
291 StatePtr save = ri->current; | 272 n->right = n1; |
292 ri = createRegexInfo(ri); | 273 } |
293 NodePtr n1 = regexAtom(ri); | 274 } |
294 n->right = n1; | 275 return n; |
295 ri->current = save; | 276 } |
296 } else { | |
297 NodePtr n1 = regexAtom(ri); | |
298 n->right = n1; | |
299 } | |
300 } | |
301 } | |
302 return n; | |
303 } |