comparison regexParser/regexParser.cc @ 180:d97bcab546e8 pairPro

implement getNext
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 24 Dec 2015 17:56:28 +0900
parents 6cf8252f3912
children 7ae0a3070647
comparison
equal deleted inserted replaced
179:6cf8252f3912 180:d97bcab546e8
31 31
32 n->tokenType = type; 32 n->tokenType = type;
33 n->cc = cc; 33 n->cc = cc;
34 n->left = left; 34 n->left = left;
35 n->right = right; 35 n->right = right;
36 if (n->tokenType == 'a' || n->tokenType == 'c') { 36 n->nodeNumber = ri->stateNumber;
37 n->nodeNumber = ri->nodeNumber; 37 ri->stateNumber++;
38 ri->nodeNumber++;
39 } else {
40 n->nodeNumber = SYNTAX_NODENUMBER;
41 }
42 38
43 return n; 39 return n;
44 } 40 }
45 41
46 CharClassPtr createCharClassWord(RegexInfoPtr ri) { 42 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
47 CharClassPtr cc = NEW(CharClass); 43 CharClassPtr cc = NEW(CharClass);
48 cc->type = 'a'; 44 cc->type = 'a';
49 cc->cond.w.word = ri->tokenValue; 45 cc->cond.w.word = ri->tokenValue;
50 cc->cond.w.length = ri->ptr - ri->tokenValue; 46 cc->cond.w.length = ri->ptr - ri->tokenValue;
51 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; 47 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue;
52 cc->nextState = ri->current->bitSet;
53 return cc; 48 return cc;
54 } 49 }
55 50
56 /* 51 /*
57 cond.range.begin cond.range.end 52 cond.range.begin cond.range.end
77 72
78 |----------------| 73 |----------------|
79 13. b--e 74 13. b--e
80 75
81 */ 76 */
82 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end,BitVector nextState) { 77 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) {
83 if (cc == NULL) { 78 if (cc == NULL) {
84 createCharClassRange(begin,end,nextState.bitContainer,0,0); 79 createCharClassRange(begin,end,0,0);
85 } 80 }
86 if (end < cc->cond.range.begin ) { // 1 81 if (end < cc->cond.range.begin ) { // 1
87 if (cc->left) { 82 if (cc->left) {
88 cc->left = insertCharClass(cc->left,begin,end,nextState); 83 cc->left = insertCharClass(cc->left,begin,end);
89 } else { 84 } else {
90 cc->left = createCharClassRange(begin,end,nextState.bitContainer,0,0); 85 cc->left = createCharClassRange(begin,end,0,0);
91 } 86 }
92 return cc; 87 return cc;
93 } else if (end == cc->cond.range.begin ) { // 2 88 } else if (end == cc->cond.range.begin ) { // 2
94 cc->cond.range.begin = begin; 89 cc->cond.range.begin = begin;
95 return cc; 90 return cc;
98 cc->cond.range.begin = begin; 93 cc->cond.range.begin = begin;
99 } 94 }
100 return cc; 95 return cc;
101 } else if (begin > cc->cond.range.end ) { // 13 96 } else if (begin > cc->cond.range.end ) { // 13
102 if (cc->right) { 97 if (cc->right) {
103 cc->right = insertCharClass(cc->right,begin,end,nextState); 98 cc->right = insertCharClass(cc->right,begin,end);
104 } else { 99 } else {
105 cc->right = createCharClassRange(begin,end,nextState.bitContainer,0,0); 100 cc->right = createCharClassRange(begin,end,0,0);
106 } 101 }
107 return cc; 102 return cc;
108 } 103 }
109 if (cc->right) { 104 if (cc->right) {
110 CharClassPtr right = cc->right; 105 CharClassPtr right = cc->right;
111 begin = cc->cond.range.begin; 106 begin = cc->cond.range.begin;
112 free(cc); 107 free(cc);
113 return insertCharClass(right,begin,end,nextState); 108 return insertCharClass(right,begin,end);
114 } 109 }
115 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 110 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12
116 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 111 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8
117 } else if (begin < cc->cond.range.begin) { // 5 112 } else if (begin < cc->cond.range.begin) { // 5
118 cc->cond.range.begin = begin; 113 cc->cond.range.begin = begin;
152 } 147 }
153 148
154 RangeListPtr r = cc->cond.range.next; 149 RangeListPtr r = cc->cond.range.next;
155 cc->cond.range.next = 0; 150 cc->cond.range.next = 0;
156 for (; r; r = r->next) { 151 for (; r; r = r->next) {
157 cc = insertCharClass(cc, r->begin, r->end,ri->current->bitState); 152 cc = insertCharClass(cc, r->begin, r->end);
158 } 153 }
159 154
160 n->cc = ri->current->cc;
161 // TODO literal support 155 // TODO literal support
162 // merge rangeList here 156 // merge rangeList here
163 if (*ri->ptr) ri->ptr++; 157 if (*ri->ptr) ri->ptr++;
164 token(ri); 158 token(ri);
165 return n; 159 return n;
245 token(ri); 239 token(ri);
246 } 240 }
247 if (ri->tokenType == '*') { 241 if (ri->tokenType == '*') {
248 n = createNode(ri,'*',0,n,0); 242 n = createNode(ri,'*',0,n,0);
249 token(ri); 243 token(ri);
250 ri->asterisk = true;
251 ri->node = n;
252 } 244 }
253 return n; 245 return n;
254 } 246 }
255 247
256 RegexInfoPtr createRegexInfo (RegexInfoPtr ri) { 248 RegexInfoPtr createRegexInfo (RegexInfoPtr ri) {
257 ri->stateNumber++; 249 ri->stateNumber++;
258 ri->asterisk = false;
259 ri->current = NEW(State);
260 ri->current->bitState.bitContainer = 0
261 bitSet(ri.current->bitState,ri->stateNumber);
262 ri->current->next = ri->states;
263 ri->current->cc = NULL;
264 ri->current->node = NULL;
265 ri->states = ri->current;
266 return ri; 250 return ri;
267 } 251 }
268 252
269 253
270 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> 254 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex>
273 NodePtr n = regexAtom(ri); 257 NodePtr n = regexAtom(ri);
274 while (ri->tokenType) { 258 while (ri->tokenType) {
275 if (ri->tokenType == '*') { 259 if (ri->tokenType == '*') {
276 n = createNode(ri,'*',0,n,0); 260 n = createNode(ri,'*',0,n,0);
277 token(ri); 261 token(ri);
278 ri->asterisk = true;
279 ri->node = n;
280 return n; 262 return n;
281 } else if (ri->tokenType == '|') { 263 } else if (ri->tokenType == '|') {
282 n = createNode(ri,'|',0,n,0); 264 n = createNode(ri,'|',0,n,0);
283 NodePtr n1 = regex(ri); 265 NodePtr n1 = regex(ri);
284 n->right = n1; 266 n->right = n1;
285 n->node = n;
286 } else if (ri->tokenType == ')') { 267 } else if (ri->tokenType == ')') {
287 return n; 268 return n;
288 } else { 269 } else {
289 n = createNode(ri,'+',0,n,0); 270 n = createNode(ri,'+',0,n,0);
290 if (!ri->asterisk) { 271 NodePtr n1 = regexAtom(ri);
291 StatePtr save = ri->current; 272 n->right = n1;
292 ri = createRegexInfo(ri); 273 }
293 NodePtr n1 = regexAtom(ri); 274 }
294 n->right = n1; 275 return n;
295 ri->current = save; 276 }
296 } else {
297 NodePtr n1 = regexAtom(ri);
298 n->right = n1;
299 }
300 }
301 }
302 return n;
303 }