Mercurial > hg > Applications > Grep
comparison c/regexParser/regexParser.cc @ 147:84d32375383a pairPro
implement insertCharClass
author | masa |
---|---|
date | Tue, 15 Dec 2015 17:14:35 +0900 |
parents | 50217a0545e8 |
children | d1ebba6e117a |
comparison
equal
deleted
inserted
replaced
146:1c74ac7d56ec | 147:84d32375383a |
---|---|
35 n->right = right; | 35 n->right = right; |
36 n->nodeNumber = ri->nodeNumber; | 36 n->nodeNumber = ri->nodeNumber; |
37 ri->nodeNumber++; | 37 ri->nodeNumber++; |
38 | 38 |
39 return n; | 39 return n; |
40 } | |
41 | |
42 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right) { | |
43 CharClassPtr cc = NEW(CharClass); | |
44 cc->type = 'r'; | |
45 cc->cond.range.begin = begin; | |
46 cc->cond.range.end = end; | |
47 cc->left = left; | |
48 cc->right = right; | |
49 cc->nextState.bitContainer = 0; | |
50 return cc; | |
40 } | 51 } |
41 | 52 |
42 CharClassPtr createCharClassWord(RegexInfoPtr ri) { | 53 CharClassPtr createCharClassWord(RegexInfoPtr ri) { |
43 CharClassPtr cc = NEW(CharClass); | 54 CharClassPtr cc = NEW(CharClass); |
44 cc->type = 'a'; | 55 cc->type = 'a'; |
46 cc->cond.w.length = ri->ptr - ri->tokenValue; | 57 cc->cond.w.length = ri->ptr - ri->tokenValue; |
47 cc->nextState.bitContainer = 0; | 58 cc->nextState.bitContainer = 0; |
48 return cc; | 59 return cc; |
49 } | 60 } |
50 | 61 |
51 CharClassPtr charClassMerge(CharClassPtr cc, unsigned char begin, unsigned char end, CharClassPtr next) { | 62 CharClassPtr insertCharClass(CharClassPtr cc, unsigned char begin, unsigned char end) { |
52 CharClassPtr cc1 = NEW(CharClass); | 63 if (end < cc->cond.range.begin ) { |
53 return cc1; | 64 CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right); |
65 if (cc->left) { | |
66 cc1->left = insertCharClass(cc->left,begin,end); | |
67 return cc1; | |
68 } else { | |
69 CharClassPtr cc2 = createCharClassRange(begin,end,0,0); | |
70 cc1->left = cc2; | |
71 return cc1; | |
72 } | |
73 } else if (end == cc->cond.range.begin ) { | |
74 cc->cond.range.begin = begin; | |
75 } else if (end <= cc->cond.range.end) { | |
76 if (begin < cc->cond.range.begin) { | |
77 cc->cond.range.begin = begin; | |
78 } | |
79 } else if (begin > cc->cond.range.end ) { | |
80 CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right); | |
81 if (cc->right) { | |
82 cc1->rigt = insertCharClass(cc->right,begin,end); | |
83 return cc1; | |
84 } else { | |
85 CharClassPtr cc2 = createCharClassRange(begin,end,0,0); | |
86 cc1->right = cc2; | |
87 return cc1; | |
88 } | |
89 } else if (begin == cc->cond.range.end ) { | |
90 cc->cond.range.end = end; | |
91 } else if (begin < cc->cond.range.begin) { | |
92 cc->cond.range.begin = begin; | |
93 } | |
94 return cc; | |
54 } | 95 } |
55 | 96 |
56 // <charClass> ::= '['<literal>'-'<literal>']' | 97 // <charClass> ::= '['<literal>'-'<literal>']' |
57 static | 98 static |
58 NodePtr charClass(RegexInfoPtr ri) { | 99 NodePtr charClass(RegexInfoPtr ri) { |
59 CharClassPtr cc = NEW(CharClass); | 100 CharClassPtr cc = NEW(CharClass); |
60 NodePtr n = createNode(ri,'c',cc,0,0); | 101 NodePtr n = createNode(ri,'c',cc,0,0); |
61 cc->type = 'r'; | 102 cc->type = 'r'; |
62 cc->nextState.bitContainer = 0; | 103 cc->nextState.bitContainer = 0; |
63 RangeListPtr rangeList = &cc->cond.range; | 104 RangeListPtr rangeList = &cc->cond.range; |
64 rangeList->begin = (unsigned long)*ri->ptr; | 105 rangeList->begin = *ri->ptr; |
65 rangeList->end = (unsigned long)*ri->ptr; | 106 rangeList->end = *ri->ptr; |
66 rangeList->next = NULL; | 107 rangeList->next = NULL; |
67 | 108 |
68 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { | 109 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { |
69 if (*ri->ptr == '-') { | 110 if (*ri->ptr == '-') { |
70 rangeList->end = (unsigned long)*(ri->ptr + 1); | 111 rangeList->end = *(ri->ptr + 1); |
71 ri->ptr++; | 112 ri->ptr++; |
72 continue; | 113 continue; |
73 } | 114 } |
74 if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; | 115 if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; |
75 if (ri->ptr[0] == rangeList->end + 1) { | 116 if (ri->ptr[0] == rangeList->end + 1) { |
76 rangeList->end = (unsigned long)*ri->ptr; | 117 rangeList->end = *ri->ptr; |
77 continue; | 118 continue; |
78 } | 119 } |
79 rangeList->next = NEW(RangeList); | 120 rangeList->next = NEW(RangeList); |
80 rangeList = rangeList->next; | 121 rangeList = rangeList->next; |
81 rangeList->begin = (unsigned long)*ri->ptr; | 122 rangeList->begin = *ri->ptr; |
82 rangeList->end = (unsigned long)*ri->ptr; | 123 rangeList->end = *ri->ptr; |
83 rangeList->next = NULL; | 124 rangeList->next = NULL; |
84 } | 125 } |
126 | |
127 for (RangeListPtr r = &cc->cond.range; r; r = r->next) { | |
128 cc = insertCharClass(cc, r->begin, r->end); | |
129 } | |
130 | |
131 n->cc = cc; | |
85 // TODO literal support | 132 // TODO literal support |
86 // merge rangeList here | 133 // merge rangeList here |
87 if (*ri->ptr) ri->ptr++; | 134 if (*ri->ptr) ri->ptr++; |
88 token(ri); | 135 token(ri); |
89 return n; | 136 return n; |