Mercurial > hg > Applications > Grep
comparison c/regexParser/regexParser.cc @ 142:de0f332d560c pairPro
insert charClassMerge function
author | masa |
---|---|
date | Fri, 11 Dec 2015 14:54:00 +0900 |
parents | 15815fcb6c2f |
children | 32977f5a2ed0 |
comparison
equal
deleted
inserted
replaced
141:71f36a59cf6a | 142:de0f332d560c |
---|---|
28 | 28 |
29 static | 29 static |
30 CharClassPtr createCharClassWord(RegexInfoPtr ri) { | 30 CharClassPtr createCharClassWord(RegexInfoPtr ri) { |
31 CharClassPtr cc = NEW(CharClass); | 31 CharClassPtr cc = NEW(CharClass); |
32 cc->type = 'a'; | 32 cc->type = 'a'; |
33 cc->cond = NEW(Condition); | 33 cc->cond.w.word = ri->tokenValue; |
34 cc->cond->w = NEW(Word); | 34 cc->cond.w.length = ri->ptr - ri->tokenValue; |
35 cc->cond->w->word = ri->tokenValue; | 35 cc->nextState.bitContainer = 0; |
36 cc->cond->w->length = ri->ptr - ri->tokenValue; | |
37 token(ri); | 36 token(ri); |
38 | 37 |
39 return cc; | 38 return cc; |
40 } | 39 } |
41 | 40 |
51 ri->nodeNumber++; | 50 ri->nodeNumber++; |
52 | 51 |
53 return n; | 52 return n; |
54 } | 53 } |
55 | 54 |
55 CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) { | |
56 // 重なっているccの領域を分割する | |
57 // 必要ならばnextStateを重ねあわせる | |
58 // 変更があった場合は新しくリストを作って返す | |
59 if (src->type == 'a') { | |
60 if (add->type == 'a') { | |
61 if (src->cond.w.word[0] > add->cond.w.word[0]) { | |
62 // add のほうが小さいので小さい順のccをつくる | |
63 CharClassPtr left = charClassMerge(add->left.src); | |
64 return createCharClassWord(add->cond->w.word, left, add->right); | |
65 } else { | |
66 | |
67 } | |
68 } else if (add->type == 'c') { | |
69 // | |
70 if (src->cond.w.word[0] < add->cond.range.begin) { | |
71 | |
72 } else (src->cond->w.word[0] < add->end) { | |
73 } else if (src->type == 'c') { | |
74 | |
75 } | |
76 } | |
56 | 77 |
57 // <charClass> ::= '['<literal>'-'<literal>']' | 78 // <charClass> ::= '['<literal>'-'<literal>']' |
58 static | 79 static |
59 NodePtr charClass(RegexInfoPtr ri) { | 80 NodePtr charClass(RegexInfoPtr ri) { |
60 CharClassPtr cc = NEW(CharClass); | 81 CharClassPtr cc = NEW(CharClass); |
61 NodePtr n = createNode(ri,'c',cc,0,0); | 82 NodePtr n = createNode(ri,'c',cc,0,0); |
62 cc->type = 'r'; | 83 cc->type = 'r'; |
63 cc->cond = NEW(Condition); | 84 cc->nextState.bitContainer = 0; |
64 cc->cond->range = NEW(RangeList); | 85 RangeListPtr rangeList = &cc->cond.range; |
65 cc->cond->range->begin = ri->ptr; | 86 rangeList->begin = ri->ptr; |
66 cc->cond->range->end = ri->ptr; | 87 rangeList->end = ri->ptr; |
67 cc->cond->range->next = NULL; | |
68 | |
69 | |
70 RangeListPtr rangeList = cc->cond->range; | |
71 | 88 |
72 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { | 89 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { |
73 if (*ri->ptr == '-') { | 90 if (*ri->ptr == '-') { |
74 rangeList->end = ri->ptr + 1; | 91 rangeList->end = ri->ptr + 1; |
75 ri->ptr++; | 92 ri->ptr++; |
84 rangeList = rangeList->next; | 101 rangeList = rangeList->next; |
85 rangeList->begin = ri->ptr; | 102 rangeList->begin = ri->ptr; |
86 rangeList->end = ri->ptr; | 103 rangeList->end = ri->ptr; |
87 rangeList->next = NULL; | 104 rangeList->next = NULL; |
88 } | 105 } |
89 // TODO literal support | 106 // TODO literal support |
90 | 107 // merge rangeList here |
91 if (*ri->ptr) ri->ptr++; | 108 if (*ri->ptr) ri->ptr++; |
92 token(ri); | 109 token(ri); |
93 return n; | 110 return n; |
94 } | 111 } |
95 | 112 |
110 ri->tokenValue = NULL; | 127 ri->tokenValue = NULL; |
111 return; | 128 return; |
112 } else if (ri->ptr[0] == ')') { | 129 } else if (ri->ptr[0] == ')') { |
113 ri->ptr++; | 130 ri->ptr++; |
114 ri->tokenType = ')'; | 131 ri->tokenType = ')'; |
115 ri->tokenValue = ri->ptr; | |
116 return; | |
117 } else if (ri->ptr[0] == '[') { | |
118 ri->ptr++; | |
119 ri->tokenType = 'c'; | |
120 ri->tokenValue = ri->ptr; | 132 ri->tokenValue = ri->ptr; |
121 return; | 133 return; |
122 } else if (ri->ptr[0] == ']') { | 134 } else if (ri->ptr[0] == ']') { |
123 ri->ptr++; | 135 ri->ptr++; |
124 ri->tokenType = ']'; | 136 ri->tokenType = ']'; |
142 \0xa5 | 154 \0xa5 |
143 \[ | 155 \[ |
144 \\ | 156 \\ |
145 \utf-8 etc... | 157 \utf-8 etc... |
146 */ | 158 */ |
159 } else if (ri->ptr[0] == '[') { | |
160 ri->ptr++; | |
161 ri->tokenType = 'c'; | |
162 ri->tokenValue = ri->ptr; | |
163 return; | |
147 } else { | 164 } else { |
148 ri->tokenType = 'a'; | 165 ri->tokenType = 'a'; |
149 ri->tokenValue = ri->ptr; | 166 ri->tokenValue = ri->ptr; |
150 if (isalnum(ri->ptr[0])) { | 167 if (isalnum(ri->ptr[0])) { |
151 ri->ptr++; | 168 ri->ptr++; |