comparison c/regexParser/regexParser.cc @ 133:ccc673449351 pairPro

Look ahead '*'
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 03 Dec 2015 20:47:11 +0900
parents fb4c8adf3a80
children dbafc753078e
comparison
equal deleted inserted replaced
132:fb4c8adf3a80 133:ccc673449351
109 NodePtr group(RegexInfoPtr ri) { 109 NodePtr group(RegexInfoPtr ri) {
110 return regex(ri); 110 return regex(ri);
111 } 111 }
112 112
113 static 113 static
114 void asterCheck(RegexInfoPtr ri) {
115 if (ri->ptr[0] == '*') {
116 ri->asterFlag = true;
117 }
118 return;
119 }
120
121 static
114 void token(RegexInfoPtr ri) { 122 void token(RegexInfoPtr ri) {
115 while (ri->ptr[0] != '\0') { 123 while (ri->ptr[0] != '\0') {
116 if (ri->ptr[0] == '('){ 124 if (ri->ptr[0] == '('){
117 ri->ptr++; 125 ri->ptr++;
118 ri->tokenType = '('; 126 ri->tokenType = '(';
120 return; 128 return;
121 } else if (ri->ptr[0] == ')') { 129 } else if (ri->ptr[0] == ')') {
122 ri->ptr++; 130 ri->ptr++;
123 ri->tokenType = ')'; 131 ri->tokenType = ')';
124 ri->tokenValue = ri->ptr; 132 ri->tokenValue = ri->ptr;
133 asterCheck(ri);
125 return; 134 return;
126 } else if (ri->ptr[0] == '[') { 135 } else if (ri->ptr[0] == '[') {
127 ri->ptr++; 136 ri->ptr++;
128 ri->tokenType = 'c'; 137 ri->tokenType = 'c';
129 ri->tokenValue = ri->ptr; 138 ri->tokenValue = ri->ptr;
130 return; 139 return;
131 } else if (ri->ptr[0] == '|'){ 140 } else if (ri->ptr[0] == '|'){
132 ri->ptr++; 141 ri->ptr++;
133 ri->tokenType = '|'; 142 ri->tokenType = '|';
134 ri->tokenValue = NULL; 143 ri->tokenValue = NULL;
135 ri->orFlag++; 144 ri->orNum++;
136 return; 145 return;
137 } else if (ri->ptr[0] == '*'){ 146 } else if (ri->ptr[0] == '*'){
138 ri->ptr++; 147 ri->ptr++;
139 ri->tokenType = '*'; 148 ri->tokenType = '*';
140 ri->tokenValue = NULL; 149 ri->tokenValue = NULL;
152 ri->tokenType = 'a'; 161 ri->tokenType = 'a';
153 ri->tokenValue = ri->ptr; 162 ri->tokenValue = ri->ptr;
154 while (isalnum(ri->ptr[0])) { 163 while (isalnum(ri->ptr[0])) {
155 ri->ptr++; 164 ri->ptr++;
156 } 165 }
157 if (ri->ptr[0] == '*') { 166 asterCheck(ri);
158 ri->astarFlag = true;
159 }
160 return; 167 return;
161 } 168 }
162 } 169 }
163 return; 170 return;
164 } 171 }
174 if (ri->tokenType == '(') n = group(ri); 181 if (ri->tokenType == '(') n = group(ri);
175 182
176 return n; 183 return n;
177 } 184 }
178 185
179 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regex> | 186 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex>
180 NodePtr regex(RegexInfoPtr ri) { 187 NodePtr regex(RegexInfoPtr ri) {
181 NodePtr n = regexAtom(ri); 188 NodePtr n = regexAtom(ri);
182 while (ri->ptr[0]) { 189 while (ri->ptr[0]) {
183 token(ri); 190 token(ri);
184 if (ri->tokenType == '*') { 191 if (ri->tokenType == '*') {
185 n = createNode(ri,'*',n,0); 192 n = createNode(ri,'*',n,0);
193 ri->asterFlag = false;
186 } else if (ri->tokenType == '|') { 194 } else if (ri->tokenType == '|') {
187 NodePtr n1 = regex(ri); 195 NodePtr n1 = regex(ri);
188 n = createNode(ri,'|',n,n1); 196 n = createNode(ri,'|',n,n1);
197 } else if (ri->tokenType == '(') {
198 ri->ptr--;
199 NodePtr n1 = regex(ri);
200 if (ri->asterFlag == true) {
201 n1 = createNode(ri,'*',n1,0);
202 ri->asterFlag = false;
203 ri->ptr++;
204 }
205 n = createNode(ri,'+',n,n1);
189 } else if (ri->tokenType == ')') { 206 } else if (ri->tokenType == ')') {
190 if (ri->orFlag != 0) { 207 if (ri->orNum != 0 && ri->ptr[0] != ')') {
191 if (ri->ptr[0] != ')') ri->ptr--; 208 ri->ptr--;
192 ri->orFlag--; 209 ri->orNum--;
193 } 210 }
194 return n; 211 return n;
195 } else { 212 } else {
196 // return NULL 213 NodePtr n1 = NULL;
197 NodePtr n1 = regex(ri); 214 if (ri->asterFlag == true) {
215 ri->ptr = ri->tokenValue;
216 NodePtr n1 = regexAtom(ri);
217 n1 = createNode(ri,'*',n1,0);
218 ri->asterFlag = false;
219 ri->ptr++;
220 } else {
221 n1 = regex(ri);
222 }
198 n = createNode(ri,'+',n,n1); 223 n = createNode(ri,'+',n,n1);
199 } 224 }
200 } return n; 225 } return n;
201 } 226 }