comparison c/regexParser/regexParser.cc @ 134:dbafc753078e pairPro

fix concatination & selection & grouping
author masa
date Fri, 04 Dec 2015 17:45:09 +0900
parents ccc673449351
children e1a262ec75f0
comparison
equal deleted inserted replaced
133:ccc673449351 134:dbafc753078e
3 #include <string.h> 3 #include <string.h>
4 #include <ctype.h> 4 #include <ctype.h>
5 #include "regexParser.h" 5 #include "regexParser.h"
6 #include "error.h" 6 #include "error.h"
7 7
8 static NodePtr allocateNode();
9 static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr);
10 static NodePtr charClass(RegexInfoPtr); 8 static NodePtr charClass(RegexInfoPtr);
11 static NodePtr group(RegexInfoPtr);
12 static void token(RegexInfoPtr); 9 static void token(RegexInfoPtr);
13 static NodePtr regexAtom(RegexInfoPtr); 10 static NodePtr regexAtom(RegexInfoPtr);
14 NodePtr regex(RegexInfoPtr); 11 NodePtr regex(RegexInfoPtr);
15 12
16 /** 13 /**
35 cc->type = 'a'; 32 cc->type = 'a';
36 cc->cond = NEW(Condition); 33 cc->cond = NEW(Condition);
37 cc->cond->w = NEW(Word); 34 cc->cond->w = NEW(Word);
38 cc->cond->w->word = ri->tokenValue; 35 cc->cond->w->word = ri->tokenValue;
39 cc->cond->w->length = ri->ptr - ri->tokenValue; 36 cc->cond->w->length = ri->ptr - ri->tokenValue;
37 token(ri);
40 38
41 return cc; 39 return cc;
42 } 40 }
43 41
44 static 42 static
45 NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) { 43 NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right) {
46 NodePtr n = allocateNode(); 44 NodePtr n = allocateNode();
47 45
48 n->tokenType = type; 46 n->tokenType = type;
47 n->cc = cc;
49 n->left = left; 48 n->left = left;
50 n->right = right; 49 n->right = right;
51 n->nodeNumber = ri->nodeNumber; 50 n->nodeNumber = ri->nodeNumber;
52 ri->nodeNumber++; 51 ri->nodeNumber++;
53
54 if (type == 'a') {
55 n->cc = createCharClassWord(ri);
56 }
57 52
58 return n; 53 return n;
59 } 54 }
60 55
61 56
62 // <charClass> ::= '['<literal>'-'<literal>']' 57 // <charClass> ::= '['<literal>'-'<literal>']'
63 static 58 static
64 NodePtr charClass(RegexInfoPtr ri) { 59 NodePtr charClass(RegexInfoPtr ri) {
65 NodePtr n = allocateNode();
66
67 n->tokenType = 'c';
68 n->nodeNumber = ri->nodeNumber;
69 ri->nodeNumber++;
70 60
71 CharClassPtr cc = NEW(CharClass); 61 CharClassPtr cc = NEW(CharClass);
72 cc->type = 'r'; 62 cc->type = 'r';
73 cc->cond = NEW(Condition); 63 cc->cond = NEW(Condition);
74 cc->cond->range = NEW(RangeList); 64 cc->cond->range = NEW(RangeList);
91 i++; 81 i++;
92 } 82 }
93 // TODO literal support 83 // TODO literal support
94 84
95 rangeList->end = ri->ptr + i - 1; 85 rangeList->end = ri->ptr + i - 1;
96 86 NodePtr n = createNode(ri,'c',cc,0,0);
87 token(ri);
97 return n; 88 return n;
98 } 89 }
99 90
100 // <literal> ::= [a-z][A-Z][0-9] 91 // <literal> ::= [a-z][A-Z][0-9]
101 static 92 static
102 NodePtr literal(RegexInfoPtr ri) { 93 NodePtr literal(RegexInfoPtr ri) {
103 NodePtr n = createNode(ri,'a',0,0); 94 CharClassPtr cc = createCharClassWord(ri);
95 NodePtr n = createNode(ri,'a',cc,0,0);
104 return n; 96 return n;
105 }
106
107 // <group> ::= '('<regex>')'
108 static
109 NodePtr group(RegexInfoPtr ri) {
110 return regex(ri);
111 }
112
113 static
114 void asterCheck(RegexInfoPtr ri) {
115 if (ri->ptr[0] == '*') {
116 ri->asterFlag = true;
117 }
118 return;
119 } 97 }
120 98
121 static 99 static
122 void token(RegexInfoPtr ri) { 100 void token(RegexInfoPtr ri) {
123 while (ri->ptr[0] != '\0') { 101 while (ri->ptr[0] != '\0') {
128 return; 106 return;
129 } else if (ri->ptr[0] == ')') { 107 } else if (ri->ptr[0] == ')') {
130 ri->ptr++; 108 ri->ptr++;
131 ri->tokenType = ')'; 109 ri->tokenType = ')';
132 ri->tokenValue = ri->ptr; 110 ri->tokenValue = ri->ptr;
133 asterCheck(ri);
134 return; 111 return;
135 } else if (ri->ptr[0] == '[') { 112 } else if (ri->ptr[0] == '[') {
136 ri->ptr++; 113 ri->ptr++;
137 ri->tokenType = 'c'; 114 ri->tokenType = 'c';
138 ri->tokenValue = ri->ptr; 115 ri->tokenValue = ri->ptr;
158 \utf-8 etc... 135 \utf-8 etc...
159 */ 136 */
160 } else { 137 } else {
161 ri->tokenType = 'a'; 138 ri->tokenType = 'a';
162 ri->tokenValue = ri->ptr; 139 ri->tokenValue = ri->ptr;
163 while (isalnum(ri->ptr[0])) { 140 if (isalnum(ri->ptr[0])) {
164 ri->ptr++; 141 ri->ptr++;
165 } 142 }
166 asterCheck(ri);
167 return; 143 return;
168 } 144 }
169 } 145 }
146 ri->tokenType = 0;
147 ri->tokenValue = NULL;
170 return; 148 return;
171 } 149 }
172 150
173 // <regexAtom> ::= <literal>|<charClass>|<group> 151 // <regexAtom> ::= <literal>|<charClass>|<group>
174 static 152 static
175 NodePtr regexAtom(RegexInfoPtr ri) { 153 NodePtr regexAtom(RegexInfoPtr ri) {
176 154
177 token(ri);
178 NodePtr n = NULL; 155 NodePtr n = NULL;
179 if (ri->tokenType == 'c') n = charClass(ri); 156 if (ri->tokenType == 'c') n = charClass(ri);
180 if (ri->tokenType == 'a') n = literal(ri); 157 else if (ri->tokenType == 'a') n = literal(ri);
181 if (ri->tokenType == '(') n = group(ri); 158 else if (ri->tokenType == '(') {
159 n = regex(ri);
160 if (ri->tokenType != ')') {
161 // error
162 }
163 token(ri);
164 }
165 if (ri->tokenType == '*') {
166 n = createNode(ri,'*',0,n,0);
167 token(ri);
168 }
182 169
183 return n; 170 return n;
184 } 171 }
185 172
186 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> 173 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex>
187 NodePtr regex(RegexInfoPtr ri) { 174 NodePtr regex(RegexInfoPtr ri) {
175 token(ri);
188 NodePtr n = regexAtom(ri); 176 NodePtr n = regexAtom(ri);
189 while (ri->ptr[0]) { 177 while (ri->tokenType) {
190 token(ri);
191 if (ri->tokenType == '*') { 178 if (ri->tokenType == '*') {
192 n = createNode(ri,'*',n,0); 179 n = createNode(ri,'*',0,n,0);
193 ri->asterFlag = false; 180 token(ri);
181 return n;
194 } else if (ri->tokenType == '|') { 182 } else if (ri->tokenType == '|') {
183 n = createNode(ri,'|',0,n,0);
195 NodePtr n1 = regex(ri); 184 NodePtr n1 = regex(ri);
196 n = createNode(ri,'|',n,n1); 185 n->right = n1;
197 } else if (ri->tokenType == '(') {
198 ri->ptr--;
199 NodePtr n1 = regex(ri);
200 if (ri->asterFlag == true) {
201 n1 = createNode(ri,'*',n1,0);
202 ri->asterFlag = false;
203 ri->ptr++;
204 }
205 n = createNode(ri,'+',n,n1);
206 } else if (ri->tokenType == ')') { 186 } else if (ri->tokenType == ')') {
207 if (ri->orNum != 0 && ri->ptr[0] != ')') {
208 ri->ptr--;
209 ri->orNum--;
210 }
211 return n; 187 return n;
212 } else { 188 } else {
213 NodePtr n1 = NULL; 189 n = createNode(ri,'+',0,n,0);
214 if (ri->asterFlag == true) { 190 NodePtr n1 = regexAtom(ri);
215 ri->ptr = ri->tokenValue; 191 n->right = n1;
216 NodePtr n1 = regexAtom(ri);
217 n1 = createNode(ri,'*',n1,0);
218 ri->asterFlag = false;
219 ri->ptr++;
220 } else {
221 n1 = regex(ri);
222 }
223 n = createNode(ri,'+',n,n1);
224 } 192 }
225 } return n; 193 }
194 return n;
226 } 195 }