comparison c/regexParser/regexParser.cc @ 124:c363a66dc1a7 pairPro

fix
author masa
date Tue, 01 Dec 2015 17:06:26 +0900
parents 188d866227a4
children b061cd8205cc
comparison
equal deleted inserted replaced
123:8ce93ffaf1ad 124:c363a66dc1a7
73 n->cc->begin = ri->ptr[i-1]; 73 n->cc->begin = ri->ptr[i-1];
74 n->cc->end = ri->ptr[i+1]; 74 n->cc->end = ri->ptr[i+1];
75 } 75 }
76 i++; 76 i++;
77 } 77 }
78 // TODO literal support
78 79
79 n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1)); 80 n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1));
80 strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1); 81 strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1);
81 n->cc->cond->w->word[i] = '\0'; 82 n->cc->cond->w->word[i] = '\0';
82 ri->ptr += i+1; 83 ri->ptr += i+1;
102 while (ri->ptr[0] != '\0') { 103 while (ri->ptr[0] != '\0') {
103 if (ri->ptr[0] == '('){ 104 if (ri->ptr[0] == '('){
104 ri->ptr++; 105 ri->ptr++;
105 ri->tokenType = '('; 106 ri->tokenType = '(';
106 ri->tokenValue = NULL; 107 ri->tokenValue = NULL;
107 if (ri->ptr[1] == ')') {
108 ri->ptr++;
109 }
110 return; 108 return;
111 } else if (ri->ptr[0] == ')') { 109 } else if (ri->ptr[0] == ')') {
112 ri->ptr++; 110 ri->ptr++;
113 ri->tokenType = ')'; 111 ri->tokenType = ')';
114 ri->tokenValue = ri->ptr; 112 ri->tokenValue = ri->ptr;
115 return; 113 return;
116 } else if (ri->ptr[0] == '[') { 114 } else if (ri->ptr[0] == '[') {
117 ri->ptr++; 115 ri->ptr++;
118 ri->tokenType = 'c'; 116 ri->tokenType = 'c';
119 ri->tokenValue = ri->ptr; 117 ri->tokenValue = ri->ptr;
120 if (ri->ptr[1] == ']') {
121 ri->ptr++;
122 }
123 return; 118 return;
124 } else if (ri->ptr[0] == '|'){ 119 } else if (ri->ptr[0] == '|'){
125 ri->ptr++; 120 ri->ptr++;
126 ri->tokenType = '|'; 121 ri->tokenType = '|';
127 ri->tokenValue = NULL; 122 ri->tokenValue = NULL;
150 } 145 }
151 } 146 }
152 return; 147 return;
153 } 148 }
154 149
155 // <regexAtom> ::= <literal>|<charClass>|<group> 150 // <regexAtom> ::= <literal>|<charClass>
156 static 151 static
157 NodePtr regexAtom(RegexInfoPtr ri) { 152 NodePtr regexAtom(RegexInfoPtr ri) {
158 153
159 token(ri); 154 token(ri);
160 NodePtr n = NULL; 155 NodePtr n = NULL;
161 if (ri->tokenType == 'a') n = literal(ri); 156 if (ri->tokenType == 'c') n = charClass(ri);
162 else if (ri->tokenType == 'c') n = charClass(ri);
163 else if (ri->tokenType == '(') n = group(ri);
164 157
165 return n; 158 return n;
166 } 159 }
167 160
168 // <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')' 161 // <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')'
169 NodePtr regex(RegexInfoPtr ri) { 162 NodePtr regex(RegexInfoPtr ri) {
170 NodePtr n = regexAtom(ri); 163 NodePtr n = NULL;
171 while (ri->ptr[0]) { 164 while (ri->ptr[0]) {
172 token(ri); 165 token(ri);
173 if (ri->tokenType == '*') { 166 if (ri->tokenType == '*') {
167 // TODO literal support
174 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); 168 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
175 syntax[0] = '*'; 169 syntax[0] = '*';
176 n = createNode(ri,syntax,n,0); 170 NodePtr n1 = createNode(ri,syntax,n->right,0);
171
172 unsigned char *syntax1 = (unsigned char*)malloc(sizeof(unsigned char));
173 syntax1[0] = '+';
174
175 n = createNode(ri,syntax1,n->left,n1);
177 } else if (ri->tokenType == '|') { 176 } else if (ri->tokenType == '|') {
178 NodePtr n1 = regex(ri); 177 NodePtr n1 = regex(ri);
179 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); 178 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
180 syntax[0] = '|'; 179 syntax[0] = '|';
181 n = createNode(ri,syntax,n,n1); 180 n = createNode(ri,syntax,n,n1);
181 } else if (ri->tokenType == '(') {
182 NodePtr n1 = regex(ri);
183 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
184 syntax[0] = '+';
185 n = createNode(ri,syntax,n,n1);
182 } else if (ri->tokenType == ')') { 186 } else if (ri->tokenType == ')') {
183 return n; 187 return n;
188 } else if (ri->tokenType == 'a') {
189 NodePtr n1 = literal(ri);
190 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
191 syntax[0] = '+';
192 n = createNode(ri,syntax,n,n1);
184 } else { 193 } else {
194 // return NULL
185 NodePtr n1 = regex(ri); 195 NodePtr n1 = regex(ri);
186 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); 196 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
187 syntax[0] = '+'; 197 syntax[0] = '+';
188 n = createNode(ri,syntax,n,n1); 198 n = createNode(ri,syntax,n,n1);
189 } 199 }