Mercurial > hg > Applications > Grep
comparison c/regexParser/regexParser.cc @ 125:4d6ac69801ad pairPro
fix createNode
author | Nozomi |
---|---|
date | Tue, 01 Dec 2015 21:11:27 +0900 |
parents | 188d866227a4 |
children | 639b0b437ebf |
comparison
equal
deleted
inserted
replaced
123:8ce93ffaf1ad | 125:4d6ac69801ad |
---|---|
21 */ | 21 */ |
22 | 22 |
23 static | 23 static |
24 NodePtr allocateNode() { | 24 NodePtr allocateNode() { |
25 NodePtr n = (NodePtr)malloc(sizeof(node)); | 25 NodePtr n = (NodePtr)malloc(sizeof(node)); |
26 n->cc = (CharClassPtr)malloc(sizeof(CharClass)); | 26 n->cc = NULL; |
27 n->cc->cond = (ConditionList)malloc(sizeof(Condition)); | 27 n->left = NULL; |
28 n->right = NULL; | |
28 return n; | 29 return n; |
29 } | 30 } |
30 | 31 |
31 static | 32 static |
32 NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) { | 33 CharClassPtr createChaClassWord(RegexInfoPtr ri) { |
34 CharClassPtr cc = NEW(CharClass); | |
35 cc->type = 'a'; | |
36 cc->cond = NEW(Condition); | |
37 cc->cond->w = NEW(Word); | |
38 cc->cond->w->word = ri->tokenValue; | |
39 cc->cond->w->length = ri->ptr - ri->tokenValue; | |
40 } | |
41 | |
42 static | |
43 NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) { | |
33 NodePtr n = allocateNode(); | 44 NodePtr n = allocateNode(); |
34 if (n == NULL) { | |
35 mallocFailedMessage(); | |
36 } | |
37 | 45 |
38 n->tokenType = ri->tokenType; | 46 n->tokenType = type; |
39 n->left = left; | 47 n->left = left; |
40 n->right = right; | 48 n->right = right; |
41 n->nodeNumber = ri->nodeNumber; | 49 n->nodeNumber = ri->nodeNumber; |
42 ri->nodeNumber++; | 50 ri->nodeNumber++; |
43 | 51 |
44 if (ri->tokenType == 'a') { | 52 if (type == 'a') { |
45 ri->tokenType = 0; | 53 n->cc = createCharClassWord(ri); |
46 n->cc->cond->w = getWord(ri->tokenValue); | |
47 } else { | |
48 WordPtr w = (WordPtr)malloc(sizeof(Word)); | |
49 w->word = character; | |
50 w->length = 1; | |
51 n->cc->cond->w = w; | |
52 } | 54 } |
55 | |
53 return n; | 56 return n; |
54 } | 57 } |
58 | |
55 | 59 |
56 // <charClass> ::= '['<literal>'-'<literal>']' | 60 // <charClass> ::= '['<literal>'-'<literal>']' |
57 static | 61 static |
58 NodePtr charClass(RegexInfoPtr ri) { | 62 NodePtr charClass(RegexInfoPtr ri) { |
59 NodePtr n = allocateNode(); | 63 NodePtr n = allocateNode(); |
60 if (n == NULL) { | |
61 mallocFailedMessage(); | |
62 } | |
63 | 64 |
64 n->tokenType = ri->tokenType; | 65 n->tokenType = 'c'; |
65 n->nodeNumber = ri->nodeNumber; | 66 n->nodeNumber = ri->nodeNumber; |
66 ri->nodeNumber++; | 67 ri->nodeNumber++; |
67 n->cc->cond->w = (WordPtr)malloc(sizeof(Word)); | 68 |
69 CharClassPtr cc = NEW(CharClass); | |
70 cc->cond = NEW(Condition); | |
71 cc->cond->type = 'r'; | |
72 cc->cond->rangeList = NEW(struct utf8Range); | |
73 cc->cond->rangeList->begin = ri->ptr; | |
74 cc->cond->rangeList->end = ri->ptr + 1; | |
75 cc->cond->rangeList->next = NULL; | |
68 | 76 |
69 int i = 0; | 77 int i = 0; |
70 | 78 |
79 struct utf8Range *rangeList= cc->cond->rangeList; | |
80 | |
71 while (ri->ptr[i] != ']') { | 81 while (ri->ptr[i] != ']') { |
72 if (ri->ptr[i] == '-') { | 82 if (ri->ptr[i] == '-') i++; |
73 n->cc->begin = ri->ptr[i-1]; | 83 |
74 n->cc->end = ri->ptr[i+1]; | 84 rangeList->end = ri->ptr + i; |
75 } | 85 rangeList->next = NEW(struct utf8Range); |
86 rangeList = rangeList->next; | |
87 rangeList->begin = ri->ptr+i+1; | |
88 rangeList->next = NULL; | |
76 i++; | 89 i++; |
77 } | 90 } |
78 | 91 |
79 n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1)); | 92 rangeList->end = ri->ptr[i-1]; |
80 strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1); | |
81 n->cc->cond->w->word[i] = '\0'; | |
82 ri->ptr += i+1; | |
83 | 93 |
84 return n; | 94 return n; |
85 } | 95 } |
86 | 96 |
87 // <literal> ::= [a-z][A-Z][0-9] | 97 // <literal> ::= [a-z][A-Z][0-9] |
163 else if (ri->tokenType == '(') n = group(ri); | 173 else if (ri->tokenType == '(') n = group(ri); |
164 | 174 |
165 return n; | 175 return n; |
166 } | 176 } |
167 | 177 |
168 // <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')' | 178 // <regex> ::= <regexAtom> | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')' |
169 NodePtr regex(RegexInfoPtr ri) { | 179 NodePtr regex(RegexInfoPtr ri) { |
170 NodePtr n = regexAtom(ri); | 180 NodePtr n = regexAtom(ri); |
171 while (ri->ptr[0]) { | 181 while (ri->ptr[0]) { |
172 token(ri); | 182 token(ri); |
173 if (ri->tokenType == '*') { | 183 if (ri->tokenType == '*') { |
174 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); | 184 n = createNode(ri,'*',n,0); |
175 syntax[0] = '*'; | |
176 n = createNode(ri,syntax,n,0); | |
177 } else if (ri->tokenType == '|') { | 185 } else if (ri->tokenType == '|') { |
178 NodePtr n1 = regex(ri); | 186 NodePtr n1 = regex(ri); |
179 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); | 187 n = createNode(ri,'|',n,n1); |
180 syntax[0] = '|'; | |
181 n = createNode(ri,syntax,n,n1); | |
182 } else if (ri->tokenType == ')') { | 188 } else if (ri->tokenType == ')') { |
183 return n; | 189 return n; |
184 } else { | 190 } else { |
185 NodePtr n1 = regex(ri); | 191 NodePtr n1 = regex(ri); |
186 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); | 192 n = createNode(ri,'a',n,n1); |
187 syntax[0] = '+'; | |
188 n = createNode(ri,syntax,n,n1); | |
189 } | 193 } |
190 } return n; | 194 } return n; |
191 } | 195 } |