Mercurial > hg > Applications > Grep
comparison c/regexParser/regexParser.cc @ 122:188d866227a4 pairPro
fix
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 30 Nov 2015 23:43:53 +0900 |
parents | aa266a4db47c |
children | c363a66dc1a7 4d6ac69801ad |
comparison
equal
deleted
inserted
replaced
121:aa266a4db47c | 122:188d866227a4 |
---|---|
1 #include <stdlib.h> | 1 #include <stdlib.h> |
2 #include <stdio.h> | 2 #include <stdio.h> |
3 #include <string.h> | |
4 #include <ctype.h> | |
3 #include "regexParser.h" | 5 #include "regexParser.h" |
4 #include "error.h" | 6 #include "error.h" |
5 | 7 |
6 static NodePtr allocateNode(); | 8 static NodePtr allocateNode(); |
7 static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); | 9 static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); |
40 ri->nodeNumber++; | 42 ri->nodeNumber++; |
41 | 43 |
42 if (ri->tokenType == 'a') { | 44 if (ri->tokenType == 'a') { |
43 ri->tokenType = 0; | 45 ri->tokenType = 0; |
44 n->cc->cond->w = getWord(ri->tokenValue); | 46 n->cc->cond->w = getWord(ri->tokenValue); |
45 ri->ptr += n->cc->cond->w->length-1; | |
46 } else { | 47 } else { |
47 WordPtr w = (WordPtr)malloc(sizeof(Word)); | 48 WordPtr w = (WordPtr)malloc(sizeof(Word)); |
48 w->word = character; | 49 w->word = character; |
49 w->length = 1; | 50 w->length = 1; |
50 n->cc->cond->w = w; | 51 n->cc->cond->w = w; |
53 } | 54 } |
54 | 55 |
55 // <charClass> ::= '['<literal>'-'<literal>']' | 56 // <charClass> ::= '['<literal>'-'<literal>']' |
56 static | 57 static |
57 NodePtr charClass(RegexInfoPtr ri) { | 58 NodePtr charClass(RegexInfoPtr ri) { |
58 NodePtr n = (NodePtr)malloc(sizeof(Node)); | 59 NodePtr n = allocateNode(); |
59 if (n == NULL) { | 60 if (n == NULL) { |
60 mallocFailedMessage(); | 61 mallocFailedMessage(); |
61 } | 62 } |
62 while (ri->ptr[0] == '-') { | 63 |
63 ri->ptr++; | 64 n->tokenType = ri->tokenType; |
65 n->nodeNumber = ri->nodeNumber; | |
66 ri->nodeNumber++; | |
67 n->cc->cond->w = (WordPtr)malloc(sizeof(Word)); | |
68 | |
69 int i = 0; | |
70 | |
71 while (ri->ptr[i] != ']') { | |
72 if (ri->ptr[i] == '-') { | |
73 n->cc->begin = ri->ptr[i-1]; | |
74 n->cc->end = ri->ptr[i+1]; | |
75 } | |
76 i++; | |
64 } | 77 } |
78 | |
79 n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1)); | |
80 strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1); | |
81 n->cc->cond->w->word[i] = '\0'; | |
82 ri->ptr += i+1; | |
83 | |
65 return n; | 84 return n; |
66 } | 85 } |
67 | 86 |
68 // <literal> ::= [a-z][A-Z][0-9] | 87 // <literal> ::= [a-z][A-Z][0-9] |
69 static | 88 static |
94 ri->tokenType = ')'; | 113 ri->tokenType = ')'; |
95 ri->tokenValue = ri->ptr; | 114 ri->tokenValue = ri->ptr; |
96 return; | 115 return; |
97 } else if (ri->ptr[0] == '[') { | 116 } else if (ri->ptr[0] == '[') { |
98 ri->ptr++; | 117 ri->ptr++; |
99 ri->tokenType = '['; | 118 ri->tokenType = 'c'; |
100 ri->tokenValue = ri->ptr; | 119 ri->tokenValue = ri->ptr; |
101 if (ri->ptr[1] == ']') { | 120 if (ri->ptr[1] == ']') { |
102 ri->ptr++; | 121 ri->ptr++; |
103 } | 122 } |
104 return; | 123 return; |
122 \utf-8 etc... | 141 \utf-8 etc... |
123 */ | 142 */ |
124 } else { | 143 } else { |
125 ri->tokenType = 'a'; | 144 ri->tokenType = 'a'; |
126 ri->tokenValue = ri->ptr; | 145 ri->tokenValue = ri->ptr; |
127 ri->ptr++; | 146 while (isalnum(ri->ptr[0])) { |
147 ri->ptr++; | |
148 } | |
128 return; | 149 return; |
129 } | 150 } |
130 } | 151 } |
131 return; | 152 return; |
132 } | 153 } |
136 NodePtr regexAtom(RegexInfoPtr ri) { | 157 NodePtr regexAtom(RegexInfoPtr ri) { |
137 | 158 |
138 token(ri); | 159 token(ri); |
139 NodePtr n = NULL; | 160 NodePtr n = NULL; |
140 if (ri->tokenType == 'a') n = literal(ri); | 161 if (ri->tokenType == 'a') n = literal(ri); |
141 else if (ri->tokenType == '[') n = charClass(ri); | 162 else if (ri->tokenType == 'c') n = charClass(ri); |
142 else if (ri->tokenType == '(') n = group(ri); | 163 else if (ri->tokenType == '(') n = group(ri); |
143 | 164 |
144 return n; | 165 return n; |
145 } | 166 } |
146 | 167 |
147 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> | 168 // <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')' |
148 NodePtr regex(RegexInfoPtr ri) { | 169 NodePtr regex(RegexInfoPtr ri) { |
149 NodePtr n = regexAtom(ri); | 170 NodePtr n = regexAtom(ri); |
150 while (ri->ptr[0]) { | 171 while (ri->ptr[0]) { |
151 token(ri); | 172 token(ri); |
152 if (ri->tokenType == '*') { | 173 if (ri->tokenType == '*') { |