Mercurial > hg > Applications > Grep
annotate c/regexParser/regexParser.cc @ 147:84d32375383a pairPro
implement insertCharClass
author | masa |
---|---|
date | Tue, 15 Dec 2015 17:14:35 +0900 |
parents | 50217a0545e8 |
children | d1ebba6e117a |
rev | line source |
---|---|
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdlib.h> |
89
50a146c05192
add NodeNumber in Regex Parser tree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
87
diff
changeset
|
2 #include <stdio.h> |
122 | 3 #include <string.h> |
4 #include <ctype.h> | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include "regexParser.h" |
115
ca30f8334741
rename createRegexParser.cc to regexParser.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
112
diff
changeset
|
6 |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
7 static NodePtr charClass(RegexInfoPtr); |
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
8 static void token(RegexInfoPtr); |
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
9 static NodePtr regexAtom(RegexInfoPtr); |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 NodePtr regex(RegexInfoPtr); |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 /** |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 * Create a node of regex parse tree. |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 * tokenType |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 * regexPosition(state) |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 * stateTransitionTable |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 */ |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
18 |
118 | 19 static |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
20 NodePtr allocateNode() { |
129
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
128
diff
changeset
|
21 NodePtr n = NEW(Node); |
125 | 22 n->cc = NULL; |
23 n->left = NULL; | |
24 n->right = NULL; | |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
25 return n; |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
26 } |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
27 |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
28 static |
134 | 29 NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right) { |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
30 NodePtr n = allocateNode(); |
108
70069d4647a0
implement malloc error checking
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
104
diff
changeset
|
31 |
125 | 32 n->tokenType = type; |
134 | 33 n->cc = cc; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 n->left = left; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 n->right = right; |
120
5d29b6a1b50f
include Word in Node
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
118
diff
changeset
|
36 n->nodeNumber = ri->nodeNumber; |
5d29b6a1b50f
include Word in Node
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
118
diff
changeset
|
37 ri->nodeNumber++; |
89
50a146c05192
add NodeNumber in Regex Parser tree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
87
diff
changeset
|
38 |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 |
147 | 42 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right) { |
43 CharClassPtr cc = NEW(CharClass); | |
44 cc->type = 'r'; | |
45 cc->cond.range.begin = begin; | |
46 cc->cond.range.end = end; | |
47 cc->left = left; | |
48 cc->right = right; | |
49 cc->nextState.bitContainer = 0; | |
50 return cc; | |
51 } | |
52 | |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
53 CharClassPtr createCharClassWord(RegexInfoPtr ri) { |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
54 CharClassPtr cc = NEW(CharClass); |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
55 cc->type = 'a'; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
56 cc->cond.w.word = ri->tokenValue; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
57 cc->cond.w.length = ri->ptr - ri->tokenValue; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
58 cc->nextState.bitContainer = 0; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
59 return cc; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
60 } |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
61 |
147 | 62 CharClassPtr insertCharClass(CharClassPtr cc, unsigned char begin, unsigned char end) { |
63 if (end < cc->cond.range.begin ) { | |
64 CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right); | |
65 if (cc->left) { | |
66 cc1->left = insertCharClass(cc->left,begin,end); | |
67 return cc1; | |
68 } else { | |
69 CharClassPtr cc2 = createCharClassRange(begin,end,0,0); | |
70 cc1->left = cc2; | |
71 return cc1; | |
72 } | |
73 } else if (end == cc->cond.range.begin ) { | |
74 cc->cond.range.begin = begin; | |
75 } else if (end <= cc->cond.range.end) { | |
76 if (begin < cc->cond.range.begin) { | |
77 cc->cond.range.begin = begin; | |
78 } | |
79 } else if (begin > cc->cond.range.end ) { | |
80 CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right); | |
81 if (cc->right) { | |
82 cc1->rigt = insertCharClass(cc->right,begin,end); | |
83 return cc1; | |
84 } else { | |
85 CharClassPtr cc2 = createCharClassRange(begin,end,0,0); | |
86 cc1->right = cc2; | |
87 return cc1; | |
88 } | |
89 } else if (begin == cc->cond.range.end ) { | |
90 cc->cond.range.end = end; | |
91 } else if (begin < cc->cond.range.begin) { | |
92 cc->cond.range.begin = begin; | |
93 } | |
94 return cc; | |
142 | 95 } |
125 | 96 |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 // <charClass> ::= '['<literal>'-'<literal>']' |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
98 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 NodePtr charClass(RegexInfoPtr ri) { |
125 | 100 CharClassPtr cc = NEW(CharClass); |
135 | 101 NodePtr n = createNode(ri,'c',cc,0,0); |
126 | 102 cc->type = 'r'; |
142 | 103 cc->nextState.bitContainer = 0; |
104 RangeListPtr rangeList = &cc->cond.range; | |
147 | 105 rangeList->begin = *ri->ptr; |
106 rangeList->end = *ri->ptr; | |
145
50217a0545e8
fix charClass()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
144
diff
changeset
|
107 rangeList->next = NULL; |
130
7925e9abb078
add or flag
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
129
diff
changeset
|
108 |
135 | 109 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { |
110 if (*ri->ptr == '-') { | |
147 | 111 rangeList->end = *(ri->ptr + 1); |
135 | 112 ri->ptr++; |
113 continue; | |
114 } | |
115 if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; | |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
116 if (ri->ptr[0] == rangeList->end + 1) { |
147 | 117 rangeList->end = *ri->ptr; |
135 | 118 continue; |
119 } | |
126 | 120 rangeList->next = NEW(RangeList); |
125 | 121 rangeList = rangeList->next; |
147 | 122 rangeList->begin = *ri->ptr; |
123 rangeList->end = *ri->ptr; | |
125 | 124 rangeList->next = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 } |
147 | 126 |
127 for (RangeListPtr r = &cc->cond.range; r; r = r->next) { | |
128 cc = insertCharClass(cc, r->begin, r->end); | |
129 } | |
130 | |
131 n->cc = cc; | |
142 | 132 // TODO literal support |
133 // merge rangeList here | |
135 | 134 if (*ri->ptr) ri->ptr++; |
135 token(ri); | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 // <literal> ::= [a-z][A-Z][0-9] |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
140 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 NodePtr literal(RegexInfoPtr ri) { |
134 | 142 CharClassPtr cc = createCharClassWord(ri); |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
143 token(ri); |
134 | 144 NodePtr n = createNode(ri,'a',cc,0,0); |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 |
133
ccc673449351
Look ahead '*'
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
132
diff
changeset
|
148 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 void token(RegexInfoPtr ri) { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 while (ri->ptr[0] != '\0') { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 if (ri->ptr[0] == '('){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 ri->tokenType = '('; |
118 | 154 ri->tokenValue = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
156 } else if (ri->ptr[0] == ')') { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
157 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
158 ri->tokenType = ')'; |
118 | 159 ri->tokenValue = ri->ptr; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 return; |
135 | 161 } else if (ri->ptr[0] == ']') { |
162 ri->ptr++; | |
163 ri->tokenType = ']'; | |
164 ri->tokenValue = ri->ptr; | |
165 return; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 } else if (ri->ptr[0] == '|'){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 ri->tokenType = '|'; |
118 | 169 ri->tokenValue = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 } else if (ri->ptr[0] == '*'){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
172 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 ri->tokenType = '*'; |
118 | 174 ri->tokenValue = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
175 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
176 } else if (ri->ptr[0] == '\\'){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177 // need more proccesing |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
178 /* |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
179 \277 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 \0xa5 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 \[ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 \\ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183 \utf-8 etc... |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
184 */ |
142 | 185 } else if (ri->ptr[0] == '[') { |
186 ri->ptr++; | |
187 ri->tokenType = 'c'; | |
188 ri->tokenValue = ri->ptr; | |
189 return; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 } else { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 ri->tokenType = 'a'; |
118 | 192 ri->tokenValue = ri->ptr; |
134 | 193 if (isalnum(ri->ptr[0])) { |
122 | 194 ri->ptr++; |
195 } | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
197 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
198 } |
134 | 199 ri->tokenType = 0; |
200 ri->tokenValue = NULL; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 |
130
7925e9abb078
add or flag
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
129
diff
changeset
|
204 // <regexAtom> ::= <literal>|<charClass>|<group> |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
205 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
206 NodePtr regexAtom(RegexInfoPtr ri) { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
207 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
208 NodePtr n = NULL; |
124 | 209 if (ri->tokenType == 'c') n = charClass(ri); |
134 | 210 else if (ri->tokenType == 'a') n = literal(ri); |
211 else if (ri->tokenType == '(') { | |
212 n = regex(ri); | |
213 if (ri->tokenType != ')') { | |
214 // error | |
215 } | |
216 token(ri); | |
217 } | |
218 if (ri->tokenType == '*') { | |
219 n = createNode(ri,'*',0,n,0); | |
220 token(ri); | |
221 } | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
222 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
223 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
224 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
225 |
133
ccc673449351
Look ahead '*'
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
132
diff
changeset
|
226 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
227 NodePtr regex(RegexInfoPtr ri) { |
134 | 228 token(ri); |
128 | 229 NodePtr n = regexAtom(ri); |
134 | 230 while (ri->tokenType) { |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
231 if (ri->tokenType == '*') { |
134 | 232 n = createNode(ri,'*',0,n,0); |
233 token(ri); | |
234 return n; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
235 } else if (ri->tokenType == '|') { |
134 | 236 n = createNode(ri,'|',0,n,0); |
133
ccc673449351
Look ahead '*'
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
132
diff
changeset
|
237 NodePtr n1 = regex(ri); |
134 | 238 n->right = n1; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
239 } else if (ri->tokenType == ')') { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
240 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
241 } else { |
134 | 242 n = createNode(ri,'+',0,n,0); |
243 NodePtr n1 = regexAtom(ri); | |
244 n->right = n1; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
245 } |
134 | 246 } |
247 return n; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
248 } |