Mercurial > hg > Applications > Grep
comparison c/regexParser/main.cc @ 82:1d9bbf922bb6
add createRegexTree.cc
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 07 Oct 2015 18:13:27 +0900 |
parents | 27883946b2dc |
children | 912d7bd51f38 |
comparison
equal
deleted
inserted
replaced
81:27883946b2dc | 82:1d9bbf922bb6 |
---|---|
9 #include <stdio.h> | 9 #include <stdio.h> |
10 #include <stdlib.h> | 10 #include <stdlib.h> |
11 #include <string.h> | 11 #include <string.h> |
12 #include "regexParser.h" | 12 #include "regexParser.h" |
13 | 13 |
14 NodePtr createNode(unsigned char,NodePtr,NodePtr); | 14 extern NodePtr regex(RegexInfoPtr); |
15 NodePtr charClass(RegexInfoPtr); | |
16 NodePtr group(RegexInfoPtr); | |
17 void token(RegexInfoPtr); | |
18 NodePtr regexAtom(RegexInfoPtr); | |
19 NodePtr regex(RegexInfoPtr); | |
20 extern void printTree(NodePtr); | 15 extern void printTree(NodePtr); |
21 | |
22 /** | |
23 * Create a node of regex parse tree. | |
24 * tokenType | |
25 * regexPosition(state) | |
26 * stateTransitionTable | |
27 */ | |
28 NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) { | |
29 NodePtr n = (NodePtr)malloc(sizeof(Node)); | |
30 n->self = n; | |
31 n->Value.character = character; | |
32 n->left = left; | |
33 n->right = right; | |
34 return n; | |
35 } | |
36 | |
37 // <charClass> ::= '['<literal>'-'<literal>']' | |
38 NodePtr charClass(RegexInfoPtr ri) { | |
39 NodePtr n = (NodePtr)malloc(sizeof(Node)); | |
40 unsigned char startChar = ri->ptr[0]; | |
41 while (ri->ptr[0] == '-') { | |
42 ri->ptr++; | |
43 } | |
44 unsigned char endChar = ri->ptr[0]; | |
45 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); | |
46 | |
47 return n; | |
48 } | |
49 | |
50 // <literal> ::= [a-z][A-Z][0-9] | |
51 NodePtr literal(RegexInfoPtr ri) { | |
52 NodePtr n = createNode(ri->ptr[0],0,0); | |
53 ri->ptr++; | |
54 return n; | |
55 } | |
56 | |
57 // <group> ::= '('<regex>')' | |
58 NodePtr group(RegexInfoPtr ri) { | |
59 return regex(ri); | |
60 } | |
61 | |
62 | |
63 | |
64 void token(RegexInfoPtr ri) { | |
65 while (ri->ptr[0] != '\0') { | |
66 if (ri->ptr[0] == '('){ | |
67 ri->ptr++; | |
68 ri->tokenType = '('; | |
69 ri->tokenValue = 0; | |
70 if (ri->ptr[1] == ')') { | |
71 ri->ptr++; | |
72 } | |
73 return; | |
74 } else if (ri->ptr[0] == ')') { | |
75 ri->ptr++; | |
76 ri->tokenType = ')'; | |
77 ri->tokenValue = ri->ptr[0]; | |
78 return; | |
79 } else if (ri->ptr[0] == '[') { | |
80 ri->ptr++; | |
81 ri->tokenType = '['; | |
82 ri->tokenValue = ri->ptr[0]; | |
83 if (ri->ptr[1] == ']') { | |
84 ri->ptr++; | |
85 } | |
86 return; | |
87 } else if (ri->ptr[0] == '|'){ | |
88 ri->ptr++; | |
89 ri->tokenType = '|'; | |
90 ri->tokenValue = 0; | |
91 return; | |
92 } else if (ri->ptr[0] == '*'){ | |
93 ri->ptr++; | |
94 ri->tokenType = '*'; | |
95 ri->tokenValue = 0; | |
96 return; | |
97 } else if (ri->ptr[0] == '\\'){ | |
98 // need more proccesing | |
99 /* | |
100 \277 | |
101 \0xa5 | |
102 \[ | |
103 \\ | |
104 \utf-8 etc... | |
105 */ | |
106 } else { | |
107 ri->tokenType = 'a'; | |
108 ri->tokenValue = ri->ptr[0]; | |
109 return; | |
110 } | |
111 } | |
112 | |
113 ri->tokenType = 0; | |
114 ri->tokenValue = 0; | |
115 return; | |
116 } | |
117 | |
118 // <regexAtom> ::= <literal>|<charClass>|<group> | |
119 NodePtr regexAtom(RegexInfoPtr ri) { | |
120 | |
121 token(ri); | |
122 NodePtr n = NULL; | |
123 if (ri->tokenType == 'a') n = literal(ri); | |
124 else if (ri->tokenType == '[') n = charClass(ri); | |
125 else if (ri->tokenType == '(') n = group(ri); | |
126 | |
127 return n; | |
128 } | |
129 | |
130 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> | |
131 NodePtr regex(RegexInfoPtr ri) { | |
132 NodePtr n = regexAtom(ri); | |
133 while (ri->ptr[0]) { | |
134 token(ri); | |
135 if (ri->tokenType == '*') { | |
136 n = createNode('*',n,0); | |
137 } else if (ri->tokenType == '|') { | |
138 NodePtr n1 = regex(ri); | |
139 n = createNode('|',n,n1); | |
140 } else if (ri->tokenType == ')') { | |
141 return n; | |
142 } else { | |
143 NodePtr n1 = regex(ri); | |
144 n = createNode('+',n,n1); | |
145 } | |
146 } return n; | |
147 } | |
148 | 16 |
149 | 17 |
150 int main(int argc, char **argv) | 18 int main(int argc, char **argv) |
151 { | 19 { |
152 RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo)); | 20 RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo)); |