Mercurial > hg > Applications > Grep
comparison c/regexParser/createRegexParser.cc @ 95:1cdad0468484 impl-bitvector
rename createRegexTree to createRegexParser
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 10 Nov 2015 15:35:49 +0900 |
parents | c/regexParser/createRegexTree.cc@912d7bd51f38 |
children | 0b6940588e88 |
comparison
equal
deleted
inserted
replaced
94:43b807f88961 | 95:1cdad0468484 |
---|---|
1 #include <stdlib.h> | |
2 #include <stdio.h> | |
3 #include "regexParser.h" | |
4 | |
5 NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); | |
6 NodePtr charClass(RegexInfoPtr); | |
7 NodePtr group(RegexInfoPtr); | |
8 void token(RegexInfoPtr); | |
9 NodePtr regexAtom(RegexInfoPtr); | |
10 NodePtr regex(RegexInfoPtr); | |
11 | |
12 /** | |
13 * Create a node of regex parse tree. | |
14 * tokenType | |
15 * regexPosition(state) | |
16 * stateTransitionTable | |
17 */ | |
18 NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { | |
19 NodePtr n = (NodePtr)malloc(sizeof(Node)); | |
20 n->tokenType = ri->tokenType; | |
21 n->self = n; | |
22 n->Value.character = character; | |
23 n->left = left; | |
24 n->right = right; | |
25 | |
26 if (ri->tokenType == 'a') { | |
27 n->nodeNumber = ri->nodeNumber; | |
28 ri->nodeNumber++; | |
29 ri->tokenType = 0; | |
30 } | |
31 return n; | |
32 } | |
33 | |
34 // <charClass> ::= '['<literal>'-'<literal>']' | |
35 NodePtr charClass(RegexInfoPtr ri) { | |
36 NodePtr n = (NodePtr)malloc(sizeof(Node)); | |
37 unsigned char startChar = ri->ptr[0]; | |
38 while (ri->ptr[0] == '-') { | |
39 ri->ptr++; | |
40 } | |
41 unsigned char endChar = ri->ptr[0]; | |
42 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); | |
43 | |
44 return n; | |
45 } | |
46 | |
47 // <literal> ::= [a-z][A-Z][0-9] | |
48 NodePtr literal(RegexInfoPtr ri) { | |
49 unsigned char *top = ri->ptr; | |
50 NodePtr n = createNode(ri,ri->ptr[0],0,0); | |
51 ri->ptr++; | |
52 return n; | |
53 } | |
54 | |
55 // <group> ::= '('<regex>')' | |
56 NodePtr group(RegexInfoPtr ri) { | |
57 return regex(ri); | |
58 } | |
59 | |
60 | |
61 | |
62 void token(RegexInfoPtr ri) { | |
63 while (ri->ptr[0] != '\0') { | |
64 if (ri->ptr[0] == '('){ | |
65 ri->ptr++; | |
66 ri->tokenType = '('; | |
67 ri->tokenValue = 0; | |
68 if (ri->ptr[1] == ')') { | |
69 ri->ptr++; | |
70 } | |
71 return; | |
72 } else if (ri->ptr[0] == ')') { | |
73 ri->ptr++; | |
74 ri->tokenType = ')'; | |
75 ri->tokenValue = ri->ptr[0]; | |
76 return; | |
77 } else if (ri->ptr[0] == '[') { | |
78 ri->ptr++; | |
79 ri->tokenType = '['; | |
80 ri->tokenValue = ri->ptr[0]; | |
81 if (ri->ptr[1] == ']') { | |
82 ri->ptr++; | |
83 } | |
84 return; | |
85 } else if (ri->ptr[0] == '|'){ | |
86 ri->ptr++; | |
87 ri->tokenType = '|'; | |
88 ri->tokenValue = 0; | |
89 return; | |
90 } else if (ri->ptr[0] == '*'){ | |
91 ri->ptr++; | |
92 ri->tokenType = '*'; | |
93 ri->tokenValue = 0; | |
94 return; | |
95 } else if (ri->ptr[0] == '\\'){ | |
96 // need more proccesing | |
97 /* | |
98 \277 | |
99 \0xa5 | |
100 \[ | |
101 \\ | |
102 \utf-8 etc... | |
103 */ | |
104 } else { | |
105 ri->tokenType = 'a'; | |
106 ri->tokenValue = ri->ptr[0]; | |
107 return; | |
108 } | |
109 } | |
110 ri->tokenType = 0; | |
111 ri->tokenValue = 0; | |
112 return; | |
113 } | |
114 | |
115 // <regexAtom> ::= <literal>|<charClass>|<group> | |
116 NodePtr regexAtom(RegexInfoPtr ri) { | |
117 | |
118 token(ri); | |
119 NodePtr n = NULL; | |
120 if (ri->tokenType == 'a') n = literal(ri); | |
121 else if (ri->tokenType == '[') n = charClass(ri); | |
122 else if (ri->tokenType == '(') n = group(ri); | |
123 | |
124 return n; | |
125 } | |
126 | |
127 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> | |
128 NodePtr regex(RegexInfoPtr ri) { | |
129 NodePtr n = regexAtom(ri); | |
130 while (ri->ptr[0]) { | |
131 token(ri); | |
132 if (ri->tokenType == '*') { | |
133 n = createNode(ri,'*',n,0); | |
134 } else if (ri->tokenType == '|') { | |
135 NodePtr n1 = regex(ri); | |
136 n = createNode(ri,'|',n,n1); | |
137 } else if (ri->tokenType == ')') { | |
138 return n; | |
139 } else { | |
140 NodePtr n1 = regex(ri); | |
141 n = createNode(ri,'+',n,n1); | |
142 } | |
143 } return n; | |
144 } |