Mercurial > hg > Applications > Grep
comparison c/regexParser/regexParser.cc @ 115:ca30f8334741 pairPro
rename createRegexParser.cc to regexParser.cc
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 24 Nov 2015 14:38:26 +0900 |
parents | c/regexParser/createRegexParser.cc@ec485345daf9 |
children | 66c633575b53 |
comparison
equal
deleted
inserted
replaced
114:c82e7a7ef8d9 | 115:ca30f8334741 |
---|---|
1 #include <stdlib.h> | |
2 #include <stdio.h> | |
3 #include "regexParser.h" | |
4 #include "error.h" | |
5 | |
6 typedef struct regexInfo { | |
7 unsigned char *ptr; | |
8 unsigned char tokenType; | |
9 int tokenValue; | |
10 int nodeNumber; | |
11 } RegexInfo, *RegexInfoPtr; | |
12 | |
13 static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); | |
14 static NodePtr charClass(RegexInfoPtr); | |
15 static NodePtr group(RegexInfoPtr); | |
16 static void token(RegexInfoPtr); | |
17 static NodePtr regexAtom(RegexInfoPtr); | |
18 NodePtr regex(RegexInfoPtr); | |
19 | |
20 /** | |
21 * Create a node of regex parse tree. | |
22 * tokenType | |
23 * regexPosition(state) | |
24 * stateTransitionTable | |
25 */ | |
26 | |
27 static | |
28 NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { | |
29 NodePtr n = (NodePtr)malloc(sizeof(Node)); | |
30 if (n == NULL) { | |
31 mallocFailedMessage(); | |
32 } | |
33 | |
34 n->tokenType = ri->tokenType; | |
35 n->cc->conditionList->character = character; | |
36 n->left = left; | |
37 n->right = right; | |
38 | |
39 if (ri->tokenType == 'a') { | |
40 n->nodeNumber = ri->nodeNumber; | |
41 ri->nodeNumber++; | |
42 ri->tokenType = 0; | |
43 } | |
44 return n; | |
45 } | |
46 | |
47 // <charClass> ::= '['<literal>'-'<literal>']' | |
48 static | |
49 NodePtr charClass(RegexInfoPtr ri) { | |
50 NodePtr n = (NodePtr)malloc(sizeof(Node)); | |
51 if (n == NULL) { | |
52 mallocFailedMessage(); | |
53 } | |
54 while (ri->ptr[0] == '-') { | |
55 ri->ptr++; | |
56 } | |
57 return n; | |
58 } | |
59 | |
60 // <literal> ::= [a-z][A-Z][0-9] | |
61 static | |
62 NodePtr literal(RegexInfoPtr ri) { | |
63 NodePtr n = createNode(ri,ri->ptr[0],0,0); | |
64 ri->ptr++; | |
65 return n; | |
66 } | |
67 | |
68 // <group> ::= '('<regex>')' | |
69 static | |
70 NodePtr group(RegexInfoPtr ri) { | |
71 return regex(ri); | |
72 } | |
73 | |
74 static | |
75 void token(RegexInfoPtr ri) { | |
76 while (ri->ptr[0] != '\0') { | |
77 if (ri->ptr[0] == '('){ | |
78 ri->ptr++; | |
79 ri->tokenType = '('; | |
80 ri->tokenValue = 0; | |
81 if (ri->ptr[1] == ')') { | |
82 ri->ptr++; | |
83 } | |
84 return; | |
85 } else if (ri->ptr[0] == ')') { | |
86 ri->ptr++; | |
87 ri->tokenType = ')'; | |
88 ri->tokenValue = ri->ptr[0]; | |
89 return; | |
90 } else if (ri->ptr[0] == '[') { | |
91 ri->ptr++; | |
92 ri->tokenType = '['; | |
93 ri->tokenValue = ri->ptr[0]; | |
94 if (ri->ptr[1] == ']') { | |
95 ri->ptr++; | |
96 } | |
97 return; | |
98 } else if (ri->ptr[0] == '|'){ | |
99 ri->ptr++; | |
100 ri->tokenType = '|'; | |
101 ri->tokenValue = 0; | |
102 return; | |
103 } else if (ri->ptr[0] == '*'){ | |
104 ri->ptr++; | |
105 ri->tokenType = '*'; | |
106 ri->tokenValue = 0; | |
107 return; | |
108 } else if (ri->ptr[0] == '\\'){ | |
109 // need more proccesing | |
110 /* | |
111 \277 | |
112 \0xa5 | |
113 \[ | |
114 \\ | |
115 \utf-8 etc... | |
116 */ | |
117 } else { | |
118 ri->tokenType = 'a'; | |
119 ri->tokenValue = ri->ptr[0]; | |
120 return; | |
121 } | |
122 } | |
123 ri->tokenType = 0; | |
124 ri->tokenValue = 0; | |
125 return; | |
126 } | |
127 | |
128 // <regexAtom> ::= <literal>|<charClass>|<group> | |
129 static | |
130 NodePtr regexAtom(RegexInfoPtr ri) { | |
131 | |
132 token(ri); | |
133 NodePtr n = NULL; | |
134 if (ri->tokenType == 'a') n = literal(ri); | |
135 else if (ri->tokenType == '[') n = charClass(ri); | |
136 else if (ri->tokenType == '(') n = group(ri); | |
137 | |
138 return n; | |
139 } | |
140 | |
141 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> | |
142 NodePtr regex(RegexInfoPtr ri) { | |
143 NodePtr n = regexAtom(ri); | |
144 while (ri->ptr[0]) { | |
145 token(ri); | |
146 if (ri->tokenType == '*') { | |
147 n = createNode(ri,'*',n,0); | |
148 } else if (ri->tokenType == '|') { | |
149 NodePtr n1 = regex(ri); | |
150 n = createNode(ri,'|',n,n1); | |
151 } else if (ri->tokenType == ')') { | |
152 return n; | |
153 } else { | |
154 NodePtr n1 = regex(ri); | |
155 n = createNode(ri,'+',n,n1); | |
156 } | |
157 } return n; | |
158 } |