comparison c/regexParser/main.cc @ 82:1d9bbf922bb6

add createRegexTree.cc
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Wed, 07 Oct 2015 18:13:27 +0900
parents 27883946b2dc
children 912d7bd51f38
comparison
equal deleted inserted replaced
81:27883946b2dc 82:1d9bbf922bb6
9 #include <stdio.h> 9 #include <stdio.h>
10 #include <stdlib.h> 10 #include <stdlib.h>
11 #include <string.h> 11 #include <string.h>
12 #include "regexParser.h" 12 #include "regexParser.h"
13 13
14 NodePtr createNode(unsigned char,NodePtr,NodePtr); 14 extern NodePtr regex(RegexInfoPtr);
15 NodePtr charClass(RegexInfoPtr);
16 NodePtr group(RegexInfoPtr);
17 void token(RegexInfoPtr);
18 NodePtr regexAtom(RegexInfoPtr);
19 NodePtr regex(RegexInfoPtr);
20 extern void printTree(NodePtr); 15 extern void printTree(NodePtr);
21
22 /**
23 * Create a node of regex parse tree.
24 * tokenType
25 * regexPosition(state)
26 * stateTransitionTable
27 */
28 NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) {
29 NodePtr n = (NodePtr)malloc(sizeof(Node));
30 n->self = n;
31 n->Value.character = character;
32 n->left = left;
33 n->right = right;
34 return n;
35 }
36
37 // <charClass> ::= '['<literal>'-'<literal>']'
38 NodePtr charClass(RegexInfoPtr ri) {
39 NodePtr n = (NodePtr)malloc(sizeof(Node));
40 unsigned char startChar = ri->ptr[0];
41 while (ri->ptr[0] == '-') {
42 ri->ptr++;
43 }
44 unsigned char endChar = ri->ptr[0];
45 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256);
46
47 return n;
48 }
49
50 // <literal> ::= [a-z][A-Z][0-9]
51 NodePtr literal(RegexInfoPtr ri) {
52 NodePtr n = createNode(ri->ptr[0],0,0);
53 ri->ptr++;
54 return n;
55 }
56
57 // <group> ::= '('<regex>')'
58 NodePtr group(RegexInfoPtr ri) {
59 return regex(ri);
60 }
61
62
63
64 void token(RegexInfoPtr ri) {
65 while (ri->ptr[0] != '\0') {
66 if (ri->ptr[0] == '('){
67 ri->ptr++;
68 ri->tokenType = '(';
69 ri->tokenValue = 0;
70 if (ri->ptr[1] == ')') {
71 ri->ptr++;
72 }
73 return;
74 } else if (ri->ptr[0] == ')') {
75 ri->ptr++;
76 ri->tokenType = ')';
77 ri->tokenValue = ri->ptr[0];
78 return;
79 } else if (ri->ptr[0] == '[') {
80 ri->ptr++;
81 ri->tokenType = '[';
82 ri->tokenValue = ri->ptr[0];
83 if (ri->ptr[1] == ']') {
84 ri->ptr++;
85 }
86 return;
87 } else if (ri->ptr[0] == '|'){
88 ri->ptr++;
89 ri->tokenType = '|';
90 ri->tokenValue = 0;
91 return;
92 } else if (ri->ptr[0] == '*'){
93 ri->ptr++;
94 ri->tokenType = '*';
95 ri->tokenValue = 0;
96 return;
97 } else if (ri->ptr[0] == '\\'){
98 // need more proccesing
99 /*
100 \277
101 \0xa5
102 \[
103 \\
104 \utf-8 etc...
105 */
106 } else {
107 ri->tokenType = 'a';
108 ri->tokenValue = ri->ptr[0];
109 return;
110 }
111 }
112
113 ri->tokenType = 0;
114 ri->tokenValue = 0;
115 return;
116 }
117
118 // <regexAtom> ::= <literal>|<charClass>|<group>
119 NodePtr regexAtom(RegexInfoPtr ri) {
120
121 token(ri);
122 NodePtr n = NULL;
123 if (ri->tokenType == 'a') n = literal(ri);
124 else if (ri->tokenType == '[') n = charClass(ri);
125 else if (ri->tokenType == '(') n = group(ri);
126
127 return n;
128 }
129
130 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
131 NodePtr regex(RegexInfoPtr ri) {
132 NodePtr n = regexAtom(ri);
133 while (ri->ptr[0]) {
134 token(ri);
135 if (ri->tokenType == '*') {
136 n = createNode('*',n,0);
137 } else if (ri->tokenType == '|') {
138 NodePtr n1 = regex(ri);
139 n = createNode('|',n,n1);
140 } else if (ri->tokenType == ')') {
141 return n;
142 } else {
143 NodePtr n1 = regex(ri);
144 n = createNode('+',n,n1);
145 }
146 } return n;
147 }
148 16
149 17
150 int main(int argc, char **argv) 18 int main(int argc, char **argv)
151 { 19 {
152 RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo)); 20 RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo));