comparison c/regexParser/createRegexTree.cc @ 82:1d9bbf922bb6

add createRegexTree.cc
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Wed, 07 Oct 2015 18:13:27 +0900
parents
children 5072a44ed842
comparison
equal deleted inserted replaced
81:27883946b2dc 82:1d9bbf922bb6
1 #include <stdlib.h>
2 #include "regexParser.h"
3
4 NodePtr createNode(unsigned char,NodePtr,NodePtr);
5 NodePtr charClass(RegexInfoPtr);
6 NodePtr group(RegexInfoPtr);
7 void token(RegexInfoPtr);
8 NodePtr regexAtom(RegexInfoPtr);
9 NodePtr regex(RegexInfoPtr);
10
11 /**
12 * Create a node of regex parse tree.
13 * tokenType
14 * regexPosition(state)
15 * stateTransitionTable
16 */
17 NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) {
18 NodePtr n = (NodePtr)malloc(sizeof(Node));
19 n->self = n;
20 n->Value.character = character;
21 n->left = left;
22 n->right = right;
23 return n;
24 }
25
26 // <charClass> ::= '['<literal>'-'<literal>']'
27 NodePtr charClass(RegexInfoPtr ri) {
28 NodePtr n = (NodePtr)malloc(sizeof(Node));
29 unsigned char startChar = ri->ptr[0];
30 while (ri->ptr[0] == '-') {
31 ri->ptr++;
32 }
33 unsigned char endChar = ri->ptr[0];
34 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256);
35
36 return n;
37 }
38
39 // <literal> ::= [a-z][A-Z][0-9]
40 NodePtr literal(RegexInfoPtr ri) {
41 NodePtr n = createNode(ri->ptr[0],0,0);
42 ri->ptr++;
43 return n;
44 }
45
46 // <group> ::= '('<regex>')'
47 NodePtr group(RegexInfoPtr ri) {
48 return regex(ri);
49 }
50
51
52
53 void token(RegexInfoPtr ri) {
54 while (ri->ptr[0] != '\0') {
55 if (ri->ptr[0] == '('){
56 ri->ptr++;
57 ri->tokenType = '(';
58 ri->tokenValue = 0;
59 if (ri->ptr[1] == ')') {
60 ri->ptr++;
61 }
62 return;
63 } else if (ri->ptr[0] == ')') {
64 ri->ptr++;
65 ri->tokenType = ')';
66 ri->tokenValue = ri->ptr[0];
67 return;
68 } else if (ri->ptr[0] == '[') {
69 ri->ptr++;
70 ri->tokenType = '[';
71 ri->tokenValue = ri->ptr[0];
72 if (ri->ptr[1] == ']') {
73 ri->ptr++;
74 }
75 return;
76 } else if (ri->ptr[0] == '|'){
77 ri->ptr++;
78 ri->tokenType = '|';
79 ri->tokenValue = 0;
80 return;
81 } else if (ri->ptr[0] == '*'){
82 ri->ptr++;
83 ri->tokenType = '*';
84 ri->tokenValue = 0;
85 return;
86 } else if (ri->ptr[0] == '\\'){
87 // need more proccesing
88 /*
89 \277
90 \0xa5
91 \[
92 \\
93 \utf-8 etc...
94 */
95 } else {
96 ri->tokenType = 'a';
97 ri->tokenValue = ri->ptr[0];
98 return;
99 }
100 }
101
102 ri->tokenType = 0;
103 ri->tokenValue = 0;
104 return;
105 }
106
107 // <regexAtom> ::= <literal>|<charClass>|<group>
108 NodePtr regexAtom(RegexInfoPtr ri) {
109
110 token(ri);
111 NodePtr n = NULL;
112 if (ri->tokenType == 'a') n = literal(ri);
113 else if (ri->tokenType == '[') n = charClass(ri);
114 else if (ri->tokenType == '(') n = group(ri);
115
116 return n;
117 }
118
119 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
120 NodePtr regex(RegexInfoPtr ri) {
121 NodePtr n = regexAtom(ri);
122 while (ri->ptr[0]) {
123 token(ri);
124 if (ri->tokenType == '*') {
125 n = createNode('*',n,0);
126 } else if (ri->tokenType == '|') {
127 NodePtr n1 = regex(ri);
128 n = createNode('|',n,n1);
129 } else if (ri->tokenType == ')') {
130 return n;
131 } else {
132 NodePtr n1 = regex(ri);
133 n = createNode('+',n,n1);
134 }
135 } return n;
136 }
137