comparison c/regexParser/createRegexParser.cc @ 95:1cdad0468484 impl-bitvector

rename createRegexTree to createRegexParser
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Tue, 10 Nov 2015 15:35:49 +0900
parents c/regexParser/createRegexTree.cc@912d7bd51f38
children 0b6940588e88
comparison
equal deleted inserted replaced
94:43b807f88961 95:1cdad0468484
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include "regexParser.h"
4
5 NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr);
6 NodePtr charClass(RegexInfoPtr);
7 NodePtr group(RegexInfoPtr);
8 void token(RegexInfoPtr);
9 NodePtr regexAtom(RegexInfoPtr);
10 NodePtr regex(RegexInfoPtr);
11
12 /**
13 * Create a node of regex parse tree.
14 * tokenType
15 * regexPosition(state)
16 * stateTransitionTable
17 */
18 NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) {
19 NodePtr n = (NodePtr)malloc(sizeof(Node));
20 n->tokenType = ri->tokenType;
21 n->self = n;
22 n->Value.character = character;
23 n->left = left;
24 n->right = right;
25
26 if (ri->tokenType == 'a') {
27 n->nodeNumber = ri->nodeNumber;
28 ri->nodeNumber++;
29 ri->tokenType = 0;
30 }
31 return n;
32 }
33
34 // <charClass> ::= '['<literal>'-'<literal>']'
35 NodePtr charClass(RegexInfoPtr ri) {
36 NodePtr n = (NodePtr)malloc(sizeof(Node));
37 unsigned char startChar = ri->ptr[0];
38 while (ri->ptr[0] == '-') {
39 ri->ptr++;
40 }
41 unsigned char endChar = ri->ptr[0];
42 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256);
43
44 return n;
45 }
46
47 // <literal> ::= [a-z][A-Z][0-9]
48 NodePtr literal(RegexInfoPtr ri) {
49 unsigned char *top = ri->ptr;
50 NodePtr n = createNode(ri,ri->ptr[0],0,0);
51 ri->ptr++;
52 return n;
53 }
54
55 // <group> ::= '('<regex>')'
56 NodePtr group(RegexInfoPtr ri) {
57 return regex(ri);
58 }
59
60
61
62 void token(RegexInfoPtr ri) {
63 while (ri->ptr[0] != '\0') {
64 if (ri->ptr[0] == '('){
65 ri->ptr++;
66 ri->tokenType = '(';
67 ri->tokenValue = 0;
68 if (ri->ptr[1] == ')') {
69 ri->ptr++;
70 }
71 return;
72 } else if (ri->ptr[0] == ')') {
73 ri->ptr++;
74 ri->tokenType = ')';
75 ri->tokenValue = ri->ptr[0];
76 return;
77 } else if (ri->ptr[0] == '[') {
78 ri->ptr++;
79 ri->tokenType = '[';
80 ri->tokenValue = ri->ptr[0];
81 if (ri->ptr[1] == ']') {
82 ri->ptr++;
83 }
84 return;
85 } else if (ri->ptr[0] == '|'){
86 ri->ptr++;
87 ri->tokenType = '|';
88 ri->tokenValue = 0;
89 return;
90 } else if (ri->ptr[0] == '*'){
91 ri->ptr++;
92 ri->tokenType = '*';
93 ri->tokenValue = 0;
94 return;
95 } else if (ri->ptr[0] == '\\'){
96 // need more proccesing
97 /*
98 \277
99 \0xa5
100 \[
101 \\
102 \utf-8 etc...
103 */
104 } else {
105 ri->tokenType = 'a';
106 ri->tokenValue = ri->ptr[0];
107 return;
108 }
109 }
110 ri->tokenType = 0;
111 ri->tokenValue = 0;
112 return;
113 }
114
115 // <regexAtom> ::= <literal>|<charClass>|<group>
116 NodePtr regexAtom(RegexInfoPtr ri) {
117
118 token(ri);
119 NodePtr n = NULL;
120 if (ri->tokenType == 'a') n = literal(ri);
121 else if (ri->tokenType == '[') n = charClass(ri);
122 else if (ri->tokenType == '(') n = group(ri);
123
124 return n;
125 }
126
127 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
128 NodePtr regex(RegexInfoPtr ri) {
129 NodePtr n = regexAtom(ri);
130 while (ri->ptr[0]) {
131 token(ri);
132 if (ri->tokenType == '*') {
133 n = createNode(ri,'*',n,0);
134 } else if (ri->tokenType == '|') {
135 NodePtr n1 = regex(ri);
136 n = createNode(ri,'|',n,n1);
137 } else if (ri->tokenType == ')') {
138 return n;
139 } else {
140 NodePtr n1 = regex(ri);
141 n = createNode(ri,'+',n,n1);
142 }
143 } return n;
144 }