comparison c/regexParser/main.cc @ 80:0a452d69f0e2

remove global variable in main.cc
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Wed, 07 Oct 2015 16:08:34 +0900
parents 52da06c3f050
children 27883946b2dc
comparison
equal deleted inserted replaced
79:52da06c3f050 80:0a452d69f0e2
9 #include <stdio.h> 9 #include <stdio.h>
10 #include <stdlib.h> 10 #include <stdlib.h>
11 #include <string.h> 11 #include <string.h>
12 #include "regexParser.h" 12 #include "regexParser.h"
13 13
14 unsigned char *ptr; 14 typedef struct regexInfo {
15 unsigned char tokenType; 15 unsigned char *ptr;
16 int tokenValue; 16 unsigned char tokenType;
17 int tokenValue;
18 } RegexInfo, *RegexInfoPtr;
17 19
18 NodePtr charClass(); 20 NodePtr charClass();
19 NodePtr group(); 21 NodePtr group();
20 NodePtr regex(); 22 NodePtr regex(RegexInfoPtr);
21 NodePtr createNode(unsigned char,NodePtr,NodePtr); 23 NodePtr createNode(unsigned char,NodePtr,NodePtr);
22 void token(); 24 void token();
23 NodePtr regexAtom(); 25 NodePtr regexAtom();
24 extern void printTree(NodePtr); 26 extern void printTree(NodePtr);
25
26
27 bool isLiteral(char c) {
28 if (*ptr > 0x7f) return true;
29 else if (*ptr == '(') return false;
30 else if (*ptr == '[') return false;
31 else if (*ptr == '|') return false;
32 else if (*ptr == '*') return false;
33 return true;
34 }
35 27
36 /** 28 /**
37 * Create a node of regex parse tree. 29 * Create a node of regex parse tree.
38 * tokenType 30 * tokenType
39 * regexPosition(state) 31 * regexPosition(state)
47 n->right = right; 39 n->right = right;
48 return n; 40 return n;
49 } 41 }
50 42
51 // <charClass> ::= '['<literal>'-'<literal>']' 43 // <charClass> ::= '['<literal>'-'<literal>']'
52 NodePtr charClass() { 44 NodePtr charClass(RegexInfoPtr ri) {
53 NodePtr n = (NodePtr)malloc(sizeof(Node)); 45 NodePtr n = (NodePtr)malloc(sizeof(Node));
54 unsigned char startChar = *ptr; 46 unsigned char startChar = ri->ptr[0];
55 while (*ptr == '-') { 47 while (ri->ptr[0] == '-') {
56 ptr++; 48 ri->ptr++;
57 } 49 }
58 unsigned char endChar = *ptr; 50 unsigned char endChar = ri->ptr[0];
59 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); 51 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256);
60 52
61 return n; 53 return n;
62 } 54 }
63 55
64 // <literal> ::= [a-z][A-Z][0-9] 56 // <literal> ::= [a-z][A-Z][0-9]
65 NodePtr literal() { 57 NodePtr literal(RegexInfoPtr ri) {
66 NodePtr n = createNode(*ptr,0,0); 58 NodePtr n = createNode(ri->ptr[0],0,0);
67 ptr++; 59 ri->ptr++;
68 return n; 60 return n;
69 } 61 }
70 62
71 // <group> ::= '('<regex>')' 63 // <group> ::= '('<regex>')'
72 NodePtr group() { 64 NodePtr group(RegexInfoPtr ri) {
73 return regex(); 65 return regex(ri);
74 } 66 }
75 67
76 68
77 69
78 void token() { 70 void token(RegexInfoPtr ri) {
79 while (*ptr != '\0') { 71 while (ri->ptr[0] != '\0') {
80 if (*ptr == '('){ 72 if (ri->ptr[0] == '('){
81 ptr++; 73 ri->ptr++;
82 tokenType = '('; 74 ri->tokenType = '(';
83 tokenValue = 0; 75 ri->tokenValue = 0;
84 if (ptr[1] == ')') { 76 if (ri->ptr[1] == ')') {
85 ptr++; 77 ri->ptr++;
86 } 78 }
87 return; 79 return;
88 } else if (*ptr == ')') { 80 } else if (ri->ptr[0] == ')') {
89 ptr++; 81 ri->ptr++;
90 tokenType = ')'; 82 ri->tokenType = ')';
91 tokenValue = *ptr; 83 ri->tokenValue = ri->ptr[0];
92 return; 84 return;
93 } else if (*ptr == '[') { 85 } else if (ri->ptr[0] == '[') {
94 ptr++; 86 ri->ptr++;
95 tokenType = '['; 87 ri->tokenType = '[';
96 tokenValue = *ptr; 88 ri->tokenValue = ri->ptr[0];
97 if (ptr[1] == ']') { 89 if (ri->ptr[1] == ']') {
98 ptr++; 90 ri->ptr++;
99 } 91 }
100 return; 92 return;
101 } else if (*ptr == '|'){ 93 } else if (ri->ptr[0] == '|'){
102 ptr++; 94 ri->ptr++;
103 tokenType = '|'; 95 ri->tokenType = '|';
104 tokenValue = 0; 96 ri->tokenValue = 0;
105 return; 97 return;
106 } else if (*ptr == '*'){ 98 } else if (ri->ptr[0] == '*'){
107 ptr++; 99 ri->ptr++;
108 tokenType = '*'; 100 ri->tokenType = '*';
109 tokenValue = 0; 101 ri->tokenValue = 0;
110 return; 102 return;
111 } else if (*ptr == '\\'){ 103 } else if (ri->ptr[0] == '\\'){
112 // need more proccesing 104 // need more proccesing
113 /* 105 /*
114 \277 106 \277
115 \0xa5 107 \0xa5
116 \[ 108 \[
117 \\ 109 \\
118 \utf-8 etc... 110 \utf-8 etc...
119 */ 111 */
120 } else { 112 } else {
121 tokenType = 'a'; 113 ri->tokenType = 'a';
122 tokenValue = *ptr; 114 ri->tokenValue = ri->ptr[0];
123 return; 115 return;
124 } 116 }
125 } 117 }
126 118
127 tokenType = 0; 119 ri->tokenType = 0;
128 tokenValue = 0; 120 ri->tokenValue = 0;
129 return; 121 return;
130 } 122 }
131 123
132 // <regexAtom> ::= <literal>|<charClass>|<group> 124 // <regexAtom> ::= <literal>|<charClass>|<group>
133 NodePtr regexAtom() { 125 NodePtr regexAtom(RegexInfoPtr ri) {
134 126
135 token(); 127 token(ri);
136 NodePtr n = NULL; 128 NodePtr n = NULL;
137 if (tokenType == 'a') n = literal(); 129 if (ri->tokenType == 'a') n = literal(ri);
138 else if (tokenType == '[') n = charClass(); 130 else if (ri->tokenType == '[') n = charClass(ri);
139 else if (tokenType == '(') n = group(); 131 else if (ri->tokenType == '(') n = group(ri);
140 132
141 return n; 133 return n;
142 } 134 }
143 135
144 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> 136 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
145 NodePtr regex() { 137 NodePtr regex(RegexInfoPtr ri) {
146 NodePtr n = regexAtom(); 138 NodePtr n = regexAtom(ri);
147 while (*ptr) { 139 while (ri->ptr[0]) {
148 token(); 140 token(ri);
149 if (tokenType == '*') { 141 if (ri->tokenType == '*') {
150 n = createNode('*',n,0); 142 n = createNode('*',n,0);
151 } else if (tokenType == '|') { 143 } else if (ri->tokenType == '|') {
152 NodePtr n1 = regex(); 144 NodePtr n1 = regex(ri);
153 n = createNode('|',n,n1); 145 n = createNode('|',n,n1);
154 } else if (tokenType == ')') { 146 } else if (ri->tokenType == ')') {
155 return n; 147 return n;
156 } else { 148 } else {
157 NodePtr n1 = regex(); 149 NodePtr n1 = regex(ri);
158 n = createNode('+',n,n1); 150 n = createNode('+',n,n1);
159 } 151 }
160 } return n; 152 } return n;
161 } 153 }
162 154
163 155
164 int main(int argc, char **argv) 156 int main(int argc, char **argv)
165 { 157 {
158 RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo));
159
166 for (int i = 1; i < argc; i++) { 160 for (int i = 1; i < argc; i++) {
167 if (strcmp(argv[i],"-regex") == 0) { 161 if (strcmp(argv[i],"-regex") == 0) {
168 ptr = (unsigned char*)argv[i+1]; i++; 162 ri->ptr = (unsigned char*)argv[i+1]; i++;
169 } 163 }
170 } 164 }
171 165
172 printf("regex : %s\n",ptr); 166 printf("regex : %s\n",ri->ptr);
173 NodePtr n = regex(); 167 NodePtr n = regex(ri);
174 printTree(n); 168 printTree(n);
175 return 0; 169 return 0;
176 } 170 }