annotate c/regexParser/main.cc @ 71:58d2b10988c9

move ptr++ in token()
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Tue, 25 Aug 2015 20:56:26 +0900
parents 87dff3a124ab
children c4b934048e2a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
1 /*
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
2 * <literal> ::= [a-z][A-Z][0-9]
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
3 * <charClass> ::= '['<literal>'-'<literal>']'
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
4 * <group> ::= '('<regex>')'
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
5 * <regexAtom> ::= <literal>|<charClass>|<group>
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
6 * <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
7 */
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
8
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <stdio.h>
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
10 #include <stdlib.h>
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
11 #include <string.h>
57
71b497d25273 fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 56
diff changeset
12
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
13 typedef struct charClass {
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
14 unsigned char table[256];
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
15 struct utf8Range {
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
16 unsigned char *begin;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
17 unsigned char *end;
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
18 struct utf8Range *next;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
19 } *rangeList;
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
20 } CharClass, *CharClassPtr;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
21
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
22 typedef struct node {
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
23 unsigned char type;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
24 union value {
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
25 charClass *cc;
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
26 unsigned char character;
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
27 } Value, *ValuePtr;
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
28 struct node *self;
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
29 struct node *parent;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
30 struct node *left;
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
31 struct node *right;
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
32 } Node, *NodePtr;
52
a2826bf4e80a remove magic number
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 51
diff changeset
33
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
34 unsigned char *ptr;
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
35 unsigned char tokenType;
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
36 int tokenValue;
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
37 NodePtr regexHeadNode;
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
38
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
39 NodePtr charClass();
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
40 NodePtr group();
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
41 NodePtr orexp();
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
42 NodePtr asterisk();
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
43 NodePtr regex();
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
44 NodePtr createNode(unsigned char,NodePtr,NodePtr);
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
45 extern void token();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
46 extern NodePtr regexAtom();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
47
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
48
62
a49b4a8b8c14 implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 61
diff changeset
49 bool isLiteral(char c) {
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
50 if (*ptr > 0x7f) return true;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
51 else if (*ptr == '(') return false;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
52 else if (*ptr == '[') return false;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
53 else if (*ptr == '|') return false;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
54 else if (*ptr == '*') return false;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
55 return true;
62
a49b4a8b8c14 implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 61
diff changeset
56 }
a49b4a8b8c14 implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 61
diff changeset
57
60
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
58 void printNodeDate(NodePtr n) {
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
59 puts("---------------------");
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
60 printf("Self Node char : %c\n", n->Value.character);
60
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
61 printf("Self Node addr : %p\n", n->self);
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
62 printf("left Node addr : %p\n", n->left);
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
63 printf("right Node addr : %p\n", n->right);
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
64 puts("---------------------");
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
65 puts("");
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
66 }
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
67
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
68 NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) {
57
71b497d25273 fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 56
diff changeset
69 NodePtr n = (NodePtr)malloc(sizeof(Node));
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
70 n->self = n;
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
71 n->Value.character = character;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
72 n->left = left;
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
73 n->right = right;
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
74
60
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
75 printNodeDate(n);
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
76 return n;
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
77 }
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
78
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
79 // <charClass> ::= '['<literal>'-'<literal>']'
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
80 NodePtr charClass() {
62
a49b4a8b8c14 implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 61
diff changeset
81 NodePtr n = (NodePtr)malloc(sizeof(Node));
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
82 unsigned char startChar = *ptr;
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
83 while (*ptr == '-') {
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
84 ptr++;
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
85 }
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
86 unsigned char endChar = *ptr;
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
87 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256);
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
88
62
a49b4a8b8c14 implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 61
diff changeset
89 return n;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
90 }
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
91
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
92 // <literal> ::= [a-z][A-Z][0-9]
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
93 NodePtr literal() {
65
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 64
diff changeset
94 NodePtr n = createNode(*ptr,0,0);
57
71b497d25273 fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 56
diff changeset
95 ptr++;
71b497d25273 fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 56
diff changeset
96 return n;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
97 }
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
98
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
99 // <group> ::= '('<regex>')'
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
100 NodePtr group() {
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
101 NodePtr n = regex();
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
102 if (*ptr == ')') {
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
103 n = createNode('(',n,0);
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
104 } else {
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
105 // ) reqiured
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
106 }
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
107 return n;
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
108 }
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
109
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
110
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
111
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
112 void token() {
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
113 while (*ptr != '\0') {
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
114 if ((*ptr == '(') || (*ptr == ')')) {
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
115 ptr++;
70
87dff3a124ab resolve segmentation fault(But not correct performance)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 69
diff changeset
116 tokenType = '(';
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
117 tokenValue = 0;
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
118 if (ptr[1] == ')') {
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
119 ptr++;
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
120 }
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
121 return;
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
122 } else if (*ptr == '[') {
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
123 ptr++;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
124 tokenType = '[';
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
125 tokenValue = *ptr;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
126 if (ptr[1] == ']') {
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
127 ptr++;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
128 }
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
129 return;
58
4053c3e0fa7f implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 57
diff changeset
130 } else if (*ptr == '|'){
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
131 ptr++;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
132 tokenType = '|';
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
133 tokenValue = 0;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
134 return;
60
8616a045a7f4 impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 59
diff changeset
135 } else if (*ptr == '*'){
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
136 ptr++;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
137 tokenType = '*';
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
138 tokenValue = 0;
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
139 return;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
140 }
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
141
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
142 tokenType = 'a';
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
143 tokenValue = *ptr;
65
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 64
diff changeset
144 return;
63
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
145
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
146 if (*ptr == '\\') ptr++; // need more proccesing
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
147 /*
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
148 \277
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
149 \0xa5
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
150 \[
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
151 \\
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
152 \utf-8 etc...
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
153 */
8fd3d35e9861 add token function
masa
parents: 62
diff changeset
154
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
155 }
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
156
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
157 tokenType = 0;
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
158 tokenValue = 0;
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
159 return;
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
160 }
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
161
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
162 // <regexAtom> ::= <literal>|<charClass>|<group>
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
163 NodePtr regexAtom() {
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
164
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
165 token();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
166 NodePtr n = NULL;
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
167 if (tokenType == 'a') n = literal();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
168 else if (tokenType == '[') n = charClass();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
169 else if (tokenType == '(') n = group();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
170
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
171 return n;
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
172 }
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
173
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
174 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
175 NodePtr regex() {
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
176 NodePtr n = regexAtom();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
177 while (*ptr) {
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
178 token();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
179 if (tokenType == '*') {
71
58d2b10988c9 move ptr++ in token()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 70
diff changeset
180 n = createNode('*',n,0);
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
181 } else if (tokenType == '|') {
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
182 NodePtr n1 = regex();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
183 n = createNode('|',n,n1);
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
184 } else {
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
185 NodePtr n1 = regex();
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
186 n = createNode('+',n,n1);
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
187 }
67
4842ca2cf8ee print character
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 66
diff changeset
188 } return n;
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
189 }
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
190 /*
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
191 * e.g.
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
192 *
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
193 * % ./regexParser -regex abc
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
194 * c
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
195 * +
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
196 * b
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
197 * +
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
198 * a
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
199 *
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
200 * % ./regexParser -regex (a*|bc)d
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
201 *
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
202 * d
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
203 * +
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
204 * c
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
205 * +
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
206 * b
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
207 * |
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
208 * *
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
209 * a
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
210 */
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
211
69
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
212 void descendTree(NodePtr n) {
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
213 static int d = 0;
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
214 if (n->right != NULL) {
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
215 d++;
69
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
216 descendTree(n->right);
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
217 d--;
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
218 }
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
219 printf("%*c%c\n",d*4, ' ',n->Value.character);
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
220 if (n->left != NULL) {
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
221 d++;
69
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
222 descendTree(n->left);
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
223 d--;
68
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
224 }
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
225 }
d27b3af1fe75 remove string()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 67
diff changeset
226
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
227 void printTree(NodePtr n) {
69
eecddded9b91 implement printTree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 68
diff changeset
228 descendTree(n);
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
229 }
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
230
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
231
62
a49b4a8b8c14 implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 61
diff changeset
232 int main(int argc, char **argv)
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
233 {
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
234 for (int i = 1; i < argc; i++) {
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
235 if (strcmp(argv[i],"-regex") == 0) {
64
e0ad6c145f89 remove some errors
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
236 ptr = (unsigned char*)argv[i+1]; i++;
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
237 }
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
238 }
55
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 54
diff changeset
239
56
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
240 printf("regex : %s\n",ptr);
8901bc071d33 implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
241 NodePtr n = regex();
66
f8fb3b463f70 fix when '|' come procces
Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
parents: 65
diff changeset
242 printTree(n);
45
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
243 return 0;
89a198fa6b23 add dfrTobin
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
244 }