comparison c/regexParser/regexParser.cc @ 121:aa266a4db47c pairPro

merge
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 26 Nov 2015 21:17:26 +0900
parents 2f0653f8eabb 5d29b6a1b50f
children 188d866227a4
comparison
equal deleted inserted replaced
119:2f0653f8eabb 121:aa266a4db47c
1 #include <stdlib.h> 1 #include <stdlib.h>
2 #include <stdio.h> 2 #include <stdio.h>
3 #include "regexParser.h" 3 #include "regexParser.h"
4 #include "error.h" 4 #include "error.h"
5 5
6 static NodePtr createNode(RegexInfoPtr,NodePtr,NodePtr); 6 static NodePtr allocateNode();
7 static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr);
7 static NodePtr charClass(RegexInfoPtr); 8 static NodePtr charClass(RegexInfoPtr);
8 static NodePtr group(RegexInfoPtr); 9 static NodePtr group(RegexInfoPtr);
9 static void token(RegexInfoPtr); 10 static void token(RegexInfoPtr);
10 static NodePtr regexAtom(RegexInfoPtr); 11 static NodePtr regexAtom(RegexInfoPtr);
11 NodePtr regex(RegexInfoPtr); 12 NodePtr regex(RegexInfoPtr);
15 * tokenType 16 * tokenType
16 * regexPosition(state) 17 * regexPosition(state)
17 * stateTransitionTable 18 * stateTransitionTable
18 */ 19 */
19 20
21 static
20 NodePtr allocateNode() { 22 NodePtr allocateNode() {
21 NodePtr n = (NodePtr)malloc(sizeof(node)); 23 NodePtr n = (NodePtr)malloc(sizeof(node));
22 n->cc = (CharClassPtr)malloc(sizeof(CharClass)); 24 n->cc = (CharClassPtr)malloc(sizeof(CharClass));
23 n->cc->cond = (ConditionList)malloc(sizeof(Condition)); 25 n->cc->cond = (ConditionList)malloc(sizeof(Condition));
24 return n; 26 return n;
25 } 27 }
26 28
27 static 29 static
28 NodePtr createNode(RegexInfoPtr ri, NodePtr left, NodePtr right) { 30 NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) {
29 NodePtr n = allocateNode(); 31 NodePtr n = allocateNode();
30 if (n == NULL) { 32 if (n == NULL) {
31 mallocFailedMessage(); 33 mallocFailedMessage();
32 } 34 }
33 35
34 n->tokenType = ri->tokenType; 36 n->tokenType = ri->tokenType;
35 n->cc->cond->character = ri->tokenValue;
36 n->left = left; 37 n->left = left;
37 n->right = right; 38 n->right = right;
39 n->nodeNumber = ri->nodeNumber;
40 ri->nodeNumber++;
38 41
39 if (ri->tokenType == 'a') { 42 if (ri->tokenType == 'a') {
40 n->nodeNumber = ri->nodeNumber;
41 ri->nodeNumber++;
42 ri->tokenType = 0; 43 ri->tokenType = 0;
44 n->cc->cond->w = getWord(ri->tokenValue);
45 ri->ptr += n->cc->cond->w->length-1;
46 } else {
47 WordPtr w = (WordPtr)malloc(sizeof(Word));
48 w->word = character;
49 w->length = 1;
50 n->cc->cond->w = w;
43 } 51 }
44 return n; 52 return n;
45 } 53 }
46 54
47 // <charClass> ::= '['<literal>'-'<literal>']' 55 // <charClass> ::= '['<literal>'-'<literal>']'
58 } 66 }
59 67
60 // <literal> ::= [a-z][A-Z][0-9] 68 // <literal> ::= [a-z][A-Z][0-9]
61 static 69 static
62 NodePtr literal(RegexInfoPtr ri) { 70 NodePtr literal(RegexInfoPtr ri) {
63 NodePtr n = createNode(ri,0,0); 71 NodePtr n = createNode(ri,ri->ptr,0,0);
64 ri->ptr++;
65 return n; 72 return n;
66 } 73 }
67 74
68 // <group> ::= '('<regex>')' 75 // <group> ::= '('<regex>')'
69 static 76 static
75 void token(RegexInfoPtr ri) { 82 void token(RegexInfoPtr ri) {
76 while (ri->ptr[0] != '\0') { 83 while (ri->ptr[0] != '\0') {
77 if (ri->ptr[0] == '('){ 84 if (ri->ptr[0] == '('){
78 ri->ptr++; 85 ri->ptr++;
79 ri->tokenType = '('; 86 ri->tokenType = '(';
80 ri->tokenValue = 0; 87 ri->tokenValue = NULL;
81 if (ri->ptr[1] == ')') { 88 if (ri->ptr[1] == ')') {
82 ri->ptr++; 89 ri->ptr++;
83 } 90 }
84 return; 91 return;
85 } else if (ri->ptr[0] == ')') { 92 } else if (ri->ptr[0] == ')') {
86 ri->ptr++; 93 ri->ptr++;
87 ri->tokenType = ')'; 94 ri->tokenType = ')';
88 ri->tokenValue = ri->ptr[0]; 95 ri->tokenValue = ri->ptr;
89 return; 96 return;
90 } else if (ri->ptr[0] == '[') { 97 } else if (ri->ptr[0] == '[') {
91 ri->ptr++; 98 ri->ptr++;
92 ri->tokenType = '['; 99 ri->tokenType = '[';
93 ri->tokenValue = ri->ptr[0]; 100 ri->tokenValue = ri->ptr;
94 if (ri->ptr[1] == ']') { 101 if (ri->ptr[1] == ']') {
95 ri->ptr++; 102 ri->ptr++;
96 } 103 }
97 return; 104 return;
98 } else if (ri->ptr[0] == '|'){ 105 } else if (ri->ptr[0] == '|'){
99 ri->ptr++; 106 ri->ptr++;
100 ri->tokenType = '|'; 107 ri->tokenType = '|';
101 ri->tokenValue = '|'; 108 ri->tokenValue = NULL;
102 return; 109 return;
103 } else if (ri->ptr[0] == '*'){ 110 } else if (ri->ptr[0] == '*'){
104 ri->ptr++; 111 ri->ptr++;
105 ri->tokenType = '*'; 112 ri->tokenType = '*';
106 ri->tokenValue = '*'; 113 ri->tokenValue = NULL;
107 return; 114 return;
108 } else if (ri->ptr[0] == '\\'){ 115 } else if (ri->ptr[0] == '\\'){
109 // need more proccesing 116 // need more proccesing
110 /* 117 /*
111 \277 118 \277
114 \\ 121 \\
115 \utf-8 etc... 122 \utf-8 etc...
116 */ 123 */
117 } else { 124 } else {
118 ri->tokenType = 'a'; 125 ri->tokenType = 'a';
119 ri->tokenValue = ri->ptr[0]; 126 ri->tokenValue = ri->ptr;
127 ri->ptr++;
120 return; 128 return;
121 } 129 }
122 } 130 }
123 ri->tokenType = 0;
124 ri->tokenValue = 0;
125 return; 131 return;
126 } 132 }
127 133
128 // <regexAtom> ::= <literal>|<charClass>|<group> 134 // <regexAtom> ::= <literal>|<charClass>|<group>
129 static 135 static
142 NodePtr regex(RegexInfoPtr ri) { 148 NodePtr regex(RegexInfoPtr ri) {
143 NodePtr n = regexAtom(ri); 149 NodePtr n = regexAtom(ri);
144 while (ri->ptr[0]) { 150 while (ri->ptr[0]) {
145 token(ri); 151 token(ri);
146 if (ri->tokenType == '*') { 152 if (ri->tokenType == '*') {
147 n = createNode(ri,n,0); 153 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
154 syntax[0] = '*';
155 n = createNode(ri,syntax,n,0);
148 } else if (ri->tokenType == '|') { 156 } else if (ri->tokenType == '|') {
149 NodePtr n1 = regex(ri); 157 NodePtr n1 = regex(ri);
150 ri->tokenValue = '|'; 158 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
151 n = createNode(ri,n,n1); 159 syntax[0] = '|';
160 n = createNode(ri,syntax,n,n1);
152 } else if (ri->tokenType == ')') { 161 } else if (ri->tokenType == ')') {
153 return n; 162 return n;
154 } else { 163 } else {
155 NodePtr n1 = regex(ri); 164 NodePtr n1 = regex(ri);
156 ri->tokenValue = '+'; 165 unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
157 n = createNode(ri,n,n1); 166 syntax[0] = '+';
167 n = createNode(ri,syntax,n,n1);
158 } 168 }
159 } return n; 169 } return n;
160 } 170 }