comparison c/regexParser/regexParser.cc @ 122:188d866227a4 pairPro

fix
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Mon, 30 Nov 2015 23:43:53 +0900
parents aa266a4db47c
children c363a66dc1a7 4d6ac69801ad
comparison
equal deleted inserted replaced
121:aa266a4db47c 122:188d866227a4
1 #include <stdlib.h> 1 #include <stdlib.h>
2 #include <stdio.h> 2 #include <stdio.h>
3 #include <string.h>
4 #include <ctype.h>
3 #include "regexParser.h" 5 #include "regexParser.h"
4 #include "error.h" 6 #include "error.h"
5 7
6 static NodePtr allocateNode(); 8 static NodePtr allocateNode();
7 static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); 9 static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr);
40 ri->nodeNumber++; 42 ri->nodeNumber++;
41 43
42 if (ri->tokenType == 'a') { 44 if (ri->tokenType == 'a') {
43 ri->tokenType = 0; 45 ri->tokenType = 0;
44 n->cc->cond->w = getWord(ri->tokenValue); 46 n->cc->cond->w = getWord(ri->tokenValue);
45 ri->ptr += n->cc->cond->w->length-1;
46 } else { 47 } else {
47 WordPtr w = (WordPtr)malloc(sizeof(Word)); 48 WordPtr w = (WordPtr)malloc(sizeof(Word));
48 w->word = character; 49 w->word = character;
49 w->length = 1; 50 w->length = 1;
50 n->cc->cond->w = w; 51 n->cc->cond->w = w;
53 } 54 }
54 55
55 // <charClass> ::= '['<literal>'-'<literal>']' 56 // <charClass> ::= '['<literal>'-'<literal>']'
56 static 57 static
57 NodePtr charClass(RegexInfoPtr ri) { 58 NodePtr charClass(RegexInfoPtr ri) {
58 NodePtr n = (NodePtr)malloc(sizeof(Node)); 59 NodePtr n = allocateNode();
59 if (n == NULL) { 60 if (n == NULL) {
60 mallocFailedMessage(); 61 mallocFailedMessage();
61 } 62 }
62 while (ri->ptr[0] == '-') { 63
63 ri->ptr++; 64 n->tokenType = ri->tokenType;
65 n->nodeNumber = ri->nodeNumber;
66 ri->nodeNumber++;
67 n->cc->cond->w = (WordPtr)malloc(sizeof(Word));
68
69 int i = 0;
70
71 while (ri->ptr[i] != ']') {
72 if (ri->ptr[i] == '-') {
73 n->cc->begin = ri->ptr[i-1];
74 n->cc->end = ri->ptr[i+1];
75 }
76 i++;
64 } 77 }
78
79 n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1));
80 strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1);
81 n->cc->cond->w->word[i] = '\0';
82 ri->ptr += i+1;
83
65 return n; 84 return n;
66 } 85 }
67 86
68 // <literal> ::= [a-z][A-Z][0-9] 87 // <literal> ::= [a-z][A-Z][0-9]
69 static 88 static
94 ri->tokenType = ')'; 113 ri->tokenType = ')';
95 ri->tokenValue = ri->ptr; 114 ri->tokenValue = ri->ptr;
96 return; 115 return;
97 } else if (ri->ptr[0] == '[') { 116 } else if (ri->ptr[0] == '[') {
98 ri->ptr++; 117 ri->ptr++;
99 ri->tokenType = '['; 118 ri->tokenType = 'c';
100 ri->tokenValue = ri->ptr; 119 ri->tokenValue = ri->ptr;
101 if (ri->ptr[1] == ']') { 120 if (ri->ptr[1] == ']') {
102 ri->ptr++; 121 ri->ptr++;
103 } 122 }
104 return; 123 return;
122 \utf-8 etc... 141 \utf-8 etc...
123 */ 142 */
124 } else { 143 } else {
125 ri->tokenType = 'a'; 144 ri->tokenType = 'a';
126 ri->tokenValue = ri->ptr; 145 ri->tokenValue = ri->ptr;
127 ri->ptr++; 146 while (isalnum(ri->ptr[0])) {
147 ri->ptr++;
148 }
128 return; 149 return;
129 } 150 }
130 } 151 }
131 return; 152 return;
132 } 153 }
136 NodePtr regexAtom(RegexInfoPtr ri) { 157 NodePtr regexAtom(RegexInfoPtr ri) {
137 158
138 token(ri); 159 token(ri);
139 NodePtr n = NULL; 160 NodePtr n = NULL;
140 if (ri->tokenType == 'a') n = literal(ri); 161 if (ri->tokenType == 'a') n = literal(ri);
141 else if (ri->tokenType == '[') n = charClass(ri); 162 else if (ri->tokenType == 'c') n = charClass(ri);
142 else if (ri->tokenType == '(') n = group(ri); 163 else if (ri->tokenType == '(') n = group(ri);
143 164
144 return n; 165 return n;
145 } 166 }
146 167
147 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> 168 // <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')'
148 NodePtr regex(RegexInfoPtr ri) { 169 NodePtr regex(RegexInfoPtr ri) {
149 NodePtr n = regexAtom(ri); 170 NodePtr n = regexAtom(ri);
150 while (ri->ptr[0]) { 171 while (ri->ptr[0]) {
151 token(ri); 172 token(ri);
152 if (ri->tokenType == '*') { 173 if (ri->tokenType == '*') {