comparison regexParser/subsetConstruction.cc @ 216:4852bfa85db4

spell fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 30 Dec 2015 15:05:06 +0900
parents regexParser/subsetConstraction.cc@63e9224c7b2b
children a9e3512120e2
comparison
equal deleted inserted replaced
215:63e9224c7b2b 216:4852bfa85db4
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <ctype.h>
4
5 #include "regexParser.h"
6 #include "subsetConstruction.h"
7 #include "node.h"
8 #include "BitVector.h"
9 #include "error.h"
10
11 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) {
12 CharClassPtr cc = NEW(CharClass);
13 cc->type = 'r';
14 cc->cond.range.begin = begin;
15 cc->cond.range.end = end;
16 cc->cond.range.next = NULL;
17 cc->left = left;
18 cc->right = right;
19 cc->nextState.bitContainer = state;
20 return cc;
21 }
22
23 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) {
24 CharClassPtr cc1;
25 if (cc) {
26 cc1 = charClassMerge(cc,mBegin,mEnd,nextState);
27 } else {
28 cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL);
29 }
30 return cc1;
31 }
32
33 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) {
34 // 重なっているccの領域を分割する
35 // 必要ならばnextStateを重ねあわせる
36 // 変更があった場合は新しくリストを作って返す
37 if (end < cc->cond.range.begin ) { // 1
38 if (cc->left) {
39 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right);
40 } else {
41 return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc);
42 }
43 } else if (end == cc->cond.range.begin && begin != end ) { // 2
44 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState);
45 if (cc->cond.range.begin == cc->cond.range.end) {
46 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
47 cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right);
48 }
49 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
50 return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,
51 cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
52 } else if (end < cc->cond.range.end) { // range.begin < end
53 if (begin < cc->cond.range.begin) { // 3
54 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
55 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
56 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
57 }
58 if (begin == cc->cond.range.begin) { // 6
59 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
60 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2);
61 }
62 // 9
63 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
64 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
65 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3);
66 } else if (end == cc->cond.range.end) {
67 if (begin == cc->cond.range.begin) { // 7
68 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right);
69 } else if (begin < cc->cond.range.begin) { // 4
70 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
71 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right);
72 }
73 // 10 cond.range.begin < begin
74 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right);
75 return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2);
76 }
77 if (begin > cc->cond.range.end ) { // 13
78 if (cc->right) {
79 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState));
80 } else {
81 return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL);
82 }
83 }
84 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
85 if (end > cc->cond.range.end) { // cond.range.end < end
86 if (begin == cc->cond.range.begin) { // 8
87 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
88 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
89 cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1);
90 }
91 if (begin > cc->cond.range.begin) { // 11,12
92 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
93 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
94 return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1);
95 }
96 }
97 } else if (begin < cc->cond.range.begin) { // 5
98 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
99 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
100 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
101 } else {
102 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
103 }
104 return cc;
105 }
106
107 void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) {
108 while (next->left) {
109 CharClassStackPtr ccs = NEW(CharClassStack);
110 ccs->next = walk->stack;
111 ccs->turn = walk->turn;
112 walk->turn = LEFT;
113 ccs->cc = next;
114 walk->stack = ccs;
115 next = next->left;
116 }
117 walk->turn = SELF;
118 walk->next = next;
119 }
120
121 CharClassWalkerPtr createCharClassWalker (CharClassPtr next) {
122 CharClassWalkerPtr walk = NEW(CharClassWalker);
123 walk->next = NULL;
124 walk->stack = NULL;
125 if (!next) return walk;
126 findLeftMost(next,walk);
127 return walk;
128 }
129
130 bool hasNext(CharClassWalkerPtr walk) {
131 return walk->next != NULL;
132 }
133
134 CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) {
135 CharClassStackPtr prev = walk->stack->next;
136 walk->turn = walk->stack->turn;
137 free(walk->stack);
138 walk->stack = prev;
139 return prev;
140 }
141
142 CharClassPtr getNext(CharClassWalkerPtr walk) {
143 CharClassPtr current = walk->next;
144 walk->next = NULL;
145 if (walk->turn == SELF) {
146 if (current->right) {
147 walk->turn = RIGHT;
148 findLeftMost(current->right,walk);
149 return current;
150 }
151 }
152 while (walk->stack) {
153 walk->next = walk->stack->cc;
154 charClassStackPop(walk);
155 if (walk->turn == LEFT) {
156 walk->turn = SELF;
157 return current;
158 }
159 }
160 walk->next = NULL;
161 return current;
162 }
163
164 void setState(CharClassPtr cc, BitVector bi) {
165 cc->nextState = bi;
166 if (cc->left) {
167 setState(cc->left,bi);
168 }
169 cc->nextState = bi;
170 if (cc->right) {
171 setState(cc->right,bi);
172 }
173 }
174
175 CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) {
176 if (x->cc == NULL) {
177 return y;
178 }
179 CharClassWalkerPtr walk = createCharClassWalker(x->cc);
180 CharClassPtr ccy = y;
181 BitVector bi;
182 while (hasNext(walk)) {
183 CharClassPtr cc = getNext(walk);
184 unsigned long begin = cc->cond.range.begin;
185 unsigned long end = cc->cond.range.end;
186 bi = cc->nextState;
187 ccy = charClassMerge(ccy,begin,end,bi);
188 }
189 free(walk);
190 return ccy;
191 }
192
193 /**
194 作成する state を linked list
195 bitvector を index とした配列に BitVectorPtr を格納
196 state に対応する NodePtr を
197 */
198 StatePtr createState(TGValue tgv,NodePtr n) {
199 StatePtr s = NEW(State);
200 s->stateNum = n->stateNum = tgv.tg->totalStateCount;
201 s->next = tgv.tg->stateList;
202 tgv.tg->stateList = s;
203 s->node = n;
204 BitVector bi = createBitVector(n->stateNum);
205 s->bitState = bi;
206 tgv.tg->totalStateCount++;
207 s->cc = n->cc;
208 return s;
209 }
210
211 /**
212 pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る
213 前が * でない + は新しく状態を作る
214 * があったら、次の状態はその時の先頭の状態になる
215 常に先頭の状態を返す
216 最後が*ならば、それを持ち歩く
217 pass 2:
218 割り当てられた状態に沿って charclass の行き先を書き換える
219 書き換えた charclass を merge する
220 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する
221 */
222 TGValue generateTransition(NodePtr n,TGValue tgv, int pass) {
223 if (n->tokenType == '+') {
224 TGValue tgvLeft = tgv;
225 tgvLeft.endState = n->right->state;
226 tgvLeft.asterisk = NULL;
227 tgvLeft = generateTransition(n->left,tgv,pass);
228 TGValue tgvRight = tgvLeft;
229 if (tgvLeft.asterisk) {
230 n->right->state = tgv.endState;
231 tgvRight.startState = tgvRight.asterisk;
232 tgvRight = generateTransition(n->right,tgvRight,pass);
233 tgvLeft.asterisk = tgvRight.asterisk;
234 return tgvLeft;
235 }
236 tgvRight.asterisk = NULL;
237 if (pass==1) {
238 n->right->state = tgvRight.startState = createState(tgvRight,n->right);
239 } else {
240 tgvLeft.startState = n->right->state;
241 tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = tgvLeft.startState ;
242 }
243 tgvRight = generateTransition(n->right,tgvRight,pass);
244 tgvLeft.asterisk = tgvRight.asterisk;
245 return tgvLeft;
246 } else if (n->tokenType == '|') {
247 TGValue tgv1 = generateTransition(n->left,tgv,pass);
248 TGValue tgv2 = generateTransition(n->right,tgv1,pass);
249 return tgv2;
250 } else if (n->tokenType == '*') {
251 TGValue tgvAstah = tgv;
252 tgvAstah.endState = tgvAstah.startState;
253 tgvAstah = generateTransition(n->left,tgvAstah,pass);
254 tgvAstah.asterisk = tgvAstah.startState;
255 return tgvAstah;
256 } else if (n->tokenType == 'c' || n->tokenType == 'a'){
257 TGValue tgv1 = tgv;
258 if (pass==1) {
259 n->stateNum = tgv.startState->stateNum;
260 n->nextStateNum = tgv.endState->stateNum;
261 n->state = tgv.startState;;
262 n->nextState = tgv.endState;
263 } else {
264 BitVector bi = createBitVector(n->nextStateNum);
265 setState(n->cc,bi);
266 tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc);
267 }
268 return tgv1;
269 } else {
270 return tgv;
271 }
272 }
273
274 TransitionGeneratorPtr createTransitionGenerator() {
275 TransitionGeneratorPtr tg = NEW(TransitionGenerator);
276 tg->totalStateCount = 0;
277 tg->stack = NULL;
278 tg->stateArray = NULL;
279 tg->stateList = NULL;
280 return tg;
281 }
282
283 TGValue createTGValue() {
284 TGValue tgv;
285 tgv.startState = NULL;
286 tgv.endState = NULL;
287 tgv.asterisk = NULL;
288 tgv.tg = createTransitionGenerator();
289 return tgv;
290 }
291
292 TGValue generateTransitionList(NodePtr n) {
293 TGValue tgv = createTGValue();
294 StatePtr startState = tgv.startState = createState(tgv,n);
295 NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL);
296 StatePtr endState = tgv.endState = createState(tgv,eof);
297 tgv = generateTransition(n,tgv,1);
298 printTree(n);
299 if (tgv.tg->totalStateCount > BITBLOCK) {
300 errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__);
301 }
302 BitVector bi = createBitVector(tgv.tg->totalStateCount);
303 tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*));
304 tgv.tg->stateArray[startState->bitState.bitContainer] = startState;
305 tgv.tg->stateArray[endState->bitState.bitContainer] = endState;
306 tgv = generateTransition(n,tgv,2);
307 return tgv;
308 }
309
310 void printState(StatePtr state) {
311 printf("state : %lx\n",state->bitState.bitContainer);
312 long nodeNumber = 0;
313 if (state->node) {
314 printf("node : %c %lx -> %d\n",state->node->tokenType,state->bitState.bitContainer,state->node->nextStateNum);
315 if (state->node->state)
316 nodeNumber = state->node->state->bitState.bitContainer;
317 }
318 if (state->cc) {
319 printCharacterClass(state->cc,nodeNumber,4);
320 }
321 }
322
323 void printState(TransitionGeneratorPtr tg) {
324 StatePtr state = tg->stateList;
325 for (;state;state = state->next) {
326 printState(state);
327 putchar('\n');
328 }
329 }
330
331 SCValue createSCValue(TGValue tgv) {
332 SCValue scv;
333 scv.stateTop = tgv.tg->stateList;
334 scv.stateEnd = scv.stateTop;
335 while (scv.stateEnd->next) {
336 scv.stateEnd = scv.stateEnd->next;
337 }
338 return scv;
339 }
340
341 SCValue createState(SCValue scv,BitVector bi) {
342 StatePtr s = NEW(State);
343 s->stateNum = ++scv.tg->totalStateCount;
344 s->next = NULL;
345 scv.stateEnd->next = s;
346 scv.stateEnd = s;
347 s->bitState = bi;
348 s->cc = NULL;
349 return scv;
350 }
351
352 /**
353 現在のステートに含まれる組み合わせ状態をとってくる
354 組み合わされた個々の charclass をmerge して新しい charclass をつくり、組み合わせ状態に登録する
355 生成した状態は stateArray に格納するA
356 新しい状態ができなくなったら終了
357
358 charClassMerge の段階で新しい状態をリストに登録したら、charclasswalk をする必要がなくなる
359 */
360 SCValue subsetConstruction(SCValue scv) {
361 for (;scv.stateTop;scv.stateTop = scv.stateTop->next) {
362 CharClassWalkerPtr cw = createCharClassWalker(scv.stateTop->cc);
363 while (hasNext(cw)) {
364 CharClassPtr cc = getNext(cw);
365 BitVector bi = cc->nextState;
366 if (scv.stateArray[bi.bitContainer]) continue;
367 scv = createState(scv,bi);
368 StatePtr s = scv.stateEnd;
369 for (;bi.bitContainer;) {
370 int bitPosition = searchBit(bi);
371 unsigned long baseNum = 1 << bitPosition;
372 bi.bitContainer ^= baseNum;
373 StatePtr base = scv.stateArray[baseNum];
374 if (base == NULL) {// error
375 continue;
376 }
377 CharClassPtr merge = mergeTransition(s,base->cc);
378 s->cc = merge;
379 }
380 scv.stateArray[bi.bitContainer] = s;
381 }
382 }
383 return scv;
384 }