comparison regexParser/subsetConstruction.cc @ 308:1188debbef10

separate CharClass
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 08 Feb 2016 12:45:45 +0900
parents 3e78631a6222
children
comparison
equal deleted inserted replaced
307:9f0df6ce89a2 308:1188debbef10
3 #include <ctype.h> 3 #include <ctype.h>
4 4
5 #include "regexParser.h" 5 #include "regexParser.h"
6 #include "subsetConstruction.h" 6 #include "subsetConstruction.h"
7 #include "node.h" 7 #include "node.h"
8 #include "BitVector.h" 8 #include "bitVector.h"
9 #include "CharClass.h"
9 #include "error.h" 10 #include "error.h"
10
11 #define SIZE 256
12
13 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) {
14 CharClassPtr cc = NEW(CharClass);
15 cc->type = 'r';
16 cc->cond.range.begin = begin;
17 cc->cond.range.end = end;
18 cc->cond.w.word = NULL;
19 cc->cond.w.length = 0;
20 cc->left = left;
21 cc->right = right;
22 cc->nextState.bitContainer = state;
23 return cc;
24 }
25
26 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) {
27 CharClassPtr cc1;
28 if (cc) {
29 cc1 = charClassMerge(cc,mBegin,mEnd,nextState);
30 } else {
31 cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL);
32 }
33 return cc1;
34 }
35
36 /*
37 cond.range.begin cond.range.end
38 |----------------|
39 1.b---e
40 2.b------e
41 3.b------------e
42 4.b-----------------------e
43 5.b----------------------------e
44
45 |----------------|
46 6. b---------e
47 7. b----------------e
48 8. b---------------------e
49
50 |----------------|
51 9. b-----e
52 10. b--------e
53 11. b-------------e
54
55 |----------------|
56 12. b-----e
57
58 |----------------|
59 13. b--e
60
61 */
62
63 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) {
64 // 重なっているccの領域を分割する
65 // 必要ならばnextStateを重ねあわせる
66 // 変更があった場合は新しくリストを作って返す
67 if (end < cc->cond.range.begin ) { // 1
68 if (cc->left) {
69 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right);
70 } else {
71 return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc);
72 }
73 } else if (end == cc->cond.range.begin && begin != end ) { // 2
74 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState);
75 if (cc->cond.range.begin == cc->cond.range.end) {
76 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
77 cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right);
78 }
79 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
80 return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,
81 cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
82 } else if (end < cc->cond.range.end) { // range.begin < end
83 if (begin < cc->cond.range.begin) { // 3
84 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
85 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
86 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
87 }
88 if (begin == cc->cond.range.begin) { // 6
89 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
90 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2);
91 }
92 // 9
93 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
94 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
95 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3);
96 } else if (end == cc->cond.range.end) {
97 if (begin == cc->cond.range.begin) { // 7
98 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right);
99 } else if (begin < cc->cond.range.begin) { // 4
100 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
101 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right);
102 }
103 // 10 cond.range.begin < begin
104 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right);
105 return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2);
106 }
107 if (begin > cc->cond.range.end ) { // 13
108 if (cc->right) {
109 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState));
110 } else {
111 return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL);
112 }
113 }
114 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
115 if (end > cc->cond.range.end) { // cond.range.end < end
116 if (begin == cc->cond.range.begin) { // 8
117 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
118 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
119 cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1);
120 }
121 if (begin > cc->cond.range.begin) { // 11,12
122 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
123 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
124 return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1);
125 }
126 }
127 } else if (begin < cc->cond.range.begin) { // 5
128 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
129 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
130 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
131 } else {
132 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
133 }
134 return cc;
135 }
136
137 void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) {
138 while (next->left) {
139 CharClassStackPtr ccs = NEW(CharClassStack);
140 ccs->next = walk->stack;
141 ccs->turn = walk->turn;
142 walk->turn = LEFT;
143 ccs->cc = next;
144 walk->stack = ccs;
145 next = next->left;
146 }
147 walk->turn = SELF;
148 walk->next = next;
149 }
150
151 CharClassWalkerPtr createCharClassWalker (CharClassPtr next) {
152 CharClassWalkerPtr walk = NEW(CharClassWalker);
153 walk->next = NULL;
154 walk->stack = NULL;
155 walk->turn = LEFT;
156 if (!next) return walk;
157 findLeftMost(next,walk);
158 return walk;
159 }
160
161 bool hasNext(CharClassWalkerPtr walk) {
162 return walk->next != NULL;
163 }
164
165 CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) {
166 CharClassStackPtr prev = walk->stack->next;
167 walk->turn = walk->stack->turn;
168 free(walk->stack);
169 walk->stack = prev;
170 return prev;
171 }
172
173 CharClassPtr getNext(CharClassWalkerPtr walk) {
174 CharClassPtr current = walk->next;
175 walk->next = NULL;
176 if (walk->turn == SELF) {
177 if (current->right) {
178 walk->turn = RIGHT;
179 findLeftMost(current->right,walk);
180 return current;
181 }
182 }
183 while (walk->stack) {
184 walk->next = walk->stack->cc;
185 charClassStackPop(walk);
186 if (walk->turn == LEFT) {
187 walk->turn = SELF;
188 return current;
189 }
190 }
191 return current;
192 }
193
194 void setState(CharClassPtr cc, BitVector bi) {
195 // if (word case) setNext(bitVector to the word list)
196 cc->nextState = bi;
197 if (cc->left) {
198 setState(cc->left,bi);
199 }
200 if (cc->right) {
201 setState(cc->right,bi);
202 }
203 }
204
205 CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) {
206 if (x->cc == NULL) {
207 return y;
208 }
209 CharClassWalkerPtr walk = createCharClassWalker(x->cc);
210 CharClassPtr ccy = y;
211 BitVector bi;
212 while (hasNext(walk)) {
213 CharClassPtr cc = getNext(walk);
214 unsigned long begin = cc->cond.range.begin;
215 unsigned long end = cc->cond.range.end;
216 bi = cc->nextState;
217 ccy = charClassMerge(ccy,begin,end,bi);
218 }
219 free(walk);
220 return ccy;
221 }
222 11
223 /** 12 /**
224 作成する state を linked list 13 作成する state を linked list
225 bitvector を index とした配列に BitVectorPtr を格納 14 bitvector を index とした配列に BitVectorPtr を格納
226 state に対応する NodePtr を 15 state に対応する NodePtr を