Mercurial > hg > Applications > Grep
comparison regexParser/subsetConstruction.cc @ 308:1188debbef10
separate CharClass
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 12:45:45 +0900 |
parents | 3e78631a6222 |
children |
comparison
equal
deleted
inserted
replaced
307:9f0df6ce89a2 | 308:1188debbef10 |
---|---|
3 #include <ctype.h> | 3 #include <ctype.h> |
4 | 4 |
5 #include "regexParser.h" | 5 #include "regexParser.h" |
6 #include "subsetConstruction.h" | 6 #include "subsetConstruction.h" |
7 #include "node.h" | 7 #include "node.h" |
8 #include "BitVector.h" | 8 #include "bitVector.h" |
9 #include "CharClass.h" | |
9 #include "error.h" | 10 #include "error.h" |
10 | |
11 #define SIZE 256 | |
12 | |
13 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { | |
14 CharClassPtr cc = NEW(CharClass); | |
15 cc->type = 'r'; | |
16 cc->cond.range.begin = begin; | |
17 cc->cond.range.end = end; | |
18 cc->cond.w.word = NULL; | |
19 cc->cond.w.length = 0; | |
20 cc->left = left; | |
21 cc->right = right; | |
22 cc->nextState.bitContainer = state; | |
23 return cc; | |
24 } | |
25 | |
26 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { | |
27 CharClassPtr cc1; | |
28 if (cc) { | |
29 cc1 = charClassMerge(cc,mBegin,mEnd,nextState); | |
30 } else { | |
31 cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); | |
32 } | |
33 return cc1; | |
34 } | |
35 | |
36 /* | |
37 cond.range.begin cond.range.end | |
38 |----------------| | |
39 1.b---e | |
40 2.b------e | |
41 3.b------------e | |
42 4.b-----------------------e | |
43 5.b----------------------------e | |
44 | |
45 |----------------| | |
46 6. b---------e | |
47 7. b----------------e | |
48 8. b---------------------e | |
49 | |
50 |----------------| | |
51 9. b-----e | |
52 10. b--------e | |
53 11. b-------------e | |
54 | |
55 |----------------| | |
56 12. b-----e | |
57 | |
58 |----------------| | |
59 13. b--e | |
60 | |
61 */ | |
62 | |
63 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { | |
64 // 重なっているccの領域を分割する | |
65 // 必要ならばnextStateを重ねあわせる | |
66 // 変更があった場合は新しくリストを作って返す | |
67 if (end < cc->cond.range.begin ) { // 1 | |
68 if (cc->left) { | |
69 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); | |
70 } else { | |
71 return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); | |
72 } | |
73 } else if (end == cc->cond.range.begin && begin != end ) { // 2 | |
74 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); | |
75 if (cc->cond.range.begin == cc->cond.range.end) { | |
76 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, | |
77 cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); | |
78 } | |
79 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); | |
80 return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, | |
81 cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | |
82 } else if (end < cc->cond.range.end) { // range.begin < end | |
83 if (begin < cc->cond.range.begin) { // 3 | |
84 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | |
85 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); | |
86 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | |
87 } | |
88 if (begin == cc->cond.range.begin) { // 6 | |
89 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); | |
90 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); | |
91 } | |
92 // 9 | |
93 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); | |
94 CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); | |
95 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); | |
96 } else if (end == cc->cond.range.end) { | |
97 if (begin == cc->cond.range.begin) { // 7 | |
98 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); | |
99 } else if (begin < cc->cond.range.begin) { // 4 | |
100 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | |
101 return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); | |
102 } | |
103 // 10 cond.range.begin < begin | |
104 CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); | |
105 return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); | |
106 } | |
107 if (begin > cc->cond.range.end ) { // 13 | |
108 if (cc->right) { | |
109 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); | |
110 } else { | |
111 return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); | |
112 } | |
113 } | |
114 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { | |
115 if (end > cc->cond.range.end) { // cond.range.end < end | |
116 if (begin == cc->cond.range.begin) { // 8 | |
117 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | |
118 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, | |
119 cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); | |
120 } | |
121 if (begin > cc->cond.range.begin) { // 11,12 | |
122 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | |
123 CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); | |
124 return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); | |
125 } | |
126 } | |
127 } else if (begin < cc->cond.range.begin) { // 5 | |
128 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); | |
129 CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); | |
130 return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); | |
131 } else { | |
132 printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | |
133 } | |
134 return cc; | |
135 } | |
136 | |
137 void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { | |
138 while (next->left) { | |
139 CharClassStackPtr ccs = NEW(CharClassStack); | |
140 ccs->next = walk->stack; | |
141 ccs->turn = walk->turn; | |
142 walk->turn = LEFT; | |
143 ccs->cc = next; | |
144 walk->stack = ccs; | |
145 next = next->left; | |
146 } | |
147 walk->turn = SELF; | |
148 walk->next = next; | |
149 } | |
150 | |
151 CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { | |
152 CharClassWalkerPtr walk = NEW(CharClassWalker); | |
153 walk->next = NULL; | |
154 walk->stack = NULL; | |
155 walk->turn = LEFT; | |
156 if (!next) return walk; | |
157 findLeftMost(next,walk); | |
158 return walk; | |
159 } | |
160 | |
161 bool hasNext(CharClassWalkerPtr walk) { | |
162 return walk->next != NULL; | |
163 } | |
164 | |
165 CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { | |
166 CharClassStackPtr prev = walk->stack->next; | |
167 walk->turn = walk->stack->turn; | |
168 free(walk->stack); | |
169 walk->stack = prev; | |
170 return prev; | |
171 } | |
172 | |
173 CharClassPtr getNext(CharClassWalkerPtr walk) { | |
174 CharClassPtr current = walk->next; | |
175 walk->next = NULL; | |
176 if (walk->turn == SELF) { | |
177 if (current->right) { | |
178 walk->turn = RIGHT; | |
179 findLeftMost(current->right,walk); | |
180 return current; | |
181 } | |
182 } | |
183 while (walk->stack) { | |
184 walk->next = walk->stack->cc; | |
185 charClassStackPop(walk); | |
186 if (walk->turn == LEFT) { | |
187 walk->turn = SELF; | |
188 return current; | |
189 } | |
190 } | |
191 return current; | |
192 } | |
193 | |
194 void setState(CharClassPtr cc, BitVector bi) { | |
195 // if (word case) setNext(bitVector to the word list) | |
196 cc->nextState = bi; | |
197 if (cc->left) { | |
198 setState(cc->left,bi); | |
199 } | |
200 if (cc->right) { | |
201 setState(cc->right,bi); | |
202 } | |
203 } | |
204 | |
205 CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { | |
206 if (x->cc == NULL) { | |
207 return y; | |
208 } | |
209 CharClassWalkerPtr walk = createCharClassWalker(x->cc); | |
210 CharClassPtr ccy = y; | |
211 BitVector bi; | |
212 while (hasNext(walk)) { | |
213 CharClassPtr cc = getNext(walk); | |
214 unsigned long begin = cc->cond.range.begin; | |
215 unsigned long end = cc->cond.range.end; | |
216 bi = cc->nextState; | |
217 ccy = charClassMerge(ccy,begin,end,bi); | |
218 } | |
219 free(walk); | |
220 return ccy; | |
221 } | |
222 | 11 |
223 /** | 12 /** |
224 作成する state を linked list | 13 作成する state を linked list |
225 bitvector を index とした配列に BitVectorPtr を格納 | 14 bitvector を index とした配列に BitVectorPtr を格納 |
226 state に対応する NodePtr を | 15 state に対応する NodePtr を |