comparison c/regexParser/regexParser.cc @ 142:de0f332d560c pairPro

insert charClassMerge function
author masa
date Fri, 11 Dec 2015 14:54:00 +0900
parents 15815fcb6c2f
children 32977f5a2ed0
comparison
equal deleted inserted replaced
141:71f36a59cf6a 142:de0f332d560c
28 28
29 static 29 static
30 CharClassPtr createCharClassWord(RegexInfoPtr ri) { 30 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
31 CharClassPtr cc = NEW(CharClass); 31 CharClassPtr cc = NEW(CharClass);
32 cc->type = 'a'; 32 cc->type = 'a';
33 cc->cond = NEW(Condition); 33 cc->cond.w.word = ri->tokenValue;
34 cc->cond->w = NEW(Word); 34 cc->cond.w.length = ri->ptr - ri->tokenValue;
35 cc->cond->w->word = ri->tokenValue; 35 cc->nextState.bitContainer = 0;
36 cc->cond->w->length = ri->ptr - ri->tokenValue;
37 token(ri); 36 token(ri);
38 37
39 return cc; 38 return cc;
40 } 39 }
41 40
51 ri->nodeNumber++; 50 ri->nodeNumber++;
52 51
53 return n; 52 return n;
54 } 53 }
55 54
55 CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) {
56 // 重なっているccの領域を分割する
57 // 必要ならばnextStateを重ねあわせる
58 // 変更があった場合は新しくリストを作って返す
59 if (src->type == 'a') {
60 if (add->type == 'a') {
61 if (src->cond.w.word[0] > add->cond.w.word[0]) {
62 // add のほうが小さいので小さい順のccをつくる
63 CharClassPtr left = charClassMerge(add->left.src);
64 return createCharClassWord(add->cond->w.word, left, add->right);
65 } else {
66
67 }
68 } else if (add->type == 'c') {
69 //
70 if (src->cond.w.word[0] < add->cond.range.begin) {
71
72 } else (src->cond->w.word[0] < add->end) {
73 } else if (src->type == 'c') {
74
75 }
76 }
56 77
57 // <charClass> ::= '['<literal>'-'<literal>']' 78 // <charClass> ::= '['<literal>'-'<literal>']'
58 static 79 static
59 NodePtr charClass(RegexInfoPtr ri) { 80 NodePtr charClass(RegexInfoPtr ri) {
60 CharClassPtr cc = NEW(CharClass); 81 CharClassPtr cc = NEW(CharClass);
61 NodePtr n = createNode(ri,'c',cc,0,0); 82 NodePtr n = createNode(ri,'c',cc,0,0);
62 cc->type = 'r'; 83 cc->type = 'r';
63 cc->cond = NEW(Condition); 84 cc->nextState.bitContainer = 0;
64 cc->cond->range = NEW(RangeList); 85 RangeListPtr rangeList = &cc->cond.range;
65 cc->cond->range->begin = ri->ptr; 86 rangeList->begin = ri->ptr;
66 cc->cond->range->end = ri->ptr; 87 rangeList->end = ri->ptr;
67 cc->cond->range->next = NULL;
68
69
70 RangeListPtr rangeList = cc->cond->range;
71 88
72 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { 89 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) {
73 if (*ri->ptr == '-') { 90 if (*ri->ptr == '-') {
74 rangeList->end = ri->ptr + 1; 91 rangeList->end = ri->ptr + 1;
75 ri->ptr++; 92 ri->ptr++;
84 rangeList = rangeList->next; 101 rangeList = rangeList->next;
85 rangeList->begin = ri->ptr; 102 rangeList->begin = ri->ptr;
86 rangeList->end = ri->ptr; 103 rangeList->end = ri->ptr;
87 rangeList->next = NULL; 104 rangeList->next = NULL;
88 } 105 }
89 // TODO literal support 106 // TODO literal support
90 107 // merge rangeList here
91 if (*ri->ptr) ri->ptr++; 108 if (*ri->ptr) ri->ptr++;
92 token(ri); 109 token(ri);
93 return n; 110 return n;
94 } 111 }
95 112
110 ri->tokenValue = NULL; 127 ri->tokenValue = NULL;
111 return; 128 return;
112 } else if (ri->ptr[0] == ')') { 129 } else if (ri->ptr[0] == ')') {
113 ri->ptr++; 130 ri->ptr++;
114 ri->tokenType = ')'; 131 ri->tokenType = ')';
115 ri->tokenValue = ri->ptr;
116 return;
117 } else if (ri->ptr[0] == '[') {
118 ri->ptr++;
119 ri->tokenType = 'c';
120 ri->tokenValue = ri->ptr; 132 ri->tokenValue = ri->ptr;
121 return; 133 return;
122 } else if (ri->ptr[0] == ']') { 134 } else if (ri->ptr[0] == ']') {
123 ri->ptr++; 135 ri->ptr++;
124 ri->tokenType = ']'; 136 ri->tokenType = ']';
142 \0xa5 154 \0xa5
143 \[ 155 \[
144 \\ 156 \\
145 \utf-8 etc... 157 \utf-8 etc...
146 */ 158 */
159 } else if (ri->ptr[0] == '[') {
160 ri->ptr++;
161 ri->tokenType = 'c';
162 ri->tokenValue = ri->ptr;
163 return;
147 } else { 164 } else {
148 ri->tokenType = 'a'; 165 ri->tokenType = 'a';
149 ri->tokenValue = ri->ptr; 166 ri->tokenValue = ri->ptr;
150 if (isalnum(ri->ptr[0])) { 167 if (isalnum(ri->ptr[0])) {
151 ri->ptr++; 168 ri->ptr++;