comparison c/regexParser/regexParser.cc @ 147:84d32375383a pairPro

implement insertCharClass
author masa
date Tue, 15 Dec 2015 17:14:35 +0900
parents 50217a0545e8
children d1ebba6e117a
comparison
equal deleted inserted replaced
146:1c74ac7d56ec 147:84d32375383a
35 n->right = right; 35 n->right = right;
36 n->nodeNumber = ri->nodeNumber; 36 n->nodeNumber = ri->nodeNumber;
37 ri->nodeNumber++; 37 ri->nodeNumber++;
38 38
39 return n; 39 return n;
40 }
41
42 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right) {
43 CharClassPtr cc = NEW(CharClass);
44 cc->type = 'r';
45 cc->cond.range.begin = begin;
46 cc->cond.range.end = end;
47 cc->left = left;
48 cc->right = right;
49 cc->nextState.bitContainer = 0;
50 return cc;
40 } 51 }
41 52
42 CharClassPtr createCharClassWord(RegexInfoPtr ri) { 53 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
43 CharClassPtr cc = NEW(CharClass); 54 CharClassPtr cc = NEW(CharClass);
44 cc->type = 'a'; 55 cc->type = 'a';
46 cc->cond.w.length = ri->ptr - ri->tokenValue; 57 cc->cond.w.length = ri->ptr - ri->tokenValue;
47 cc->nextState.bitContainer = 0; 58 cc->nextState.bitContainer = 0;
48 return cc; 59 return cc;
49 } 60 }
50 61
51 CharClassPtr charClassMerge(CharClassPtr cc, unsigned char begin, unsigned char end, CharClassPtr next) { 62 CharClassPtr insertCharClass(CharClassPtr cc, unsigned char begin, unsigned char end) {
52 CharClassPtr cc1 = NEW(CharClass); 63 if (end < cc->cond.range.begin ) {
53 return cc1; 64 CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right);
65 if (cc->left) {
66 cc1->left = insertCharClass(cc->left,begin,end);
67 return cc1;
68 } else {
69 CharClassPtr cc2 = createCharClassRange(begin,end,0,0);
70 cc1->left = cc2;
71 return cc1;
72 }
73 } else if (end == cc->cond.range.begin ) {
74 cc->cond.range.begin = begin;
75 } else if (end <= cc->cond.range.end) {
76 if (begin < cc->cond.range.begin) {
77 cc->cond.range.begin = begin;
78 }
79 } else if (begin > cc->cond.range.end ) {
80 CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right);
81 if (cc->right) {
82 cc1->rigt = insertCharClass(cc->right,begin,end);
83 return cc1;
84 } else {
85 CharClassPtr cc2 = createCharClassRange(begin,end,0,0);
86 cc1->right = cc2;
87 return cc1;
88 }
89 } else if (begin == cc->cond.range.end ) {
90 cc->cond.range.end = end;
91 } else if (begin < cc->cond.range.begin) {
92 cc->cond.range.begin = begin;
93 }
94 return cc;
54 } 95 }
55 96
56 // <charClass> ::= '['<literal>'-'<literal>']' 97 // <charClass> ::= '['<literal>'-'<literal>']'
57 static 98 static
58 NodePtr charClass(RegexInfoPtr ri) { 99 NodePtr charClass(RegexInfoPtr ri) {
59 CharClassPtr cc = NEW(CharClass); 100 CharClassPtr cc = NEW(CharClass);
60 NodePtr n = createNode(ri,'c',cc,0,0); 101 NodePtr n = createNode(ri,'c',cc,0,0);
61 cc->type = 'r'; 102 cc->type = 'r';
62 cc->nextState.bitContainer = 0; 103 cc->nextState.bitContainer = 0;
63 RangeListPtr rangeList = &cc->cond.range; 104 RangeListPtr rangeList = &cc->cond.range;
64 rangeList->begin = (unsigned long)*ri->ptr; 105 rangeList->begin = *ri->ptr;
65 rangeList->end = (unsigned long)*ri->ptr; 106 rangeList->end = *ri->ptr;
66 rangeList->next = NULL; 107 rangeList->next = NULL;
67 108
68 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { 109 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) {
69 if (*ri->ptr == '-') { 110 if (*ri->ptr == '-') {
70 rangeList->end = (unsigned long)*(ri->ptr + 1); 111 rangeList->end = *(ri->ptr + 1);
71 ri->ptr++; 112 ri->ptr++;
72 continue; 113 continue;
73 } 114 }
74 if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; 115 if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break;
75 if (ri->ptr[0] == rangeList->end + 1) { 116 if (ri->ptr[0] == rangeList->end + 1) {
76 rangeList->end = (unsigned long)*ri->ptr; 117 rangeList->end = *ri->ptr;
77 continue; 118 continue;
78 } 119 }
79 rangeList->next = NEW(RangeList); 120 rangeList->next = NEW(RangeList);
80 rangeList = rangeList->next; 121 rangeList = rangeList->next;
81 rangeList->begin = (unsigned long)*ri->ptr; 122 rangeList->begin = *ri->ptr;
82 rangeList->end = (unsigned long)*ri->ptr; 123 rangeList->end = *ri->ptr;
83 rangeList->next = NULL; 124 rangeList->next = NULL;
84 } 125 }
126
127 for (RangeListPtr r = &cc->cond.range; r; r = r->next) {
128 cc = insertCharClass(cc, r->begin, r->end);
129 }
130
131 n->cc = cc;
85 // TODO literal support 132 // TODO literal support
86 // merge rangeList here 133 // merge rangeList here
87 if (*ri->ptr) ri->ptr++; 134 if (*ri->ptr) ri->ptr++;
88 token(ri); 135 token(ri);
89 return n; 136 return n;