comparison regexParser/regexParser.cc @ 308:1188debbef10

separate CharClass
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 08 Feb 2016 12:45:45 +0900
parents 3e78631a6222
children a4484c02cba5
comparison
equal deleted inserted replaced
307:9f0df6ce89a2 308:1188debbef10
1 #include <stdlib.h> 1 #include <stdlib.h>
2 #include <stdio.h> 2 #include <stdio.h>
3 #include <string.h> 3 #include <string.h>
4 #include <ctype.h> 4 #include <ctype.h>
5 #include "regexParser.h" 5 #include "regexParser.h"
6 #include "CharClass.h"
6 7
7 static NodePtr charClass(RegexInfoPtr); 8 static NodePtr charClass(RegexInfoPtr);
8 static void token(RegexInfoPtr); 9 static void token(RegexInfoPtr);
9 static NodePtr regexAtom(RegexInfoPtr); 10 static NodePtr regexAtom(RegexInfoPtr);
10 11
33 n->cc = cc; 34 n->cc = cc;
34 n->state = NULL; 35 n->state = NULL;
35 n->left = left; 36 n->left = left;
36 n->right = right; 37 n->right = right;
37 return n; 38 return n;
38 }
39
40 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
41 CharClassPtr cc = NEW(CharClass);
42 cc->type = 'a';
43 cc->left = NULL;
44 cc->right = NULL;
45 cc->cond.w.word = ri->tokenValue;
46 cc->cond.w.length = ri->ptr - ri->tokenValue;
47 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue;
48 return cc;
49 }
50
51 /*
52 cond.range.begin cond.range.end
53 |----------------|
54 1.b---e
55 2.b------e
56 3.b------------e
57 4.b-----------------------e
58 5.b----------------------------e
59
60 |----------------|
61 6. b---------e
62 7. b----------------e
63 8. b---------------------e
64
65 |----------------|
66 9. b-----e
67 10. b--------e
68 11. b-------------e
69
70 |----------------|
71 12. b-----e
72
73 |----------------|
74 13. b--e
75
76 */
77 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) {
78 if (begin>end) {
79 unsigned long tmp = begin; begin = end; end = tmp;
80 }
81 if (cc == NULL) {
82 return createCharClassRange(begin,end,0,0,0);
83 }
84 if (end < cc->cond.range.begin ) { // 1
85 if (cc->left) {
86 cc->left = insertCharClass(cc->left,begin,end);
87 } else {
88 cc->left = createCharClassRange(begin,end,0,0,0);
89 }
90 return cc;
91 } else if (end == cc->cond.range.begin ) { // 2
92 cc->cond.range.begin = begin;
93 return cc;
94 } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10
95 if (begin < cc->cond.range.begin) { // 3,4
96 cc->cond.range.begin = begin;
97 }
98 return cc;
99 } else if (begin > cc->cond.range.end ) { // 13
100 if (cc->right) {
101 cc->right = insertCharClass(cc->right,begin,end);
102 } else {
103 cc->right = createCharClassRange(begin,end,0,0,0);
104 }
105 return cc;
106 }
107 if (cc->right) {
108 CharClassPtr right = cc->right;
109 begin = cc->cond.range.begin;
110 free(cc);
111 return insertCharClass(right,begin,end);
112 }
113 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12
114 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8
115 } else if (begin < cc->cond.range.begin) { // 5
116 cc->cond.range.begin = begin;
117 cc->cond.range.end = end;
118 } else {
119 printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
120 }
121 return cc;
122 } 39 }
123 40
124 // <charClass> ::= '['<literal>'-'<literal>']' 41 // <charClass> ::= '['<literal>'-'<literal>']'
125 static 42 static
126 NodePtr charClass(RegexInfoPtr ri) { 43 NodePtr charClass(RegexInfoPtr ri) {