annotate regexParser/regexParser.h @ 220:4dc8d327cc7d

subset construction worked how to handle EOF?
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sat, 02 Jan 2016 00:01:22 +0900
parents 63e9224c7b2b
children 1a34e702776a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
126
639b0b437ebf remove error (do not work)
masa
parents: 125
diff changeset
1 #define NEW(type) (type*)malloc(sizeof(type))
639b0b437ebf remove error (do not work)
masa
parents: 125
diff changeset
2
188
109d22faf7b5 remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 187
diff changeset
3 #ifndef INCLUDED_STRUCT
109d22faf7b5 remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 187
diff changeset
4 #define INCLUDED_STRUCT
191
02031fb73af8 remove somefiles and fix header files
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 190
diff changeset
5
02031fb73af8 remove somefiles and fix header files
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 190
diff changeset
6 #define BITBLOCK 64
144
d8a4922eceae remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 142
diff changeset
7 typedef struct bitVector {
d8a4922eceae remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 142
diff changeset
8 unsigned long bitContainer;
191
02031fb73af8 remove somefiles and fix header files
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 190
diff changeset
9 } BitVector,*BitVectorPtr;
144
d8a4922eceae remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 142
diff changeset
10
129
b930be74a16e remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 126
diff changeset
11 typedef struct word {
b930be74a16e remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 126
diff changeset
12 unsigned char *word;
b930be74a16e remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 126
diff changeset
13 int length;
b930be74a16e remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 126
diff changeset
14 } Word, *WordPtr;
b930be74a16e remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 126
diff changeset
15
126
639b0b437ebf remove error (do not work)
masa
parents: 125
diff changeset
16 typedef struct utf8Range {
142
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
17 unsigned long begin;
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
18 unsigned long end;
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
19 struct utf8Range *next; // only used in the parser.
126
639b0b437ebf remove error (do not work)
masa
parents: 125
diff changeset
20 } RangeList , *RangeListPtr;
639b0b437ebf remove error (do not work)
masa
parents: 125
diff changeset
21
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 175
diff changeset
22 typedef struct condition {
142
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
23 RangeList range;
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
24 Word w;
116
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
25 } Condition, *ConditionList;
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
26
77
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 typedef struct charClass {
111
1d30f70702df add determinize.cc and transition.cc
masa
parents: 110
diff changeset
28 unsigned char type;
115
ca30f8334741 rename createRegexParser.cc to regexParser.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 111
diff changeset
29 struct charClass *left;
ca30f8334741 rename createRegexParser.cc to regexParser.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 111
diff changeset
30 struct charClass *right;
142
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
31 Condition cond;
212
b0ae5273925c implement allocateCCState()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 203
diff changeset
32 int stateNum;
142
de0f332d560c insert charClassMerge function
masa
parents: 133
diff changeset
33 BitVector nextState;
77
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 } CharClass, *CharClassPtr;
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
36 struct node;
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
37
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
38 typedef struct state {
192
ecf70fb215a5 print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 191
diff changeset
39 int stateNum;
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
40 BitVector bitState;
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
41 CharClassPtr cc;
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
42 struct node *node;
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
43 struct state *next;
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
44 } State, *StatePtr;
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
45
77
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 typedef struct node {
89
50a146c05192 add NodeNumber in Regex Parser tree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 85
diff changeset
47 unsigned char tokenType;
111
1d30f70702df add determinize.cc and transition.cc
masa
parents: 110
diff changeset
48 CharClassPtr cc;
183
7ae0a3070647 implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 180
diff changeset
49 int stateNum;
7ae0a3070647 implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 180
diff changeset
50 int nextStateNum;
7ae0a3070647 implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 180
diff changeset
51 StatePtr state;
195
4fefd80c05f2 change variable name (TGValue tg -> TGValue tgv)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 192
diff changeset
52 StatePtr nextState;
77
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 struct node *left;
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 struct node *right;
7f53a587bf97 add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 } Node, *NodePtr;
116
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
56
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
57 typedef struct stateStack {
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
58 BitVector state;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
59 struct stateStack *next;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
60 } StateStack, *StateStackPtr;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
61
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
62 typedef struct transitionGenerator {
203
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
63 long totalStateCount;
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
64 StateStackPtr stack;
192
ecf70fb215a5 print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 191
diff changeset
65 StatePtr *stateArray;
ecf70fb215a5 print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 191
diff changeset
66 StatePtr stateList;
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
67 } TransitionGenerator, *TransitionGeneratorPtr;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
68
220
4dc8d327cc7d subset construction worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 215
diff changeset
69 // Value Container is passed directly in functions
4dc8d327cc7d subset construction worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 215
diff changeset
70 // Don't use it's pointer
203
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
71 typedef struct scValue {
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
72 StatePtr stateTop;
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
73 StatePtr stateEnd;
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
74 TransitionGeneratorPtr tg;
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
75 } SCValue, *SCValuePtr;
e809a2dd0731 add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 201
diff changeset
76
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
77 typedef struct tgValue {
215
63e9224c7b2b try to fix asterisk
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 214
diff changeset
78 StatePtr asterisk; // last * state of the expression
192
ecf70fb215a5 print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 191
diff changeset
79 StatePtr startState;
ecf70fb215a5 print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 191
diff changeset
80 StatePtr endState;
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
81 TransitionGeneratorPtr tg;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
82 } TGValue, *TGValuePtr;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
83
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
84 enum charClassStackState {
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
85 LEFT,
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
86 SELF,
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
87 RIGHT
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
88 };
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
89
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
90 typedef struct charClassStack {
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
91 charClassStackState turn;
184
1da1b2eacb84 gather struct
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 183
diff changeset
92 CharClassPtr cc;
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
93 struct charClassStack *next;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
94 } CharClassStack, *CharClassStackPtr;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
95
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
96 typedef struct charClassWalker {
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
97 CharClassStackPtr stack;
201
b8bc24abaf8a add TODO and fix CharClassWalker
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 195
diff changeset
98 charClassStackState turn;
187
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
99 CharClassPtr next;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
100 } CharClassWalker, *CharClassWalkerPtr;
ef798db705e9 remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 186
diff changeset
101
184
1da1b2eacb84 gather struct
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 183
diff changeset
102
116
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
103 typedef struct regexInfo {
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
104 unsigned char *ptr;
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
105 unsigned char tokenType;
118
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 116
diff changeset
106 unsigned char *tokenValue;
178
5e8c6857934c implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 175
diff changeset
107 int stateNumber;
116
66c633575b53 remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 115
diff changeset
108 } RegexInfo, *RegexInfoPtr;
188
109d22faf7b5 remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 187
diff changeset
109 #endif
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
110
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
111 extern NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right);
188
109d22faf7b5 remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 187
diff changeset
112 extern CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right);
190
3e8e5780ad4a change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents: 188
diff changeset
113 extern NodePtr regex(RegexInfoPtr);