Mercurial > hg > Members > nobuyasu > test
comparison pyrect/pyrect/regexp/ast.py @ 9:493c96d030c0
add pyrect
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 14 Jun 2011 17:24:03 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:515c780e1c13 | 9:493c96d030c0 |
---|---|
1 #!/usr/bin/env python | |
2 #-*- encoding: utf-8 -*- | |
3 | |
4 """ | |
5 General-Node-set. Parser create AST (be composed of Nodes) from Regexp. | |
6 Node are Printable, and Keywords Countable(kwset_node). | |
7 """ | |
8 | |
9 class ASTWalker(object): | |
10 def visit(self, ast): | |
11 return | |
12 | |
13 def visit_Star(self, star): | |
14 return star.op.accept(self) | |
15 | |
16 def visit_Plus(self, plus): | |
17 return plus.op.accept(self) | |
18 | |
19 def visit_Qmark(self, qmark): | |
20 return qmark.op.accept(self) | |
21 | |
22 def visit_Concat(self, concat): | |
23 r1 = concat.op1.accept(self) | |
24 r2 = concat.op2.accept(self) | |
25 return self.concat(r1, r2) | |
26 | |
27 def visit_Union(self, union): | |
28 r1 = union.op1.accept(self) | |
29 r2 = union.op2.accept(self) | |
30 return self.union(r1, r2) | |
31 | |
32 def union(self, r1, r2): | |
33 return | |
34 | |
35 def concat(self, r1, r2): | |
36 return | |
37 | |
38 # AST-Nodes | |
39 class Node(object): | |
40 def __init__(self): | |
41 pass | |
42 | |
43 def __str__(self): | |
44 return str(self.__class__) | |
45 | |
46 def __repr__(self): | |
47 return "("+self.__class__.__name__+":"+str(self)+")" | |
48 | |
49 def accept(self, visitor): | |
50 visit = "visit_%s" % self.__class__.__name__ | |
51 return getattr(visitor, visit, visitor.visit)(self) | |
52 | |
53 """ | |
54 NFA basic elements. | |
55 Concat, Union, Star, Qmark, Plus | |
56 """ | |
57 | |
58 class Concat(Node): | |
59 def __init__(self, op1, op2): | |
60 self.op1 = op1 | |
61 self.op2 = op2 | |
62 | |
63 def __repr__(self): | |
64 return self.__class__.__name__ + "(%s.%s)" \ | |
65 % (self.op1.__repr__(), self.op2.__repr__()) | |
66 | |
67 def __str__(self): | |
68 return "(%s.%s)" % (self.op1, self.op2) | |
69 | |
70 class Union(Node): | |
71 def __init__(self, op1, op2): | |
72 self.op1 = op1 | |
73 self.op2 = op2 | |
74 | |
75 def __repr__(self): | |
76 return "(Union:(%s|%s))" % \ | |
77 (self.op1.__repr__(), self.op2.__repr__()) | |
78 | |
79 def __str__(self): | |
80 return "(%s|%s)" % (self.op1, self.op2) | |
81 | |
82 class Star(Node): | |
83 def __init__(self, op): | |
84 self.op = op | |
85 | |
86 def __str__(self): | |
87 return "(%s)*" % self.op | |
88 | |
89 class Qmark(Node): | |
90 def __init__(self, op): | |
91 self.op = op | |
92 | |
93 def __str__(self): | |
94 return "(%s)?" % self.op | |
95 | |
96 class Plus(Node): | |
97 def __init__(self, op): | |
98 self.op = op | |
99 | |
100 def __str__(self): | |
101 return "(%s)+" % self.op | |
102 | |
103 """ | |
104 following Nodes are'nt convert NFA/DFA's each state, | |
105 InputNode remains as input which is decided at matching. | |
106 """ | |
107 | |
108 """ | |
109 basic elements. | |
110 Character, MBCharacter | |
111 """ | |
112 | |
113 class Singleton(type): | |
114 def __new__(self, name, bases, dict): | |
115 dict['instances'] = {} | |
116 return type.__new__(self, name, bases, dict) | |
117 | |
118 def __call__(self, *args): | |
119 if not args in self.instances: | |
120 self.instances[args] = type.__call__(self, *args) | |
121 return self.instances[args] | |
122 | |
123 class InputNode(Node): | |
124 __metaclass__ = Singleton | |
125 | |
126 def __hash__(self): | |
127 return id(self.__str__()) | |
128 | |
129 def __cmp__(self, other): | |
130 if self.__hash__() == other.__hash__(): | |
131 return 0 | |
132 elif self.__hash__() > other.__hash__(): | |
133 return 1 | |
134 else: | |
135 return -1 | |
136 | |
137 class SpecialInputNode(InputNode): | |
138 __metaclass__ = Singleton | |
139 | |
140 class Character(InputNode): | |
141 import curses.ascii as ascii | |
142 ASCII = ascii.controlnames + \ | |
143 ["'"+chr(c)+"'" for c in range(33, 127)]\ | |
144 + ['DEL'] + [r"\x%x" % c for c in range(128, 256)] | |
145 | |
146 def __init__(self, char): | |
147 self.char = ord(char) | |
148 | |
149 def __str__(self): | |
150 return self.ASCII[self.char] | |
151 | |
152 def __hash__(self): | |
153 return self.char.__hash__() | |
154 | |
155 @classmethod | |
156 def ascii(cls, c): | |
157 return cls.ASCII[ord(c)] | |
158 | |
159 class MBCharacter(Character): | |
160 def __init__(self, mbchar): | |
161 ret = Character.__init__(self, mbchar) | |
162 self.bytes = map(ord, str(mbchar)) | |
163 | |
164 class EscapeCharacter(Character): | |
165 def __init__(self, char): | |
166 Character.__init__(self, char) | |
167 | |
168 """ | |
169 Anchor, is Special-Input rules to match specify text position. | |
170 BegLine, EndLine, | |
171 """ | |
172 | |
173 class Anchor(SpecialInputNode): | |
174 pass | |
175 | |
176 class BegLine(Anchor): | |
177 def __str__(self): | |
178 return "^" | |
179 | |
180 class EndLine(Anchor): | |
181 def __str__(self): | |
182 return "$" | |
183 | |
184 """ | |
185 other Special Inputs. | |
186 AnyChar, CharClass | |
187 """ | |
188 | |
189 class AnyChar(InputNode): | |
190 def __str__(self): | |
191 return "." | |
192 | |
193 class CharClass(InputNode): | |
194 def __init__(self, factor, inverse=False): | |
195 self.inverse = inverse | |
196 self.factor = factor | |
197 | |
198 def get_chars(self): | |
199 char = set() | |
200 for f in self.factor: | |
201 if type(f) == Range: | |
202 for ff in f: | |
203 char.add(ff) | |
204 else: | |
205 char.add(f.char) | |
206 if self.inverse: | |
207 char = set(range(256)) - char | |
208 return char | |
209 | |
210 def __repr__(self): | |
211 return self.__class__.__name__+"[%s]" \ | |
212 % ",".join((s.__repr__() for s in self.factor)) | |
213 | |
214 def __str__(self): | |
215 if self.inverse: | |
216 return "[^%s]" % "".join(map(str, self.factor)) | |
217 else: | |
218 return "[%s]" % "".join(map(str, self.factor)) | |
219 | |
220 class Range(InputNode): | |
221 def __init__(self, lower, upper): | |
222 self.lower = lower | |
223 self.upper = upper | |
224 | |
225 def __iter__(self): | |
226 for c in range(self.lower.char, self.upper.char+1): | |
227 yield c | |
228 | |
229 def __contains__(self, input_node): | |
230 if isinstance(input_node, Character): | |
231 self.lower | |
232 | |
233 def __str__(self): | |
234 return "%s-%s" % (self.lower, self.upper) | |
235 | |
236 class RepMN(SpecialInputNode): | |
237 def __init__(self, min, max, op): | |
238 self.op = op | |
239 self.min = min | |
240 self.max = max | |
241 | |
242 def __str__(self): | |
243 if self.max == self.min: | |
244 return "%s{%d}" % (self.op, self.min) | |
245 elif self.max == None: | |
246 return "%s{%d,}" % (self.op, self.min) | |
247 else: | |
248 return "%s{%d, %d}" % (self.op, self.min, self.max) | |
249 | |
250 def __hash__(self): | |
251 return self.op.__hash__()+self.min.__hash__()+self.max.__hash__() |