comparison pyrect/pyrect/regexp/ast.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
comparison
equal deleted inserted replaced
8:515c780e1c13 9:493c96d030c0
1 #!/usr/bin/env python
2 #-*- encoding: utf-8 -*-
3
4 """
5 General-Node-set. Parser create AST (be composed of Nodes) from Regexp.
6 Node are Printable, and Keywords Countable(kwset_node).
7 """
8
9 class ASTWalker(object):
10 def visit(self, ast):
11 return
12
13 def visit_Star(self, star):
14 return star.op.accept(self)
15
16 def visit_Plus(self, plus):
17 return plus.op.accept(self)
18
19 def visit_Qmark(self, qmark):
20 return qmark.op.accept(self)
21
22 def visit_Concat(self, concat):
23 r1 = concat.op1.accept(self)
24 r2 = concat.op2.accept(self)
25 return self.concat(r1, r2)
26
27 def visit_Union(self, union):
28 r1 = union.op1.accept(self)
29 r2 = union.op2.accept(self)
30 return self.union(r1, r2)
31
32 def union(self, r1, r2):
33 return
34
35 def concat(self, r1, r2):
36 return
37
38 # AST-Nodes
39 class Node(object):
40 def __init__(self):
41 pass
42
43 def __str__(self):
44 return str(self.__class__)
45
46 def __repr__(self):
47 return "("+self.__class__.__name__+":"+str(self)+")"
48
49 def accept(self, visitor):
50 visit = "visit_%s" % self.__class__.__name__
51 return getattr(visitor, visit, visitor.visit)(self)
52
53 """
54 NFA basic elements.
55 Concat, Union, Star, Qmark, Plus
56 """
57
58 class Concat(Node):
59 def __init__(self, op1, op2):
60 self.op1 = op1
61 self.op2 = op2
62
63 def __repr__(self):
64 return self.__class__.__name__ + "(%s.%s)" \
65 % (self.op1.__repr__(), self.op2.__repr__())
66
67 def __str__(self):
68 return "(%s.%s)" % (self.op1, self.op2)
69
70 class Union(Node):
71 def __init__(self, op1, op2):
72 self.op1 = op1
73 self.op2 = op2
74
75 def __repr__(self):
76 return "(Union:(%s|%s))" % \
77 (self.op1.__repr__(), self.op2.__repr__())
78
79 def __str__(self):
80 return "(%s|%s)" % (self.op1, self.op2)
81
82 class Star(Node):
83 def __init__(self, op):
84 self.op = op
85
86 def __str__(self):
87 return "(%s)*" % self.op
88
89 class Qmark(Node):
90 def __init__(self, op):
91 self.op = op
92
93 def __str__(self):
94 return "(%s)?" % self.op
95
96 class Plus(Node):
97 def __init__(self, op):
98 self.op = op
99
100 def __str__(self):
101 return "(%s)+" % self.op
102
103 """
104 following Nodes are'nt convert NFA/DFA's each state,
105 InputNode remains as input which is decided at matching.
106 """
107
108 """
109 basic elements.
110 Character, MBCharacter
111 """
112
113 class Singleton(type):
114 def __new__(self, name, bases, dict):
115 dict['instances'] = {}
116 return type.__new__(self, name, bases, dict)
117
118 def __call__(self, *args):
119 if not args in self.instances:
120 self.instances[args] = type.__call__(self, *args)
121 return self.instances[args]
122
123 class InputNode(Node):
124 __metaclass__ = Singleton
125
126 def __hash__(self):
127 return id(self.__str__())
128
129 def __cmp__(self, other):
130 if self.__hash__() == other.__hash__():
131 return 0
132 elif self.__hash__() > other.__hash__():
133 return 1
134 else:
135 return -1
136
137 class SpecialInputNode(InputNode):
138 __metaclass__ = Singleton
139
140 class Character(InputNode):
141 import curses.ascii as ascii
142 ASCII = ascii.controlnames + \
143 ["'"+chr(c)+"'" for c in range(33, 127)]\
144 + ['DEL'] + [r"\x%x" % c for c in range(128, 256)]
145
146 def __init__(self, char):
147 self.char = ord(char)
148
149 def __str__(self):
150 return self.ASCII[self.char]
151
152 def __hash__(self):
153 return self.char.__hash__()
154
155 @classmethod
156 def ascii(cls, c):
157 return cls.ASCII[ord(c)]
158
159 class MBCharacter(Character):
160 def __init__(self, mbchar):
161 ret = Character.__init__(self, mbchar)
162 self.bytes = map(ord, str(mbchar))
163
164 class EscapeCharacter(Character):
165 def __init__(self, char):
166 Character.__init__(self, char)
167
168 """
169 Anchor, is Special-Input rules to match specify text position.
170 BegLine, EndLine,
171 """
172
173 class Anchor(SpecialInputNode):
174 pass
175
176 class BegLine(Anchor):
177 def __str__(self):
178 return "^"
179
180 class EndLine(Anchor):
181 def __str__(self):
182 return "$"
183
184 """
185 other Special Inputs.
186 AnyChar, CharClass
187 """
188
189 class AnyChar(InputNode):
190 def __str__(self):
191 return "."
192
193 class CharClass(InputNode):
194 def __init__(self, factor, inverse=False):
195 self.inverse = inverse
196 self.factor = factor
197
198 def get_chars(self):
199 char = set()
200 for f in self.factor:
201 if type(f) == Range:
202 for ff in f:
203 char.add(ff)
204 else:
205 char.add(f.char)
206 if self.inverse:
207 char = set(range(256)) - char
208 return char
209
210 def __repr__(self):
211 return self.__class__.__name__+"[%s]" \
212 % ",".join((s.__repr__() for s in self.factor))
213
214 def __str__(self):
215 if self.inverse:
216 return "[^%s]" % "".join(map(str, self.factor))
217 else:
218 return "[%s]" % "".join(map(str, self.factor))
219
220 class Range(InputNode):
221 def __init__(self, lower, upper):
222 self.lower = lower
223 self.upper = upper
224
225 def __iter__(self):
226 for c in range(self.lower.char, self.upper.char+1):
227 yield c
228
229 def __contains__(self, input_node):
230 if isinstance(input_node, Character):
231 self.lower
232
233 def __str__(self):
234 return "%s-%s" % (self.lower, self.upper)
235
236 class RepMN(SpecialInputNode):
237 def __init__(self, min, max, op):
238 self.op = op
239 self.min = min
240 self.max = max
241
242 def __str__(self):
243 if self.max == self.min:
244 return "%s{%d}" % (self.op, self.min)
245 elif self.max == None:
246 return "%s{%d,}" % (self.op, self.min)
247 else:
248 return "%s{%d, %d}" % (self.op, self.min, self.max)
249
250 def __hash__(self):
251 return self.op.__hash__()+self.min.__hash__()+self.max.__hash__()