9
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Extract Charactor from AST. Keywords,
|
|
5 """
|
|
6
|
|
7 from pyrect.regexp.parser import Parser
|
|
8 from pyrect.regexp.ast import ASTWalker
|
|
9
|
|
10 class CharCollector(ASTWalker):
|
|
11 """ Extract with Visitor-Pattern.
|
|
12 AST (ast), is represented by Node-Tree.
|
|
13 >>> prs = Parser()
|
|
14 >>> an = CharCollector()
|
|
15 >>> an.analyze(prs.parse('.*(a|b)e'))
|
|
16 ['a', 'b', (AnyChar:.), 'e']
|
|
17 >>> an.analyze(prs.parse('\*+ \[\['))
|
|
18 [' ', '*', '[']
|
|
19 """
|
|
20
|
|
21 def __init__(self):
|
|
22 self.chars = None
|
|
23
|
|
24 def analyze(self, ast=None):
|
|
25 if ast:
|
|
26 self.chars = list(set(ast.accept(self)))
|
|
27 self.chars.sort()
|
|
28 return self.chars
|
|
29
|
|
30 def visit(self, ast):
|
|
31 """Following Classes contain no-Keywords.
|
|
32 Union, Star
|
|
33 """
|
|
34 return [str(ast)]
|
|
35
|
|
36 def visit_AnyChar(self, anychar):
|
|
37 return [anychar]
|
|
38
|
|
39 def visit_CharClass(self, cclass):
|
|
40 chars = [chr(x) for x in cclass.get_chars()]
|
|
41 return chars
|
|
42
|
|
43 def visit_Character(self, char):
|
|
44 return [chr(char.char)]
|
|
45
|
|
46 def union(self, r1, r2):
|
|
47 return r1 + r2
|
|
48
|
|
49 def concat(self, r1, r2):
|
|
50 return r1 + r2
|
|
51
|
|
52 def test():
|
|
53 import doctest
|
|
54 doctest.testmod()
|
|
55
|
|
56 if __name__ == "__main__": test()
|