annotate pyrect/pyrect/regexp/char_collector.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #!/usr/bin/env python
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 """
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 Extract Charactor from AST. Keywords,
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 """
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 from pyrect.regexp.parser import Parser
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 from pyrect.regexp.ast import ASTWalker
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 class CharCollector(ASTWalker):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 """ Extract with Visitor-Pattern.
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 AST (ast), is represented by Node-Tree.
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 >>> prs = Parser()
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 >>> an = CharCollector()
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 >>> an.analyze(prs.parse('.*(a|b)e'))
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 ['a', 'b', (AnyChar:.), 'e']
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 >>> an.analyze(prs.parse('\*+ \[\['))
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 [' ', '*', '[']
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 """
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 def __init__(self):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 self.chars = None
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 def analyze(self, ast=None):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 if ast:
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 self.chars = list(set(ast.accept(self)))
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 self.chars.sort()
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 return self.chars
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 def visit(self, ast):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 """Following Classes contain no-Keywords.
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 Union, Star
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 """
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 return [str(ast)]
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 def visit_AnyChar(self, anychar):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 return [anychar]
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 def visit_CharClass(self, cclass):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 chars = [chr(x) for x in cclass.get_chars()]
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 return chars
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 def visit_Character(self, char):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 return [chr(char.char)]
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 def union(self, r1, r2):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 return r1 + r2
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 def concat(self, r1, r2):
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 return r1 + r2
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 def test():
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 import doctest
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 doctest.testmod()
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55
493c96d030c0 add pyrect
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 if __name__ == "__main__": test()