diff pyrect/pyrect/regexp/char_collector.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/pyrect/regexp/char_collector.py	Tue Jun 14 17:24:03 2011 +0900
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+"""
+Extract Charactor from AST. Keywords,
+"""
+
+from pyrect.regexp.parser import Parser
+from pyrect.regexp.ast import ASTWalker
+
+class CharCollector(ASTWalker):
+    """ Extract with Visitor-Pattern.
+    AST (ast), is represented by Node-Tree.
+    >>> prs = Parser()
+    >>> an  = CharCollector()
+    >>> an.analyze(prs.parse('.*(a|b)e'))
+    ['a', 'b', (AnyChar:.), 'e']
+    >>> an.analyze(prs.parse('\*+ \[\['))
+    [' ', '*', '[']
+    """
+
+    def __init__(self):
+        self.chars = None
+
+    def analyze(self, ast=None):
+        if ast:
+            self.chars = list(set(ast.accept(self)))
+            self.chars.sort()
+        return self.chars
+
+    def visit(self, ast):
+        """Following Classes contain no-Keywords.
+        Union, Star
+        """
+        return [str(ast)]
+
+    def visit_AnyChar(self, anychar):
+        return [anychar]
+
+    def visit_CharClass(self, cclass):
+        chars = [chr(x) for x in cclass.get_chars()]
+        return chars
+
+    def visit_Character(self, char):
+        return [chr(char.char)]
+
+    def union(self, r1, r2):
+        return r1 + r2
+
+    def concat(self, r1, r2):
+        return r1 + r2
+
+def test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == "__main__": test()