view pyrect/pyrect/regexp/char_collector.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
line wrap: on
line source

#!/usr/bin/env python

"""
Extract Charactor from AST. Keywords,
"""

from pyrect.regexp.parser import Parser
from pyrect.regexp.ast import ASTWalker

class CharCollector(ASTWalker):
    """ Extract with Visitor-Pattern.
    AST (ast), is represented by Node-Tree.
    >>> prs = Parser()
    >>> an  = CharCollector()
    >>> an.analyze(prs.parse('.*(a|b)e'))
    ['a', 'b', (AnyChar:.), 'e']
    >>> an.analyze(prs.parse('\*+ \[\['))
    [' ', '*', '[']
    """

    def __init__(self):
        self.chars = None

    def analyze(self, ast=None):
        if ast:
            self.chars = list(set(ast.accept(self)))
            self.chars.sort()
        return self.chars

    def visit(self, ast):
        """Following Classes contain no-Keywords.
        Union, Star
        """
        return [str(ast)]

    def visit_AnyChar(self, anychar):
        return [anychar]

    def visit_CharClass(self, cclass):
        chars = [chr(x) for x in cclass.get_chars()]
        return chars

    def visit_Character(self, char):
        return [chr(char.char)]

    def union(self, r1, r2):
        return r1 + r2

    def concat(self, r1, r2):
        return r1 + r2

def test():
    import doctest
    doctest.testmod()

if __name__ == "__main__": test()