view pyrect/pyrect/regexp/ast.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
line wrap: on
line source

#!/usr/bin/env python
#-*- encoding: utf-8 -*-

"""
General-Node-set. Parser create AST (be composed of Nodes) from Regexp.
Node are Printable, and Keywords Countable(kwset_node).
"""

class ASTWalker(object):
    def visit(self, ast):
        return

    def visit_Star(self, star):
        return star.op.accept(self)

    def visit_Plus(self, plus):
        return plus.op.accept(self)

    def visit_Qmark(self, qmark):
        return qmark.op.accept(self)

    def visit_Concat(self, concat):
        r1 = concat.op1.accept(self)
        r2 = concat.op2.accept(self)
        return self.concat(r1, r2)

    def visit_Union(self, union):
        r1 = union.op1.accept(self)
        r2 = union.op2.accept(self)
        return self.union(r1, r2)

    def union(self, r1, r2):
        return

    def concat(self, r1, r2):
        return

# AST-Nodes
class Node(object):
    def __init__(self):
        pass

    def __str__(self):
        return str(self.__class__)

    def __repr__(self):
        return "("+self.__class__.__name__+":"+str(self)+")"

    def accept(self, visitor):
        visit = "visit_%s" % self.__class__.__name__
        return getattr(visitor, visit, visitor.visit)(self)

"""
NFA basic elements.
Concat, Union, Star, Qmark, Plus
"""

class Concat(Node):
    def __init__(self, op1, op2):
        self.op1 = op1
        self.op2 = op2

    def __repr__(self):
        return self.__class__.__name__ + "(%s.%s)" \
               % (self.op1.__repr__(), self.op2.__repr__())

    def __str__(self):
        return "(%s.%s)" % (self.op1, self.op2)

class Union(Node):
    def __init__(self, op1, op2):
        self.op1 = op1
        self.op2 = op2

    def __repr__(self):
        return "(Union:(%s|%s))" % \
               (self.op1.__repr__(), self.op2.__repr__())

    def __str__(self):
        return "(%s|%s)" % (self.op1, self.op2)

class Star(Node):
    def __init__(self, op):
        self.op = op

    def __str__(self):
        return "(%s)*" % self.op

class Qmark(Node):
    def __init__(self, op):
        self.op = op

    def __str__(self):
        return "(%s)?" % self.op

class Plus(Node):
    def __init__(self, op):
        self.op = op

    def __str__(self):
        return "(%s)+" % self.op

"""
following Nodes are'nt convert NFA/DFA's each state,
InputNode remains as input which is decided at matching.
"""

"""
basic elements.
Character, MBCharacter
"""

class Singleton(type):
    def __new__(self, name, bases, dict):
        dict['instances'] = {}
        return type.__new__(self, name, bases, dict)

    def __call__(self, *args):
        if not args in self.instances:
            self.instances[args] = type.__call__(self, *args)
        return self.instances[args]

class InputNode(Node):
    __metaclass__ = Singleton

    def __hash__(self):
        return id(self.__str__())

    def __cmp__(self, other):
        if self.__hash__() == other.__hash__():
            return 0
        elif self.__hash__() > other.__hash__():
            return 1
        else:
            return -1

class SpecialInputNode(InputNode):
    __metaclass__ = Singleton

class Character(InputNode):
    import curses.ascii as ascii
    ASCII = ascii.controlnames + \
            ["'"+chr(c)+"'" for c in range(33, 127)]\
            + ['DEL'] + [r"\x%x" % c for c in range(128, 256)]

    def __init__(self, char):
        self.char = ord(char)

    def __str__(self):
        return self.ASCII[self.char]

    def __hash__(self):
        return self.char.__hash__()

    @classmethod
    def ascii(cls, c):
        return cls.ASCII[ord(c)]

class MBCharacter(Character):
    def __init__(self, mbchar):
        ret = Character.__init__(self, mbchar)
        self.bytes = map(ord, str(mbchar))

class EscapeCharacter(Character):
    def __init__(self, char):
        Character.__init__(self, char)

"""
Anchor, is Special-Input rules to match specify text position.
BegLine, EndLine,
"""

class Anchor(SpecialInputNode):
    pass

class BegLine(Anchor):
    def __str__(self):
        return "^"

class EndLine(Anchor):
    def __str__(self):
        return "$"

"""
other Special Inputs.
AnyChar, CharClass
"""

class AnyChar(InputNode):
    def __str__(self):
        return "."

class CharClass(InputNode):
    def __init__(self, factor, inverse=False):
        self.inverse = inverse
        self.factor = factor

    def get_chars(self):
        char = set()
        for f in self.factor:
            if type(f) == Range:
                for ff in f:
                    char.add(ff)
            else:
                char.add(f.char)
        if self.inverse:
            char = set(range(256)) - char
        return char

    def __repr__(self):
        return self.__class__.__name__+"[%s]" \
               % ",".join((s.__repr__() for s in self.factor))

    def __str__(self):
        if self.inverse:
            return "[^%s]" % "".join(map(str, self.factor))
        else:
            return "[%s]" % "".join(map(str, self.factor))

class Range(InputNode):
    def __init__(self, lower, upper):
        self.lower = lower
        self.upper = upper

    def __iter__(self):
        for c in range(self.lower.char, self.upper.char+1):
            yield c

    def __contains__(self, input_node):
        if isinstance(input_node, Character):
            self.lower

    def __str__(self):
        return "%s-%s" % (self.lower, self.upper)

class RepMN(SpecialInputNode):
    def __init__(self, min, max, op):
        self.op = op
        self.min = min
        self.max = max

    def __str__(self):
        if self.max == self.min:
            return "%s{%d}" % (self.op, self.min)
        elif self.max == None:
            return "%s{%d,}" % (self.op, self.min)
        else:
            return "%s{%d, %d}" % (self.op, self.min, self.max)

    def __hash__(self):
        return self.op.__hash__()+self.min.__hash__()+self.max.__hash__()