view pyrect/regexp/ast.py @ 62:a05baa7dc7ba

modify I/O routine. use mmap. it's really faster than fgets ;-)
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Fri, 05 Nov 2010 01:37:35 +0900
parents fd3d0b8326fe
children 4de11d799dee
line wrap: on
line source

#!/usr/bin/env python
#-*- encoding: utf-8 -*-

"""
General-Node-set. Parser create AST (be composed of Nodes) from Regexp.
Node are Printable, and Keywords Countable(kwset_node).
"""

class ASTWalker(object):
    def visit(self, ast):
        pass

# AST-Nodes
class Node(object):
    def __init__(self):
        pass

    def __str__(self):
        return str(self.__class__)

    def __repr__(self):
        return "("+self.__class__.__name__+":"+str(self)+")"

    def accept(self, visitor):
        visit = "visit_%s" % self.__class__.__name__
        return getattr(visitor, visit, visitor.visit)(self)

"""
NFA basic elements.
Concat, Union, Star, Qmark, Plus
"""

class Concat(Node):
    def __init__(self, op1, op2):
        self.op1 = op1
        self.op2 = op2

    def __repr__(self):
        return self.__class__.__name__ + "(%s.%s)" \
               % (self.op1.__repr__(), self.op2.__repr__())

    def __str__(self):
        return "(%s.%s)" % (self.op1, self.op2)

class Union(Node):
    def __init__(self, op1, op2):
        self.op1 = op1
        self.op2 = op2

    def __repr__(self):
        return "(Union:(%s|%s))" % \
               (self.op1.__repr__(), self.op2.__repr__())

    def __str__(self):
        return "(%s|%s)" % (self.op1, self.op2)

class Star(Node):
    def __init__(self, op):
        self.op = op

    def __str__(self):
        return "(%s)*" % self.op

class Qmark(Node):
    def __init__(self, op):
        self.op = op

    def __str__(self):
        return "(%s)?" % self.op

class Plus(Node):
    def __init__(self, op):
        self.op = op

    def __str__(self):
        return "(%s)+" % self.op

"""
following Nodes are'nt convert NFA/DFA's each state,
InputNode remains as input which is decided at matching.
"""

"""
basic elements.
Character, MBCharacter
"""

class Singleton(type):
    def __new__(self, name, bases, dict):
        dict['instances'] = {}
        return type.__new__(self, name, bases, dict)

    def __call__(self, *args):
        if not args in self.instances:
            self.instances[args] = type.__call__(self, *args)
        return self.instances[args]

class InputNode(Node):
    __metaclass__ = Singleton

    def __add__(self, other):
        return FixedString(self, other)

    def __hash__(self):
        return id(self.__str__())

    def __cmp__(self, other):
        if self.__hash__() == other.__hash__():
            return 0
        elif self.__hash__() > other.__hash__():
            return 1
        else:
            return -1

class Character(InputNode):
    import curses.ascii as ascii
    ASCII = ascii.controlnames + \
            ["'"+chr(c)+"'" for c in range(33, 127)]\
            + ['DEL'] + [r"\x%x" % c for c in range(128, 256)]

    def __init__(self, char):
        self.char = ord(char)

    def __str__(self):
        return self.ASCII[self.char]

    def __hash__(self):
        return self.char.__hash__()

class MBCharacter(Character):
    def __init__(self, mbchar):
        ret = Character.__init__(self, mbchar)
        self.bytes = map(ord, str(mbchar))

class EscapeCharacter(Character):
    def __init__(self, char):
        Character.__init__(self, char)

class FixedString(InputNode):
    def __init__(self, char):
        self.string = list()

    def appfront(self, input_):
        self.string.insert(0, input_)
        return self

"""
Anchor, is Special-Input rules to match specify text position.
BegLine, EndLine,
"""

class Anchor(InputNode):
    pass

class BegLine(Anchor):
    def __str__(self):
        return "^"

class EndLine(Anchor):
    def __str__(self):
        return "$"

"""
other Special Inputs.
AnyChar, CharClass
"""

class AnyChar(InputNode):
    def __str__(self):
        return "."

class CharClass(InputNode):
    def __init__(self, factor, inverse=False):
        self.inverse = inverse
        self.factor = factor

    def __repr__(self):
        return self.__class__.__name__+"[%s]" \
               % ",".join((s.__repr__() for s in self.factor))

    def __str__(self):
        if self.inverse:
            return "[^%s]" % "".join(map(str, self.factor))
        else:
            return "[%s]" % "".join(map(str, self.factor))

class Range(InputNode):
    def __init__(self, lower, upper):
        self.lower = lower
        self.upper = upper

    def __contains__(self, input_node):
        if isinstance(input_node, Character):
            self.lower

    def __str__(self):
        return "%s-%s" % (self.lower, self.upper)