view pyrect/regexp/nfa_translator.py @ 98:4d498b002de5

refactor for data-structure (dict -> Transition).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 12 Dec 2010 23:04:31 +0900
parents 5db856953793
children
line wrap: on
line source

#!/usr/bin/env python
#-*- encoding: utf-8 -*-

from pyrect.regexp.parser import Parser
from pyrect.regexp.ast import *
from pyrect.regexp.nfa import *

class NFATranslator(ASTWalker):
    """ Create NFA from AST with Visitor-Pattern.
    AST (ast), is represented by Node-Tree.
    >>> prs  = Parser()
    >>> nfat = NFATranslator()
    >>> nfa = nfat.translate(prs.parse('(A|B)*C'))
    >>> print nfa.transition
    Transition: 0 x 'A' -> set([3]), 1 x 'B' -> set([3]), 2 x  -> set([0, 1]), 3 x  -> set([2, 4]), 4 x 'C' -> set([5]),
    >>> nfa = nfat.translate(prs.parse('[A-C]{2}D{3,}E{4,5}'))
    >>> print nfa.transition
    Transition: 0 x 'A' -> set([5]), 1 x 'B' -> set([5]), 2 x  -> set([0, 1]), 3 x 'C' -> set([5]), 4 x  -> set([2, 3]), 5 x REPEAT -> set([4]), 5 x ['A'-'C']{2} -> set([7]), 7 x 'D' -> set([8]), 8 x REPEAT -> set([7]), 8 x 'D'{3,} -> set([10]), 10 x 'E' -> set([11]), 11 x REPEAT -> set([10]), 11 x 'E'{4, 5} -> set([13]),
    """

    def __init__(self):
        self.ast = None
        self.nfa = None

    def get_state_id(self):
        self.__state_id += 1
        return self.__state_id

    state_id = property(get_state_id)

    def translate(self, ast=None):
        if ast:
            self.ast = ast
        if self.ast:
            self.__state_id = -1
            frag = ast.accept(self)
            frag.set_accept(self.state_id)
            self.nfa = frag.build()
            self.nfa.states = range(self.__state_id + 1)
            self.nfa.accepts = [self.__state_id]
        return self.nfa

    def visit(self, input_node):
        frag = NFAFragment()
        s = self.state_id
        frag.start = s
        frag.accept_to(s, input_node)
        return frag

    def visit_CharClass(self, charclass):
        # CharClass be converted Union in NFA
        factor = list(charclass.factor)
        if len(factor) == 1:
            return factor[0].accept(self)

        union = factor[0]
        for op in factor[1:]:
            union = Union(union, op)

        return union.accept(self)

    def visit_Union(self, union):
        frag1 = union.op1.accept(self)
        frag2 = union.op2.accept(self)

        frag = frag1 | frag2

        s = self.state_id
        frag.connect(s, '', frag1.start)
        frag.connect(s, '', frag2.start)
        frag.start = s
        frag.accepts = frag1.accepts | frag2.accepts
        return frag

    def visit_Concat(self, concat):
        frag1 = concat.op1.accept(self)
        frag2 = concat.op2.accept(self)

        frag1.set_accept(frag2.start)
        frag = frag1 | frag2

        frag.start   = frag1.start
        frag.accepts = frag2.accepts
        return frag

    def visit_Star(self, star):
        frag = star.op.accept(self)

        s = self.state_id
        frag.connect(s, '', frag.start)
        frag.set_accept(s)
        frag.accept_to(s, '')
        frag.start = s

        return frag

    def visit_Qmark(self, qmark):
        frag = qmark.op.accept(self)

        s = self.state_id
        frag.connect(s, '', frag.start)
        frag.accept_to(s, '')
        frag.start = s

        return frag

    def visit_Plus(self, plus):
        frag = plus.op.accept(self)

        s = self.state_id
        frag.set_accept(s)
        frag.connect(s, '', frag.start)
        frag.accept_to(s, '')

        return frag

    def visit_Range(self, range_):
        elems = range(range_.lower.char, range_.upper.char+1)
        elems = map(chr, elems)
        elems = map(Character, elems)
        union = reduce(Union, elems)
        return union.accept(self)

    def visit_RepMN(self, repmn):
        frag = repmn.op.accept(self)
        s = self.state_id
        frag.set_accept(s)
        frag.connect(s, "REPEAT", frag.start)
        s_ = self.state_id
        frag.set_accept(s_)
        frag.accept_to(s, repmn)

        return frag

def test():
    import doctest
    doctest.testmod()

if __name__ == "__main__": test()