view pyrect/regexp/nfa_translator.py @ 87:d23f12ce0369

add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 14 Nov 2010 04:16:12 +0900
parents fd3d0b8326fe
children 82a8232625c3
line wrap: on
line source

#!/usr/bin/env python
#-*- encoding: utf-8 -*-

from pyrect.regexp.parser import Parser
from pyrect.regexp.ast import *
from pyrect.regexp.nfa import *

class NFATranslator(ASTWalker):
    """ Create NFA from AST with Visitor-Pattern.
    AST (ast), is represented by Node-Tree.
    >>> prs  = Parser()
    >>> nfat = NFATranslator()
    >>> nfa = nfat.translate(prs.parse('(A|B)*C'))
    >>> nfa.map
    {(3, ''): set([2, 4]), (2, ''): set([0, 1]), (1, (Character:'B')): set([3]), (4, (Character:'C')): set([5]), (0, (Character:'A')): set([3])}
    >>> anfa = ANFA(nfa)
    >>> anfa.map
    {(1, (Character:'B')): set([3]), (0, (Character:'A')): set([3]), (3, ''): set([2, 4]), (6, ''): set([0, 1, 2, 3, 4, 5]), (2, ''): set([0, 1]), (4, (Character:'C')): set([5])}
    """

    def __init__(self):
        self.ast = None
        self.nfa = None

    def get_state_id(self):
        self.__state_id += 1
        return self.__state_id

    state_id = property(get_state_id)

    def translate(self, ast=None):
        if ast:
            self.ast = ast
        if self.ast:
            self.__state_id = -1
            frag = ast.accept(self)
            frag.set_accept(self.state_id)
            self.nfa = frag.build()
            self.nfa.states = range(self.__state_id + 1)
            self.nfa.accepts = [self.__state_id]
        return self.nfa

    def visit(self, input_node):
        frag = NFAFragment()
        s = self.state_id
        frag.start = s
        frag.accept_to(s, input_node)
        return frag

    def visit_CharClass(self, charclass):
        # CharClass be converted Union in NFA
        factor = list(charclass.factor)
        if len(factor) == 1:
            return factor[0].accept(self)

        union = factor[0]
        for op in factor[1:]:
            union = Union(union, op)

        return union.accept(self)

    def visit_Union(self, union):
        frag1 = union.op1.accept(self)
        frag2 = union.op2.accept(self)

        frag = frag1 | frag2

        s = self.state_id
        frag.connect(s, '', frag1.start)
        frag.connect(s, '', frag2.start)
        frag.start = s
        frag.accepts = frag1.accepts | frag2.accepts
        return frag

    def visit_Concat(self, concat):
        frag1 = concat.op1.accept(self)
        frag2 = concat.op2.accept(self)

        frag1.set_accept(frag2.start)
        frag = frag1 | frag2

        frag.start   = frag1.start
        frag.accepts = frag2.accepts
        return frag

    def visit_Star(self, star):
        frag = star.op.accept(self)

        s = self.state_id
        frag.connect(s, '', frag.start)
        frag.set_accept(s)
        frag.accept_to(s, '')
        frag.start = s

        return frag

    def visit_Qmark(self, qmark):
        frag = qmark.op.accept(self)

        s = self.state_id
        frag.connect(s, '', frag.start)
        frag.accept_to(s, '')
        frag.start = s

        return frag

    def visit_Plus(self, plus):
        frag = plus.op.accept(self)

        s = self.state_id
        frag.set_accept(s)
        frag.connect(s, '', frag.start)
        frag.accept_to(s, '')

        return frag

def test():
    import doctest
    doctest.testmod()

if __name__ == "__main__": test()