view pyrect/pyrect/regexp/dfa_translator.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
line wrap: on
line source

#!/usr/bin/env python
#-*- encoding: utf-8 -*-

from pyrect.regexp.parser import Parser
from pyrect.regexp.ast import ASTWalker, AnyChar, Range
from pyrect.regexp.fa import Transition
from pyrect.regexp.nfa_translator import NFATranslator
from pyrect.regexp.nfa import NFA, SuffixNFA, SuffixTrieNFA
from pyrect.regexp.dfa import DFA, SuffixDFA

class DFATranslator(object):
    """ Create DFA from AST with Visitor-Pattern.
    actually DFATranslator use NFATranslator and it convert NFA into DFA.
    >>> prs  = Parser()
    >>> dfat = DFATranslator()
    >>> print(dfat.translate(prs.parse('(A|B)*C')).transition)
    Transition: 0 x 'A' -> 0, 0 x 'B' -> 0, 0 x 'C' -> 1,
    >>> dfat.translate(prs.parse('(A|B)*C')).accepts
    [1]
    >>> print(dfat.translate(prs.parse('ほ?げ+')).transition)
    Transition: 0 x \\x81 -> 1, 1 x \\x92 -> 5, 2 x \\x92 -> 5, 2 x \\xbb -> 3, 3 x \\xe3 -> 0, 4 x \\xe3 -> 6, 5 x \\xe3 -> 0, 6 x \\x81 -> 2,
    """

    def __init__(self):
        self.dfa = None
        self.nfa = None

    def translate(self, arg=None):
        if isinstance(arg, NFA):
            self.nfa = arg
            self.dfa = self.convert(self.nfa)
        elif arg:
            self.nfa = NFATranslator().translate(arg)
            self.dfa = self.convert(self.nfa)
        return self.dfa

    def convert(self, nfa):
        transition = Transition()
        que =  set()
        done = set()
        que.add(nfa.epsilon_expand(nfa.start))

        while que:
            states = que.pop()

            anys = set()
            for state in states:
                trans = nfa.transition[state]
                if trans and trans.has_key(AnyChar()):
                    anys.update(nfa.epsilon_expand(trans[AnyChar()]))

            for state in states:
                trans = nfa.transition[state]
                if trans == None: continue
                for i, n in trans.iteritems():
                    if i != '':
                        default = set(anys)
                        if transition[states, i] == None:
                            slot = default
                            transition[states, i] = slot
                        else:
                            slot = transition[states, i]
                        slot.update(nfa.epsilon_expand(n))

            done.add(states)

            for (_, _), n in transition.itertrans():
                if not n in done:
                    que.add(frozenset(n))

        assoc = dict()
        states = set()
        states.add(nfa.epsilon_expand(nfa.start))

        for _, state in transition.itertrans():
            states.add(frozenset(state))
        for index, state in enumerate(states):
            assoc[state] = index
        states = list()
        for state in assoc.itervalues():
            states.append(state)

        start = assoc[nfa.epsilon_expand(nfa.start)]

        accepts = list()
        for state in assoc.iterkeys():
            if state & set(nfa.accepts):
                accepts.append(assoc[state])

        transition_ = Transition()

        for (s, i), n in transition.itertrans():
            transition_[(assoc[s], i)] = assoc[frozenset(n)]
        for k, v in assoc.items():
            assoc[v] = k

        return DFA(
            transition_,
            start,
            accepts,
            states,
            assoc
            )

class SuffixDFATranslator(DFATranslator):
    def __init__(self):
        self.sdfa = None

    def translate(self, fa=None):
        if fa:
            self.sdfa = SuffixDFA(DFATranslator.translate(self, SuffixNFA(fa)), fa)
        return self.sdfa

class SuffixTrieTranslator(DFATranslator):
    def __init__(self):
        self.trie = None

    def translate(self, fa=None):
        if fa:
            self.trie = DFATranslator().translate(SuffixTrieNFA(fa))
        return self.trie

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__' : test()