view pyrect/regexp/dfa_translator.py @ 87:d23f12ce0369

add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 14 Nov 2010 04:16:12 +0900
parents fd3d0b8326fe
children 4d498b002de5
line wrap: on
line source

#!/usr/bin/env python
#-*- encoding: utf-8 -*-

from pyrect.regexp.parser import Parser
from pyrect.regexp.ast import ASTWalker, AnyChar
from pyrect.regexp.nfa_translator import NFATranslator
from pyrect.regexp.nfa import NFA, SuffixNFA, SuffixTrieNFA
from pyrect.regexp.dfa import DFA, SuffixDFA

class DFATranslator(object):
    """ Create DFA from AST with Visitor-Pattern.
    actually DFATranslator use NFATranslator and it convert NFA into DFA.
    >>> prs  = Parser()
    >>> dfat = DFATranslator()
    >>> dfat.translate(prs.parse('(A|B)*C')).map
    {(0, (Character:'C')): 1, (0, (Character:'A')): 0, (0, (Character:'B')): 0}
    >>> dfat.translate(prs.parse('(A|B)*C')).accepts
    [1]
    >>> dfat.translate(prs.parse('ほ?げ+')).map
    {(2, (MBCharacter:'げ')): 2, (0, (MBCharacter:'げ')): 2, (0, (MBCharacter:'ほ')): 1, (1, (MBCharacter:'げ')): 2}
    """

    def __init__(self):
        self.dfa = None
        self.nfa = None

    def translate(self, arg=None):
        if isinstance(arg, NFA):
            self.nfa = arg
            self.dfa = self.convert(self.nfa)
        elif arg:
            self.nfa = NFATranslator().translate(arg)
            self.dfa = self.convert(self.nfa)
        return self.dfa

    def convert(self, nfa):
        map_ = dict()
        que =  set(frozenset([nfa.epsilon_expand(set([nfa.start]))]))
        done = set()

        while que:
            states = que.pop()
            anys = set()
            for state in states:
                for k, v in nfa.map.iteritems():
                    if state == k[0] and k[1] == AnyChar():
                        anys.update(nfa.epsilon_expand(v))

            for state in states:
                for k, v in nfa.map.iteritems():
                    if state == k[0] and k[1] != '':
                        default = set(anys)
                        slot = map_.setdefault((states, k[1]), default)
                        slot.update(nfa.epsilon_expand(v))


            done.add(states)

            for v in map_.itervalues():
                if not v in done:
                    que.add(frozenset(v))

        assoc = dict()
        states = set()

        states.add(nfa.epsilon_expand(frozenset([nfa.start])))
        for state in map_.itervalues():
            states.add(frozenset(state))
        for index, state in enumerate(states):
            assoc[frozenset(state)] = index
        states = list()
        for state in assoc.itervalues():
            states.append(state)

        start = assoc[nfa.epsilon_expand(frozenset([nfa.start]))]

        accepts = list()
        for state in assoc.iterkeys():
            if state & set(nfa.accepts):
                accepts.append(assoc[state])

        map__ = dict()
        for (s, c), v in map_.iteritems():
            map__[(assoc[frozenset(s)], c)] = assoc[frozenset(v)]
        for k, v in assoc.items():
            assoc[v] = k

        return DFA(
            start,
            accepts,
            map__,
            states,
            assoc
            )

class SuffixDFATranslator(DFATranslator):
    def __init__(self):
        self.sdfa = None

    def translate(self, fa=None):
        if fa:
            self.sdfa = SuffixDFA(DFATranslator.translate(self, SuffixNFA(fa)), fa)
        return self.sdfa

class SuffixTrieTranslator(DFATranslator):
    def __init__(self):
        self.trie = None

    def translate(self, fa=None):
        if fa:
            self.trie = DFATranslator().translate(SuffixTrieNFA(fa))
        return self.trie

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__' : test()