view pyrect/regexp/dfa.py @ 87:d23f12ce0369

add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 14 Nov 2010 04:16:12 +0900
parents 83c69d42faa8
children 4d498b002de5
line wrap: on
line source

#!/usr/bin/env python

import curses.ascii as ascii
import copy, collections
from fa import FA

class DFA(FA):
    def __init__(self, start, accepts, map_, states, assoc):
        def transition(state, input):
            return map_[state, input]
        FA.__init__(self, transition, start, accepts, map_, states)
        self.assoc = assoc

    def copy_from(self, dfa):
        self.start = dfa.start
        self.map = copy.deepcopy(dfa.map)
        self.accepts = copy.deepcopy(dfa.accepts)
        self.states = copy.deepcopy(dfa.states)
        self.assoc = copy.deepcopy(dfa.assoc)

    @classmethod
    def copy(cls, dfa_):
        dfa = DFA(dfa_)
        dfa.copy_from(dfa)
        return dfa

    def get_runtime(self):
        return DFARuntime(self)

class SuffixDFA(DFA):
    def __init__(self, dfa, pdfa):
        self.copy_from(dfa)
        self.pdfa = pdfa
        self.acceptable = dict()
        self.make_acceptable()

    def setdict(self):
        return collections.defaultdict(set)

    def make_acceptable(self):
        self.acceptable = collections.defaultdict(self.setdict)

        def search_acceptable(pcur, scur, searched, history, pstart):
            for (pcur_, i), pnext in self.pdfa.map.iteritems():
                if pcur_ != pcur or (pnext, i) in searched: continue
                searched_ = searched + [(pnext, i)]
                snext = self.map[(scur, i)]
                if scur  not in history:
                    history_ = history + (scur,)
                else:
                    history_ = history
                if pnext in self.pdfa.accepts:
                    self.acceptable[snext][history_].add(pstart)
                search_acceptable(pnext, snext, searched_, history_, pstart)

        for s in self.pdfa.states:
            search_acceptable(s, self.start, [], (), s)

class DFARuntime(object):
    def __init__(self, DFA):
        self.DFA = DFA
        self.curState = self.DFA.start

    def do_transition(self, char):
        self.curState = self.DFA.transition(
            self.curState, char)

    def is_accept_state(self):
        return self.curState in self.DFA.accepts

    def accept(self, string):
        try:
            for alphabet in string:
                self.do_transition(alphabet)
        except KeyError:
            return False
        return self.is_accept_state()