view pyrect/regexp/nfa.py @ 98:4d498b002de5

refactor for data-structure (dict -> Transition).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 12 Dec 2010 23:04:31 +0900
parents 19a88707bd29
children 2632b963e441
line wrap: on
line source

#!/usr/bin/env python

import curses.ascii as ascii
import copy
import collections
from pyrect.regexp.fa import FA, Transition

class NFAFragment(object):
    def __init__(self):
        self.start   = None    # integer
        self.accepts = set()   # frozenset
        self.transition     = Transition()  # transition, char or special -> frozenset([states])

    def connect(self, from_, char, to):
        if self.transition[from_, char]:
            slot = self.transition[from_, char]
        else:
            slot = set()
            self.transition[from_, char] = slot
        slot.add(to)

    def accept_to(self, from_, char):
        self.accepts.add((from_, char))

    def set_accept(self, to):
        for from_, char in self.accepts:
            self.connect(from_, char, to)
        self.accepts = set()

    def new_skelton(self):
        # copy fragment (self), and it return
        newFrag = NFAFragment();
        newFrag.transition = copy.deepcopy(self.transition)
        return newFrag

    def __or__(self, frag):
        newFrag = self.new_skelton()
        for k, v in frag.transition.iterstates():
            newFrag.transition[k] = copy.deepcopy(v)
        return newFrag

    def __str__(self):
        return ("NFA: delta -> %s, start -> %s, accepts -> %s" %
                (self.transition, self.start, self.accepts))

    def build(self):
        states = set()
        for s in self.transition.iterkeys():
            states.add(s)

        return NFA(
            self.transition,
            self.start,
            self.accepts,
            sorted(states)
            )

class NFA(FA):
    def epsilon_expand(self, states):
        if not isinstance(states, set) and \
        not isinstance(states, list):
            states = set([states])
        states = copy.deepcopy(states)
        done = set()
        while states:
            stat = states.pop()
            done.add(stat)
            if self.transition[stat, ""] != None:
                for n in self.transition[stat, ""]:
                    if not n in done: states.add(n)
        return frozenset(done)

class SuffixNFA(NFA):
    def __init__(self, nfa):
        self.start = nfa.start
        self.accepts = copy.deepcopy(nfa.accepts)
        self.states = copy.deepcopy(nfa.states)
        self.transition = self.nfa_trans(nfa)
        start = set(self.states)-set([self.start])
        if self.map.has_key((self.start, '')):
            self.map[(self.start, '')].add(start)
        else:
            self.map[(self.start, '')] = start

    def nfa_trans(self, fa):
        if type(fa) == NFA:
            return copy.deepcopy(fa.transition)
        ntrans = dict()
        for k, v in fa.map.itertrans():
            ntrans[k] = set([v])
        return ntrans

class SuffixTrieNFA(NFA):
    def __init__(self, dfa):
        self.start = dfa.start
        self.pdfa = dfa
        self.states = copy.deepcopy(dfa.states)
        self.acceptable = collections.defaultdict(set)
        self.accepts = copy.deepcopy(set(dfa.accepts))
        states = [self.start] + list(set(self.states)-set((self.start,)))
        self.make_trie_trans(states)

    def make_trie_trans(self, states):
        add_states = set()
        offset = [x * len(states) for x in range(len(states))]
        for s, ind in zip(states, offset):
            for (s_, i), n in self.pdfa.trans.itertrans():
                sind = s_ + ind
                nind = n + ind
                add_states.add((sind, nind))
                if n in self.pdfa.accepts:
                    self.accepts.add(nind)
                    self.acceptable[nind].add(s)
                self.map[(sind, i)] = set((nind,))

        starts = set()
        for i in range(1, len(states)):
            starts.add(self.start + i + offset[i])

        self.states = list(add_states)
        self.trans[(self.start, '')] = starts