view pyrect/regexp/nfa.py @ 87:d23f12ce0369

add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 14 Nov 2010 04:16:12 +0900
parents 701beabd7d97
children 933d422f21f0
line wrap: on
line source

#!/usr/bin/env python

import curses.ascii as ascii
import copy
import collections
from pyrect.regexp.fa import FA

class NFAFragment(object):
    def __init__(self):
        self.start   = None    # integer
        self.accepts = set()   # frozenset
        self.map     = dict()  # transition, char or special -> frozenset([states])

    def connect(self, from_, char, to):
        slot = self.map.setdefault((from_, char), set())
        slot.add(to)

    def accept_to(self, from_, char):
        self.accepts.add((from_, char))

    def set_accept(self, to):
        for from_, char in self.accepts:
            self.connect(from_, char, to)
        self.accepts = set()

    def new_skelton(self):
        # copy fragment (self), and it return
        newFrag = NFAFragment();
        newFrag.map = copy.deepcopy(self.map)
        return newFrag

    def __or__(self, frag):
        newFrag = self.new_skelton()
        for k, v in frag.map.iteritems():
            newFrag.map[k] = v.copy()
        return newFrag

    def __str__(self):
        return ("NFA: delta -> %s, start -> %s, accepts -> %s" %
                (self.map, self.start, self.accepts))

    def build(self):
        map_ = self.map
        def transition(state, char):
            return frozenset(map_.get((state, char), []))

        states = set()
        for (s, _) in self.map.iterkeys():
            states.add(s)

        return NFA(
            transition,
            self.start,
            self.accepts,
            self.map,
            sorted(states)
            )

class NFA(FA):
    def epsilon_expand(self, set_):
        que = set(set_)
        done = set()
        while que:
            stat = que.pop()
            nexts = self.transition(stat, "")
            done.add(stat)
            for nextStat in nexts:
                if not nextStat in done: que.add(nextStat)
        return frozenset(done)

class SuffixNFA(NFA):
    def __init__(self, nfa):
        self.start = nfa.start
        self.accepts = copy.deepcopy(nfa.accepts)
        self.states = copy.deepcopy(nfa.states)
        self.map = self.nfa_map(nfa)
        start = set(self.states)-set([self.start])
        if self.map.has_key((self.start, '')):
            self.map[(self.start, '')].add(start)
        else:
            self.map[(self.start, '')] = start

    def nfa_map(self, fa):
        if type(fa) == NFA:
            return copy.deepcopy(fa.map)
        nmap = dict()
        for k, v in fa.map.items():
            nmap[k] = set([v])
        return nmap

    def transition(self, state, char):
        return frozenset(self.map.get((state, char), []))

#TODO: bug-fix
class SuffixTrieNFA(NFA):
    def __init__(self, dfa):
        self.start = dfa.start
        self.pdfa = dfa
        self.map = dict()
        self.states = copy.deepcopy(dfa.states)
        self.acceptable = collections.defaultdict(set)
        self.accepts = copy.deepcopy(set(dfa.accepts))
        states = [self.start] + list(set(self.states)-set((self.start,)))
        self.make_trie_map(states)

    def make_trie_map(self, states):
        add_states = set()
        offset = [x * len(states) for x in range(len(states))]
        for s, ind in zip(states, offset):
            for (s_, i), n in self.pdfa.map.iteritems():
                if s_ != s: continue
                sind = s + ind
                nind = n + ind
                add_states.add((sind, nind))
                if n in self.pdfa.accepts:
                    self.accepts.add(nind)
                    self.acceptable[nind].add(s)
                self.map[(sind, i)] = set((nind,))

        self.states = list(add_states)
        self.map[(self.start, '')] = add_states.difference((self.start, ))
        print self.map

    def transition(self, state, char):
        return frozenset(self.map.get((state, char), []))