Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/nfa.py @ 98:4d498b002de5
refactor for data-structure (dict -> Transition).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Dec 2010 23:04:31 +0900 |
parents | 19a88707bd29 |
children | 2632b963e441 |
line wrap: on
line source
#!/usr/bin/env python import curses.ascii as ascii import copy import collections from pyrect.regexp.fa import FA, Transition class NFAFragment(object): def __init__(self): self.start = None # integer self.accepts = set() # frozenset self.transition = Transition() # transition, char or special -> frozenset([states]) def connect(self, from_, char, to): if self.transition[from_, char]: slot = self.transition[from_, char] else: slot = set() self.transition[from_, char] = slot slot.add(to) def accept_to(self, from_, char): self.accepts.add((from_, char)) def set_accept(self, to): for from_, char in self.accepts: self.connect(from_, char, to) self.accepts = set() def new_skelton(self): # copy fragment (self), and it return newFrag = NFAFragment(); newFrag.transition = copy.deepcopy(self.transition) return newFrag def __or__(self, frag): newFrag = self.new_skelton() for k, v in frag.transition.iterstates(): newFrag.transition[k] = copy.deepcopy(v) return newFrag def __str__(self): return ("NFA: delta -> %s, start -> %s, accepts -> %s" % (self.transition, self.start, self.accepts)) def build(self): states = set() for s in self.transition.iterkeys(): states.add(s) return NFA( self.transition, self.start, self.accepts, sorted(states) ) class NFA(FA): def epsilon_expand(self, states): if not isinstance(states, set) and \ not isinstance(states, list): states = set([states]) states = copy.deepcopy(states) done = set() while states: stat = states.pop() done.add(stat) if self.transition[stat, ""] != None: for n in self.transition[stat, ""]: if not n in done: states.add(n) return frozenset(done) class SuffixNFA(NFA): def __init__(self, nfa): self.start = nfa.start self.accepts = copy.deepcopy(nfa.accepts) self.states = copy.deepcopy(nfa.states) self.transition = self.nfa_trans(nfa) start = set(self.states)-set([self.start]) if self.map.has_key((self.start, '')): self.map[(self.start, '')].add(start) else: self.map[(self.start, '')] = start def nfa_trans(self, fa): if type(fa) == NFA: return copy.deepcopy(fa.transition) ntrans = dict() for k, v in fa.map.itertrans(): ntrans[k] = set([v]) return ntrans class SuffixTrieNFA(NFA): def __init__(self, dfa): self.start = dfa.start self.pdfa = dfa self.states = copy.deepcopy(dfa.states) self.acceptable = collections.defaultdict(set) self.accepts = copy.deepcopy(set(dfa.accepts)) states = [self.start] + list(set(self.states)-set((self.start,))) self.make_trie_trans(states) def make_trie_trans(self, states): add_states = set() offset = [x * len(states) for x in range(len(states))] for s, ind in zip(states, offset): for (s_, i), n in self.pdfa.trans.itertrans(): sind = s_ + ind nind = n + ind add_states.add((sind, nind)) if n in self.pdfa.accepts: self.accepts.add(nind) self.acceptable[nind].add(s) self.map[(sind, i)] = set((nind,)) starts = set() for i in range(1, len(states)): starts.add(self.start + i + offset[i]) self.states = list(add_states) self.trans[(self.start, '')] = starts