Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/nfa.py @ 87:d23f12ce0369
add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 14 Nov 2010 04:16:12 +0900 |
parents | 701beabd7d97 |
children | 933d422f21f0 |
line wrap: on
line source
#!/usr/bin/env python import curses.ascii as ascii import copy import collections from pyrect.regexp.fa import FA class NFAFragment(object): def __init__(self): self.start = None # integer self.accepts = set() # frozenset self.map = dict() # transition, char or special -> frozenset([states]) def connect(self, from_, char, to): slot = self.map.setdefault((from_, char), set()) slot.add(to) def accept_to(self, from_, char): self.accepts.add((from_, char)) def set_accept(self, to): for from_, char in self.accepts: self.connect(from_, char, to) self.accepts = set() def new_skelton(self): # copy fragment (self), and it return newFrag = NFAFragment(); newFrag.map = copy.deepcopy(self.map) return newFrag def __or__(self, frag): newFrag = self.new_skelton() for k, v in frag.map.iteritems(): newFrag.map[k] = v.copy() return newFrag def __str__(self): return ("NFA: delta -> %s, start -> %s, accepts -> %s" % (self.map, self.start, self.accepts)) def build(self): map_ = self.map def transition(state, char): return frozenset(map_.get((state, char), [])) states = set() for (s, _) in self.map.iterkeys(): states.add(s) return NFA( transition, self.start, self.accepts, self.map, sorted(states) ) class NFA(FA): def epsilon_expand(self, set_): que = set(set_) done = set() while que: stat = que.pop() nexts = self.transition(stat, "") done.add(stat) for nextStat in nexts: if not nextStat in done: que.add(nextStat) return frozenset(done) class SuffixNFA(NFA): def __init__(self, nfa): self.start = nfa.start self.accepts = copy.deepcopy(nfa.accepts) self.states = copy.deepcopy(nfa.states) self.map = self.nfa_map(nfa) start = set(self.states)-set([self.start]) if self.map.has_key((self.start, '')): self.map[(self.start, '')].add(start) else: self.map[(self.start, '')] = start def nfa_map(self, fa): if type(fa) == NFA: return copy.deepcopy(fa.map) nmap = dict() for k, v in fa.map.items(): nmap[k] = set([v]) return nmap def transition(self, state, char): return frozenset(self.map.get((state, char), [])) #TODO: bug-fix class SuffixTrieNFA(NFA): def __init__(self, dfa): self.start = dfa.start self.pdfa = dfa self.map = dict() self.states = copy.deepcopy(dfa.states) self.acceptable = collections.defaultdict(set) self.accepts = copy.deepcopy(set(dfa.accepts)) states = [self.start] + list(set(self.states)-set((self.start,))) self.make_trie_map(states) def make_trie_map(self, states): add_states = set() offset = [x * len(states) for x in range(len(states))] for s, ind in zip(states, offset): for (s_, i), n in self.pdfa.map.iteritems(): if s_ != s: continue sind = s + ind nind = n + ind add_states.add((sind, nind)) if n in self.pdfa.accepts: self.accepts.add(nind) self.acceptable[nind].add(s) self.map[(sind, i)] = set((nind,)) self.states = list(add_states) self.map[(self.start, '')] = add_states.difference((self.start, )) print self.map def transition(self, state, char): return frozenset(self.map.get((state, char), []))