Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/dfa_translator.py @ 98:4d498b002de5
refactor for data-structure (dict -> Transition).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Dec 2010 23:04:31 +0900 |
parents | d23f12ce0369 |
children | 5e509a9c951c |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser from pyrect.regexp.ast import ASTWalker, AnyChar from pyrect.regexp.fa import Transition from pyrect.regexp.nfa_translator import NFATranslator from pyrect.regexp.nfa import NFA, SuffixNFA, SuffixTrieNFA from pyrect.regexp.dfa import DFA, SuffixDFA class DFATranslator(object): """ Create DFA from AST with Visitor-Pattern. actually DFATranslator use NFATranslator and it convert NFA into DFA. >>> prs = Parser() >>> dfat = DFATranslator() >>> print(dfat.translate(prs.parse('(A|B)*C')).transition) Transition: 0 x 'A' -> 0, 0 x 'B' -> 0, 0 x 'C' -> 1, >>> dfat.translate(prs.parse('(A|B)*C')).accepts [1] >>> print(dfat.translate(prs.parse('ほ?げ+')).transition) Transition: 0 x \\x81 -> 1, 1 x \\x92 -> 5, 2 x \\x92 -> 5, 2 x \\xbb -> 3, 3 x \\xe3 -> 0, 4 x \\xe3 -> 6, 5 x \\xe3 -> 0, 6 x \\x81 -> 2, """ def __init__(self): self.dfa = None self.nfa = None def translate(self, arg=None): if isinstance(arg, NFA): self.nfa = arg self.dfa = self.convert(self.nfa) elif arg: self.nfa = NFATranslator().translate(arg) self.dfa = self.convert(self.nfa) return self.dfa def convert(self, nfa): transition = Transition() que = set() done = set() que.add(nfa.epsilon_expand(nfa.start)) while que: states = que.pop() anys = set() for state in states: trans = nfa.transition[state] if trans == None: continue for i, n in trans.iteritems(): if i == AnyChar(): anys.update(nfa.epsilon_expand(n)) for state in states: trans = nfa.transition[state] if trans == None: continue for i, n in trans.iteritems(): if i != '': default = set(anys) if transition[states, i] == None: slot = default transition[states, i] = slot else: slot = transition[states, i] slot.update(nfa.epsilon_expand(n)) done.add(states) for (_, _), n in transition.itertrans(): if not n in done: que.add(frozenset(n)) assoc = dict() states = set() states.add(nfa.epsilon_expand(nfa.start)) for _, state in transition.itertrans(): states.add(frozenset(state)) for index, state in enumerate(states): assoc[state] = index states = list() for state in assoc.itervalues(): states.append(state) start = assoc[nfa.epsilon_expand(nfa.start)] accepts = list() for state in assoc.iterkeys(): if state & set(nfa.accepts): accepts.append(assoc[state]) transition_ = Transition() for (s, i), n in transition.itertrans(): transition_[(assoc[s], i)] = assoc[frozenset(n)] for k, v in assoc.items(): assoc[v] = k return DFA( transition_, start, accepts, states, assoc ) class SuffixDFATranslator(DFATranslator): def __init__(self): self.sdfa = None def translate(self, fa=None): if fa: self.sdfa = SuffixDFA(DFATranslator.translate(self, SuffixNFA(fa)), fa) return self.sdfa class SuffixTrieTranslator(DFATranslator): def __init__(self): self.trie = None def translate(self, fa=None): if fa: self.trie = DFATranslator().translate(SuffixTrieNFA(fa)) return self.trie def test(): import doctest doctest.testmod() if __name__ == '__main__' : test()