Mercurial > hg > Members > nobuyasu > test
view pyrect/pyrect/regexp/dfa_translator.py @ 9:493c96d030c0
add pyrect
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 14 Jun 2011 17:24:03 +0900 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser from pyrect.regexp.ast import ASTWalker, AnyChar, Range from pyrect.regexp.fa import Transition from pyrect.regexp.nfa_translator import NFATranslator from pyrect.regexp.nfa import NFA, SuffixNFA, SuffixTrieNFA from pyrect.regexp.dfa import DFA, SuffixDFA class DFATranslator(object): """ Create DFA from AST with Visitor-Pattern. actually DFATranslator use NFATranslator and it convert NFA into DFA. >>> prs = Parser() >>> dfat = DFATranslator() >>> print(dfat.translate(prs.parse('(A|B)*C')).transition) Transition: 0 x 'A' -> 0, 0 x 'B' -> 0, 0 x 'C' -> 1, >>> dfat.translate(prs.parse('(A|B)*C')).accepts [1] >>> print(dfat.translate(prs.parse('ほ?げ+')).transition) Transition: 0 x \\x81 -> 1, 1 x \\x92 -> 5, 2 x \\x92 -> 5, 2 x \\xbb -> 3, 3 x \\xe3 -> 0, 4 x \\xe3 -> 6, 5 x \\xe3 -> 0, 6 x \\x81 -> 2, """ def __init__(self): self.dfa = None self.nfa = None def translate(self, arg=None): if isinstance(arg, NFA): self.nfa = arg self.dfa = self.convert(self.nfa) elif arg: self.nfa = NFATranslator().translate(arg) self.dfa = self.convert(self.nfa) return self.dfa def convert(self, nfa): transition = Transition() que = set() done = set() que.add(nfa.epsilon_expand(nfa.start)) while que: states = que.pop() anys = set() for state in states: trans = nfa.transition[state] if trans and trans.has_key(AnyChar()): anys.update(nfa.epsilon_expand(trans[AnyChar()])) for state in states: trans = nfa.transition[state] if trans == None: continue for i, n in trans.iteritems(): if i != '': default = set(anys) if transition[states, i] == None: slot = default transition[states, i] = slot else: slot = transition[states, i] slot.update(nfa.epsilon_expand(n)) done.add(states) for (_, _), n in transition.itertrans(): if not n in done: que.add(frozenset(n)) assoc = dict() states = set() states.add(nfa.epsilon_expand(nfa.start)) for _, state in transition.itertrans(): states.add(frozenset(state)) for index, state in enumerate(states): assoc[state] = index states = list() for state in assoc.itervalues(): states.append(state) start = assoc[nfa.epsilon_expand(nfa.start)] accepts = list() for state in assoc.iterkeys(): if state & set(nfa.accepts): accepts.append(assoc[state]) transition_ = Transition() for (s, i), n in transition.itertrans(): transition_[(assoc[s], i)] = assoc[frozenset(n)] for k, v in assoc.items(): assoc[v] = k return DFA( transition_, start, accepts, states, assoc ) class SuffixDFATranslator(DFATranslator): def __init__(self): self.sdfa = None def translate(self, fa=None): if fa: self.sdfa = SuffixDFA(DFATranslator.translate(self, SuffixNFA(fa)), fa) return self.sdfa class SuffixTrieTranslator(DFATranslator): def __init__(self): self.trie = None def translate(self, fa=None): if fa: self.trie = DFATranslator().translate(SuffixTrieNFA(fa)) return self.trie def test(): import doctest doctest.testmod() if __name__ == '__main__' : test()