Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/dfa_translator.py @ 87:d23f12ce0369
add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 14 Nov 2010 04:16:12 +0900 |
parents | fd3d0b8326fe |
children | 4d498b002de5 |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser from pyrect.regexp.ast import ASTWalker, AnyChar from pyrect.regexp.nfa_translator import NFATranslator from pyrect.regexp.nfa import NFA, SuffixNFA, SuffixTrieNFA from pyrect.regexp.dfa import DFA, SuffixDFA class DFATranslator(object): """ Create DFA from AST with Visitor-Pattern. actually DFATranslator use NFATranslator and it convert NFA into DFA. >>> prs = Parser() >>> dfat = DFATranslator() >>> dfat.translate(prs.parse('(A|B)*C')).map {(0, (Character:'C')): 1, (0, (Character:'A')): 0, (0, (Character:'B')): 0} >>> dfat.translate(prs.parse('(A|B)*C')).accepts [1] >>> dfat.translate(prs.parse('ほ?げ+')).map {(2, (MBCharacter:'げ')): 2, (0, (MBCharacter:'げ')): 2, (0, (MBCharacter:'ほ')): 1, (1, (MBCharacter:'げ')): 2} """ def __init__(self): self.dfa = None self.nfa = None def translate(self, arg=None): if isinstance(arg, NFA): self.nfa = arg self.dfa = self.convert(self.nfa) elif arg: self.nfa = NFATranslator().translate(arg) self.dfa = self.convert(self.nfa) return self.dfa def convert(self, nfa): map_ = dict() que = set(frozenset([nfa.epsilon_expand(set([nfa.start]))])) done = set() while que: states = que.pop() anys = set() for state in states: for k, v in nfa.map.iteritems(): if state == k[0] and k[1] == AnyChar(): anys.update(nfa.epsilon_expand(v)) for state in states: for k, v in nfa.map.iteritems(): if state == k[0] and k[1] != '': default = set(anys) slot = map_.setdefault((states, k[1]), default) slot.update(nfa.epsilon_expand(v)) done.add(states) for v in map_.itervalues(): if not v in done: que.add(frozenset(v)) assoc = dict() states = set() states.add(nfa.epsilon_expand(frozenset([nfa.start]))) for state in map_.itervalues(): states.add(frozenset(state)) for index, state in enumerate(states): assoc[frozenset(state)] = index states = list() for state in assoc.itervalues(): states.append(state) start = assoc[nfa.epsilon_expand(frozenset([nfa.start]))] accepts = list() for state in assoc.iterkeys(): if state & set(nfa.accepts): accepts.append(assoc[state]) map__ = dict() for (s, c), v in map_.iteritems(): map__[(assoc[frozenset(s)], c)] = assoc[frozenset(v)] for k, v in assoc.items(): assoc[v] = k return DFA( start, accepts, map__, states, assoc ) class SuffixDFATranslator(DFATranslator): def __init__(self): self.sdfa = None def translate(self, fa=None): if fa: self.sdfa = SuffixDFA(DFATranslator.translate(self, SuffixNFA(fa)), fa) return self.sdfa class SuffixTrieTranslator(DFATranslator): def __init__(self): self.trie = None def translate(self, fa=None): if fa: self.trie = DFATranslator().translate(SuffixTrieNFA(fa)) return self.trie def test(): import doctest doctest.testmod() if __name__ == '__main__' : test()