Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/nfa_translator.py @ 98:4d498b002de5
refactor for data-structure (dict -> Transition).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Dec 2010 23:04:31 +0900 |
parents | 5db856953793 |
children |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser from pyrect.regexp.ast import * from pyrect.regexp.nfa import * class NFATranslator(ASTWalker): """ Create NFA from AST with Visitor-Pattern. AST (ast), is represented by Node-Tree. >>> prs = Parser() >>> nfat = NFATranslator() >>> nfa = nfat.translate(prs.parse('(A|B)*C')) >>> print nfa.transition Transition: 0 x 'A' -> set([3]), 1 x 'B' -> set([3]), 2 x -> set([0, 1]), 3 x -> set([2, 4]), 4 x 'C' -> set([5]), >>> nfa = nfat.translate(prs.parse('[A-C]{2}D{3,}E{4,5}')) >>> print nfa.transition Transition: 0 x 'A' -> set([5]), 1 x 'B' -> set([5]), 2 x -> set([0, 1]), 3 x 'C' -> set([5]), 4 x -> set([2, 3]), 5 x REPEAT -> set([4]), 5 x ['A'-'C']{2} -> set([7]), 7 x 'D' -> set([8]), 8 x REPEAT -> set([7]), 8 x 'D'{3,} -> set([10]), 10 x 'E' -> set([11]), 11 x REPEAT -> set([10]), 11 x 'E'{4, 5} -> set([13]), """ def __init__(self): self.ast = None self.nfa = None def get_state_id(self): self.__state_id += 1 return self.__state_id state_id = property(get_state_id) def translate(self, ast=None): if ast: self.ast = ast if self.ast: self.__state_id = -1 frag = ast.accept(self) frag.set_accept(self.state_id) self.nfa = frag.build() self.nfa.states = range(self.__state_id + 1) self.nfa.accepts = [self.__state_id] return self.nfa def visit(self, input_node): frag = NFAFragment() s = self.state_id frag.start = s frag.accept_to(s, input_node) return frag def visit_CharClass(self, charclass): # CharClass be converted Union in NFA factor = list(charclass.factor) if len(factor) == 1: return factor[0].accept(self) union = factor[0] for op in factor[1:]: union = Union(union, op) return union.accept(self) def visit_Union(self, union): frag1 = union.op1.accept(self) frag2 = union.op2.accept(self) frag = frag1 | frag2 s = self.state_id frag.connect(s, '', frag1.start) frag.connect(s, '', frag2.start) frag.start = s frag.accepts = frag1.accepts | frag2.accepts return frag def visit_Concat(self, concat): frag1 = concat.op1.accept(self) frag2 = concat.op2.accept(self) frag1.set_accept(frag2.start) frag = frag1 | frag2 frag.start = frag1.start frag.accepts = frag2.accepts return frag def visit_Star(self, star): frag = star.op.accept(self) s = self.state_id frag.connect(s, '', frag.start) frag.set_accept(s) frag.accept_to(s, '') frag.start = s return frag def visit_Qmark(self, qmark): frag = qmark.op.accept(self) s = self.state_id frag.connect(s, '', frag.start) frag.accept_to(s, '') frag.start = s return frag def visit_Plus(self, plus): frag = plus.op.accept(self) s = self.state_id frag.set_accept(s) frag.connect(s, '', frag.start) frag.accept_to(s, '') return frag def visit_Range(self, range_): elems = range(range_.lower.char, range_.upper.char+1) elems = map(chr, elems) elems = map(Character, elems) union = reduce(Union, elems) return union.accept(self) def visit_RepMN(self, repmn): frag = repmn.op.accept(self) s = self.state_id frag.set_accept(s) frag.connect(s, "REPEAT", frag.start) s_ = self.state_id frag.set_accept(s_) frag.accept_to(s, repmn) return frag def test(): import doctest doctest.testmod() if __name__ == "__main__": test()