Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/nfa_translator.py @ 87:d23f12ce0369
add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 14 Nov 2010 04:16:12 +0900 |
parents | fd3d0b8326fe |
children | 82a8232625c3 |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser from pyrect.regexp.ast import * from pyrect.regexp.nfa import * class NFATranslator(ASTWalker): """ Create NFA from AST with Visitor-Pattern. AST (ast), is represented by Node-Tree. >>> prs = Parser() >>> nfat = NFATranslator() >>> nfa = nfat.translate(prs.parse('(A|B)*C')) >>> nfa.map {(3, ''): set([2, 4]), (2, ''): set([0, 1]), (1, (Character:'B')): set([3]), (4, (Character:'C')): set([5]), (0, (Character:'A')): set([3])} >>> anfa = ANFA(nfa) >>> anfa.map {(1, (Character:'B')): set([3]), (0, (Character:'A')): set([3]), (3, ''): set([2, 4]), (6, ''): set([0, 1, 2, 3, 4, 5]), (2, ''): set([0, 1]), (4, (Character:'C')): set([5])} """ def __init__(self): self.ast = None self.nfa = None def get_state_id(self): self.__state_id += 1 return self.__state_id state_id = property(get_state_id) def translate(self, ast=None): if ast: self.ast = ast if self.ast: self.__state_id = -1 frag = ast.accept(self) frag.set_accept(self.state_id) self.nfa = frag.build() self.nfa.states = range(self.__state_id + 1) self.nfa.accepts = [self.__state_id] return self.nfa def visit(self, input_node): frag = NFAFragment() s = self.state_id frag.start = s frag.accept_to(s, input_node) return frag def visit_CharClass(self, charclass): # CharClass be converted Union in NFA factor = list(charclass.factor) if len(factor) == 1: return factor[0].accept(self) union = factor[0] for op in factor[1:]: union = Union(union, op) return union.accept(self) def visit_Union(self, union): frag1 = union.op1.accept(self) frag2 = union.op2.accept(self) frag = frag1 | frag2 s = self.state_id frag.connect(s, '', frag1.start) frag.connect(s, '', frag2.start) frag.start = s frag.accepts = frag1.accepts | frag2.accepts return frag def visit_Concat(self, concat): frag1 = concat.op1.accept(self) frag2 = concat.op2.accept(self) frag1.set_accept(frag2.start) frag = frag1 | frag2 frag.start = frag1.start frag.accepts = frag2.accepts return frag def visit_Star(self, star): frag = star.op.accept(self) s = self.state_id frag.connect(s, '', frag.start) frag.set_accept(s) frag.accept_to(s, '') frag.start = s return frag def visit_Qmark(self, qmark): frag = qmark.op.accept(self) s = self.state_id frag.connect(s, '', frag.start) frag.accept_to(s, '') frag.start = s return frag def visit_Plus(self, plus): frag = plus.op.accept(self) s = self.state_id frag.set_accept(s) frag.connect(s, '', frag.start) frag.accept_to(s, '') return frag def test(): import doctest doctest.testmod() if __name__ == "__main__": test()