Mercurial > hg > Members > shinya > pyrect
view pyrect/dfa_translator.py @ 43:83c69d42faa8
replace converting-flow, module dfareg with module regexp. it's is substantial changing in implimentation.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 03 Aug 2010 05:35:38 +0900 |
parents | ef2928cdbdb6 |
children | d29d3470fde7 |
line wrap: on
line source
#!/usr/bin/env python from pyrect.grep_translator import GREPTranslator from pyrect.regexp import Regexp class GNUGREPTranslator(GREPTranslator): """GNUGREPTranslator This class can translate from DFA into size_t DFA(char* s). which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c). * but which is not work currently. (when search large-file, there is fewer * accepted-lines than grep's dfaexec.) * probably, there is some problem exists about buffering. >>> string = '(build|fndecl|gcc)' >>> reg = Regexp(string) >>> tje = GNUGREPTranslator(reg) >>> tje.translate() """ def __init__(self, regexp): GREPTranslator.__init__(self, regexp) self.funType = 'size_t ' self.callType = 'return ' self.breakStatement = '' def emit_initialization(self): for state in self.cg.map.iterkeys(): self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n") self.emit(self.funType + 'accept(char* s);\n') self.emit(self.funType + 'reject(char* s);\n') def emit_accept_state(self): self.emit (""" %saccept(char* s) { \treturn 1; }\n""" % self.funType) def emit_reject_state(self): self.emit (""" %sreject(char* s) { \treturn 0; }\n""" % self.funType) def emit_driver(self): self.emit(""" /* This DFA accept only \'%s\'*/ %sDFA(char *s) { char *begin = s; do { if (%s(s)) { //(matchhere(regexp+1, text)) return (char const *) s - begin; } } while (*s != '\\n' && *s++ != '\\0'); return (size_t) -1; }\n\n""" % (self.regexp.regexp, self.funType, self.modify_state_name(self.cg.start))) def emit_state(self, cur_state, transition): self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n") if cur_state in self.cg.accepts: self.emit("\treturn accept(s);\n") else: if transition: if self.cg.type == "DFA": self.emit_switch(transition, default="reject") else: self.emit_switch(transition) self.emit("}\n\n") def test(): import doctest doctest.testmod() if __name__ == '__main__': test()