Mercurial > hg > Members > shinya > pyrect
view src/cTranslator.py @ 12:41391400fe68
add GREPTranslator(Translator) and implement jit-compile-grep,
which faster than grep!! in case of regular expression search in large files.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 04 Jul 2010 08:40:59 +0900 |
parents | 94984eaa03e2 |
children | 55684cb51347 |
line wrap: on
line source
#!/Usr/bin/env python from dfareg import Regexp, CallGraph from translator import Translator class CTranslator(Translator): """CTranslator This Calss can translate from DFA or NFA into C source code. DFA: A simple state transition as tail call (also can implement with CbC). NFA: using stack, deepening depth-first search. >>> string = \"(A|B)*C\" >>> reg = Regexp(string) >>> dfacg = CallGraph(reg.dfa) >>> nfacg = CallGraph(reg.nfa) >>> CTranslator(string, dfacg).translate() >>> CTranslator(string, nfacg).translate() """ def __init__(self, regexp, cg): Translator.__init__(self, regexp, cg) self.funType = 'void ' self.callType = '' self.breakStatement = '\t\t\tbreak;' self.debug = False if self.cg.type is "DFA": self.name_hash = self.create_name_hash() def modify_state_name(self, state_name): if state_name is "accept" or state_name is "reject": return state_name if self.cg.type is "DFA": return "state_"+self.name_hash[state_name] else: return "state_"+state_name def emit_accept_state(self): self.emit (""" %saccept(char* s) { \tprintf(\"\\nstring matches regexp. \\n\\n\"); }\n""" % self.funType) def emit_reject_state(self): self.emit (""" %sreject(char* s) { \tprintf(\"\\nstring does not match regexp. \\n\\n\"); }\n""" % self.funType) def emit_driver(self): self.emit(""" int main(int argc, char* argv[]) { \tputs(\"regexp: %s\"); \tputs(\"number of state: %d\"); \tprintf(\"string: %%s\\n\", argv[1]); \t%s%s(argv[1]); """ % (self.regexp, len(self.cg.states), self.callType, self.modify_state_name(self.cg.start))) if self.cg.type is "NFA": self.emit("\treject(argv[1]);\n") self.emit(""" \treturn 0; }\n\n""") def emit_switch(self, case, default=None): if self.cg.type is "NFA": sLocal = "s_local" else: sLocal = "s" self.emit("\tswitch(*%s++) {\n" % (sLocal)) for input, nextStates in case.iteritems(): if input != '': self.emit("\t\tcase '%s': \n" % (input)) for nextState in nextStates: self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modify_state_name(nextState), sLocal)) if self.breakStatement != '': self.emit(self.breakStatement+'\n') if default: self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default)) self.emit("\t}\n") def emit_state(self, cur_state, transition): self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n") if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (cur_state)) if self.cg.type is "NFA": sLocal = "s_local" self.emit("\tchar* %s = s;\n" % (sLocal)) if '' in transition: epsilon_transition = transition.pop('') for n in epsilon_transition: self.emit("\t%s%s(%s);\n" % (self.callType, self.modify_state_name(n), sLocal)) else: sLocal = "s" if cur_state in self.cg.accepts: transition['\\0'] = ["accept"] if transition: if self.cg.type is "DFA": self.emit_switch(transition, default="reject") else: self.emit_switch(transition) self.emit("}\n\n") def emit_initialization(self): self.emit("#include <stdio.h>\n\n") for state in self.cg.map.iterkeys(): self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n") self.emit(self.funType + 'accept(char* s);\n') self.emit(self.funType + 'reject(char* s);\n') def emit_from_callgraph(self): # self.emit C-source code self.emit_initialization() self.emit_driver() for cur_state, transition in self.cg.map.iteritems(): self.emit_state(cur_state, transition) self.emit_accept_state() self.emit_reject_state() def test(): import doctest doctest.testmod() ''' reg = Regexp("(A|B)*C") ct = CTranslator(reg.regexp, CallGraph(reg.dfa)) ct.translate() ''' if __name__ == '__main__': test()