Mercurial > hg > Members > shinya > pyrect
changeset 42:84059ea1a2db
add goto_grep_translator.py, this is label-based grep implimentation.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 02 Aug 2010 04:03:23 +0900 |
parents | ffbbdd33881d |
children | 83c69d42faa8 |
files | pyrect/goto_grep_translator.py pyrect/jitgrep.py pyrect/regexp/__init__.py pyrect/template/grep.label |
diffstat | 3 files changed, 172 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/goto_grep_translator.py Mon Aug 02 04:03:23 2010 +0900 @@ -0,0 +1,124 @@ +#!/usr/bin/env python + +from c_translator import CTranslator +from dfareg import Regexp, CallGraph + +class GREPTranslateExeption(Exception): + pass + +class GoToGREPTranslator(CTranslator): + """GoToGREPTranslator + This Class can translate form DFA into grep source-code. + which based on (beautiful) mini-grep introduced \"The Practice of Programming\" + written by Rob Pike & Brian W. Kernighan. (see template/grep.label) + >>> string = \"(build|fndecl|gcc)\" + >>> reg = Regexp(string) + >>> dfacg = CallGraph(reg.dfa) + >>> tje = GoToGREPTranslator(string, dfacg) + >>> tje.translate() + """ + + def __init__(self, regexp, cg): + if cg.type == "NFA": raise GREPTranslateExeption("can't translate grep from NFA") + CTranslator.__init__(self, regexp, cg) + self.funType = 'void ' + self.callType = 'return ' + self.breakStatement = '' + self.begline = False + self.__bufsize = 1024 + + def getbufsize(self,): + return self.__bufsize + def setbufsize(self, bufsize): + self.__bufsize = abs(bufsize) + + bufsize = property(getbufsize, setbufsize) + + def emit_accept_state(self): + self.emit (""" +\taccept: +\t\tprintf(\"%s\\n\", buf); +\t\treturn; +""") + + def emit_reject_state(self): + self.emit("\treject:\n") + if not self.begline: + self.emit(""" +\t\tif (*cur++ == '\\0') +\t\t\treturn; +\t\ts = cur; +\t\tgoto %s; +""" % self.modify_state_name(self.cg.start)) + self.emit("return;\n") + + + def emit_initialization(self): + self.emit("#include <stdio.h>\n") + self.emit("#include <stdlib.h>\n") + self.emit("#include <string.h>\n\n") + + self.emit("#define LINEBUFSIZE 1024\n") + self.emit("#define READBUFSIZE %d\n\n" % (self.bufsize)) + self.emit("char readbuf[%d];\n\n" % (self.bufsize)) + + self.emit("%sDFA(char* s, char *buf, char* filename);\n" % self.funType) + grepsource = open("template/grep.label") + self.emit(grepsource.read()) + + def emit_filter(self): + pass + + def emit_driver(self): + self.emit(""" +int main(int argc, char* argv[]) { +\treturn grepmain(argc, argv); +}\n\n""") + + def emit_switch(self, case, default=None): + self.emit("\t\tswitch(*s++) {\n") + for input, next_states in case.iteritems(): + if input != '': + self.emit("\t\t\tcase '%s': \n" % (input)) + for next_state in next_states: + self.emit("\t\t\t\tgoto %s;\n" % (self.modify_state_name(next_state))) + if self.breakStatement != '': self.emit(self.breakStatement+'\n') + + if default: + self.emit( """\t\t\tdefault:\n\t\t\tgoto %s;\n""" % default) + self.emit("\t\t}\n") + + def emit_state(self, cur_state, transition): + self.emit("\t" + self.modify_state_name(cur_state) + ":\n") + if cur_state in self.cg.accepts: + self.emit("\t\tgoto accept;\n") + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("\n") + + def emit_from_callgraph(self): + # self.emit C-source code + self.emit_initialization() + self.emit_driver() + + self.emit("void DFA(char *s, char *buf, char* filename) {\n") + self.emit("\tchar *cur = s;\n") + self.emit("\tgoto %s;\n" % self.modify_state_name(self.cg.start)) + + for cur_state, transition in self.cg.map.iteritems(): + self.emit_state(cur_state, transition) + + self.emit_accept_state() + self.emit_reject_state() + + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- a/pyrect/jitgrep.py Mon Aug 02 04:02:19 2010 +0900 +++ b/pyrect/jitgrep.py Mon Aug 02 04:03:23 2010 +0900 @@ -6,6 +6,7 @@ import time from optparse import OptionParser from grep_translator import GREPTranslator +from goto_grep_translator import GoToGREPTranslator from cbcgrep_translator import CbCGREPTranslator from dfareg import Regexp, CallGraph @@ -25,6 +26,7 @@ psr.add_option("--CFLAGS", action="store", type="string", dest="cflags", default="-O3 -fomit-frame-pointer", help="Print compile/matching time.") psr.add_option("--time", action="store_true", dest="time", default=False, help="Print compile/matching time.") psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).") + psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.") psr.add_option("--dump", action="store_true", dest="dump", default=False, help="Dump generated grep-source.") psr.add_option("--out", action="store", type="string", dest="out", default="", metavar="FILE", help="Output file.") @@ -67,6 +69,8 @@ dfacg = CallGraph(reg.dfa) if cbc: grept = CbCGREPTranslator(string, dfacg) + elif opts.label: + grept = GoToGREPTranslator(string, dfacg) else: grept = GREPTranslator(string, dfacg) grept.begline = begline
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/template/grep.label Mon Aug 02 04:03:23 2010 +0900 @@ -0,0 +1,44 @@ +/* Excerpted from 'The Practice of Programming' */ +/* by Brian W. Kernighan and Rob Pike */ + +int grep(char * regexp, FILE *f, char *name) { + int n; + char buf[LINEBUFSIZE]; + while (fgets(buf, sizeof buf, f) != NULL) { + n = strlen(buf); + if (n > 0 && buf[n-1] == '\n') + buf[n-1] = '\0'; + DFA(buf, buf, name); + } + return 1; +} + +int grepmain(int argc, char* argv[]) { + int i, nmatch; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + nmatch = 0; + if (argc == 2) { + if (grep(argv[1], stdin, NULL)) + nmatch; + } else { + for (i = 2; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", argv[i]); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + if (grep(argv[1], f, argc > 3 ? argv[i] : NULL) > 0) + nmatch++; + fclose(f); + } + } + + return nmatch; +}