Mercurial > hg > Members > shinya > pyrect
changeset 40:962ae4154724
add CbCGREPTranslator.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 20 Jul 2010 17:26:54 +0900 |
parents | 43b277a00905 |
children | ffbbdd33881d |
files | code/graph/makegraph.sh code/graph/regdfa.pdf pyrect/cbcgrep_translator.py pyrect/grep_bench.sh pyrect/jitgrep.py pyrect/template/grep.cbc |
diffstat | 6 files changed, 171 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/code/graph/makegraph.sh Tue Jul 13 07:53:28 2010 +0900 +++ b/code/graph/makegraph.sh Tue Jul 20 17:26:54 2010 +0900 @@ -5,4 +5,4 @@ dvips $1.dvi dvipdf $1.dvi convert $1.pdf $1.png -rm -f $1.dvi $1.tex $1.dvi $1.aux $1.ps $1.log +rm -f $1.dvi $1.aux $1.ps $1.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/cbcgrep_translator.py Tue Jul 20 17:26:54 2010 +0900 @@ -0,0 +1,131 @@ +#!/usr/bin/env python + +from grep_translator import GREPTranslator +from dfareg import Regexp, CallGraph + +class CbCGREPTranslateExeption(Exception): + pass + +class CbCGREPTranslator(GREPTranslator): + """CbCGREPTranslator + This Class can translate form DFA into grep source-code. + which based on (beautiful) mini-grep introduced \"The Practice of Programming\" + written by Rob Pike & Brian W. Kernighan. (see template/grep.c) + >>> string = \"(build|fndecl|gcc)\" + >>> reg = Regexp(string) + >>> dfacg = CallGraph(reg.dfa) + >>> tje = GREPTranslator(string, dfacg) + >>> tje.translate() + """ + + def __init__(self, regexp, cg): + if cg.type == "NFA": raise CbCGREPTranslateExeption("can't translate grep from NFA") + GREPTranslator.__init__(self, regexp, cg) + self.funType = '__code ' + self.callType = 'goto ' + self.breakStatement = '' + self.print_file = False + self.__bufsize = 1024 + + def getbufsize(self,): + return self.__bufsize + def setbufsize(self, bufsize): + self.__bufsize = abs(bufsize) + + bufsize = property(getbufsize, setbufsize) + + def emit_accept_state(self): + self.emit("__code accept(char* s) {\n") + if self.print_file: + self.emit(" printf(\"%s: %s\\n\", filename, buf);\n") + else: + self.emit(" printf(\"%s\\n\", buf);\n") + self.emit(" goto next_line(s);\n}\n\n") + + def emit_reject_state(self): + self.emit(""" +__code reject(char* s) { + goto next_ptr(); +} +""") + + def emit_next_state(self): + self.emit (""" +__code next_ptr() { + if(*cur++ == '\\0') + goto next_line(); + goto DFA(cur); +} +""") + + self.emit(""" +__code next_line() { + if(fgets(buf, sizeof buf, f) == NULL) + goto returner(); + int n = strlen(buf); + if (n > 0 && buf[n-1] == '\\n') + buf[n-1] = '\\0'; + cur = buf; + goto DFA(buf); +} +""") + self.emit(""" +__code returner() { + return; +}""") + + def emit_initialization(self): + self.emit("#include <stdio.h>\n") + self.emit("#include <stdlib.h>\n") + self.emit("#include <string.h>\n\n") + self.emit("#define LINEBUFSIZE 1024\n") + self.emit("#define READBUFSIZE %d\n\n" % self.bufsize) + self.emit("char readbuf[READBUFSIZE], buf[LINEBUFSIZE];\n") + self.emit("char *cur, *filename;\n\n") + self.emit("FILE* f;") + + self.emit("%sDFA(char* s);\n" % self.funType) + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') + self.emit(self.funType + 'next_ptr();\n') + self.emit(self.funType + 'next_line();\n') + self.emit(self.funType + 'returner();\n') + grepsource = open("template/grep.cbc") + self.emit(grepsource.read()) + self.emit_next_state() + + def emit_filter(self): + pass + + def emit_driver(self): + self.emit(""" +int main(int argc, char* argv[]) { + grepmain(argc, argv); + return; +} +""") + self.emit(""" +%sDFA(char* s) { + goto %s(s); +} +""" % (self.funType, self.modify_state_name(self.cg.start))) + + def emit_state(self, cur_state, transition): + self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n") + if cur_state in self.cg.accepts: + self.emit("\tgoto accept(s);\n") + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- a/pyrect/grep_bench.sh Tue Jul 13 07:53:28 2010 +0900 +++ b/pyrect/grep_bench.sh Tue Jul 20 17:26:54 2010 +0900 @@ -12,7 +12,7 @@ #time /tmp/jitgrep $@ > $jitgrepout echo "\n[llgrep]" -time ./llgrep.py $@ 2> /dev/null > $llgrepout +time ./llgrep.py -O $@ 2> /dev/null > $llgrepout echo "\n[cgrep]" time cgrep -E $@ > $cgrepout
--- a/pyrect/jitgrep.py Tue Jul 13 07:53:28 2010 +0900 +++ b/pyrect/jitgrep.py Tue Jul 20 17:26:54 2010 +0900 @@ -6,6 +6,7 @@ import time from optparse import OptionParser from grep_translator import GREPTranslator +from cbcgrep_translator import CbCGREPTranslator from dfareg import Regexp, CallGraph def main(argv): @@ -35,7 +36,8 @@ if opts.cc == "cbc": cbc = True - opts.cc = "gcc" + opts.cc = "$CBCROOT/INSTALL_DIR/bin/gcc" + opts.cflags += " -L$CBCROOT/gcc" else: cbc = False @@ -63,7 +65,10 @@ if opts.time : start_time = time.time() reg = Regexp(string) dfacg = CallGraph(reg.dfa) - grept = GREPTranslator(string, dfacg) + if cbc: + grept = CbCGREPTranslator(string, dfacg) + else: + grept = GREPTranslator(string, dfacg) grept.begline = begline grept.bufsize = bufsize
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/template/grep.cbc Tue Jul 20 17:26:54 2010 +0900 @@ -0,0 +1,31 @@ +void grep(char * regexp, FILE *f) { + goto next_line(); + return; +} + +void grepmain(int argc, char* argv[]) { + int i; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + if (argc == 2) { + grep(argv[1], stdin); + } else { + for (i = 2; i < argc; i++) { + filename = argv[i]; + f = fopen(filename, "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", filename); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(argv[1], f); + fclose(f); + } + } + + return; +}