changeset 40:962ae4154724

add CbCGREPTranslator.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 20 Jul 2010 17:26:54 +0900
parents 43b277a00905
children ffbbdd33881d
files code/graph/makegraph.sh code/graph/regdfa.pdf pyrect/cbcgrep_translator.py pyrect/grep_bench.sh pyrect/jitgrep.py pyrect/template/grep.cbc
diffstat 6 files changed, 171 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/code/graph/makegraph.sh	Tue Jul 13 07:53:28 2010 +0900
+++ b/code/graph/makegraph.sh	Tue Jul 20 17:26:54 2010 +0900
@@ -5,4 +5,4 @@
 dvips $1.dvi
 dvipdf $1.dvi
 convert $1.pdf $1.png
-rm -f $1.dvi $1.tex $1.dvi $1.aux $1.ps $1.log
+rm -f $1.dvi $1.aux $1.ps $1.log
Binary file code/graph/regdfa.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/cbcgrep_translator.py	Tue Jul 20 17:26:54 2010 +0900
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+
+from grep_translator import GREPTranslator
+from dfareg import Regexp, CallGraph
+
+class CbCGREPTranslateExeption(Exception):
+    pass
+
+class CbCGREPTranslator(GREPTranslator):
+    """CbCGREPTranslator
+    This Class can translate form DFA into grep source-code.
+    which based on (beautiful) mini-grep introduced  \"The Practice of Programming\"
+    written by Rob Pike & Brian W. Kernighan. (see template/grep.c)
+    >>> string = \"(build|fndecl|gcc)\"
+    >>> reg = Regexp(string)
+    >>> dfacg = CallGraph(reg.dfa)
+    >>> tje = GREPTranslator(string, dfacg)
+    >>> tje.translate()
+    """
+
+    def __init__(self, regexp, cg):
+        if cg.type == "NFA": raise CbCGREPTranslateExeption("can't translate grep from NFA")
+        GREPTranslator.__init__(self, regexp, cg)
+        self.funType = '__code '
+        self.callType = 'goto '
+        self.breakStatement = ''
+        self.print_file = False
+        self.__bufsize = 1024
+
+    def getbufsize(self,):
+        return self.__bufsize
+    def setbufsize(self, bufsize):
+        self.__bufsize = abs(bufsize)
+
+    bufsize = property(getbufsize, setbufsize)
+
+    def emit_accept_state(self):
+        self.emit("__code accept(char* s) {\n")
+        if self.print_file:
+            self.emit("  printf(\"%s: %s\\n\", filename, buf);\n")
+        else:
+            self.emit("  printf(\"%s\\n\", buf);\n")
+        self.emit("    goto next_line(s);\n}\n\n")
+
+    def emit_reject_state(self):
+        self.emit("""
+__code reject(char* s) {
+  goto next_ptr();
+}
+""")
+
+    def emit_next_state(self):
+        self.emit ("""
+__code next_ptr() {
+  if(*cur++ == '\\0')
+    goto next_line();
+  goto DFA(cur);
+}
+""")
+
+        self.emit("""
+__code next_line() {
+  if(fgets(buf, sizeof buf, f) == NULL)
+    goto returner();
+  int n = strlen(buf);
+  if (n > 0 && buf[n-1] == '\\n')
+    buf[n-1] = '\\0';
+  cur = buf;
+  goto DFA(buf);
+}
+""")
+        self.emit("""
+__code returner() {
+  return;
+}""")
+
+    def emit_initialization(self):
+        self.emit("#include <stdio.h>\n")
+        self.emit("#include <stdlib.h>\n")
+        self.emit("#include <string.h>\n\n")
+        self.emit("#define LINEBUFSIZE 1024\n")
+        self.emit("#define READBUFSIZE %d\n\n" % self.bufsize)
+        self.emit("char readbuf[READBUFSIZE], buf[LINEBUFSIZE];\n")
+        self.emit("char *cur, *filename;\n\n")
+        self.emit("FILE* f;")
+
+        self.emit("%sDFA(char* s);\n" % self.funType)
+        for state in self.cg.map.iterkeys():
+            self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
+        self.emit(self.funType + 'accept(char* s);\n')
+        self.emit(self.funType + 'reject(char* s);\n')
+        self.emit(self.funType + 'next_ptr();\n')
+        self.emit(self.funType + 'next_line();\n')
+        self.emit(self.funType + 'returner();\n')
+        grepsource = open("template/grep.cbc")
+        self.emit(grepsource.read())
+        self.emit_next_state()
+
+    def emit_filter(self):
+        pass
+
+    def emit_driver(self):
+        self.emit("""
+int main(int argc, char* argv[]) {
+  grepmain(argc, argv);
+  return;
+}
+""")
+        self.emit("""
+%sDFA(char* s) {
+  goto %s(s);
+}
+""" % (self.funType, self.modify_state_name(self.cg.start)))
+
+    def emit_state(self, cur_state, transition):
+        self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
+        if cur_state in self.cg.accepts:
+            self.emit("\tgoto accept(s);\n")
+        else:
+            if transition:
+                if self.cg.type == "DFA":
+                    self.emit_switch(transition, default="reject")
+                else:
+                    self.emit_switch(transition)
+        self.emit("}\n\n")
+
+def test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == '__main__': test()
--- a/pyrect/grep_bench.sh	Tue Jul 13 07:53:28 2010 +0900
+++ b/pyrect/grep_bench.sh	Tue Jul 20 17:26:54 2010 +0900
@@ -12,7 +12,7 @@
 #time /tmp/jitgrep $@ > $jitgrepout
 
 echo "\n[llgrep]"
-time ./llgrep.py $@ 2> /dev/null > $llgrepout
+time ./llgrep.py -O $@ 2> /dev/null > $llgrepout
 
 echo "\n[cgrep]"
 time cgrep -E $@ > $cgrepout
--- a/pyrect/jitgrep.py	Tue Jul 13 07:53:28 2010 +0900
+++ b/pyrect/jitgrep.py	Tue Jul 20 17:26:54 2010 +0900
@@ -6,6 +6,7 @@
 import time
 from optparse import OptionParser
 from grep_translator import GREPTranslator
+from cbcgrep_translator import CbCGREPTranslator
 from dfareg import Regexp, CallGraph
 
 def main(argv):
@@ -35,7 +36,8 @@
 
     if opts.cc == "cbc":
         cbc = True
-        opts.cc = "gcc"
+        opts.cc = "$CBCROOT/INSTALL_DIR/bin/gcc"
+        opts.cflags += " -L$CBCROOT/gcc"
     else:
         cbc = False
 
@@ -63,7 +65,10 @@
     if opts.time : start_time = time.time()
     reg = Regexp(string)
     dfacg = CallGraph(reg.dfa)
-    grept = GREPTranslator(string, dfacg)
+    if cbc:
+        grept = CbCGREPTranslator(string, dfacg)
+    else:
+        grept = GREPTranslator(string, dfacg)
     grept.begline = begline
     grept.bufsize = bufsize
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/template/grep.cbc	Tue Jul 20 17:26:54 2010 +0900
@@ -0,0 +1,31 @@
+void grep(char * regexp, FILE *f) {
+  goto next_line();
+  return;
+}
+
+void grepmain(int argc, char* argv[]) {
+  int i;
+
+  if (argc < 2) {
+    fprintf(stderr, "usage: grep regexp [file ...]");
+    exit(0);
+  }
+  if (argc == 2) {
+    grep(argv[1], stdin);
+  } else {
+    for (i = 2; i < argc; i++) {
+      filename = argv[i];
+      f = fopen(filename, "r");
+      if (f == NULL) {
+        fprintf(stderr, "can't open %s:", filename);
+        continue;
+      }
+      if (READBUFSIZE > 0)
+        setvbuf(f, readbuf, _IOFBF, READBUFSIZE);
+      grep(argv[1], f);
+      fclose(f);
+    }
+  }
+
+  return;
+}