changeset 27:3db85244784b

modify jitgrep, pre-compile grep main routine to libgrep.so. so JIT-compile only required DFA-transition.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Thu, 08 Jul 2010 06:35:39 +0900
parents 7481ce2bcc42
children 0e90ae1a2d9b
files src/grep_bench.sh src/grep_translator.py src/jitgrep.py src/template/grep.c
diffstat 4 files changed, 36 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/src/grep_bench.sh	Tue Jul 06 15:12:39 2010 +0900
+++ b/src/grep_bench.sh	Thu Jul 08 06:35:39 2010 +0900
@@ -12,19 +12,19 @@
 echo "\n[cgrep]"
 time cgrep -E $@ > $cgrepout
 
+echo "\n[egrep]"
+time egrep    $@ > $egrepout
+
 echo "\n[agrep]"
 time agrep $@ > $agrepout
 
-echo "\n[egrep]"
-time egrep    $@ > $egrepout
-
 echo "\n[diff egrep jitgrep]"
 diff $egrepout $jitgrepout
 
+echo "[diff cgrep jitgrep]"
+diff $cgrepout $jitgrepout
+
 echo "[diff agrep jitgrep]"
 diff $agrepout $jitgrepout
 
-echo "[diff cgrep jitgrep]"
-diff $cgrepout $jitgrepout
-
 #rm -f $egrepout $jitgrepout $agrepout $cgrepout
--- a/src/grep_translator.py	Tue Jul 06 15:12:39 2010 +0900
+++ b/src/grep_translator.py	Thu Jul 08 06:35:39 2010 +0900
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 from c_translator import CTranslator
 from dfareg import Regexp, CallGraph
 
@@ -6,7 +8,7 @@
 
 class GREPTranslator(CTranslator):
     """GREPTranslator
-    >>> string = \"(A|B)*C\"
+    >>> string = \"(build|fndecl|gcc)\"
     >>> reg = Regexp(string)
     >>> dfacg = CallGraph(reg.dfa)
     >>> tje = GREPTranslator(string, dfacg)
@@ -33,10 +35,8 @@
 }\n""" % self.funType)
 
     def emit_initialization(self):
-        self.emit("#include <stdio.h>\n")
-        self.emit("#include <stdlib.h>\n")
-        self.emit("#include <string.h>\n\n")
-        self.emit("#define BUFSIZE 1024\n\n")
+        self.emit("#define NULL s\n\n")
+        self.emit("int DFA(char* s);\n")
         for state in self.cg.map.iterkeys():
             self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
         self.emit(self.funType + 'accept(char* s);\n')
@@ -44,15 +44,9 @@
 
     def emit_driver(self):
         self.emit("""
-int match(char *text) {
-  do {
-    if (%s(text))
-      return 1;
-  } while (*text++ != '\\0');
-  return 0;
+int DFA(char *s) {
+  return %s(s);
 }\n\n""" % (self.modify_state_name(self.cg.start)))
-        self.emit(open("template/grep.template", "r").read())
-        self.emit("\n")
 
     def emit_state(self, cur_state, transition):
         self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
--- a/src/jitgrep.py	Tue Jul 06 15:12:39 2010 +0900
+++ b/src/jitgrep.py	Thu Jul 08 06:35:39 2010 +0900
@@ -14,8 +14,9 @@
 
     optimize = ""
     redirect = ""
-    srcpath = "/tmp/jitgrep_emit.c"
-    binpath = "/tmp/jitgrep_emit"
+    srcpath = "/tmp/jitgrep_dfa.c"
+    binpath = "/tmp/jitgrep"
+    libgrep = "template/libgrep.so"
 
     argv_iter = argv
     for args in argv_iter:
@@ -33,6 +34,10 @@
 
     (opts, args) = psr.parse_args(argv)
 
+    if len(args) < 2:
+        psr.print_usage()
+        return
+
     if opts.compiler == "cbc":
         cbc = True
         opts.compiler = "gcc"
@@ -55,7 +60,14 @@
         end_time = time.time()
         print("Translation: " + str(end_time - start_time) + " Sec.")
 
-    cmd = " ".join([opts.compiler, optimize, srcpath, "-o", binpath])
+    if not os.path.exists(libgrep):
+        cmd = "gcc -O3 -c -fPIC -shared template/grep.c -o " + libgrep
+        if opts.debug:
+            print cmd
+        else:
+            os.system(cmd)
+
+    cmd = " ".join([opts.compiler, optimize, srcpath, libgrep, "-o", binpath])
     if opts.debug:
         print("compile command", cmd)
     else:
@@ -63,7 +75,7 @@
         os.system(cmd)
         if (opts.time):
             end_time = time.time()
-            print("Compiling: " + str(end_time - start_time) + " Sec.")
+            print("Compiling  : " + str(end_time - start_time) + " Sec.")
 
     if opts.debug:
         print("argv=" + argv)
@@ -87,7 +99,7 @@
             os.system(cmd)
             if (opts.time):
                 end_time = time.time()
-                print("Matching: " + str(end_time - start_time) + " Sec.")
+                print("Matching   : " + str(end_time - start_time) + " Sec.")
 
     if not opts.debug:
         #os.remove(srcpath)
--- a/src/template/grep.c	Tue Jul 06 15:12:39 2010 +0900
+++ b/src/template/grep.c	Thu Jul 08 06:35:39 2010 +0900
@@ -7,16 +7,19 @@
 
 #define BUFSIZE 1024
 
+extern int DFA(char *text);
+
 int match(char *regexp, char *text) {
   if (regexp[0] == '^')
-    return matchhere(regexp+1, text);
+    return DFA(text);
   do {
-    if (matchhere(regexp+1, text))
+    if  (DFA(text)) //(matchhere(regexp+1, text))
       return 1;
   } while (*text++ != '\0');
   return 0;
 }
 
+/*
 int matchhere(char *regexp, char *text) {
   if (regexp[0] == '\0')
     return 1;
@@ -36,6 +39,7 @@
   } while (*text != '\0' && (*text++ == c || c == '.'));
   return 0;
 }
+*/
 
 int grep(char * regexp, FILE *f, char *name) {
   int n, nmatch;