changeset 52:abb0691e792a

bug fix. remove unnecessarily files.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Mon, 23 Aug 2010 20:00:04 +0900
parents c48284580d5a
children 1f8c474ca8b3
files pyrect/grep_bench.sh pyrect/jitgrep.py pyrect/regexp/lexer.py pyrect/translator/c_translator.py pyrect/translator/cbc_grep_translator.py pyrect/translator/cbc_translator.py pyrect/translator/dot_translator.py pyrect/translator/grep_translator.py
diffstat 8 files changed, 214 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/pyrect/grep_bench.sh	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/grep_bench.sh	Mon Aug 23 20:00:04 2010 +0900
@@ -7,28 +7,39 @@
 cgrepout="/tmp/cgrep.out"
 dgrepout="/tmp/dgrep.out"
 
-echo "[jitgrep]"
-time ./jitgrep.py $@ > $jitgrepout
+echo "[jitgrep - compiling]"
+time ./jitgrep.py -c $@ > /dev/null
 
-echo "[jitgrep - with out compiling]"
+echo
+echo "[jitgrep - matching with out compiling]"
+time /tmp/jitgrep $@ > $jitgrepout
+
+echo
+echo "[jitgrep - cbc matching with out compiling]"
 time /tmp/jitgrep $@ > /dev/null
 
-#echo "\n[llgrep]"
+#echo
+#echo "[llgrep]"
 #time ./llgrep.py -O $@ 2> /dev/null > $llgrepout
 
-echo "\n[cgrep]"
+echo
+echo "[cgrep]"
 time cgrep -E $@ > $cgrepout
 
-echo "\n[egrep]"
+echo
+echo "[egrep]"
 time egrep    $@ > $egrepout
 
-#echo "\n[dgrep (non-filter grep)]"
+#echo
+#echo "[dgrep (non-filter grep)]"
 #time dgrep -E $@ > $dgrepout
 
-#echo "\n[agrep]"
+#echo
+#echo "[agrep]"
 #time agrep $@ > $agrepout
 
-echo "\n[diff egrep jitgrep]"
+echo
+echo "[diff egrep jitgrep]"
 diff $egrepout $jitgrepout
 
 #echo "[diff egrep llgrep]"
@@ -40,10 +51,7 @@
 #echo "[diff cgrep llgrep]"
 #diff $cgrepout $llgrepout
 
-echo "\n[matches]"
+echo "[matches]"
 wc $egrepout
 
-#echo "[diff agrep jitgrep]"
-#diff $agrepout $jitgrepout
-
 #rm -f $egrepout $jitgrepout $agrepout $cgrepout $llgrepout
--- a/pyrect/jitgrep.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/jitgrep.py	Mon Aug 23 20:00:04 2010 +0900
@@ -10,7 +10,7 @@
 
 def main(argv):
     myusage = """%prog [--buf-size=size] [--dump]
-                  [--time] [--debug] [--cc=compiler]
+                  [--time] [--debug] [--cc=compiler] [-c]
                   [-Olevel] regexp [file..] [--out=file]"""
     psr = OptionParser(usage=myusage)
 
@@ -20,8 +20,9 @@
 
     psr.add_option("--cc", action="store", type="string", dest="cc", default="gcc", metavar="FILE",
                    help="Choose compiler (default is gcc).")
+    psr.add_option("-c", action="store_true", dest="compile", default=False , help="compile only.")
     psr.add_option("--buf-size=size", action="store", type="string", dest="bufsize", default="1M" , help="Set read-buffer size (e.x. 1024, 1024K, 2M)")
-    psr.add_option("--CFLAGS", action="store", type="string", dest="cflags", default="-O3 -fomit-frame-pointer", help="Print compile/matching time.")
+    psr.add_option("--CFLAGS", action="store", type="string", dest="cflags", default="-O3", help="Print compile/matching time.")
     psr.add_option("--time", action="store_true", dest="time", default=False, help="Print compile/matching time.")
     psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).")
     psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.")
@@ -32,12 +33,12 @@
 
     if len(args) < 2:
         psr.print_usage()
-        exit(0)
+        return
 
     if opts.cc == "cbc":
         cbc = True
         opts.cc = "$CBCROOT/INSTALL_DIR/bin/gcc"
-        opts.cflags += " -L$CBCROOT/gcc"
+        opts.cflags += " -L$CBCROOT/gcc -w"
     else:
         cbc = False
 
@@ -60,7 +61,7 @@
             bufsize = int(opts.bufsize)
     except ValueError:
         psr.print_usage()
-        exit(0)
+        return
 
     if opts.time : start_time = time.time()
     reg = Regexp(string)
@@ -75,7 +76,7 @@
 
     if opts.dump:
         grept.translate()
-        exit(0)
+        return
     else:
         tmpsrc = open(srcpath, 'w')
         grept.translate(tmpsrc)
@@ -100,6 +101,9 @@
         print("args=", args)
         print("opts=", opts)
 
+    if opts.compile:
+        return
+
     if len(args) == 2 and not opts.debug:
         while True:
             try:
--- a/pyrect/regexp/lexer.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/regexp/lexer.py	Mon Aug 23 20:00:04 2010 +0900
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- encoding: utf-8 -*-
+#-*- encoding: utf-8 -*-
 
 from ply import lex
 
--- a/pyrect/translator/c_translator.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/translator/c_translator.py	Mon Aug 23 20:00:04 2010 +0900
@@ -140,7 +140,7 @@
             if '' in transition:
                 epsilon_transition = transition.pop('')
                 for n in epsilon_transition:
-                    self.emit("\t%s%s(s);\n" % (self.callType, self.state_name(n)))
+                    self.emit("return %s(s);\n" % self.state_name(n))
         else:
             default = "reject"
 
@@ -197,7 +197,7 @@
             self._emit("/* %s */" % input_node.__repr__())
 
         def visit_Character(self, char):
-            self._emit("case %d:" % char.char)
+            self._emit("case %d: /* match %s */" % (char.char, chr(char.char)))
             self._emit("  return %s(s);" % self.next)
 
         def visit_EndLine(self, endline):
--- a/pyrect/translator/cbc_grep_translator.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/translator/cbc_grep_translator.py	Mon Aug 23 20:00:04 2010 +0900
@@ -1,12 +1,13 @@
 #!/usr/bin/env python
 
-from grep_translator import GREPTranslator
+import os
 from pyrect.regexp import Regexp
+from pyrect.translator import CbCTranslator
 
 class CbCGREPTranslateExeption(Exception):
     pass
 
-class CbCGREPTranslator(GREPTranslator):
+class CbCGREPTranslator(CbCTranslator):
     """CbCGREPTranslator
     This Class can translate form DFA into grep source-code.
     which based on (beautiful) mini-grep introduced  \"The Practice of Programming\"
@@ -17,15 +18,15 @@
     >>> tje.translate()
     """
 
+    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
     def __init__(self, regexp):
-        GREPTranslator.__init__(self, regexp)
-        self.funType = '__code '
-        self.interface = "char *s, char* cur, char* buf, FILE *f, char* filename"
+        CbCTranslator.__init__(self, regexp)
+        self.interface = "unsigned char *s, unsigned char* cur, unsigned char* buf, FILE *f, char* filename"
         self.args = "s, cur, buf, f, filename"
-        self.callType = 'goto '
-        self.breakStatement = ''
         self.print_file = False
-        self.__bufsize = 1024
+        self.__bufsize = 1024 * 1024
+        self.trans_stmt = self._trans_stmt(self.emit, self.args)
 
     def getbufsize(self):
         return self.__bufsize
@@ -34,6 +35,13 @@
 
     bufsize = property(getbufsize, setbufsize)
 
+    def state_name(self, state):
+        if state in ("accept", "reject", "next_ptr", "next_line", "returner"):
+            return state
+        else:
+            return "state_" + state
+
+
     def emit_accept_state(self):
         self.emit("__code accept(%s) {\n" % self.interface)
         if self.print_file:
@@ -62,7 +70,7 @@
         self.emit("""
 __code next_line(%s) {
   if(fgets(buf, LINEBUFSIZE, f) == NULL) {
-    goto returner();
+    goto returner(%s);
   }
   int n = strlen(buf);
   if (n > 0 && buf[n-1] == '\\n')
@@ -71,28 +79,24 @@
   s   = cur;
   goto DFA(%s);
 }
-""" % (self.interface, self.args))
+""" % (self.interface, self.args, self.args))
         self.emit("""
-__code returner() {
+__code returner(%s) {
   return;
-}""")
+}""" % self.interface)
 
     def emit_initialization(self):
-        self.emit("#include <stdio.h>\n")
-        self.emit("#include <stdlib.h>\n")
-        self.emit("#include <string.h>\n\n")
-        self.emit("#define LINEBUFSIZE 1024\n")
-        self.emit("#define READBUFSIZE %d\n\n" % self.bufsize)
+        self.emit("#include <stdio.h>")
+        self.emit("#include <stdlib.h>")
+        self.emit("#include <string.h>", 2)
+        self.emit("#define LINEBUFSIZE 1024")
+        self.emit("#define READBUFSIZE %d" % self.bufsize, 2)
 
-        self.emit("%sDFA(%s);\n" % (self.funType, self.interface))
-        for state in self.cg.map.iterkeys():
-            self.emit(self.funType + self.state_name(state) + "(" + self.interface + ");\n")
-        self.emit(self.funType + 'accept(%s);\n' % self.interface)
-        self.emit(self.funType + 'reject(%s);\n' % self.interface)
-        self.emit(self.funType + 'next_ptr(%s);\n' % self.interface)
-        self.emit(self.funType + 'next_line(%s);\n' % self.interface)
-        self.emit(self.funType + 'returner();\n\n')
-        grepsource = open("template/grep.cbc")
+        self.emit("__code DFA(%s);\n" % self.interface)
+        for state in self.cg.map.keys() + ["accept", "reject", "next_ptr", "next_line", "returner"]:
+            self.emit("__code %s(%s);" % (self.state_name(state), self.interface))
+        self.emit()
+        grepsource = open(self.BASE_DIR + "/template/grep.cbc")
         self.emit(grepsource.read())
         self.emit_next_state()
 
@@ -107,34 +111,18 @@
 }
 """)
         self.emit("""
-%sDFA(%s) {
+__code DFA(%s) {
   goto %s(%s);
 }
-""" % (self.funType, self.interface, self.state_name(self.cg.start), self.args))
-
-    def emit_switch(self, case, default=None):
-        self.emit("\tswitch(*s++) {\n")
-        for input, next_state in case.iteritems():
-            if input != '':
-                self.emit("\t\tcase '%s': \n" % (input))
-                self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.state_name(next_state), self.args))
-                if self.breakStatement != '': self.emit(self.breakStatement+'\n')
-
-        if default:
-            self.emit( """\t\tdefault:\n\t\t\t%s%s(%s);\n""" % (self.callType, default, self.args))
-        self.emit("\t}\n")
+""" % (self.interface, self.state_name(self.cg.start), self.args))
 
     def emit_state(self, cur_state, transition):
-        self.emit(self.funType + self.state_name(cur_state) + "(" + self.interface + ") {\n")
         if cur_state in self.cg.accepts:
-            self.emit("\tgoto accept(%s);\n" % self.args)
+            self.emiti("__code %s(%s) {" % (self.state_name(cur_state), self.interface))
+            self.emit(   "goto accept(%s);" % self.args)
+            self.emitd("}")
         else:
-            if transition:
-                if self.cg.type == "DFA":
-                    self.emit_switch(transition, default="reject")
-                else:
-                    self.emit_switch(transition)
-        self.emit("}\n\n")
+            CbCTranslator.emit_state(self, cur_state, transition)
 
 def test():
     import doctest
--- a/pyrect/translator/cbc_translator.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/translator/cbc_translator.py	Mon Aug 23 20:00:04 2010 +0900
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 
 from pyrect.regexp import Regexp
+from pyrect.regexp.ast import *
+from translator import Translator
 from c_translator import CTranslator
 
 class CbCTranslator(CTranslator):
@@ -10,14 +12,147 @@
     >>> reg = Regexp(string)
     >>> ct = CbCTranslator(reg)
     >>> ct.translate()
-    >>> ct.debug = True
-    >>> ct.translate()
     """
     def __init__(self, regexp):
-        CTranslator.__init__(self, regexp)
-        self.funType = '__code '
-        self.callType = 'goto '
-        self.breakStatement = ''
+        Translator.__init__(self, regexp)
+        self.special_rule = (Range, BegLine, MBCharacter)
+        self.cg = regexp.dfacg
+        self.debug = False
+        self.interface = "unsigned char *s"
+        self.args = "s"
+        self.trans_stmt = self._trans_stmt(self.emit, self.args)
+
+    def emit_accept_state(self):
+        self.emiti("__code accept(%s) {" % self.interface)
+        self.emit(   "return;")
+        self.emitd("}", 2)
+
+    def emit_reject_state(self):
+        self.emiti("__code reject(%s) {" % self.interface)
+        self.emit(   "return;")
+        self.emitd("}", 2)
+
+    def emit_driver(self):
+        self.emiti("int main(int argc, unsigned char* argv[]) {")
+        self.emit(   'buf = argv[1];')
+        self.emit(   'puts("regexp: %s");' % self.regexp.regexp)
+        self.emit(   'puts("number of state: %d");' % len(self.cg.states))
+        self.emit(  r'printf("string: %s\n", argv[1]);')
+        self.emit0(  "goto %s((unsigned char*)argv[1]);" % self.state_name(self.cg.start))
+        self.emit(   "return 0;")
+        self.emitd("}", 2)
+
+    def emit_switch(self, case, default=None):
+        if not case:
+            if default:
+                self.emit("goto %s(%s);" % (default, self.args))
+            return
+        self.emiti("switch(*s++) {")
+        for case, next_ in case.iteritems():
+            self.trans_stmt.emit(case, self.state_name(next_))
+        if default:
+            self.emit("default: goto %s(%s);" % (default, self.args))
+        self.emitd("}")
+
+    def emit_state(self, cur_state, transition):
+        self.emiti("__code %s(%s) {" % (self.state_name(cur_state), self.interface))
+
+        if self.debug:
+            self.emit(r'printf("state: %s, input: %%s\n", s);' % cur_state)
+        if self.cg.type == "NFA":
+            default = None
+            if '' in transition:
+                epsilon_transition = transition.pop('')
+                for n in epsilon_transition:
+                    self.emit("goto %s(%s);\n" % (self.state_name(n), self.args))
+        else:
+            default = "reject"
+
+        any_ = None
+
+        for input_ in transition.keys():
+            if type(input_) in self.special_rule:
+                self.trans_stmt.emit(input_, self.state_name(transition.pop(input_)))
+            elif type(input_) is AnyChar:
+                any_ = (input_, self.state_name(transition.pop(input_)))
+                default = None
+
+        if cur_state in self.cg.accepts:
+            eol = Character('\0')
+            transition[eol] = "accept"
+
+        self.emit_switch(transition, default)
+
+        if any_:
+            self.trans_stmt.emit(any_[0], any_[1])
+
+        self.emitd("}", 2)
+
+    def emit_initialization(self):
+        self.emit("#include <stdio.h>")
+        for state in self.cg.map.keys() + ["accept", "reject"]:
+            self.emit("__code %s(%s);" % (self.state_name(state), self.interface))
+        self.emit('unsigned char* buf;')
+        self.emit_skip()
+
+    def emit_from_callgraph(self):
+        # self.emit C-source code
+        self.emit_initialization()
+        self.emit_driver()
+
+        for cur_state, transition in self.cg.map.iteritems():
+            self.emit_state(cur_state, transition)
+
+        self.emit_accept_state()
+        self.emit_reject_state()
+
+    class _trans_stmt(CTranslator._trans_stmt):
+        def __init__(self, emit, args):
+            CTranslator._trans_stmt.__init__(self, emit)
+            self.args = args
+
+        def visit_Character(self, char):
+            self._emit("case %d: /* match %s */" % (char.char, chr(char.char)))
+            self._emit("  goto %s(%s);" % (self.next, self.args))
+
+        def visit_EndLine(self, endline):
+            self._emit(r"case '\0':")
+            self._emit("  goto %s($s);" % (self.next, self.args))
+
+        # Special Rule
+
+        def visit_MBCharacter(self, mbchar):
+            self._emit("/* match %s  */" % mbchar)
+            bytes = mbchar.bytes
+            self._emit("  if(%s)" % \
+                       " && ".join(["*(s+%d) == 0x%x" % (d, x) for d, x in enumerate(bytes)]))
+            self._emit("    s += %d;" % len(bytes))
+            self._emit("    goto %s(%s);" % (self.next, self.args), 2)
+
+        def visit_BegLine(self, begline):
+            self._emit("if (s == buf)")
+            self._emit("  goto %s(%s);" % (self.next, self.args), 2)
+
+        def visit_Range(self, range):
+            if isinstance(range.lower, MBCharacter) and not \
+               isinstance(range.upper, MBCharacter) or  \
+               isinstance(range.upper, MBCharacter) and not \
+               isinstance(range.lower, MBCharacter):
+                return
+
+            if isinstance(range.lower, MBCharacter):
+                self.visit(range)
+            else:
+                self._emit("if ('%s' <= *s && *s <= '%s')" % (range.lower.char, range.upper.char))
+                self._emit("  s++;")
+                self._emit("  goto %s(%s);" % (self.next, self.args), 2)
+
+        def visit_AnyChar(self, anychar):
+            self._emit(r"if (*s != '\0') {")
+            self._emit("  s = SKIP(s);")
+            self._emit("  goto %s(%s);" % (self.next, self.args), 2)
+            self._emit("}")
+            self._emit("goto reject(%s);" % self.args)
 
 def test():
     import doctest
--- a/pyrect/translator/dot_translator.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/translator/dot_translator.py	Mon Aug 23 20:00:04 2010 +0900
@@ -22,8 +22,8 @@
             self.cg = regexp.nfacg
         else:
             self.cg = regexp.dfacg
-        self.fill_color = "lightsteelblue1"
-        self.frame_color = "navyblue"
+        self.fill_color = "white" #"lightsteelblue1"
+        self.frame_color = "black" #"navyblue"
 
     def state_name(self, name):
         return "q"+name
--- a/pyrect/translator/grep_translator.py	Tue Aug 10 15:56:23 2010 +0900
+++ b/pyrect/translator/grep_translator.py	Mon Aug 23 20:00:04 2010 +0900
@@ -51,7 +51,7 @@
         self.emiti(  "if(%s(text))" % self.state_name(self.cg.start))
         self.emit(     "return 1;")
         self.emitd( r"} while (*text++ != '\0');")
-        self.emit("return 0;")
+        self.emitd("return 0;")
         self.emitd("}", 2)
 
     def emit_state(self, cur_state, transition):