changeset 109:d591da6e2988

add memchr-filter. and fix emit buf.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sat, 12 Feb 2011 16:41:25 +0900
parents 2632b963e441
children 68b616dbe2c9 92507d269615
files pyrect/jitgrep.py pyrect/translator/grep_translator.py pyrect/translator/translator.py
diffstat 3 files changed, 44 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/pyrect/jitgrep.py	Thu Dec 30 17:18:40 2010 +0900
+++ b/pyrect/jitgrep.py	Sat Feb 12 16:41:25 2011 +0900
@@ -31,7 +31,7 @@
     psr.add_option("--thread", action="store", type="string", dest="thread", default="0", metavar="FILE", help="number of thread.")
     psr.add_option("--disable-booster", action="store_true", dest="no_boost", default=False, help="disable boosetr (default: use booster).")
     psr.add_option("--enable-table-lookup", action="store_true", dest="table_lookup", default=False, help="use table-lookup in first-state's transition.")
-    psr.add_option("--filter", action="store", type="string", dest="filter", default="", help="chose filtering-algorithm bmh(default), quick, or none.")
+    psr.add_option("--filter", action="store", type="string", dest="filter", default="", help="chose filtering-algorithm bmh(default), quick, memchr, or none.")
     psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).")
     psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.")
     psr.add_option("--dump", action="store_true", dest="dump", default=False, help="Dump generated grep-source.")
--- a/pyrect/translator/grep_translator.py	Thu Dec 30 17:18:40 2010 +0900
+++ b/pyrect/translator/grep_translator.py	Sat Feb 12 16:41:25 2011 +0900
@@ -61,7 +61,7 @@
 
         key = None
 
-        if (self.filter == "bmh" or self.filter == "quick")\
+        if (self.filter == "bmh" or self.filter == "quick" or self.filter == "memchr")\
                and self.regexp.must_words:
             key = max(self.regexp.must_words, key=len)
             if len(self.regexp.must_words) == 1 and len(key) == self.regexp.min_len:
@@ -76,8 +76,15 @@
 
         if self.filter == "bmh":
             self.emit_bmh_filter(key)
+        elif self.filter == "memchr":
+            self.emit_memchr_filter(key)
+        elif self.filter == "quick":
+            self.emit_quick_filter(key)
         elif self.filter:
-            self.emit_quick_filter(key)
+            if len(key) > 5:
+                self.emit_quick_filter(key)
+            else:
+                self.emit_memchr_filter(key)
 
         if self.skip_boost and not self.filter_only and \
                not AnyChar() in self.regexp.chars and \
@@ -89,6 +96,38 @@
         grepsource = open(self.BASE_DIR + "/template/grep.c")
         self.emit(grepsource.read())
 
+    def emit_memchr_filter(self, key):
+        l = len(key)
+        def emit_next():
+            if self.filter_only:
+                self.emit("return accept(%s);" % self.args)
+            elif self.filter_prefix:
+                self.emit("buf++;")
+                self.emit("return %s(%s);" % (self.state_name(self.fa.start), self.args))
+            else:
+                self.emit("beg = get_line_beg(buf, beg);")
+                self.emit("buf = beg;")
+                self.emit("return %s(%s);" % (self.state_name(self.fa.start), self.args))
+
+        self.emit("UCHARP get_line_beg(UCHARP p, UCHARP beg);", 2)
+        self.emiti("void memchr_filter(%s) {" % self.interface)
+        self.emit('static const UCHAR key[] = "%s";' % key)
+
+        self.emit("int i, len = %d;" % l);
+
+        self.emiti("while ((buf = memchr(buf, key[0], end-buf)) != NULL) {")
+        self.emiti(  "for (i = 1; i < len; i++) {")
+        self.iemitd(  "if (key[i] != buf[i]) goto retry;")
+        self.demit(  "}")
+        self.emit(   "goto next;")
+        self.demiti("retry:")
+        self.emit(   "buf++;")
+        self.demit("}")
+        self.emit( "return;")
+        self.emit( "next:")
+        emit_next()
+        self.demit("}", 2)
+
     def emit_bmh_filter(self, key):
         l = len(key)
         def emit_next():
@@ -225,7 +264,7 @@
 
     def emit_accept_state(self):
         self.emiti("void accept(%s) {" % self.interface)
-        self.emit(   "UCHARP ret = (UCHARP)memchr(buf, '\\n', (buf - end));")
+        self.emit(   "UCHARP ret = (UCHARP)memchr(buf, '\\n', (end - buf));")
         if self.skip_boost or self.filter:
             self.emit(   "beg = get_line_beg(buf, beg);")
         self.emiti(  "if (ret == NULL) {")
--- a/pyrect/translator/translator.py	Thu Dec 30 17:18:40 2010 +0900
+++ b/pyrect/translator/translator.py	Sat Feb 12 16:41:25 2011 +0900
@@ -36,7 +36,7 @@
         self.emit(*arg)
 
     def iemitd(self, *arg):
-        self.iemit0(*arg)
+        self.iemit(*arg)
         self.dedent()
 
     def demiti(self, *arg):