Mercurial > hg > Members > shinya > pyrect
diff pyrect/translator/grep_translator.py @ 57:81b44ae1cd73
add fixed-string filter(Boyer-Moore), and add option '--disable-filter'.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 01 Nov 2010 14:41:03 +0900 |
parents | ee9945561f80 |
children | 81337db23999 |
line wrap: on
line diff
--- a/pyrect/translator/grep_translator.py Wed Oct 27 20:46:41 2010 +0900 +++ b/pyrect/translator/grep_translator.py Mon Nov 01 14:41:03 2010 +0900 @@ -2,7 +2,7 @@ import os from c_translator import CTranslator -from pyrect.regexp import Regexp +from pyrect.regexp import Regexp, Analyzer class GREPTranslateExeption(Exception): pass @@ -25,6 +25,7 @@ self.__bufsize = 1024 * 1024 self.parallel_match = False self.thread_num = 0 + self.filter = True def getbufsize(self,): return self.__bufsize @@ -35,24 +36,71 @@ def emit_initialization(self): CTranslator.emit_initialization(self) + + if self.thread_num > 1: + self.emit("#define GREP paragrep") + else: + self.emit("#define GREP grep") + self.emit("#define LINEBUFSIZE %d" % self.bufsize) self.emit("#define READBUFSIZE %d" % self.bufsize) self.emit('#define THREAD_NUM %d' % self.thread_num) self.emit('#define THREAD_BUF %d' % 3) self.emit('#include <pthread.h>') - if self.thread_num > 1: - self.emit("#define GREP paragrep") - else: - self.emit("#define GREP grep") self.emit("#include <stdlib.h>") self.emit("#include <string.h>") self.emit("char readbuf[%d];" % (self.bufsize)) self.emit("int DFA(unsigned char* s);", 2) + + if self.filter and self.regexp.must_words: + self.emit_filter(self.regexp.must_words) + grepsource = open(self.BASE_DIR + "/template/grep.c") self.emit(grepsource.read()) - def emit_filter(self): - pass + def emit_filter(self, words): + def longest(s1, s2): + return s1 if len(s1) >= len(s2) else s2 + key = reduce(longest, words) + + if len(words) == 1: + if len(key) == self.regexp.min_len: + self.emit("#define FILTER_ONLY 1", 1) + else: + self.emit("#define WITH_FILTER 1", 1) + + self.emiti("int FILTER(unsigned char* text, int n) {") + l = len(key) + if l == 1: + self.emit(" return (strchr(text, %d) != NULL)" % ord(key)) + self.emitd("}", 2) + return + + skip = [str(l)] * 256 + for i in range(l - 1): + skip[ord(key[i])] = str(l-1-i) + + self.emit('static unsigned char key[] = "%s";' % key) + self.emiti( "static int skip[256] = {") + for i in range(8): + i = i * 32 + self.emit(",".join(skip[i:i+32]) + ",") + self.emitd( "};") + + self.emit("int i = %d, j, k, len = %d;" % (l-1 ,l)) + self.emit("unsigned char c, tail = %d; //'%c'" % (ord(key[l-1]), key[l-1]), 2) + self.emiti("while (i < n) {") + self.emit( "c = text[i];") + self.emiti( "if (c == tail) {") + self.emit( "j = len - 1; k = i;") + self.emiti( "while (key[--j] == text[--k]) {") + self.emit( "if (j == 0) return 1;") + self.emitd( "}") + self.emitd( "}") + self.emit( "i += skip[c];") + self.emitd("}") + self.emit( "return 0;") + self.emitd("}", 2) def emit_driver(self): self.emiti("int DFA(unsigned char *text) {")