Mercurial > hg > Members > shinya > pyrect
view pyrect/translator/grep_translator.py @ 58:81337db23999
modify ternary operator (ex: return s1 if ~~ else s2). for python2.4 ;-(
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 01 Nov 2010 14:50:52 +0900 |
parents | 81b44ae1cd73 |
children | fd3d0b8326fe |
line wrap: on
line source
#!/usr/bin/env python import os from c_translator import CTranslator from pyrect.regexp import Regexp, Analyzer class GREPTranslateExeption(Exception): pass class GREPTranslator(CTranslator): """GREPTranslator This Class can translate form DFA into grep source-code. which based on (beautiful) mini-grep introduced \"The Practice of Programming\" written by Rob Pike & Brian W. Kernighan. (see template/grep.c) >>> string = \"(build|fndecl|gcc)\" >>> reg = Regexp(string) >>> tje = GREPTranslator(reg) >>> tje.translate() """ BASE_DIR = os.path.dirname(os.path.abspath(__file__)) def __init__(self, regexp): CTranslator.__init__(self, regexp, fa="DFA") self.__bufsize = 1024 * 1024 self.parallel_match = False self.thread_num = 0 self.filter = True def getbufsize(self,): return self.__bufsize def setbufsize(self, bufsize): self.__bufsize = abs(bufsize) bufsize = property(getbufsize, setbufsize) def emit_initialization(self): CTranslator.emit_initialization(self) if self.thread_num > 1: self.emit("#define GREP paragrep") else: self.emit("#define GREP grep") self.emit("#define LINEBUFSIZE %d" % self.bufsize) self.emit("#define READBUFSIZE %d" % self.bufsize) self.emit('#define THREAD_NUM %d' % self.thread_num) self.emit('#define THREAD_BUF %d' % 3) self.emit('#include <pthread.h>') self.emit("#include <stdlib.h>") self.emit("#include <string.h>") self.emit("char readbuf[%d];" % (self.bufsize)) self.emit("int DFA(unsigned char* s);", 2) if self.filter and self.regexp.must_words: self.emit_filter(self.regexp.must_words) grepsource = open(self.BASE_DIR + "/template/grep.c") self.emit(grepsource.read()) def emit_filter(self, words): def longest(s1, s2): if len(s1) >= len(s2): return s1 else: return s2 key = reduce(longest, words) if len(words) == 1: if len(key) == self.regexp.min_len: self.emit("#define FILTER_ONLY 1", 1) else: self.emit("#define WITH_FILTER 1", 1) self.emiti("int FILTER(unsigned char* text, int n) {") l = len(key) if l == 1: self.emit(" return (strchr(text, %d) != NULL)" % ord(key)) self.emitd("}", 2) return skip = [str(l)] * 256 for i in range(l - 1): skip[ord(key[i])] = str(l-1-i) self.emit('static unsigned char key[] = "%s";' % key) self.emiti( "static int skip[256] = {") for i in range(8): i = i * 32 self.emit(",".join(skip[i:i+32]) + ",") self.emitd( "};") self.emit("int i = %d, j, k, len = %d;" % (l-1 ,l)) self.emit("unsigned char c, tail = %d; //'%c'" % (ord(key[l-1]), key[l-1]), 2) self.emiti("while (i < n) {") self.emit( "c = text[i];") self.emiti( "if (c == tail) {") self.emit( "j = len - 1; k = i;") self.emiti( "while (key[--j] == text[--k]) {") self.emit( "if (j == 0) return 1;") self.emitd( "}") self.emitd( "}") self.emit( "i += skip[c];") self.emitd("}") self.emit( "return 0;") self.emitd("}", 2) def emit_driver(self): self.emiti("int DFA(unsigned char *text) {") self.emiti( "do {") self.emiti( "if(%s(text))" % self.state_name(self.cg.start)) self.emit( "return 1;") self.emitd( r"} while (*text++ != '\0');") self.emitd("return 0;") self.emitd("}", 2) def emit_state(self, cur_state, transition): if cur_state in self.cg.accepts: self.emiti("int %s(unsigned char* s) {" % self.state_name(cur_state)) self.emit( "return accept(s);") self.emitd("}") else: CTranslator.emit_state(self, cur_state, transition) def test(): import doctest doctest.testmod() if __name__ == '__main__': test()