view pyrect/translator/grep_translator.py @ 58:81337db23999

modify ternary operator (ex: return s1 if ~~ else s2). for python2.4 ;-(
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Mon, 01 Nov 2010 14:50:52 +0900
parents 81b44ae1cd73
children fd3d0b8326fe
line wrap: on
line source

#!/usr/bin/env python

import os
from c_translator import CTranslator
from pyrect.regexp import Regexp, Analyzer

class GREPTranslateExeption(Exception):
    pass

class GREPTranslator(CTranslator):
    """GREPTranslator
    This Class can translate form DFA into grep source-code.
    which based on (beautiful) mini-grep introduced  \"The Practice of Programming\"
    written by Rob Pike & Brian W. Kernighan. (see template/grep.c)
    >>> string = \"(build|fndecl|gcc)\"
    >>> reg = Regexp(string)
    >>> tje = GREPTranslator(reg)
    >>> tje.translate()
    """

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))

    def __init__(self, regexp):
        CTranslator.__init__(self, regexp, fa="DFA")
        self.__bufsize = 1024 * 1024
        self.parallel_match = False
        self.thread_num = 0
        self.filter = True

    def getbufsize(self,):
        return self.__bufsize
    def setbufsize(self, bufsize):
        self.__bufsize = abs(bufsize)

    bufsize = property(getbufsize, setbufsize)

    def emit_initialization(self):
        CTranslator.emit_initialization(self)

        if self.thread_num > 1:
            self.emit("#define GREP paragrep")
        else:
            self.emit("#define GREP grep")

        self.emit("#define LINEBUFSIZE %d" % self.bufsize)
        self.emit("#define READBUFSIZE %d" % self.bufsize)
        self.emit('#define THREAD_NUM %d' % self.thread_num)
        self.emit('#define THREAD_BUF %d' % 3)
        self.emit('#include <pthread.h>')
        self.emit("#include <stdlib.h>")
        self.emit("#include <string.h>")
        self.emit("char readbuf[%d];" % (self.bufsize))
        self.emit("int DFA(unsigned char* s);", 2)

        if self.filter and self.regexp.must_words:
            self.emit_filter(self.regexp.must_words)

        grepsource = open(self.BASE_DIR + "/template/grep.c")
        self.emit(grepsource.read())

    def emit_filter(self, words):
        def longest(s1, s2):
            if len(s1) >= len(s2):
                return s1
            else:
                return s2

        key = reduce(longest, words)

        if len(words) == 1:
            if len(key) == self.regexp.min_len:
                self.emit("#define FILTER_ONLY 1", 1)
        else:
            self.emit("#define WITH_FILTER 1", 1)

        self.emiti("int FILTER(unsigned char* text, int n) {")
        l = len(key)
        if l == 1:
            self.emit("   return (strchr(text, %d) != NULL)" % ord(key))
            self.emitd("}", 2)
            return

        skip = [str(l)] * 256
        for i in range(l - 1):
            skip[ord(key[i])] = str(l-1-i)

        self.emit('static unsigned char key[] = "%s";' % key)
        self.emiti(   "static int skip[256] = {")
        for i in range(8):
            i = i * 32
            self.emit(",".join(skip[i:i+32]) + ",")
        self.emitd(   "};")

        self.emit("int i = %d, j, k, len = %d;" % (l-1 ,l))
        self.emit("unsigned char c, tail = %d; //'%c'" % (ord(key[l-1]), key[l-1]), 2)
        self.emiti("while (i < n) {")
        self.emit(   "c = text[i];")
        self.emiti(  "if (c == tail) {")
        self.emit(     "j = len - 1; k = i;")
        self.emiti(    "while (key[--j] == text[--k]) {")
        self.emit(       "if (j == 0) return 1;")
        self.emitd(    "}")
        self.emitd(  "}")
        self.emit(   "i += skip[c];")
        self.emitd("}")
        self.emit( "return 0;")
        self.emitd("}", 2)

    def emit_driver(self):
        self.emiti("int DFA(unsigned char *text) {")
        self.emiti(  "do {")
        self.emiti(  "if(%s(text))" % self.state_name(self.cg.start))
        self.emit(     "return 1;")
        self.emitd( r"} while (*text++ != '\0');")
        self.emitd("return 0;")
        self.emitd("}", 2)

    def emit_state(self, cur_state, transition):
        if cur_state in self.cg.accepts:
            self.emiti("int %s(unsigned char* s) {" % self.state_name(cur_state))
            self.emit(   "return accept(s);")
            self.emitd("}")
        else:
            CTranslator.emit_state(self, cur_state, transition)

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__': test()