view pyrect/cbcgrep_translator.py @ 40:962ae4154724

add CbCGREPTranslator.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 20 Jul 2010 17:26:54 +0900
parents
children ffbbdd33881d
line wrap: on
line source

#!/usr/bin/env python

from grep_translator import GREPTranslator
from dfareg import Regexp, CallGraph

class CbCGREPTranslateExeption(Exception):
    pass

class CbCGREPTranslator(GREPTranslator):
    """CbCGREPTranslator
    This Class can translate form DFA into grep source-code.
    which based on (beautiful) mini-grep introduced  \"The Practice of Programming\"
    written by Rob Pike & Brian W. Kernighan. (see template/grep.c)
    >>> string = \"(build|fndecl|gcc)\"
    >>> reg = Regexp(string)
    >>> dfacg = CallGraph(reg.dfa)
    >>> tje = GREPTranslator(string, dfacg)
    >>> tje.translate()
    """

    def __init__(self, regexp, cg):
        if cg.type == "NFA": raise CbCGREPTranslateExeption("can't translate grep from NFA")
        GREPTranslator.__init__(self, regexp, cg)
        self.funType = '__code '
        self.callType = 'goto '
        self.breakStatement = ''
        self.print_file = False
        self.__bufsize = 1024

    def getbufsize(self,):
        return self.__bufsize
    def setbufsize(self, bufsize):
        self.__bufsize = abs(bufsize)

    bufsize = property(getbufsize, setbufsize)

    def emit_accept_state(self):
        self.emit("__code accept(char* s) {\n")
        if self.print_file:
            self.emit("  printf(\"%s: %s\\n\", filename, buf);\n")
        else:
            self.emit("  printf(\"%s\\n\", buf);\n")
        self.emit("    goto next_line(s);\n}\n\n")

    def emit_reject_state(self):
        self.emit("""
__code reject(char* s) {
  goto next_ptr();
}
""")

    def emit_next_state(self):
        self.emit ("""
__code next_ptr() {
  if(*cur++ == '\\0')
    goto next_line();
  goto DFA(cur);
}
""")

        self.emit("""
__code next_line() {
  if(fgets(buf, sizeof buf, f) == NULL)
    goto returner();
  int n = strlen(buf);
  if (n > 0 && buf[n-1] == '\\n')
    buf[n-1] = '\\0';
  cur = buf;
  goto DFA(buf);
}
""")
        self.emit("""
__code returner() {
  return;
}""")

    def emit_initialization(self):
        self.emit("#include <stdio.h>\n")
        self.emit("#include <stdlib.h>\n")
        self.emit("#include <string.h>\n\n")
        self.emit("#define LINEBUFSIZE 1024\n")
        self.emit("#define READBUFSIZE %d\n\n" % self.bufsize)
        self.emit("char readbuf[READBUFSIZE], buf[LINEBUFSIZE];\n")
        self.emit("char *cur, *filename;\n\n")
        self.emit("FILE* f;")

        self.emit("%sDFA(char* s);\n" % self.funType)
        for state in self.cg.map.iterkeys():
            self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
        self.emit(self.funType + 'accept(char* s);\n')
        self.emit(self.funType + 'reject(char* s);\n')
        self.emit(self.funType + 'next_ptr();\n')
        self.emit(self.funType + 'next_line();\n')
        self.emit(self.funType + 'returner();\n')
        grepsource = open("template/grep.cbc")
        self.emit(grepsource.read())
        self.emit_next_state()

    def emit_filter(self):
        pass

    def emit_driver(self):
        self.emit("""
int main(int argc, char* argv[]) {
  grepmain(argc, argv);
  return;
}
""")
        self.emit("""
%sDFA(char* s) {
  goto %s(s);
}
""" % (self.funType, self.modify_state_name(self.cg.start)))

    def emit_state(self, cur_state, transition):
        self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
        if cur_state in self.cg.accepts:
            self.emit("\tgoto accept(s);\n")
        else:
            if transition:
                if self.cg.type == "DFA":
                    self.emit_switch(transition, default="reject")
                else:
                    self.emit_switch(transition)
        self.emit("}\n\n")

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__': test()