changeset 42:84059ea1a2db

add goto_grep_translator.py, this is label-based grep implimentation.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Mon, 02 Aug 2010 04:03:23 +0900
parents ffbbdd33881d
children 83c69d42faa8
files pyrect/goto_grep_translator.py pyrect/jitgrep.py pyrect/regexp/__init__.py pyrect/template/grep.label
diffstat 3 files changed, 172 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/goto_grep_translator.py	Mon Aug 02 04:03:23 2010 +0900
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+
+from c_translator import CTranslator
+from dfareg import Regexp, CallGraph
+
+class GREPTranslateExeption(Exception):
+    pass
+
+class GoToGREPTranslator(CTranslator):
+    """GoToGREPTranslator
+    This Class can translate form DFA into grep source-code.
+    which based on (beautiful) mini-grep introduced  \"The Practice of Programming\"
+    written by Rob Pike & Brian W. Kernighan. (see template/grep.label)
+    >>> string = \"(build|fndecl|gcc)\"
+    >>> reg = Regexp(string)
+    >>> dfacg = CallGraph(reg.dfa)
+    >>> tje = GoToGREPTranslator(string, dfacg)
+    >>> tje.translate()
+    """
+
+    def __init__(self, regexp, cg):
+        if cg.type == "NFA": raise GREPTranslateExeption("can't translate grep from NFA")
+        CTranslator.__init__(self, regexp, cg)
+        self.funType = 'void '
+        self.callType = 'return '
+        self.breakStatement = ''
+        self.begline = False
+        self.__bufsize = 1024
+
+    def getbufsize(self,):
+        return self.__bufsize
+    def setbufsize(self, bufsize):
+        self.__bufsize = abs(bufsize)
+
+    bufsize = property(getbufsize, setbufsize)
+
+    def emit_accept_state(self):
+        self.emit ("""
+\taccept:
+\t\tprintf(\"%s\\n\", buf);
+\t\treturn;
+""")
+
+    def emit_reject_state(self):
+        self.emit("\treject:\n")
+        if not self.begline:
+            self.emit("""
+\t\tif (*cur++ == '\\0')
+\t\t\treturn;
+\t\ts = cur;
+\t\tgoto %s;
+""" % self.modify_state_name(self.cg.start))
+        self.emit("return;\n")
+
+
+    def emit_initialization(self):
+        self.emit("#include <stdio.h>\n")
+        self.emit("#include <stdlib.h>\n")
+        self.emit("#include <string.h>\n\n")
+
+        self.emit("#define LINEBUFSIZE 1024\n")
+        self.emit("#define READBUFSIZE %d\n\n" % (self.bufsize))
+        self.emit("char readbuf[%d];\n\n" % (self.bufsize))
+
+        self.emit("%sDFA(char* s, char *buf, char* filename);\n" % self.funType)
+        grepsource = open("template/grep.label")
+        self.emit(grepsource.read())
+
+    def emit_filter(self):
+        pass
+
+    def emit_driver(self):
+        self.emit("""
+int main(int argc, char* argv[]) {
+\treturn grepmain(argc, argv);
+}\n\n""")
+
+    def emit_switch(self, case, default=None):
+        self.emit("\t\tswitch(*s++) {\n")
+        for input, next_states in case.iteritems():
+            if input != '':
+                self.emit("\t\t\tcase '%s': \n" % (input))
+                for next_state in next_states:
+                    self.emit("\t\t\t\tgoto %s;\n" % (self.modify_state_name(next_state)))
+                if self.breakStatement != '': self.emit(self.breakStatement+'\n')
+
+        if default:
+            self.emit( """\t\t\tdefault:\n\t\t\tgoto %s;\n""" % default)
+        self.emit("\t\t}\n")
+
+    def emit_state(self, cur_state, transition):
+        self.emit("\t" + self.modify_state_name(cur_state) + ":\n")
+        if cur_state in self.cg.accepts:
+            self.emit("\t\tgoto accept;\n")
+        else:
+            if transition:
+                if self.cg.type == "DFA":
+                    self.emit_switch(transition, default="reject")
+                else:
+                    self.emit_switch(transition)
+        self.emit("\n")
+
+    def emit_from_callgraph(self):
+        # self.emit C-source code
+        self.emit_initialization()
+        self.emit_driver()
+
+        self.emit("void DFA(char *s, char *buf, char* filename) {\n")
+        self.emit("\tchar *cur = s;\n")
+        self.emit("\tgoto %s;\n" % self.modify_state_name(self.cg.start))
+
+        for cur_state, transition in self.cg.map.iteritems():
+            self.emit_state(cur_state, transition)
+
+        self.emit_accept_state()
+        self.emit_reject_state()
+
+        self.emit("}\n\n")
+
+def test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == '__main__': test()
--- a/pyrect/jitgrep.py	Mon Aug 02 04:02:19 2010 +0900
+++ b/pyrect/jitgrep.py	Mon Aug 02 04:03:23 2010 +0900
@@ -6,6 +6,7 @@
 import time
 from optparse import OptionParser
 from grep_translator import GREPTranslator
+from goto_grep_translator import GoToGREPTranslator
 from cbcgrep_translator import CbCGREPTranslator
 from dfareg import Regexp, CallGraph
 
@@ -25,6 +26,7 @@
     psr.add_option("--CFLAGS", action="store", type="string", dest="cflags", default="-O3 -fomit-frame-pointer", help="Print compile/matching time.")
     psr.add_option("--time", action="store_true", dest="time", default=False, help="Print compile/matching time.")
     psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).")
+    psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.")
     psr.add_option("--dump", action="store_true", dest="dump", default=False, help="Dump generated grep-source.")
     psr.add_option("--out", action="store", type="string", dest="out", default="", metavar="FILE", help="Output file.")
 
@@ -67,6 +69,8 @@
     dfacg = CallGraph(reg.dfa)
     if cbc:
         grept = CbCGREPTranslator(string, dfacg)
+    elif opts.label:
+        grept = GoToGREPTranslator(string, dfacg)
     else:
         grept = GREPTranslator(string, dfacg)
     grept.begline = begline
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/template/grep.label	Mon Aug 02 04:03:23 2010 +0900
@@ -0,0 +1,44 @@
+/* Excerpted from 'The Practice of Programming' */
+/* by Brian W. Kernighan and Rob Pike */
+
+int grep(char * regexp, FILE *f, char *name) {
+  int n;
+  char buf[LINEBUFSIZE];
+  while (fgets(buf, sizeof buf, f) != NULL) {
+    n = strlen(buf);
+    if (n > 0 && buf[n-1] == '\n')
+      buf[n-1] = '\0';
+    DFA(buf, buf, name);
+  }
+  return 1;
+}
+
+int grepmain(int argc, char* argv[]) {
+  int i, nmatch;
+  FILE *f;
+
+  if (argc < 2) {
+    fprintf(stderr, "usage: grep regexp [file ...]");
+    exit(0);
+  }
+  nmatch = 0;
+  if (argc == 2) {
+    if (grep(argv[1], stdin, NULL))
+      nmatch;
+  } else {
+    for (i = 2; i < argc; i++) {
+      f = fopen(argv[i], "r");
+      if (f == NULL) {
+        fprintf(stderr, "can't open %s:", argv[i]);
+        continue;
+      }
+      if (READBUFSIZE > 0)
+        setvbuf(f, readbuf, _IOFBF, READBUFSIZE);
+      if (grep(argv[1], f, argc > 3 ? argv[i] : NULL) > 0)
+        nmatch++;
+      fclose(f);
+    }
+  }
+
+  return nmatch;
+}