changeset 28:0e90ae1a2d9b

add DFATranslator(GREPTranslator). which can translate into gnu-grep's DFA-based matching function.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Thu, 08 Jul 2010 20:02:42 +0900
parents 3db85244784b
children b833746d9d92
files src/c_translator.py src/dfa_translator.py
diffstat 2 files changed, 78 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/c_translator.py	Thu Jul 08 06:35:39 2010 +0900
+++ b/src/c_translator.py	Thu Jul 08 20:02:42 2010 +0900
@@ -21,6 +21,7 @@
         self.callType = ''
         self.breakStatement = '\t\t\tbreak;'
         self.debug = False
+        self.eols = ('\\0', '\\n')
         if self.cg.type == "DFA":
             self.name_hash = self.create_name_hash()
 
@@ -68,7 +69,7 @@
                 if self.breakStatement != '': self.emit(self.breakStatement+'\n')
 
         if default:
-            self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default))
+            self.emit( """\t\tdefault:\n\t\t\t%s%s(s);\n""" % (self.callType, default))
         self.emit("\t}\n")
 
 
@@ -83,7 +84,8 @@
                     self.emit("\t%s%s(s);\n" % (self.callType, self.modify_state_name(n)))
 
         if cur_state in self.cg.accepts:
-            transition['\\0'] = ["accept"]
+            for eol in self.eols:
+                transition[eol] = ["accept"]
 
         if transition:
             if self.cg.type == "DFA":
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/dfa_translator.py	Thu Jul 08 20:02:42 2010 +0900
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+from grep_translator import GREPTranslator
+from dfareg import Regexp, CallGraph
+
+'''(build|fndecl|gcc)'''
+class DFATranslator(GREPTranslator):
+    """DFATranslator
+    This class can translate from DFA into size_t DFA(char* s).
+    which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c).
+    * but which is not work currently. (when search large-file, there is fewer
+    * accepted-lines than grep's dfaexec.)
+    * probably, there is some problem exists about buffering.
+    >>> string = '(build|fndecl|gcc)'
+    >>> reg = Regexp(string)
+    >>> dfacg = CallGraph(reg.dfa)
+    >>> tje = DFATranslator(string, dfacg)
+    >>> tje.translate()
+    """
+
+    def __init__(self, regexp, cg):
+        GREPTranslator.__init__(self, regexp, cg)
+        self.funType = 'size_t '
+        self.callType = 'return '
+        self.breakStatement = ''
+
+    def emit_initialization(self):
+        for state in self.cg.map.iterkeys():
+            self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
+        self.emit(self.funType + 'accept(char* s);\n')
+        self.emit(self.funType + 'reject(char* s);\n')
+
+    def emit_accept_state(self):
+        self.emit ("""
+%saccept(char* s) {
+\treturn 1;
+}\n""" % self.funType)
+
+    def emit_reject_state(self):
+        self.emit ("""
+%sreject(char* s) {
+\treturn 0;
+}\n""" % self.funType)
+
+    def emit_driver(self):
+        self.emit("""
+/* This DFA accept only \'%s\'*/
+%sDFA(char *s) {
+  char *begin = s;
+  do {
+    if  (%s(s)) { //(matchhere(regexp+1, text))
+        return (char const *) s - begin;
+      }
+  } while (*s != '\\n' && *s++ != '\\0');
+  return (size_t) -1;
+}\n\n""" % (self.regexp, self.funType, self.modify_state_name(self.cg.start)))
+
+    def emit_state(self, cur_state, transition):
+        self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
+        if cur_state in self.cg.accepts:
+            self.emit("\treturn accept(s);\n")
+        else:
+            if transition:
+                if self.cg.type == "DFA":
+                    self.emit_switch(transition, default="reject")
+                else:
+                    self.emit_switch(transition)
+        self.emit("}\n\n")
+
+def test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == '__main__': test()