view pyrect/dfa_translator.py @ 43:83c69d42faa8

replace converting-flow, module dfareg with module regexp. it's is substantial changing in implimentation.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 03 Aug 2010 05:35:38 +0900
parents ef2928cdbdb6
children d29d3470fde7
line wrap: on
line source

#!/usr/bin/env python

from pyrect.grep_translator import GREPTranslator
from pyrect.regexp import Regexp

class GNUGREPTranslator(GREPTranslator):
    """GNUGREPTranslator
    This class can translate from DFA into size_t DFA(char* s).
    which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c).
    * but which is not work currently. (when search large-file, there is fewer
    * accepted-lines than grep's dfaexec.)
    * probably, there is some problem exists about buffering.
    >>> string = '(build|fndecl|gcc)'
    >>> reg = Regexp(string)
    >>> tje = GNUGREPTranslator(reg)
    >>> tje.translate()
    """

    def __init__(self, regexp):
        GREPTranslator.__init__(self, regexp)
        self.funType = 'size_t '
        self.callType = 'return '
        self.breakStatement = ''

    def emit_initialization(self):
        for state in self.cg.map.iterkeys():
            self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
        self.emit(self.funType + 'accept(char* s);\n')
        self.emit(self.funType + 'reject(char* s);\n')

    def emit_accept_state(self):
        self.emit ("""
%saccept(char* s) {
\treturn 1;
}\n""" % self.funType)

    def emit_reject_state(self):
        self.emit ("""
%sreject(char* s) {
\treturn 0;
}\n""" % self.funType)

    def emit_driver(self):
        self.emit("""
/* This DFA accept only \'%s\'*/
%sDFA(char *s) {
  char *begin = s;
  do {
    if  (%s(s)) { //(matchhere(regexp+1, text))
        return (char const *) s - begin;
      }
  } while (*s != '\\n' && *s++ != '\\0');
  return (size_t) -1;
}\n\n""" % (self.regexp.regexp, self.funType, self.modify_state_name(self.cg.start)))

    def emit_state(self, cur_state, transition):
        self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
        if cur_state in self.cg.accepts:
            self.emit("\treturn accept(s);\n")
        else:
            if transition:
                if self.cg.type == "DFA":
                    self.emit_switch(transition, default="reject")
                else:
                    self.emit_switch(transition)
        self.emit("}\n\n")

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__': test()