view pyrect/translator/dfa_translator.py @ 47:701beabd7d97

add input-rules, Range, CharacterClass, Anchor and MultiByte-Char(but not work)\nand more simplify NFA (is global improvement).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 08 Aug 2010 04:13:14 +0900
parents pyrect/dfa_translator.py@d29d3470fde7
children
line wrap: on
line source

#!/usr/bin/env python

from grep_translator import GREPTranslator
from pyrect.regexp import Regexp

class GNUGREPTranslator(GREPTranslator):
    """GNUGREPTranslator
    This class can translate from DFA into size_t DFA(char* s).
    which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c).
    * but which is not work currently. (when search large-file, there is fewer
    * accepted-lines than grep's dfaexec.)
    * probably, there is some problem exists about buffering.
    >>> string = '(build|fndecl|gcc)'
    >>> reg = Regexp(string)
    >>> tje = GNUGREPTranslator(reg)
    >>> tje.translate()
    """

    def __init__(self, regexp):
        GREPTranslator.__init__(self, regexp)
        self.funType = 'size_t '
        self.callType = 'return '
        self.breakStatement = ''

    def emit_initialization(self):
        for state in self.cg.map.iterkeys():
            self.emit(self.funType + self.state_name(state) + "(char* s);\n")
        self.emit(self.funType + 'accept(char* s);\n')
        self.emit(self.funType + 'reject(char* s);\n')

    def emit_accept_state(self):
        self.emit ("""
%saccept(char* s) {
\treturn 1;
}\n""" % self.funType)

    def emit_reject_state(self):
        self.emit ("""
%sreject(char* s) {
\treturn 0;
}\n""" % self.funType)

    def emit_driver(self):
        self.emit("""
/* This DFA accept only \'%s\'*/
%sDFA(char *s) {
  char *begin = s;
  do {
    if  (%s(s)) { //(matchhere(regexp+1, text))
        return (char const *) s - begin;
      }
  } while (*s != '\\n' && *s++ != '\\0');
  return (size_t) -1;
}\n\n""" % (self.regexp.regexp, self.funType, self.state_name(self.cg.start)))

    def emit_state(self, cur_state, transition):
        self.emit(self.funType + self.state_name(cur_state) + "(char* s) {\n")
        if cur_state in self.cg.accepts:
            self.emit("\treturn accept(s);\n")
        else:
            if transition:
                if self.cg.type == "DFA":
                    self.emit_switch(transition, default="reject")
                else:
                    self.emit_switch(transition)
        self.emit("}\n\n")

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__': test()