view src/cTranslator.py @ 12:41391400fe68

add GREPTranslator(Translator) and implement jit-compile-grep, which faster than grep!! in case of regular expression search in large files.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 04 Jul 2010 08:40:59 +0900
parents 94984eaa03e2
children 55684cb51347
line wrap: on
line source

#!/Usr/bin/env python

from dfareg import Regexp, CallGraph
from translator import Translator

class CTranslator(Translator):
    """CTranslator
    This Calss can translate from DFA or NFA into C source code.
    DFA: A simple state transition as tail call (also can implement with CbC).
    NFA: using stack, deepening depth-first search.
    >>> string = \"(A|B)*C\"
    >>> reg = Regexp(string)
    >>> dfacg = CallGraph(reg.dfa)
    >>> nfacg = CallGraph(reg.nfa)
    >>> CTranslator(string, dfacg).translate()
    >>> CTranslator(string, nfacg).translate()
    """
    def __init__(self, regexp, cg):
        Translator.__init__(self, regexp, cg)
        self.funType = 'void '
        self.callType = ''
        self.breakStatement = '\t\t\tbreak;'
        self.debug = False
        if self.cg.type is "DFA":
            self.name_hash = self.create_name_hash()

    def modify_state_name(self, state_name):
        if state_name is "accept" or state_name is "reject":
            return state_name
        if self.cg.type is "DFA":
            return "state_"+self.name_hash[state_name]
        else:
            return "state_"+state_name

    def emit_accept_state(self):
        self.emit ("""
%saccept(char* s) {
\tprintf(\"\\nstring matches regexp. \\n\\n\");
}\n""" % self.funType)

    def emit_reject_state(self):
        self.emit ("""
%sreject(char* s) {
\tprintf(\"\\nstring does not match regexp. \\n\\n\");
}\n""" % self.funType)

    def emit_driver(self):
        self.emit("""
int main(int argc, char* argv[]) {
\tputs(\"regexp: %s\");
\tputs(\"number of state: %d\");
\tprintf(\"string: %%s\\n\", argv[1]);
\t%s%s(argv[1]);
""" % (self.regexp, len(self.cg.states), self.callType, self.modify_state_name(self.cg.start)))
        if self.cg.type is "NFA":
            self.emit("\treject(argv[1]);\n")
        self.emit("""
\treturn 0;
}\n\n""")

    def emit_switch(self, case, default=None):
        if self.cg.type is "NFA":
            sLocal = "s_local"
        else:
            sLocal = "s"

        self.emit("\tswitch(*%s++) {\n" % (sLocal))

        for input, nextStates in case.iteritems():
            if input != '':
                self.emit("\t\tcase '%s': \n" % (input))
                for nextState in nextStates:
                    self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modify_state_name(nextState), sLocal))
                if self.breakStatement != '': self.emit(self.breakStatement+'\n')

        if default:
            self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default))
        self.emit("\t}\n")


    def emit_state(self, cur_state, transition):
        self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
        if self.debug:
            self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (cur_state))
        if self.cg.type is "NFA":
            sLocal = "s_local"
            self.emit("\tchar* %s = s;\n" % (sLocal))
            if '' in transition:
                epsilon_transition = transition.pop('')
                for n in epsilon_transition:
                    self.emit("\t%s%s(%s);\n" % (self.callType, self.modify_state_name(n), sLocal))
        else:
            sLocal = "s"

        if cur_state in self.cg.accepts:
            transition['\\0'] = ["accept"]

        if transition:
            if self.cg.type is "DFA":
                self.emit_switch(transition, default="reject")
            else:
                self.emit_switch(transition)
        self.emit("}\n\n")

    def emit_initialization(self):
        self.emit("#include <stdio.h>\n\n")
        for state in self.cg.map.iterkeys():
            self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
        self.emit(self.funType + 'accept(char* s);\n')
        self.emit(self.funType + 'reject(char* s);\n')

    def emit_from_callgraph(self):
        # self.emit C-source code
        self.emit_initialization()
        self.emit_driver()

        for cur_state, transition in self.cg.map.iteritems():
            self.emit_state(cur_state, transition)

        self.emit_accept_state()
        self.emit_reject_state()

def test():
    import doctest
    doctest.testmod()
    '''
    reg = Regexp("(A|B)*C")
    ct = CTranslator(reg.regexp, CallGraph(reg.dfa))
    ct.translate()
    '''

if __name__ == '__main__': test()