changeset 5:11fba907c0af

add Translater(object), that can translate C/CbC source code from NFA or DFA(CbC can translate from DFA). and refactoring, refactoring, refactoring... not implimented translation-DotFile.
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Thu, 01 Jul 2010 00:40:51 +0900
parents 7e47839a54ad
children 168d60b03e2c
files code/c/dfa.c code/c/nfa.c src/cTranslator.py src/cbcTranslator.py src/converter.py src/dfareg.py src/translator.py
diffstat 7 files changed, 419 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/c/dfa.c	Thu Jul 01 00:40:51 2010 +0900
@@ -0,0 +1,49 @@
+#include <stdio.h>
+void state_1_3(char* s);
+void state_1_2(char* s);
+void accept(char* s);
+void reject(char* s);
+
+int main(int argc, char* argv[]) {
+	puts("regexp: A*");
+	puts("number of state: 2");
+	printf("string: %s\n", argv[1]);
+	state_1_3(argv[1]);
+
+	return 0;
+}
+
+void state_1_3(char* s) {
+	printf("state: 1_3, input: %s\n", s);
+	switch(*s++) {
+		case 'A': 
+			state_1_2(s);
+			break;
+		case '\0':
+			accept(s);
+			break;
+		default: reject(s);
+	}
+}
+
+void state_1_2(char* s) {
+	printf("state: 1_2, input: %s\n", s);
+	switch(*s++) {
+		case 'A': 
+			state_1_2(s);
+			break;
+		case '\0':
+			accept(s);
+			break;
+		default: reject(s);
+	}
+}
+
+
+void accept(char* s) {
+	printf("\nstring matches regexp. \n\n");
+}
+
+void reject(char* s) {
+	printf("\nstring does not match regexp. \n\n");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/c/nfa.c	Thu Jul 01 00:40:51 2010 +0900
@@ -0,0 +1,106 @@
+#include <stdio.h>
+void state_1(char* s);
+void state_3(char* s);
+void state_2(char* s);
+void state_5(char* s);
+void state_4(char* s);
+void state_7(char* s);
+void state_6(char* s);
+void state_8(char* s);
+void accept(char* s);
+void reject(char* s);
+
+int main(int argc, char* argv[]) {
+	puts("regexp: (A|B)*C");
+	puts("number of state: 8");
+	printf("string: %s\n", argv[1]);
+	state_6(argv[1]);
+	reject(argv[1]);
+
+	return 0;
+}
+
+void state_1(char* s) {
+	printf("state: 1, input: %s\n", s);
+	char* s_local = s;
+	switch(*s_local++) {
+		case 'A': 
+			state_2(s_local);
+			break;
+	}
+}
+
+void state_3(char* s) {
+	printf("state: 3, input: %s\n", s);
+	char* s_local = s;
+	switch(*s_local++) {
+		case 'B': 
+			state_4(s_local);
+			break;
+	}
+}
+
+void state_2(char* s) {
+	printf("state: 2, input: %s\n", s);
+	char* s_local = s;
+	state_5(s_local);
+	state_7(s_local);
+	switch(*s_local++) {
+	}
+}
+
+void state_5(char* s) {
+	printf("state: 5, input: %s\n", s);
+	char* s_local = s;
+	state_1(s_local);
+	state_3(s_local);
+	switch(*s_local++) {
+	}
+}
+
+void state_4(char* s) {
+	printf("state: 4, input: %s\n", s);
+	char* s_local = s;
+	state_5(s_local);
+	state_7(s_local);
+	switch(*s_local++) {
+	}
+}
+
+void state_7(char* s) {
+	printf("state: 7, input: %s\n", s);
+	char* s_local = s;
+	switch(*s_local++) {
+		case 'C': 
+			state_8(s_local);
+			break;
+	}
+}
+
+void state_6(char* s) {
+	printf("state: 6, input: %s\n", s);
+	char* s_local = s;
+	state_5(s_local);
+	state_7(s_local);
+	switch(*s_local++) {
+	}
+}
+
+void state_8(char* s) {
+	printf("state: 8, input: %s\n", s);
+	char* s_local = s;
+	switch(*s_local++) {
+		case '\0':
+			accept(s_local);
+			break;
+	}
+}
+
+
+void accept(char* s) {
+	printf("\nstring matches regexp. \n\n");
+}
+
+void reject(char* s) {
+	printf("\nstring does not match regexp. \n\n");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cTranslator.py	Thu Jul 01 00:40:51 2010 +0900
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+import sys
+from dfareg import Regexp, CallGraph
+from translator import Translator
+
+class CTranslator(Translator):
+    """
+    CTranslator
+    >>> string = \"(A|B)*C\"
+    >>> reg = Regexp(string)
+    >>> dfacg = CallGraph(reg.dfa)
+    >>> nfacg = CallGraph(reg.nfa)
+    >>> CTranslator(string, dfacg).translate()
+    >>> CTranslator(string, nfacg).translate()
+    """
+    def __init__(self, regexp, cg, stream=sys.stdout):
+        self.regexp = regexp
+        self.cg = cg
+        self.stream = stream
+        self.funType = 'void '
+        self.callType = ''
+        self.breakStatement = '\t\t\tbreak;'
+        self.debug = False
+
+    def translateFromCallGraph(self):
+        # self.emit C-source code
+        self.emit("#include <stdio.h>\n")
+        for k in self.cg.map.iterkeys():
+            self.emit(self.funType + "state_" + k + "(char* s);\n")
+        self.emit(self.funType + 'accept(char* s);\n')
+        self.emit(self.funType + 'reject(char* s);\n')
+        self.emit("""
+int main(int argc, char* argv[]) {
+\tputs(\"regexp: %s\");
+\tputs(\"number of state: %d\");
+\tprintf(\"string: %%s\\n\", argv[1]);
+\t%s%s(argv[1]);
+""" % (self.regexp, len(self.cg.states), self.callType, "state_"+self.cg.start))
+        if self.cg.type is "NFA" : self.emit("\treject(argv[1]);\n")
+        self.emit("""
+\treturn 0;
+}\n\n""")
+
+        for k, v in self.cg.map.iteritems():
+            self.emit(self.funType + "state_" + k + "(char* s) {\n")
+            if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k))
+            if self.cg.type is "NFA":
+                sLocal = "s_local"
+                self.emit("\tchar* %s = s;\n" % (sLocal))
+                # epsilon-transition
+                for ss in (ss for i,ss in v.iteritems() if i == ''):
+                    for s in ss:
+                        self.emit("\t%s%s(%s);\n" % (self.callType, "state_"+s, sLocal))
+            else:
+                sLocal = "s"
+
+            self.emit("\tswitch(*%s++) {\n" % (sLocal))
+
+            for input, nextStates in v.iteritems():
+                if input != '':
+                    self.emit("\t\tcase '%s': \n" % (input))
+                    for nextState in nextStates:
+                        self.emit("\t\t\t%s%s(%s);\n" % (self.callType, "state_"+nextState, sLocal))
+                    if self.breakStatement != '' : self.emit(self.breakStatement+'\n')
+
+            if k in self.cg.accepts :
+                self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement))
+            if self.cg.type is "DFA":
+                self.emit("\t\tdefault: %sreject(%s);\n\t}\n" % (self.callType, sLocal))
+            else:
+                self.emit("\t}\n")
+            self.emit("}\n\n")
+
+        self.emit ("""
+%saccept(char* s) {
+\tprintf(\"\\nstring matches regexp. \\n\\n\");
+}\n""" % self.funType)
+        self.emit ("""
+%sreject(char* s) {
+\tprintf(\"\\nstring does not match regexp. \\n\\n\");
+}\n""" % self.funType)
+
+def main():
+    import doctest
+    doctest.testmod()
+    '''
+    reg = Regexp("(A|B)*C")
+    ct = CTranslator(reg.regexp, CallGraph(reg.dfa))
+    ct.translate()
+    '''
+
+if __name__ == '__main__' : main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cbcTranslator.py	Thu Jul 01 00:40:51 2010 +0900
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+import sys
+from dfareg import Regexp, CallGraph
+from cTranslator import CTranslator
+
+class CbCTranslateExeption(Exception):
+    pass
+
+class CbCTranslator(CTranslator):
+    def __init__(self, regexp, cg, stream=sys):
+        if cg.type is "NFA": raise CbCTranslateExeption("can't translate CbC from NFA")
+        self.regexp = regexp
+        self.cg = cg
+        self.stream = stream
+        self.funType = '__code '
+        self.callType = 'goto '
+        self.breakStatement = ''
+        self.debug = False
+
+def main():
+    reg = Regexp("(A|B)*C")
+    cbct = CbCTranslator(reg.regexp, CallGraph(reg.dfa))
+    cbct.translate()
+
+if __name__ == '__main__' : main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/converter.py	Thu Jul 01 00:40:51 2010 +0900
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+import sys
+from dfareg import Regexp, CallGraph
+from cTranslator import CTranslator
+from cbcTranslator import CbCTranslator
+from optparse import OptionParser
+
+def main(argv):
+    myusage = "%prog [-C] regexp"
+    psr = OptionParser(usage=myusage)
+    psr.add_option("-C", action="store_true", dest="emitC", default=False, help="emit C-source")
+    psr.add_option("--from-dfa", action="store_true", dest="dfa", default=True, help="translate from DFA")
+    psr.add_option("--from-nfa", action="store_true", dest="nfa", default=False, help="translate from NFA")
+    psr.add_option("-o", action="store", type="string", dest="output", default=False, help="output file", metavar="FILE")
+    psr.add_option("-g", action="store_true", dest="debug", default=False, help="embed debug info")
+    psr.add_option("-D", action="store_true", dest="emitDot", default=False, help="emit Dot file")
+    (opts, args) = psr.parse_args(sys.argv)
+    if len(args) == 1:
+        psr.print_help()
+        exit()
+    reg = Regexp(args[1])
+    if not opts.output:
+        output = sys.stdout
+    else:
+        output = open(opts.output,"w")
+
+    if opts.nfa:
+        fa = reg.nfa
+    else:
+        fa = reg.dfa
+
+    if opts.emitDot:
+        r.emitDot()
+    elif opts.emitC:
+        translator = CTranslator(reg.regexp, CallGraph(fa))
+        translator.debug = opts.debug
+    else:
+        translator = CbCTranslator(reg.regexp, CallGraph(fa))
+        translator.debug = opts.debug
+
+    translator.translate(output)
+
+if __name__ == '__main__' : main(sys.argv)
--- a/src/dfareg.py	Tue Jun 29 12:46:29 2010 +0900
+++ b/src/dfareg.py	Thu Jul 01 00:40:51 2010 +0900
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import copy
-import sys, re
+import re
 from optparse import OptionParser
 
 class NondeterministicFiniteAutomaton(object):
@@ -324,101 +324,98 @@
 
 # create call graph (as dictionary)
 class CallGraph(object):
-    def __init__(self, dfa):
-        self.dfa = dfa
-        self.start = "state_"+self.set2name(dfa.start)
-        (self.map, self.accepts) = self.getCallGraph(self.dfa)
+    """
+    CallGraph is State Transition Diagram.
+    it's can be create from DFA or DFA.
+    >>> reg = Regexp(\"AA*|B\")
+    >>> dfacg = CallGraph(reg.dfa)
+    >>> nfacg = CallGraph(reg.nfa)
+    >>> dfacg.map
+    {'1_6_8': {'A': ['2_3_5'], 'B': ['7']}, '2_3_5': {'A': ['3_4']}, '7': {}, '3_4': {'A': ['3_4']}}
+    >>> dfacg.states
+    set(['1_6_8', '2_3_5', '7', '3_4'])
+    >>> dfacg.accepts
+    set(['2_3_5', '7', '3_4'])
+    >>> nfacg.map
+    {'1': {'A': ['2']}, '3': {'A': ['4']}, '2': {'': ['5']}, '5': {'': ['3']}, '4': {'': ['3']}, '7': {}, '6': {'B': ['7']}, '8': {'': ['1', '6']}}
+    >>> nfacg.states
+    set(['1', '3', '2', '5', '4', '7', '6', '8'])
+    >>> nfacg.accepts
+    set(['5', '4', '7'])
+    """
+
+    def __init__(self, fa):
+        self.fa = fa
+        if isinstance(fa, DeterministicFiniteAutomaton):
+            self.type = "DFA"
+            self.start = self.set2name(fa.start)
+        else:
+            self.type = "NFA"
+            self.start = str(fa.start)
+        self.inputs = set()
+        self.states = set()
+        (self.map, self.accepts) = self.getCallGraph(self.fa)
 
     # return state name from set
     def set2name(self, set_):
         l = list(set_)
         l.sort()
-        return '_'.join(str(x) for x in l)
-
-    def getCallGraph(self, dfa):
+        return ('_'.join(str(x) for x in l))
 
-        funcMap = dict()
-        funcMap[dfa.start] = set()
+    def getCallGraph(self, fa):
+        transitionCases = dict()
+        if self.type is "DFA":
+            transitionCases[fa.start] = set()
+        else:
+            transitionCases[frozenset([fa.start])] = set()
 
+        # transitionCases is hash table that binds a state and inputs,
+        # it's useful for transition definition
+
+        # fa is dfa or nfa
         # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....}
         #             : frozenset(1, 15) x 'd' -> frozenset([16])
-
-        for v in dfa.map.itervalues():
-            funcMap[frozenset(v)] = set()
+        # nfa.map => {(6, ''): set([5, 7]), (1, 'A'): set([2]),   ....}
+        #             : 6 x '' -> set([5, 7])
 
-        for key in dfa.map.iterkeys():
-            slot = funcMap.setdefault(key[0], set())
-            slot.add(key[1])
+        for nextState in fa.map.itervalues():
+            if self.type is "DFA":
+                transitionCases[frozenset(nextState)] = set()
+            else:
+                for nextState_ in nextState:
+                    transitionCases[frozenset([nextState_])] = set()
 
-        for v in dfa.map.itervalues():
-            funcMap[frozenset(v)] = set()
-
-        for key in dfa.map.iterkeys():
-            slot = funcMap.setdefault(key[0], set())
-            slot.add(key[1])
+        for (state, input) in fa.map.iterkeys():
+            if self.type is "NFA":
+                state = [state]
+            slot = transitionCases.setdefault(frozenset(state), set())
+            slot.add(input)
 
         callGraph = dict()
-        accepts = list()
-
-        # emit cbc code
-        for k in funcMap.iterkeys():
-            callGraph["state_"+self.set2name(k)] = set()
+        accepts = set()
 
-        for k, v in funcMap.iteritems():
-            caseMap = dict()
-            for case in v:
-                caseMap[case] = "state_"+self.set2name(dfa.map[(frozenset(k), case)])
-            if k in dfa.accepts:
-                accepts.append("state_"+self.set2name(k))
-                caseMap['\\0'] = "accept"
-            callGraph["state_"+self.set2name(k)] = caseMap
-
-        return (callGraph, accepts)
-
-# emit CbC-souce, from CallGraph
-def dfa2CbC(cg, regexp, emitCFlag):
-    if emitCFlag:
-        fun_type = 'void '
-        call_t = ''
-        break_s = '\n\t\t\tbreak;'
-    else:
-        fun_type = '__code '
-        call_t = 'goto '
-        break_s = ''
+        # create CallGraph
+        for state in transitionCases.iterkeys():
+            callGraph[self.set2name(state)] = set()
 
-    # emit cbc(or c) code
-    print "#include <stdio.h>\n"
-    for k in cg.map.iterkeys():
-        print fun_type + k + "(char* s);"
-    print fun_type + 'accept(char* s);'
-    print fun_type + 'reject(char* s);'
-    print """
-int main(int argc, char* argv[]) {
-\tputs(\"regexp: %s\");
-\tputs(\"number of state: %d\");
-\tprintf(\"string: %%s\\n\", argv[1]);
-\t%s%s(argv[1]);
-\treturn 0;
-}\n""" % (regexp, len(cg.map), call_t, cg.start)
-
-    for k, v in cg.map.iteritems():
-        print fun_type + k + "(char* s) {"
-        print "\tswitch(*s++) {"
-        for case, next_state in v.iteritems():
-            print "\t\tcase '%s': " % (case)
-            print "\t\t\t%s%s(s);%s" % (call_t, next_state, break_s)
-
-        print "\t\tdefault: %sreject(s);\n\t}" % call_t
-        print "}\n"
-
-    print """
-%saccept(char* s) {
-\tprintf(\"\\nstring matches regexp. \\n\\n\");
-}\n""" % fun_type
-    print """
-%sreject(char* s) {
-\tprintf(\"\\nstring does not match regexp. \\n\\n\");
-}\n""" % fun_type
+        for (state, input) in transitionCases.iteritems():
+            caseMap = dict()
+            self.states.add(self.set2name(state))
+            if self.type is "DFA":
+                for case in input:
+                    self.inputs.add(case)
+                    caseMap[case] = [self.set2name(fa.map[frozenset(state), case])]
+                if state in fa.accepts:
+                    accepts.add(self.set2name(state))
+            else:
+                self.states.add(str(list(state)[0]))
+                for case in input:
+                    self.inputs.add(case)
+                    caseMap[case] = [str(next_) for next_ in fa.map[list(state)[0], case]]
+                if list(state)[0] in fa.accepts:
+                    accepts.add(self.set2name(state))
+            callGraph[self.set2name(state)] = caseMap
+        return (callGraph, accepts)
 
 def dfa2Dot(cg, regexp):
     print """
@@ -473,19 +470,8 @@
 def compile(regexp):
     return Regexp(regexp)
 
-def main(argv):
-    myusage = "%prog [-C] regexp"
-    psr = OptionParser(usage=myusage)
-    psr.add_option("-C", action="store_true", dest="emitCFlag", default=False, help="emit C-source")
-    psr.add_option("-D", action="store_true", dest="emitDot", default=False, help="emit Dot file")
-    (opts, args) = psr.parse_args(sys.argv)
-    if len(args) == 1:
-        psr.print_help()
-        exit()
-    r = compile(args[1])
-    if opts.emitDot:
-        r.emitDot()
-    else:
-        r.emitCbC(opts.emitCFlag)
+def test(a):
+    import doctest
+    doctest.testmod()
 
-if __name__ == '__main__' : main(sys.argv)
+if __name__ == '__main__' : test()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/translator.py	Thu Jul 01 00:40:51 2010 +0900
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+import sys
+
+class Translator(object):
+    def __init__(self, regexp, cg, stream=sys.stdout):
+        self.regexp = regexp
+        self.cg = cg
+        self.stream = stream
+
+    def emit(self, string):
+        self.stream.write(string)
+
+    def translateFromCallGraph(self):
+        pass
+
+    def translate(self, stream=sys.stdout):
+        self.stream = stream
+        self.translateFromCallGraph()