changeset 10:2d00b46ce34d

modify CTranslator, improve emiting statement(if switch-statement unnecessary, not emit it.). and separate some code segment.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 04 Jul 2010 00:33:15 +0900
parents 973b596bd124
children 94984eaa03e2
files code/graph/reg.dot code/graph/sample.dot src/benchReg.py src/cTranslator.py src/dfareg.py src/reg2llvm.py
diffstat 6 files changed, 100 insertions(+), 71 deletions(-) [+]
line wrap: on
line diff
--- a/code/graph/reg.dot	Sat Jul 03 03:49:41 2010 +0900
+++ b/code/graph/reg.dot	Sun Jul 04 00:33:15 2010 +0900
@@ -3,7 +3,7 @@
     d2ttikzedgelabels = true;
     d2tstyleonly = true;
     d2tdocpreamble = "\usetikzlibrary{automata}";
-    d2tfigpreamble = "  ikzstyle{every state}=    [draw=blue!50, shape=circle, very thick,fill=blue!20]";
+    d2tfigpreamble = "	ikzstyle{every state}=    [draw=blue!50, shape=circle, very thick,fill=blue!20]";
     edge [lblstyle="fill=blue!20", style="arrows=->", topath="bend left"];
     node [shape="circle", style="state"];
 
--- a/code/graph/sample.dot	Sat Jul 03 03:49:41 2010 +0900
+++ b/code/graph/sample.dot	Sun Jul 04 00:33:15 2010 +0900
@@ -1,5 +1,5 @@
 digraph G {
-        d2ttikzedgelabels = true;
+	d2ttikzedgelabels = true;
         d2tstyleonly = true;
         d2tdocpreamble = "\usetikzlibrary{automata}";
         d2tfigpreamble = "\tikzstyle{every state}=\
--- a/src/benchReg.py	Sat Jul 03 03:49:41 2010 +0900
+++ b/src/benchReg.py	Sun Jul 04 00:33:15 2010 +0900
@@ -22,7 +22,7 @@
 
 regLLVM = RegLLVM("regllvm", opts.regexp, opts.string, opts.implLabel, opts.optimizeFlag, opts.dumpFlag)
 
-if (opts.printFlag):   regLLVM.printModule()
+if (opts.printFlag):   regLLVM.print_module()
 
 if (opts.executeFlag):
     print "LLVM  :", Timer(setup='from __main__ import regLLVM', stmt='regLLVM.execute()').timeit(opts.number)
--- a/src/cTranslator.py	Sat Jul 03 03:49:41 2010 +0900
+++ b/src/cTranslator.py	Sun Jul 04 00:33:15 2010 +0900
@@ -25,16 +25,26 @@
             self.dfaStateNameHash = self.createDFAStateNameHash()
 
     def modifyStateName(self, stateName):
+        if stateName is "accept" or stateName is "reject":
+            return stateName
         if self.cg.type is "DFA":
             return "state_"+self.dfaStateNameHash[stateName]
         else:
             return "state_"+stateName
 
-    def translateFromCallGraph(self):
-        # self.emit C-source code
-        self.emit("#include <stdio.h>\n")
-        for k in self.cg.map.iterkeys():
-            self.emit(self.funType + self.modifyStateName(k) + "(char* s);\n")
+    def emit_accept_state(self):
+        self.emit ("""
+%saccept(char* s) {
+\tprintf(\"\\nstring matches regexp. \\n\\n\");
+}\n""" % self.funType)
+
+    def emit_reject_state(self):
+        self.emit ("""
+%sreject(char* s) {
+\tprintf(\"\\nstring does not match regexp. \\n\\n\");
+}\n""" % self.funType)
+
+    def emit_driver(self):
         self.emit(self.funType + 'accept(char* s);\n')
         self.emit(self.funType + 'reject(char* s);\n')
         self.emit("""
@@ -50,45 +60,63 @@
 \treturn 0;
 }\n\n""")
 
-        for k, v in self.cg.map.iteritems():
-            self.emit(self.funType + self.modifyStateName(k) + "(char* s) {\n")
-            if self.debug:
-                self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k))
-            if self.cg.type is "NFA":
-                sLocal = "s_local"
-                self.emit("\tchar* %s = s;\n" % (sLocal))
-                # epsilon-transition
-                for ss in (ss for i,ss in v.iteritems() if i == ''):
-                    for s in ss:
-                        self.emit("\t%s%s(%s);\n" % (self.callType, self.modifyStateName(s), sLocal))
-            else:
-                sLocal = "s"
+    def emit_switch(self, case, default=None):
+        if self.cg.type is "NFA":
+            sLocal = "s_local"
+        else:
+            sLocal = "s"
+
+        self.emit("\tswitch(*%s++) {\n" % (sLocal))
 
-            self.emit("\tswitch(*%s++) {\n" % (sLocal))
+        for input, nextStates in case.iteritems():
+            if input != '':
+                self.emit("\t\tcase '%s': \n" % (input))
+                for nextState in nextStates:
+                    self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modifyStateName(nextState), sLocal))
+                if self.breakStatement != '': self.emit(self.breakStatement+'\n')
+
+        if default:
+            self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default))
+        self.emit("\t}")
+
 
-            for input, nextStates in v.iteritems():
-                if input != '':
-                    self.emit("\t\tcase '%s': \n" % (input))
-                    for nextState in nextStates:
-                        self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modifyStateName(nextState), sLocal))
-                    if self.breakStatement != '': self.emit(self.breakStatement+'\n')
+    def emit_state(self, cur_state, transition):
+        self.emit(self.funType + self.modifyStateName(cur_state) + "(char* s) {\n")
+        if self.debug:
+            self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (cur_state))
+        if self.cg.type is "NFA":
+            sLocal = "s_local"
+            self.emit("\tchar* %s = s;\n" % (sLocal))
+            if '' in transition:
+                epsilon_transition = transition.pop('')
+                for n in epsilon_transition:
+                    self.emit("\t%s%s(%s);\n" % (self.callType, self.modifyStateName(n), sLocal))
+        else:
+            sLocal = "s"
 
-            if k in self.cg.accepts:
-                self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement))
+        if cur_state in self.cg.accepts:
+            transition['\\0'] = ["accept"]
+
+        if transition:
             if self.cg.type is "DFA":
-                self.emit("\t\tdefault: %sreject(%s);\n\t}\n" % (self.callType, sLocal))
+                self.emit_switch(transition, default="reject")
             else:
-                self.emit("\t}\n")
-            self.emit("}\n\n")
+                self.emit_switch(transition)
+        self.emit("\n}\n\n")
 
-        self.emit ("""
-%saccept(char* s) {
-\tprintf(\"\\nstring matches regexp. \\n\\n\");
-}\n""" % self.funType)
-        self.emit ("""
-%sreject(char* s) {
-\tprintf(\"\\nstring does not match regexp. \\n\\n\");
-}\n""" % self.funType)
+    def translateFromCallGraph(self):
+        # self.emit C-source code
+        self.emit("#include <stdio.h>\n")
+        for k in self.cg.map.iterkeys():
+            self.emit(self.funType + self.modifyStateName(k) + "(char* s);\n")
+
+        self.emit_driver()
+
+        for cur_state, transition in self.cg.map.iteritems():
+            self.emit_state(cur_state, transition)
+
+        self.emit_accept_state()
+        self.emit_reject_state()
 
 def test():
     import doctest
--- a/src/dfareg.py	Sat Jul 03 03:49:41 2010 +0900
+++ b/src/dfareg.py	Sun Jul 04 00:33:15 2010 +0900
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/Usr/bin/env python
 
 import copy
 import re
@@ -11,7 +11,7 @@
         self.accepts = accepts
         self.map = map_
 
-    def epsilonExpand(self, set_):
+    def epsilon_expand(self, set_):
         que = set(set_)
         done = set()
         while que:
@@ -28,7 +28,7 @@
         self.start = start
         self.accepts = accepts
         self.map = map_
-    def getRuntime(self):
+    def get_runtime(self):
         return DFARuntime(self)
 
 class DFARuntime(object):
@@ -36,17 +36,17 @@
         self.DFA = DFA
         self.curState = self.DFA.start
 
-    def doTransition(self, char):
+    def do_transition(self, char):
         self.curState = self.DFA.transition(
             self.curState, char)
 
-    def isAcceptState(self):
+    def is_accept_state(self):
         return self.curState in self.DFA.accepts
 
-    def doesAccept(self, input):
+    def does_accept(self, input):
         for alphabet in input:
-            self.doTransition(alphabet)
-        return self.isAcceptState()
+            self.do_transition(alphabet)
+        return self.is_accept_state()
 
 class Token(object):
     CHARACTER  = 0
@@ -174,7 +174,7 @@
     def __init__(self):
         self.stateCount = 0
 
-    def newState(self):
+    def new_state(self):
         self.stateCount += 1
         return self.stateCount
 
@@ -188,14 +188,14 @@
         slot = self.map.setdefault((from_, char), set())
         slot.add(to)
 
-    def newSkelton(self):
+    def new_skelton(self):
         # copy fragment (self), and it return
         newFrag = NFAFragment();
         newFrag.map = copy.deepcopy(self.map)
         return newFrag
 
     def __or__(self, frag):
-        newFrag = self.newSkelton()
+        newFrag = self.new_skelton()
         for k, v in frag.map.iteritems():
             newFrag.map[k] = v.copy()
         return newFrag
@@ -218,8 +218,8 @@
 
     def assemble(self, context):
         frag = NFAFragment()
-        s1 = context.newState()
-        s2 = context.newState()
+        s1 = context.new_state()
+        s2 = context.new_state()
         frag.connect(s1, self.char, s2)
         frag.start = s1
         frag.accepts = frozenset([s2])
@@ -234,7 +234,7 @@
         frag1 = self.op1.assemble(context)
         frag2 = self.op2.assemble(context)
         frag = frag1 | frag2
-        s = context.newState()
+        s = context.new_state()
         frag.connect(s, "", frag1.start)
         frag.connect(s, "", frag2.start)
         frag.start = s
@@ -264,12 +264,12 @@
 
     def assemble(self, context):
         fragOrig = self.op.assemble(context)
-        frag = fragOrig.newSkelton()
+        frag = fragOrig.new_skelton()
 
         for state in fragOrig.accepts:
             frag.connect(state, "", fragOrig.start)
 
-        s = context.newState()
+        s = context.new_state()
         frag.connect(s, "", fragOrig.start)
         frag.start = s
         frag.accepts = fragOrig.accepts | frozenset([s])
@@ -286,17 +286,17 @@
         ret = set()
         for elem in set_:
             ret |= nfa.transition(elem, alpha)
-        return nfa.epsilonExpand(frozenset(ret))
+        return nfa.epsilon_expand(frozenset(ret))
 
-    def stateSetTransition(stateSet, char):
+    def state_set_transition(stateSet, char):
         trans = set()
         for state in stateSet:
             t = nfa.map.setdefault((state, char), None)
-            if not t == None: trans.update(nfa.epsilonExpand(t))
+            if not t == None: trans.update(nfa.epsilon_expand(t))
         return frozenset(trans)
 
     map_ = dict()
-    que =  set(frozenset([nfa.epsilonExpand(set([nfa.start]))]))
+    que =  set(frozenset([nfa.epsilon_expand(set([nfa.start]))]))
     done = set()
 
     while que:
@@ -306,7 +306,7 @@
             for k, v in nfa.map.iteritems():
                 if state == k[0] and k[1] != '':
                     slot = map_.setdefault((stateSet, k[1]), set())
-                    slot.update(nfa.epsilonExpand(v))
+                    slot.update(nfa.epsilon_expand(v))
 
         done.add(stateSet)
 
@@ -316,7 +316,7 @@
 
     return DeterministicFiniteAutomaton(
         transition,
-        nfa.epsilonExpand(frozenset([nfa.start])),
+        nfa.epsilon_expand(frozenset([nfa.start])),
         NonDisjoinSets(nfa.accepts),
         map_
         )
@@ -352,7 +352,7 @@
             self.start = str(fa.start)
         self.inputs = set()
         self.states = set()
-        (self.map, self.accepts) = self.getCallGraph(self.fa)
+        (self.map, self.accepts) = self.create_call_graph(self.fa)
 
     # return state name from set
     def set2name(self, set_):
@@ -360,7 +360,7 @@
         l.sort()
         return ('_'.join(str(x) for x in l))
 
-    def getCallGraph(self, fa):
+    def create_call_graph(self, fa):
         transitionCases = dict()
         if self.type is "DFA":
             transitionCases[fa.start] = set()
@@ -430,8 +430,8 @@
         self.dfa = nfa2dfa(self.nfa)
 
     def matches(self, string):
-        runtime = self.dfa.getRuntime()
-        return runtime.doesAccept(string)
+        runtime = self.dfa.get_runtime()
+        return runtime.does_accept(string)
 
 def compile(regexp):
     return Regexp(regexp)
--- a/src/reg2llvm.py	Sat Jul 03 03:49:41 2010 +0900
+++ b/src/reg2llvm.py	Sun Jul 04 00:33:15 2010 +0900
@@ -108,8 +108,9 @@
         else:
             for state, transition in self.cg.map.iteritems():
                 cases = dict()
-                for case, next_state in transition.iteritems():
-                    cases[self.char_const(case)] = state_list[next_state]
+                for case, next_states in transition.iteritems():
+                    for next_state in next_states:
+                        cases[self.char_const(case)] = state_list[next_state]
                 state_fun = state_list[state]
                 emit = Builder.new(state_fun.append_basic_block("entry"))
                 ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0]))
@@ -141,7 +142,7 @@
         self.ee = ExecutionEngine.new(self.mp)
         self.main = main
 
-    def getExecutionEngine(self):
+    def get_execution_engine(self):
         return self.ee
 
     def optimize(self):
@@ -161,7 +162,7 @@
         for fun in self.mod.functions:
             fp.run(fun)
 
-    def printModule(self):
+    def print_module(self):
         print self.mod
 
     def execute(self):