# HG changeset patch # User Ryoma SHINYA # Date 1278171195 -32400 # Node ID 2d00b46ce34d037837c8efe7e3242759fdda4fda # Parent 973b596bd1247c7a9b9ee39f66f1ec5ad34fbf6a modify CTranslator, improve emiting statement(if switch-statement unnecessary, not emit it.). and separate some code segment. diff -r 973b596bd124 -r 2d00b46ce34d code/graph/reg.dot --- a/code/graph/reg.dot Sat Jul 03 03:49:41 2010 +0900 +++ b/code/graph/reg.dot Sun Jul 04 00:33:15 2010 +0900 @@ -3,7 +3,7 @@ d2ttikzedgelabels = true; d2tstyleonly = true; d2tdocpreamble = "\usetikzlibrary{automata}"; - d2tfigpreamble = " ikzstyle{every state}= [draw=blue!50, shape=circle, very thick,fill=blue!20]"; + d2tfigpreamble = " ikzstyle{every state}= [draw=blue!50, shape=circle, very thick,fill=blue!20]"; edge [lblstyle="fill=blue!20", style="arrows=->", topath="bend left"]; node [shape="circle", style="state"]; diff -r 973b596bd124 -r 2d00b46ce34d code/graph/sample.dot --- a/code/graph/sample.dot Sat Jul 03 03:49:41 2010 +0900 +++ b/code/graph/sample.dot Sun Jul 04 00:33:15 2010 +0900 @@ -1,5 +1,5 @@ digraph G { - d2ttikzedgelabels = true; + d2ttikzedgelabels = true; d2tstyleonly = true; d2tdocpreamble = "\usetikzlibrary{automata}"; d2tfigpreamble = "\tikzstyle{every state}=\ diff -r 973b596bd124 -r 2d00b46ce34d src/benchReg.py --- a/src/benchReg.py Sat Jul 03 03:49:41 2010 +0900 +++ b/src/benchReg.py Sun Jul 04 00:33:15 2010 +0900 @@ -22,7 +22,7 @@ regLLVM = RegLLVM("regllvm", opts.regexp, opts.string, opts.implLabel, opts.optimizeFlag, opts.dumpFlag) -if (opts.printFlag): regLLVM.printModule() +if (opts.printFlag): regLLVM.print_module() if (opts.executeFlag): print "LLVM :", Timer(setup='from __main__ import regLLVM', stmt='regLLVM.execute()').timeit(opts.number) diff -r 973b596bd124 -r 2d00b46ce34d src/cTranslator.py --- a/src/cTranslator.py Sat Jul 03 03:49:41 2010 +0900 +++ b/src/cTranslator.py Sun Jul 04 00:33:15 2010 +0900 @@ -25,16 +25,26 @@ self.dfaStateNameHash = self.createDFAStateNameHash() def modifyStateName(self, stateName): + if stateName is "accept" or stateName is "reject": + return stateName if self.cg.type is "DFA": return "state_"+self.dfaStateNameHash[stateName] else: return "state_"+stateName - def translateFromCallGraph(self): - # self.emit C-source code - self.emit("#include \n") - for k in self.cg.map.iterkeys(): - self.emit(self.funType + self.modifyStateName(k) + "(char* s);\n") + def emit_accept_state(self): + self.emit (""" +%saccept(char* s) { +\tprintf(\"\\nstring matches regexp. \\n\\n\"); +}\n""" % self.funType) + + def emit_reject_state(self): + self.emit (""" +%sreject(char* s) { +\tprintf(\"\\nstring does not match regexp. \\n\\n\"); +}\n""" % self.funType) + + def emit_driver(self): self.emit(self.funType + 'accept(char* s);\n') self.emit(self.funType + 'reject(char* s);\n') self.emit(""" @@ -50,45 +60,63 @@ \treturn 0; }\n\n""") - for k, v in self.cg.map.iteritems(): - self.emit(self.funType + self.modifyStateName(k) + "(char* s) {\n") - if self.debug: - self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k)) - if self.cg.type is "NFA": - sLocal = "s_local" - self.emit("\tchar* %s = s;\n" % (sLocal)) - # epsilon-transition - for ss in (ss for i,ss in v.iteritems() if i == ''): - for s in ss: - self.emit("\t%s%s(%s);\n" % (self.callType, self.modifyStateName(s), sLocal)) - else: - sLocal = "s" + def emit_switch(self, case, default=None): + if self.cg.type is "NFA": + sLocal = "s_local" + else: + sLocal = "s" + + self.emit("\tswitch(*%s++) {\n" % (sLocal)) - self.emit("\tswitch(*%s++) {\n" % (sLocal)) + for input, nextStates in case.iteritems(): + if input != '': + self.emit("\t\tcase '%s': \n" % (input)) + for nextState in nextStates: + self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modifyStateName(nextState), sLocal)) + if self.breakStatement != '': self.emit(self.breakStatement+'\n') + + if default: + self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default)) + self.emit("\t}") + - for input, nextStates in v.iteritems(): - if input != '': - self.emit("\t\tcase '%s': \n" % (input)) - for nextState in nextStates: - self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modifyStateName(nextState), sLocal)) - if self.breakStatement != '': self.emit(self.breakStatement+'\n') + def emit_state(self, cur_state, transition): + self.emit(self.funType + self.modifyStateName(cur_state) + "(char* s) {\n") + if self.debug: + self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (cur_state)) + if self.cg.type is "NFA": + sLocal = "s_local" + self.emit("\tchar* %s = s;\n" % (sLocal)) + if '' in transition: + epsilon_transition = transition.pop('') + for n in epsilon_transition: + self.emit("\t%s%s(%s);\n" % (self.callType, self.modifyStateName(n), sLocal)) + else: + sLocal = "s" - if k in self.cg.accepts: - self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement)) + if cur_state in self.cg.accepts: + transition['\\0'] = ["accept"] + + if transition: if self.cg.type is "DFA": - self.emit("\t\tdefault: %sreject(%s);\n\t}\n" % (self.callType, sLocal)) + self.emit_switch(transition, default="reject") else: - self.emit("\t}\n") - self.emit("}\n\n") + self.emit_switch(transition) + self.emit("\n}\n\n") - self.emit (""" -%saccept(char* s) { -\tprintf(\"\\nstring matches regexp. \\n\\n\"); -}\n""" % self.funType) - self.emit (""" -%sreject(char* s) { -\tprintf(\"\\nstring does not match regexp. \\n\\n\"); -}\n""" % self.funType) + def translateFromCallGraph(self): + # self.emit C-source code + self.emit("#include \n") + for k in self.cg.map.iterkeys(): + self.emit(self.funType + self.modifyStateName(k) + "(char* s);\n") + + self.emit_driver() + + for cur_state, transition in self.cg.map.iteritems(): + self.emit_state(cur_state, transition) + + self.emit_accept_state() + self.emit_reject_state() def test(): import doctest diff -r 973b596bd124 -r 2d00b46ce34d src/dfareg.py --- a/src/dfareg.py Sat Jul 03 03:49:41 2010 +0900 +++ b/src/dfareg.py Sun Jul 04 00:33:15 2010 +0900 @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/Usr/bin/env python import copy import re @@ -11,7 +11,7 @@ self.accepts = accepts self.map = map_ - def epsilonExpand(self, set_): + def epsilon_expand(self, set_): que = set(set_) done = set() while que: @@ -28,7 +28,7 @@ self.start = start self.accepts = accepts self.map = map_ - def getRuntime(self): + def get_runtime(self): return DFARuntime(self) class DFARuntime(object): @@ -36,17 +36,17 @@ self.DFA = DFA self.curState = self.DFA.start - def doTransition(self, char): + def do_transition(self, char): self.curState = self.DFA.transition( self.curState, char) - def isAcceptState(self): + def is_accept_state(self): return self.curState in self.DFA.accepts - def doesAccept(self, input): + def does_accept(self, input): for alphabet in input: - self.doTransition(alphabet) - return self.isAcceptState() + self.do_transition(alphabet) + return self.is_accept_state() class Token(object): CHARACTER = 0 @@ -174,7 +174,7 @@ def __init__(self): self.stateCount = 0 - def newState(self): + def new_state(self): self.stateCount += 1 return self.stateCount @@ -188,14 +188,14 @@ slot = self.map.setdefault((from_, char), set()) slot.add(to) - def newSkelton(self): + def new_skelton(self): # copy fragment (self), and it return newFrag = NFAFragment(); newFrag.map = copy.deepcopy(self.map) return newFrag def __or__(self, frag): - newFrag = self.newSkelton() + newFrag = self.new_skelton() for k, v in frag.map.iteritems(): newFrag.map[k] = v.copy() return newFrag @@ -218,8 +218,8 @@ def assemble(self, context): frag = NFAFragment() - s1 = context.newState() - s2 = context.newState() + s1 = context.new_state() + s2 = context.new_state() frag.connect(s1, self.char, s2) frag.start = s1 frag.accepts = frozenset([s2]) @@ -234,7 +234,7 @@ frag1 = self.op1.assemble(context) frag2 = self.op2.assemble(context) frag = frag1 | frag2 - s = context.newState() + s = context.new_state() frag.connect(s, "", frag1.start) frag.connect(s, "", frag2.start) frag.start = s @@ -264,12 +264,12 @@ def assemble(self, context): fragOrig = self.op.assemble(context) - frag = fragOrig.newSkelton() + frag = fragOrig.new_skelton() for state in fragOrig.accepts: frag.connect(state, "", fragOrig.start) - s = context.newState() + s = context.new_state() frag.connect(s, "", fragOrig.start) frag.start = s frag.accepts = fragOrig.accepts | frozenset([s]) @@ -286,17 +286,17 @@ ret = set() for elem in set_: ret |= nfa.transition(elem, alpha) - return nfa.epsilonExpand(frozenset(ret)) + return nfa.epsilon_expand(frozenset(ret)) - def stateSetTransition(stateSet, char): + def state_set_transition(stateSet, char): trans = set() for state in stateSet: t = nfa.map.setdefault((state, char), None) - if not t == None: trans.update(nfa.epsilonExpand(t)) + if not t == None: trans.update(nfa.epsilon_expand(t)) return frozenset(trans) map_ = dict() - que = set(frozenset([nfa.epsilonExpand(set([nfa.start]))])) + que = set(frozenset([nfa.epsilon_expand(set([nfa.start]))])) done = set() while que: @@ -306,7 +306,7 @@ for k, v in nfa.map.iteritems(): if state == k[0] and k[1] != '': slot = map_.setdefault((stateSet, k[1]), set()) - slot.update(nfa.epsilonExpand(v)) + slot.update(nfa.epsilon_expand(v)) done.add(stateSet) @@ -316,7 +316,7 @@ return DeterministicFiniteAutomaton( transition, - nfa.epsilonExpand(frozenset([nfa.start])), + nfa.epsilon_expand(frozenset([nfa.start])), NonDisjoinSets(nfa.accepts), map_ ) @@ -352,7 +352,7 @@ self.start = str(fa.start) self.inputs = set() self.states = set() - (self.map, self.accepts) = self.getCallGraph(self.fa) + (self.map, self.accepts) = self.create_call_graph(self.fa) # return state name from set def set2name(self, set_): @@ -360,7 +360,7 @@ l.sort() return ('_'.join(str(x) for x in l)) - def getCallGraph(self, fa): + def create_call_graph(self, fa): transitionCases = dict() if self.type is "DFA": transitionCases[fa.start] = set() @@ -430,8 +430,8 @@ self.dfa = nfa2dfa(self.nfa) def matches(self, string): - runtime = self.dfa.getRuntime() - return runtime.doesAccept(string) + runtime = self.dfa.get_runtime() + return runtime.does_accept(string) def compile(regexp): return Regexp(regexp) diff -r 973b596bd124 -r 2d00b46ce34d src/reg2llvm.py --- a/src/reg2llvm.py Sat Jul 03 03:49:41 2010 +0900 +++ b/src/reg2llvm.py Sun Jul 04 00:33:15 2010 +0900 @@ -108,8 +108,9 @@ else: for state, transition in self.cg.map.iteritems(): cases = dict() - for case, next_state in transition.iteritems(): - cases[self.char_const(case)] = state_list[next_state] + for case, next_states in transition.iteritems(): + for next_state in next_states: + cases[self.char_const(case)] = state_list[next_state] state_fun = state_list[state] emit = Builder.new(state_fun.append_basic_block("entry")) ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0])) @@ -141,7 +142,7 @@ self.ee = ExecutionEngine.new(self.mp) self.main = main - def getExecutionEngine(self): + def get_execution_engine(self): return self.ee def optimize(self): @@ -161,7 +162,7 @@ for fun in self.mod.functions: fp.run(fun) - def printModule(self): + def print_module(self): print self.mod def execute(self):