changeset 0:02656ea776f3

Regexp-Compiler with LLVM
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2010 00:54:59 +0900
parents
children d5e81ef9afba
files __init__.py benchReg.py code/as/reg.gcc.s code/as/reg.ll.s code/as/regcbc.s code/c/reg.c code/c/regcbc.c code/ll/emit.ll code/ll/reg.ll code/ll/regllvm.label.ll code/ll/regllvm.label_optimized.ll dfareg.py ll reg2llvm.py test/test_DFARuntime.py test/test_NFA2DFA.py
diffstat 16 files changed, 2258 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/__init__.py	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,2 @@
+def compile(regexp):
+    return Regexp(regexp)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/benchReg.py	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import sys,re
+from optparse import OptionParser
+from timeit import Timer
+from dfareg import Regexp
+from reg2llvm import RegLLVM
+
+myusage = "%prog [-O] [-p] [-s string] [-r regexp] [-E]"
+psr = OptionParser(usage=myusage)
+psr.add_option("-O", action="store_true", dest="optimizeFlag", default=False, help="optimimzation")
+psr.add_option("-L", action="store_true", dest="implLabel", default=False, help="impliment with label")
+psr.add_option("-E", action="store_true", dest="executeFlag", default=False, help="execute Module")
+psr.add_option("-p", action="store_true", dest="printFlag", default=False, help="print module")
+psr.add_option("-d", action="store_true", dest="dumpFlag", default=False, help="add debug stmt to module")
+psr.add_option("-n", action="store", type="int", dest="number", default=1000, help="number of eval", metavar="INT")
+psr.add_option("-r", action="store", type="string", dest="regexp", default="(A|B)*C", help="regexp", metavar="FILE")
+psr.add_option("-s", action="store", type="string", dest="string", default="A"+"B"*500+"C", help="string", metavar="FILE")
+(opts, args) = psr.parse_args(sys.argv)
+
+print "regexp = \"%s\", string = \"%s\", loop = %d" % (opts.regexp, opts.string, opts.number)
+
+regLLVM = RegLLVM("regllvm", opts.regexp, opts.string, opts.implLabel, opts.optimizeFlag, opts.dumpFlag)
+
+if (opts.printFlag):   regLLVM.printModule()
+
+if (opts.executeFlag):
+    print "LLVM  :", Timer(setup='from __main__ import regLLVM', stmt='regLLVM.execute()').timeit(opts.number)
+
+    reg = re.compile("^%s$" % opts.regexp)
+    print "re    :", Timer(setup='from __main__ import reg', stmt='reg.search("%s")' % opts.string).timeit(opts.number)
+
+    reg = Regexp(opts.regexp)
+    print "dfareg:", Timer(setup='from __main__ import reg', stmt='reg.matches("%s")' % opts.string).timeit(opts.number)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/as/reg.gcc.s	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,326 @@
+	.cstring
+LC0:
+	.ascii "\12string matches regexp. \12\0"
+	.align 2
+LC1:
+	.ascii "\12string does not match regexp. \12\0"
+	.text
+	.align 4,0x90
+.globl _state_8
+_state_8:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$20, %esp
+	call	___i686.get_pc_thunk.bx
+"L00000000001$pb":
+	cmpb	$0, (%ecx)
+	jne	L5
+	leal	LC0-"L00000000001$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+L5:
+	leal	LC1-"L00000000001$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+.globl _state_1_2_3_5_7
+_state_1_2_3_5_7:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	call	___i686.get_pc_thunk.bx
+"L00000000002$pb":
+	subl	$20, %esp
+L22:
+	movzbl	(%ecx), %eax
+	addl	$1, %ecx
+	cmpb	$66, %al
+	je	L22
+	cmpb	$67, %al
+	je	L18
+	cmpb	$65, %al
+	je	L22
+L26:
+	leal	LC1-"L00000000002$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+L18:
+	cmpb	$0, (%ecx)
+	jne	L26
+	leal	LC0-"L00000000002$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+.globl _state_1_3_4_5_7
+_state_1_3_4_5_7:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	call	___i686.get_pc_thunk.bx
+"L00000000003$pb":
+	subl	$20, %esp
+L38:
+	movzbl	(%ecx), %eax
+	addl	$1, %ecx
+	cmpb	$66, %al
+	je	L38
+	cmpb	$67, %al
+	je	L41
+	cmpb	$65, %al
+	je	L48
+L46:
+	leal	LC1-"L00000000003$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+L48:
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	jmp	_state_1_2_3_5_7
+	.align 4,0x90
+L41:
+	cmpb	$0, (%ecx)
+	jne	L46
+	leal	LC0-"L00000000003$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+.globl _state_1_3_5_6_7
+_state_1_3_5_6_7:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$20, %esp
+	movzbl	(%ecx), %eax
+	call	___i686.get_pc_thunk.bx
+"L00000000004$pb":
+	leal	1(%ecx), %edx
+	cmpb	$66, %al
+	je	L63
+	cmpb	$67, %al
+	je	L53
+L72:
+	cmpb	$65, %al
+	je	L73
+L65:
+	leal	LC1-"L00000000004$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+L73:
+	addl	$20, %esp
+	movl	%edx, %ecx
+	popl	%ebx
+	popl	%ebp
+	jmp	_state_1_2_3_5_7
+	.align 4,0x90
+L63:
+	movzbl	(%edx), %eax
+	addl	$1, %edx
+	cmpb	$66, %al
+	je	L63
+	cmpb	$67, %al
+	jne	L72
+	cmpb	$0, (%edx)
+	jne	L65
+	leal	LC0-"L00000000004$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	.align 4,0x90
+L74:
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+L53:
+	cmpb	$0, 1(%ecx)
+	jne	L65
+	leal	LC0-"L00000000004$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	jmp	L74
+	.cstring
+LC2:
+	.ascii "regexp: (A|B)*C\0"
+LC3:
+	.ascii "number of state: 4\0"
+LC4:
+	.ascii "string: %s\12\0"
+	.text
+	.align 4,0x90
+.globl _main
+_main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	pushl	%ebx
+	call	___i686.get_pc_thunk.bx
+"L00000000005$pb":
+	subl	$16, %esp
+	movl	12(%ebp), %esi
+	leal	LC2-"L00000000005$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	leal	LC3-"L00000000005$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	movl	4(%esi), %eax
+	movl	%eax, 4(%esp)
+	leal	LC4-"L00000000005$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_printf$stub
+	movl	4(%esi), %eax
+	movzbl	(%eax), %edx
+	leal	1(%eax), %ecx
+	cmpb	$66, %dl
+	je	L89
+	cmpb	$67, %dl
+	je	L79
+	cmpb	$65, %dl
+	je	L97
+L91:
+	leal	LC1-"L00000000005$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$16, %esp
+	popl	%ebx
+	popl	%esi
+	popl	%ebp
+	ret
+	.align 4,0x90
+L89:
+	movzbl	(%ecx), %eax
+	addl	$1, %ecx
+	cmpb	$66, %al
+	je	L89
+	cmpb	$67, %al
+	je	L86
+	cmpb	$65, %al
+	jne	L91
+L97:
+	addl	$16, %esp
+	popl	%ebx
+	popl	%esi
+	popl	%ebp
+	jmp	_state_1_2_3_5_7
+	.align 4,0x90
+L79:
+	cmpb	$0, 1(%eax)
+	jne	L91
+L96:
+	leal	LC0-"L00000000005$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$16, %esp
+	popl	%ebx
+	popl	%esi
+	popl	%ebp
+	ret
+	.align 4,0x90
+L86:
+	cmpb	$0, (%ecx)
+	jne	L91
+	.p2align 4,,3
+	jmp	L96
+	.align 4,0x90
+.globl _accept
+_accept:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	call	___i686.get_pc_thunk.bx
+"L00000000006$pb":
+	subl	$20, %esp
+	leal	LC0-"L00000000006$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.align 4,0x90
+.globl _reject
+_reject:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	call	___i686.get_pc_thunk.bx
+"L00000000007$pb":
+	subl	$20, %esp
+	leal	LC1-"L00000000007$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$20, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+	.picsymbol_stub
+L_printf$stub:
+	.indirect_symbol _printf
+	call	LPC$1
+LPC$1:	popl	%eax
+	movl	L1$lz-LPC$1(%eax),%edx
+	jmp	*%edx
+L_printf$stub_binder:
+	lea	L1$lz-LPC$1(%eax),%eax
+	pushl	%eax
+	jmp	dyld_stub_binding_helper
+	.lazy_symbol_pointer
+L1$lz:
+	.indirect_symbol _printf
+	.long	L_printf$stub_binder
+	.picsymbol_stub
+L_puts$stub:
+	.indirect_symbol _puts
+	call	LPC$2
+LPC$2:	popl	%eax
+	movl	L2$lz-LPC$2(%eax),%edx
+	jmp	*%edx
+L_puts$stub_binder:
+	lea	L2$lz-LPC$2(%eax),%eax
+	pushl	%eax
+	jmp	dyld_stub_binding_helper
+	.lazy_symbol_pointer
+L2$lz:
+	.indirect_symbol _puts
+	.long	L_puts$stub_binder
+	.subsections_via_symbols
+	.section __TEXT,__textcoal_nt,coalesced,pure_instructions
+	.weak_definition	___i686.get_pc_thunk.bx
+	.private_extern	___i686.get_pc_thunk.bx
+___i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/as/reg.ll.s	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,98 @@
+        .text
+        .align  4,0x90
+        .globl  _main
+_main:
+Leh_func_begin1:
+Llabel1:
+        subl    $12, %esp
+        movl    $4, (%esp)
+        call    _malloc
+        movl    16(%esp), %ecx
+        movl    %ecx, (%eax)
+        .align  4,0x90
+LBB1_1: ## state_1_3_5_6_7
+        movl    (%eax), %ecx
+        leal    1(%ecx), %edx
+        movl    %edx, (%eax)
+        movb    __unnamed_1_0(%ecx), %cl
+        cmpb    $65, %cl
+        je      LBB1_1  ## state_1_3_5_6_7
+LBB1_2: ## state_1_3_5_6_7
+        cmpb    $66, %cl
+        je      LBB1_1  ## state_1_3_5_6_7
+LBB1_3: ## state_1_3_5_6_7
+        cmpb    $67, %cl
+        jne     LBB1_6  ## reject
+LBB1_4: ## state_8
+        movl    (%eax), %ecx
+        leal    1(%ecx), %edx
+        movl    %edx, (%eax)
+        movb    __unnamed_1_0(%ecx), %cl
+        testb   %cl, %cl
+        jne     LBB1_6  ## reject
+LBB1_5: ## accpet
+        movl    %eax, (%esp)
+        call    _free
+        movl    $1, %eax
+        addl    $12, %esp
+        ret
+LBB1_6: ## reject
+        movl    %eax, (%esp)
+        call    _free
+        xorl    %eax, %eax
+        addl    $12, %esp
+        ret
+Leh_func_end1:
+        .data
+        .globl __unnamed_1_0
+__unnamed_1_0:                          ##
+        .asciz  "ABBBBBBC"
+        .globl __unnamed_2_1
+        .align  4
+__unnamed_2_1:                          ##
+        .asciz  "state: %s, arg: %c(int %d)\n"
+
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame0:
+Lsection_eh_frame:
+Leh_frame_common:
+        .set    Lset1eh,Leh_frame_common_end-Leh_frame_common_begin
+        .long   Lset1eh
+Leh_frame_common_begin:
+        .long   0x0
+        .byte   0x1
+        .asciz  "zR"
+        .byte   0x1
+        .byte   0x7C
+        .byte   0x8
+        .byte   0x1
+        .byte   0x1B
+        .byte   0xC
+        .byte   0x5
+        .byte   0x4
+        .byte   0x88
+        .byte   0x1
+        .align  2
+Leh_frame_common_end:
+
+        .globl  _main.eh
+_main.eh:
+        .set    Lset2eh,Leh_frame_end1-Leh_frame_begin1
+        .long   Lset2eh
+Leh_frame_begin1:
+        .long   Leh_frame_begin1-Leh_frame_common
+        .long   Leh_func_begin1-.
+        .set    Lset3eh,Leh_func_end1-Leh_func_begin1
+        .long   Lset3eh
+        .byte   0x0
+        .byte   0xE
+        .byte   0x10
+        .byte   0x4
+        .set    Lset4eh,Llabel1-Leh_func_begin1
+        .long   Lset4eh
+        .byte   0xD
+        .byte   0x5
+        .align  2
+Leh_frame_end1:
+        .subsections_via_symbols
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/as/regcbc.s	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,317 @@
+	.cstring
+	.align 2
+LC0:
+	.ascii "\12string does not match regexp. \12\0"
+	.text
+	.align 4,0x90
+.globl _reject
+_reject:
+	call	L3
+"L00000000001$pb":
+L3:
+	popl	%ecx
+	pushl	%ebp
+	movl	%esp, %ebp
+	leal	LC0-"L00000000001$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.cstring
+LC1:
+	.ascii "\12string matches regexp. \12\0"
+	.text
+	.align 4,0x90
+.globl _accept
+_accept:
+	call	L6
+"L00000000002$pb":
+L6:
+	popl	%ecx
+	pushl	%ebp
+	movl	%esp, %ebp
+	leal	LC1-"L00000000002$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+.globl _state_14
+_state_14:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	call	L13
+"L00000000003$pb":
+L13:
+	popl	%ecx
+	cmpb	$0, (%eax)
+	jne	L8
+	leal	LC1-"L00000000003$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L8:
+	leal	LC0-"L00000000003$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+.globl _state_12
+_state_12:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	call	L19
+"L00000000004$pb":
+L19:
+	popl	%ecx
+	cmpb	$0, (%eax)
+	jne	L15
+	leal	LC1-"L00000000004$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L15:
+	leal	LC0-"L00000000004$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+.globl _state_9_11_13_15
+_state_9_11_13_15:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %edx
+	call	L31
+"L00000000005$pb":
+L31:
+	popl	%ecx
+	movzbl	(%edx), %eax
+	cmpb	$79, %al
+	je	L22
+	cmpb	$111, %al
+	je	L22
+L21:
+	leal	LC0-"L00000000005$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L22:
+	cmpb	$0, 1(%edx)
+	jne	L21
+	leal	LC1-"L00000000005$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+.globl _state_7_11_13_15
+_state_7_11_13_15:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %edx
+	call	L43
+"L00000000006$pb":
+L43:
+	popl	%ecx
+	movzbl	(%edx), %eax
+	cmpb	$79, %al
+	je	L34
+	cmpb	$111, %al
+	je	L34
+L33:
+	leal	LC0-"L00000000006$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L34:
+	cmpb	$0, 1(%edx)
+	jne	L33
+	leal	LC1-"L00000000006$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+.globl _state_4_6_8_10
+_state_4_6_8_10:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	call	L52
+"L00000000007$pb":
+L52:
+	popl	%ecx
+	leal	1(%eax), %edx
+	movzbl	(%eax), %eax
+	cmpb	$79, %al
+	je	L46
+	cmpb	$111, %al
+	je	L51
+	leal	LC0-"L00000000007$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L46:
+	movl	%edx, 8(%ebp)
+	leave
+	jmp	_state_9_11_13_15
+	.align 4,0x90
+L51:
+	movl	%edx, 8(%ebp)
+	leave
+	jmp	_state_7_11_13_15
+	.align 4,0x90
+.globl _state_2_6_8_10
+_state_2_6_8_10:
+	pushl	%ebp
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	call	L60
+"L00000000008$pb":
+L60:
+	popl	%ecx
+	leal	1(%eax), %edx
+	movzbl	(%eax), %eax
+	cmpb	$79, %al
+	je	L55
+	cmpb	$111, %al
+	je	L59
+	leal	LC0-"L00000000008$pb"(%ecx), %eax
+	movl	%eax, 8(%ebp)
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L55:
+	movl	%edx, 8(%ebp)
+	leave
+	jmp	_state_9_11_13_15
+	.align 4,0x90
+L59:
+	movl	%edx, 8(%ebp)
+	leave
+	jmp	_state_7_11_13_15
+	.cstring
+LC2:
+	.ascii "regexp: (f|F)(o|O)(o|O)\0"
+LC3:
+	.ascii "number of state: 7\0"
+LC4:
+	.ascii "string: %s\12\0"
+	.text
+	.align 4,0x90
+.globl _main
+_main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	pushl	%ebx
+	call	L75
+"L00000000009$pb":
+L75:
+	popl	%ebx
+	subl	$16, %esp
+	movl	12(%ebp), %esi
+	leal	LC2-"L00000000009$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	leal	LC3-"L00000000009$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	movl	4(%esi), %eax
+	movl	%eax, 4(%esp)
+	leal	LC4-"L00000000009$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_printf$stub
+	movl	4(%esi), %edx
+	movzbl	(%edx), %eax
+	cmpb	$70, %al
+	je	L64
+	cmpb	$102, %al
+	je	L64
+L69:
+	leal	LC0-"L00000000009$pb"(%ebx), %eax
+	movl	%eax, (%esp)
+	call	L_puts$stub
+	addl	$16, %esp
+	xorl	%eax, %eax
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+	.align 4,0x90
+L64:
+	movzbl	1(%edx), %eax
+	leal	2(%edx), %ecx
+	cmpb	$79, %al
+	je	L70
+	cmpb	$111, %al
+	jne	L69
+	movl	%ecx, (%esp)
+	call	_state_7_11_13_15
+	addl	$16, %esp
+	xorl	%eax, %eax
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+	.align 4,0x90
+L70:
+	movl	%ecx, (%esp)
+	call	_state_9_11_13_15
+	addl	$16, %esp
+	xorl	%eax, %eax
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+	.align 4,0x90
+.globl _state_1_3_5
+_state_1_3_5:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	movl	8(%ebp), %edx
+	call	L91
+"L00000000010$pb":
+L91:
+	popl	%ebx
+	movzbl	(%edx), %eax
+	cmpb	$70, %al
+	je	L79
+	cmpb	$102, %al
+	je	L79
+L88:
+	leal	LC0-"L00000000010$pb"(%ebx), %eax
+	movl	%eax, 8(%ebp)
+	popl	%ebx
+	leave
+	jmp	L_puts$stub
+	.align 4,0x90
+L79:
+	movzbl	1(%edx), %eax
+	leal	2(%edx), %ecx
+	cmpb	$79, %al
+	je	L84
+	cmpb	$111, %al
+	jne	L88
+	movl	%ecx, 8(%ebp)
+	popl	%ebx
+	leave
+	jmp	_state_7_11_13_15
+	.align 4,0x90
+L84:
+	movl	%ecx, 8(%ebp)
+	popl	%ebx
+	leave
+	jmp	_state_9_11_13_15
+	.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_printf$stub:
+	.indirect_symbol _printf
+	hlt ; hlt ; hlt ; hlt ; hlt
+L_puts$stub:
+	.indirect_symbol _puts
+	hlt ; hlt ; hlt ; hlt ; hlt
+	.subsections_via_symbols
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/c/reg.c	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,81 @@
+#include <stdio.h>
+
+__code state_8(char* s);
+__code state_1_3_5_6_7(char* s);
+__code state_1_3_4_5_7(char* s);
+__code state_1_2_3_5_7(char* s);
+__code accept(char* s);
+__code reject(char* s);
+
+int main(int argc, char* argv[]) {
+	puts("regexp: (A|B)*C");
+	puts("number of state: 4");
+	printf("string: %s\n", argv[1]);
+	goto state_1_3_5_6_7(argv[1]);
+	return 0;
+}
+
+__code state_8(char* s) {
+	switch(*s++) {
+		case '\0': 
+			goto accept(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_1_3_5_6_7(char* s) {
+	switch(*s++) {
+		case 'A': 
+			goto state_1_2_3_5_7(s);
+			break;
+		case 'C': 
+			goto state_8(s);
+			break;
+		case 'B': 
+			goto state_1_3_4_5_7(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_1_3_4_5_7(char* s) {
+	switch(*s++) {
+		case 'A': 
+			goto state_1_2_3_5_7(s);
+			break;
+		case 'C': 
+			goto state_8(s);
+			break;
+		case 'B': 
+			goto state_1_3_4_5_7(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_1_2_3_5_7(char* s) {
+	switch(*s++) {
+		case 'A': 
+			goto state_1_2_3_5_7(s);
+			break;
+		case 'C': 
+			goto state_8(s);
+			break;
+		case 'B': 
+			goto state_1_3_4_5_7(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+
+__code accept(char* s) {
+	printf("\nstring matches regexp. \n\n");
+}
+
+
+__code reject(char* s) {
+	printf("\nstring does not match regexp. \n\n");
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/c/regcbc.c	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,108 @@
+#include <stdio.h>
+
+__code state_14(char* s);
+__code state_12(char* s);
+__code state_9_11_13_15(char* s);
+__code state_7_11_13_15(char* s);
+__code state_2_6_8_10(char* s);
+__code state_4_6_8_10(char* s);
+__code state_1_3_5(char* s);
+__code accept(char* s);
+__code reject(char* s);
+
+int main(int argc, char* argv[]) {
+	puts("regexp: (f|F)(o|O)(o|O)");
+	puts("number of state: 7");
+	printf("string: %s\n", argv[1]);
+	goto state_1_3_5(argv[1]);
+	return 0;
+}
+
+__code state_14(char* s) {
+	switch(*s++) {
+		case '\0': 
+			goto accept(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_12(char* s) {
+	switch(*s++) {
+		case '\0': 
+			goto accept(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_9_11_13_15(char* s) {
+	switch(*s++) {
+		case 'o': 
+			goto state_12(s);
+			break;
+		case 'O': 
+			goto state_14(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_7_11_13_15(char* s) {
+	switch(*s++) {
+		case 'o': 
+			goto state_12(s);
+			break;
+		case 'O': 
+			goto state_14(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_2_6_8_10(char* s) {
+	switch(*s++) {
+		case 'o': 
+			goto state_7_11_13_15(s);
+			break;
+		case 'O': 
+			goto state_9_11_13_15(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_4_6_8_10(char* s) {
+	switch(*s++) {
+		case 'O': 
+			goto state_9_11_13_15(s);
+			break;
+		case 'o': 
+			goto state_7_11_13_15(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+__code state_1_3_5(char* s) {
+	switch(*s++) {
+		case 'F': 
+			goto state_4_6_8_10(s);
+			break;
+		case 'f': 
+			goto state_2_6_8_10(s);
+			break;
+		default: goto reject(s);
+	}
+}
+
+
+__code accept(char* s) {
+	printf("\nstring matches regexp. \n\n");
+}
+
+
+__code reject(char* s) {
+	printf("\nstring does not match regexp. \n\n");
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/ll/emit.ll	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,38 @@
+
+define fastcc void @driver(i32) {
+entry:
+        %1 = load i8* getelementptr ([4 x i8]* @0, i32 0, i32 0)                ; <i8> [#uses=1]
+        switch i8 %1, label %default [
+                i8 65, label %case0
+                i8 66, label %case1
+        ]
+
+return:         ; preds = %case1, %case0, %default
+        ret void
+
+default:                ; preds = %entry
+        call void @reject(i32 0)
+        br label %return
+
+case0:          ; preds = %entry
+        call void @accept(i32 1)
+        br label %return
+
+case1:          ; preds = %entry
+        call void @accept(i32 1)
+        br label %return
+}
+
+define fastcc void @accept(i32) {
+entry:
+        call void (i8*, ...)* @printf(i8* getelementptr ([24 x i8]* @1, i32 0, i32 0), i8* getelementptr ([5 x i8]* @2, i32 0, i32 0))
+        ret void
+}
+
+define fastcc void @reject(i32) {
+entry:
+        %1 = getelementptr [4 x i8]* @0, i32 0, i32 %0          ; <i8*> [#uses=1]
+        call void (i8*, ...)* @printf(i8* getelementptr ([30 x i8]* @3, i32 0, i32 0), i8* %1, i8* getelementptr ([5 x i8]* @4, i32 0, i32 0))
+        ret void
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/ll/reg.ll	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,99 @@
+; ModuleID = 'regllvm'
+global [9 x i8] c"ABBBBBBC\00"		; <[9 x i8]*>:0 [#uses=4]
+global [28 x i8] c"state: %s, arg: %c(int %d)\0A\00"		; <[28 x i8]*>:1 [#uses=0]
+
+define i32 @main(i32 %index) {
+entry:
+	%0 = malloc i32		; <i32*> [#uses=11]
+	store i32 %index, i32* %0, align 8
+	br label %state_1_3_5_6_7
+
+state_1_3_5_6_7:		; preds = %entry
+	%1 = load i32* %0, align 8		; <i32> [#uses=2]
+	%2 = sext i32 %1 to i64		; <i64> [#uses=1]
+	%3 = getelementptr [9 x i8]* @0, i64 0, i64 %2		; <i8*> [#uses=1]
+	%4 = add i32 %1, 1		; <i32> [#uses=1]
+	store i32 %4, i32* %0, align 8
+	%5 = load i8* %3		; <i8> [#uses=1]
+	switch i8 %5, label %reject [
+		i8 65, label %state_1_3_5_6_7_case0
+		i8 67, label %state_1_3_5_6_7_case1
+		i8 66, label %state_1_3_5_6_7_case2
+	]
+
+reject:		; preds = %state_1_2_3_5_7, %state_1_3_4_5_7, %state_1_3_5_6_7, %state_8
+	free i32* %0
+	ret i32 0
+
+state_1_3_5_6_7_case0:		; preds = %state_1_3_5_6_7
+	br label %state_1_2_3_5_7
+
+state_1_2_3_5_7:		; preds = %state_1_2_3_5_7_case0, %state_1_3_4_5_7_case0, %state_1_3_5_6_7_case0
+	%6 = load i32* %0, align 8		; <i32> [#uses=2]
+	%7 = sext i32 %6 to i64		; <i64> [#uses=1]
+	%8 = getelementptr [9 x i8]* @0, i64 0, i64 %7		; <i8*> [#uses=1]
+	%9 = add i32 %6, 1		; <i32> [#uses=1]
+	store i32 %9, i32* %0, align 8
+	%10 = load i8* %8		; <i8> [#uses=1]
+	switch i8 %10, label %reject [
+		i8 65, label %state_1_2_3_5_7_case0
+		i8 67, label %state_1_2_3_5_7_case1
+		i8 66, label %state_1_2_3_5_7_case2
+	]
+
+state_1_2_3_5_7_case0:		; preds = %state_1_2_3_5_7
+	br label %state_1_2_3_5_7
+
+state_1_2_3_5_7_case1:		; preds = %state_1_2_3_5_7
+	br label %state_8
+
+state_8:		; preds = %state_1_2_3_5_7_case1, %state_1_3_4_5_7_case1, %state_1_3_5_6_7_case1
+	%11 = load i32* %0, align 8		; <i32> [#uses=2]
+	%12 = sext i32 %11 to i64		; <i64> [#uses=1]
+	%13 = getelementptr [9 x i8]* @0, i64 0, i64 %12		; <i8*> [#uses=1]
+	%14 = add i32 %11, 1		; <i32> [#uses=1]
+	store i32 %14, i32* %0, align 8
+	%15 = load i8* %13		; <i8> [#uses=1]
+	switch i8 %15, label %reject [
+		i8 0, label %state_8_case0
+	]
+
+state_8_case0:		; preds = %state_8
+	br label %accpet
+
+accpet:		; preds = %state_8_case0
+	free i32* %0
+	ret i32 1
+
+state_1_2_3_5_7_case2:		; preds = %state_1_2_3_5_7
+	br label %state_1_3_4_5_7
+
+state_1_3_4_5_7:		; preds = %state_1_2_3_5_7_case2, %state_1_3_4_5_7_case2, %state_1_3_5_6_7_case2
+	%16 = load i32* %0, align 8		; <i32> [#uses=2]
+	%17 = sext i32 %16 to i64		; <i64> [#uses=1]
+	%18 = getelementptr [9 x i8]* @0, i64 0, i64 %17		; <i8*> [#uses=1]
+	%19 = add i32 %16, 1		; <i32> [#uses=1]
+	store i32 %19, i32* %0, align 8
+	%20 = load i8* %18		; <i8> [#uses=1]
+	switch i8 %20, label %reject [
+		i8 65, label %state_1_3_4_5_7_case0
+		i8 67, label %state_1_3_4_5_7_case1
+		i8 66, label %state_1_3_4_5_7_case2
+	]
+
+state_1_3_4_5_7_case0:		; preds = %state_1_3_4_5_7
+	br label %state_1_2_3_5_7
+
+state_1_3_4_5_7_case1:		; preds = %state_1_3_4_5_7
+	br label %state_8
+
+state_1_3_4_5_7_case2:		; preds = %state_1_3_4_5_7
+	br label %state_1_3_4_5_7
+
+state_1_3_5_6_7_case1:		; preds = %state_1_3_5_6_7
+	br label %state_8
+
+state_1_3_5_6_7_case2:		; preds = %state_1_3_5_6_7
+	br label %state_1_3_4_5_7
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/ll/regllvm.label.ll	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,196 @@
+; ModuleID = 'regllvm'
+global [6 x i8] c"ABBBC\00"		; <[6 x i8]*>:0 [#uses=8]
+global [28 x i8] c"state: %s, arg: %c(int %d)\0A\00"		; <[28 x i8]*>:1 [#uses=1]
+global [22 x i8] c"%s does match regexp\0A\00"		; <[22 x i8]*>:2 [#uses=1]
+global [26 x i8] c"%s does not match regexp\0A\00"		; <[26 x i8]*>:3 [#uses=1]
+global [16 x i8] c"state_1_2_3_5_7\00"		; <[16 x i8]*>:4 [#uses=1]
+global [16 x i8] c"state_1_2_3_5_7\00"		; <[16 x i8]*>:5 [#uses=1]
+global [16 x i8] c"state_1_2_3_5_7\00"		; <[16 x i8]*>:6 [#uses=1]
+global [16 x i8] c"state_1_2_3_5_7\00"		; <[16 x i8]*>:7 [#uses=1]
+
+define i32 @main(i32) {
+entry:
+	%1 = tail call i32 @state_1_3_5_6_7(i32 %0)		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+define i32 @accept(i32) {
+entry:
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 1
+}
+
+define i32 @reject(i32) {
+entry:
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 0
+}
+
+define i32 @state_8(i32 %index) {
+entry:
+	%index1 = sext i32 %index to i64		; <i64> [#uses=1]
+	%0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1		; <i8*> [#uses=1]
+	%1 = load i8* %0		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %1, i8 %1)
+	switch i8 %1, label %default [
+		i8 0, label %case0
+	]
+
+default:		; preds = %entry
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 0
+
+case0:		; preds = %entry
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 1
+}
+
+define i32 @state_1_3_5_6_7(i32 %index) {
+entry:
+	%index1 = sext i32 %index to i64		; <i64> [#uses=1]
+	%0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1		; <i8*> [#uses=1]
+	%1 = add i32 %index, 1		; <i32> [#uses=3]
+	%2 = load i8* %0		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @5, i32 0, i32 0), i8 %2, i8 %2)
+	switch i8 %2, label %default [
+		i8 65, label %case0
+		i8 67, label %case1
+		i8 66, label %case2
+	]
+
+default:		; preds = %entry
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 0
+
+case0:		; preds = %entry
+	%3 = tail call i32 @state_1_2_3_5_7(i32 %1)		; <i32> [#uses=1]
+	ret i32 %3
+
+case1:		; preds = %entry
+	%4 = sext i32 %1 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [6 x i8]* @0, i64 0, i64 %4		; <i8*> [#uses=1]
+	%6 = load i8* %5		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %6, i8 %6)
+	switch i8 %6, label %default.i [
+		i8 0, label %case0.i
+	]
+
+default.i:		; preds = %case1
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	br label %state_8.exit
+
+state_8.exit:		; preds = %default.i, %case0.i
+	%7 = phi i32 [ 1, %case0.i ], [ 0, %default.i ]		; <i32> [#uses=1]
+	ret i32 %7
+
+case0.i:		; preds = %case1
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	br label %state_8.exit
+
+case2:		; preds = %entry
+	%8 = tail call i32 @state_1_3_4_5_7(i32 %1)		; <i32> [#uses=1]
+	ret i32 %8
+}
+
+define i32 @state_1_3_4_5_7(i32 %index) {
+entry:
+	br label %tailrecurse
+
+tailrecurse:		; preds = %case2, %entry
+	%index.tr = phi i32 [ %index, %entry ], [ %1, %case2 ]		; <i32> [#uses=2]
+	%index1 = sext i32 %index.tr to i64		; <i64> [#uses=1]
+	%0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1		; <i8*> [#uses=1]
+	%1 = add i32 %index.tr, 1		; <i32> [#uses=3]
+	%2 = load i8* %0		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @6, i32 0, i32 0), i8 %2, i8 %2)
+	switch i8 %2, label %default [
+		i8 65, label %case0
+		i8 67, label %case1
+		i8 66, label %case2
+	]
+
+default:		; preds = %tailrecurse
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 0
+
+case0:		; preds = %tailrecurse
+	%3 = tail call i32 @state_1_2_3_5_7(i32 %1)		; <i32> [#uses=1]
+	ret i32 %3
+
+case1:		; preds = %tailrecurse
+	%4 = sext i32 %1 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [6 x i8]* @0, i64 0, i64 %4		; <i8*> [#uses=1]
+	%6 = load i8* %5		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %6, i8 %6)
+	switch i8 %6, label %default.i [
+		i8 0, label %case0.i
+	]
+
+default.i:		; preds = %case1
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	br label %state_8.exit
+
+state_8.exit:		; preds = %default.i, %case0.i
+	%7 = phi i32 [ 1, %case0.i ], [ 0, %default.i ]		; <i32> [#uses=1]
+	ret i32 %7
+
+case0.i:		; preds = %case1
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	br label %state_8.exit
+
+case2:		; preds = %tailrecurse
+	br label %tailrecurse
+}
+
+define i32 @state_1_2_3_5_7(i32 %index) {
+entry:
+	br label %tailrecurse
+
+tailrecurse:		; preds = %case0, %entry
+	%index.tr = phi i32 [ %index, %entry ], [ %1, %case0 ]		; <i32> [#uses=2]
+	%index1 = sext i32 %index.tr to i64		; <i64> [#uses=1]
+	%0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1		; <i8*> [#uses=1]
+	%1 = add i32 %index.tr, 1		; <i32> [#uses=3]
+	%2 = load i8* %0		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @7, i32 0, i32 0), i8 %2, i8 %2)
+	switch i8 %2, label %default [
+		i8 65, label %case0
+		i8 67, label %case1
+		i8 66, label %case2
+	]
+
+default:		; preds = %tailrecurse
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	ret i32 0
+
+case0:		; preds = %tailrecurse
+	br label %tailrecurse
+
+case1:		; preds = %tailrecurse
+	%3 = sext i32 %1 to i64		; <i64> [#uses=1]
+	%4 = getelementptr [6 x i8]* @0, i64 0, i64 %3		; <i8*> [#uses=1]
+	%5 = load i8* %4		; <i8> [#uses=3]
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %5, i8 %5)
+	switch i8 %5, label %default.i [
+		i8 0, label %case0.i
+	]
+
+default.i:		; preds = %case1
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	br label %state_8.exit
+
+state_8.exit:		; preds = %default.i, %case0.i
+	%6 = phi i32 [ 1, %case0.i ], [ 0, %default.i ]		; <i32> [#uses=1]
+	ret i32 %6
+
+case0.i:		; preds = %case1
+	tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0))
+	br label %state_8.exit
+
+case2:		; preds = %tailrecurse
+	%7 = tail call i32 @state_1_3_4_5_7(i32 %1)		; <i32> [#uses=1]
+	ret i32 %7
+}
+
+declare void @printf(i8*, ...)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code/ll/regllvm.label_optimized.ll	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,97 @@
+; ModuleID = 'regllvm'
+global [6 x i8] c"ABBBC\00"		; <[6 x i8]*>:0 [#uses=4]
+global [28 x i8] c"state: %s, arg: %c(int %d)\0A\00"		; <[28 x i8]*>:1 [#uses=0]
+
+define i32 @main(i32) {
+entry:
+	%1 = malloc i32		; <i32*> [#uses=9]
+	store i32 %0, i32* %1, align 8
+	br label %state_1_3_5_6_7
+
+state_1_3_5_6_7:		; preds = %entry
+	%2 = load i32* %1, align 8		; <i32> [#uses=2]
+	%3 = sext i32 %2 to i64		; <i64> [#uses=1]
+	%4 = getelementptr [6 x i8]* @0, i64 0, i64 %3		; <i8*> [#uses=1]
+	%5 = add i32 %2, 1		; <i32> [#uses=1]
+	store i32 %5, i32* %1, align 8
+	%6 = load i8* %4		; <i8> [#uses=1]
+	switch i8 %6, label %reject [
+		i8 65, label %state_1_3_5_6_7_case0
+		i8 67, label %state_1_3_5_6_7_case1
+		i8 66, label %state_1_3_5_6_7_case2
+	]
+
+reject:		; preds = %state_1_2_3_5_7, %state_1_3_4_5_7, %state_1_3_5_6_7, %state_8
+	ret i32 0
+
+state_1_3_5_6_7_case0:		; preds = %state_1_3_5_6_7
+	br label %state_1_2_3_5_7
+
+state_1_2_3_5_7:		; preds = %state_1_2_3_5_7_case0, %state_1_3_4_5_7_case0, %state_1_3_5_6_7_case0
+	%7 = load i32* %1, align 8		; <i32> [#uses=2]
+	%8 = sext i32 %7 to i64		; <i64> [#uses=1]
+	%9 = getelementptr [6 x i8]* @0, i64 0, i64 %8		; <i8*> [#uses=1]
+	%10 = add i32 %7, 1		; <i32> [#uses=1]
+	store i32 %10, i32* %1, align 8
+	%11 = load i8* %9		; <i8> [#uses=1]
+	switch i8 %11, label %reject [
+		i8 65, label %state_1_2_3_5_7_case0
+		i8 67, label %state_1_2_3_5_7_case1
+		i8 66, label %state_1_2_3_5_7_case2
+	]
+
+state_1_2_3_5_7_case0:		; preds = %state_1_2_3_5_7
+	br label %state_1_2_3_5_7
+
+state_1_2_3_5_7_case1:		; preds = %state_1_2_3_5_7
+	br label %state_8
+
+state_8:		; preds = %state_1_2_3_5_7_case1, %state_1_3_4_5_7_case1, %state_1_3_5_6_7_case1
+	%12 = load i32* %1, align 8		; <i32> [#uses=2]
+	%13 = sext i32 %12 to i64		; <i64> [#uses=1]
+	%14 = getelementptr [6 x i8]* @0, i64 0, i64 %13		; <i8*> [#uses=1]
+	%15 = add i32 %12, 1		; <i32> [#uses=1]
+	store i32 %15, i32* %1, align 8
+	%16 = load i8* %14		; <i8> [#uses=1]
+	switch i8 %16, label %reject [
+		i8 0, label %state_8_case0
+	]
+
+state_8_case0:		; preds = %state_8
+	br label %accpet
+
+accpet:		; preds = %state_8_case0
+	ret i32 1
+
+state_1_2_3_5_7_case2:		; preds = %state_1_2_3_5_7
+	br label %state_1_3_4_5_7
+
+state_1_3_4_5_7:		; preds = %state_1_2_3_5_7_case2, %state_1_3_4_5_7_case2, %state_1_3_5_6_7_case2
+	%17 = load i32* %1, align 8		; <i32> [#uses=2]
+	%18 = sext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [6 x i8]* @0, i64 0, i64 %18		; <i8*> [#uses=1]
+	%20 = add i32 %17, 1		; <i32> [#uses=1]
+	store i32 %20, i32* %1, align 8
+	%21 = load i8* %19		; <i8> [#uses=1]
+	switch i8 %21, label %reject [
+		i8 65, label %state_1_3_4_5_7_case0
+		i8 67, label %state_1_3_4_5_7_case1
+		i8 66, label %state_1_3_4_5_7_case2
+	]
+
+state_1_3_4_5_7_case0:		; preds = %state_1_3_4_5_7
+	br label %state_1_2_3_5_7
+
+state_1_3_4_5_7_case1:		; preds = %state_1_3_4_5_7
+	br label %state_8
+
+state_1_3_4_5_7_case2:		; preds = %state_1_3_4_5_7
+	br label %state_1_3_4_5_7
+
+state_1_3_5_6_7_case1:		; preds = %state_1_3_5_6_7
+	br label %state_8
+
+state_1_3_5_6_7_case2:		; preds = %state_1_3_5_6_7
+	br label %state_1_3_4_5_7
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dfareg.py	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,456 @@
+#!/usr/bin/env python
+
+import copy
+import sys
+from optparse import OptionParser
+
+class NondeterministicFiniteAutomaton(object):
+    def __init__(self, transition, start, accepts, map_):
+        self.transition = transition
+        self.start = start
+        self.accepts = accepts
+        self.map = map_
+
+    def epsilonExpand(self, set_):
+        que = set(set_)
+        done = set()
+        while que:
+            stat = que.pop()
+            nexts = self.transition(stat, "")
+            done.add(stat)
+            for nextStat in nexts:
+                if not nextStat in done: que.add(nextStat)
+        return frozenset(done)
+
+class DeterministicFiniteAutomaton(object):
+    def __init__(self, transition, start, accepts, map_):
+        self.transition = transition
+        self.start = start
+        self.accepts = accepts
+        self.map = map_
+    def getRuntime(self):
+        return DFARuntime(self)
+
+class DFARuntime(object):
+    def __init__(self, DFA):
+        self.DFA = DFA
+        self.curState = self.DFA.start
+
+    def doTransition(self, char):
+        self.curState = self.DFA.transition(
+            self.curState, char)
+
+    def isAcceptState(self):
+        return self.curState in self.DFA.accepts
+
+    def doesAccept(self, input):
+        for alphabet in input:
+            self.doTransition(alphabet)
+        return self.isAcceptState()
+
+class Token(object):
+    CHARACTER  = 0
+    OPE_UNION  = 1
+    OPE_STAR   = 2
+    LPAREN     = 3
+    RPAREN     = 4
+    EOF        = 5
+
+    def __init__(self, value, kind):
+        self.value = value
+        self.kind  = kind
+
+
+class Lexer(object):
+    def __init__(self, str):
+        self.stringList = list(str)
+
+    def scan(self):
+        if not self.stringList:
+            return Token(None, Token.EOF)
+
+        ch = self.stringList.pop(0)
+        if   ch == '\\':
+            return Token(self.stringList.pop(0), Token.CHARACTER)
+        elif ch == '|':
+            return Token(ch, Token.OPE_UNION)
+        elif ch == '(':
+            return Token(ch, Token.LPAREN)
+        elif ch == ')':
+            return Token(ch, Token.RPAREN)
+        elif ch == '*':
+            return Token(ch, Token.OPE_STAR)
+        else:
+            return Token(ch, Token.CHARACTER)
+
+"""
+ Context-free Grammer:
+ (A) expression -> subexpr EOF
+ (B) subexpr -> seq '|' subexpr | seq
+ (C) seq -> subseq | ''
+ (D) subseq -> star subseq | star
+ (E) star -> factor '*' | factor
+ (F) factor -> '(' subexpr ')' | CHARACTER
+"""
+
+class Parser(object):
+    def __init__(self, lexer):
+        self.lexer = lexer
+        self.look  = None
+        self.move()
+
+    def match(self, tag):
+        if self.look.kind != tag:
+            raise Exeption("syntax error")
+        self.move()
+
+    def move(self):
+        self.look = self.lexer.scan()
+
+    def factor(self):
+        if self.look.kind == Token.LPAREN:
+            # factor -> '(' subexpr ')'
+            self.match(Token.LPAREN)
+            node = self.subexpr()
+            self.match(Token.RPAREN)
+            return node
+        else:
+            # factor -> CHARACTER
+            node = Character(self.look.value)
+            self.match(Token.CHARACTER)
+            return node
+
+    def star(self):
+        # star -> factor '*' | factor
+        node = self.factor()
+        if self.look.kind == Token.OPE_STAR:
+            self.match(Token.OPE_STAR)
+            node = Star(node)
+        return node
+
+    def seq(self):
+        if self.look.kind == Token.LPAREN \
+               or self.look.kind == Token.CHARACTER:
+            # seq -> subseq
+            return self.subseq()
+        else:
+            # seq -> ''
+            return Character("")
+
+    def subseq(self):
+        node1 = self.star()
+        if self.look.kind == Token.LPAREN \
+               or self.look.kind == Token.CHARACTER:
+            # subseq -> star subseq
+            node2 = self.subseq()
+            node  = Concat(node1, node2)
+            return node
+        else:
+            # subseq -> star
+            return node1
+
+    def subexpr(self):
+        node1 = self.seq()
+        if self.look.kind == Token.OPE_UNION:
+            self.match(Token.OPE_UNION)
+            # subexpr    -> seq '|' subexpr
+            node2 = self.subexpr()
+            node  = Union(node1, node2)
+            return node
+        else:
+            # subexpr    -> seq
+            return node1
+
+    def expression(self):
+        # expression -> subexpr EOF
+        node     = self.subexpr()
+        self.match(Token.EOF)
+
+        # Create TREE, NFA
+        context  = Context()
+        fragment = node.assemble(context)
+        return fragment.build()
+
+class Context(object):
+    def __init__(self):
+        self.stateCount = 0
+
+    def newState(self):
+        self.stateCount += 1
+        return self.stateCount
+
+class NFAFragment(object):
+    def __init__(self):
+        self.start   = None    # integer
+        self.accepts = None    # frozenset
+        self.map     = dict()  # transition
+
+    def connect(self, from_, char, to):
+        slot = self.map.setdefault((from_, char), set())
+        slot.add(to)
+
+    def newSkelton(self):
+        # copy fragment (self), and it return
+        newFrag = NFAFragment();
+        newFrag.map = copy.deepcopy(self.map)
+        return newFrag
+
+    def __or__(self, frag):
+        newFrag = self.newSkelton()
+        for k, v in frag.map.iteritems():
+            newFrag.map[k] = v.copy()
+        return newFrag
+
+    def build(self):
+        map_ = self.map
+        def transition(state, char):
+            return frozenset(map_.get((state, char), []))
+
+        return NondeterministicFiniteAutomaton(
+            transition,
+            self.start,
+            self.accepts,
+            self.map
+            )
+
+class Character(object):
+    def __init__(self, char):
+        self.char = char
+
+    def assemble(self, context):
+        frag = NFAFragment()
+        s1 = context.newState()
+        s2 = context.newState()
+        frag.connect(s1, self.char, s2)
+        frag.start = s1
+        frag.accepts = frozenset([s2])
+        return frag
+
+class Union(object):
+    def __init__(self, op1, op2):
+        self.op1 = op1
+        self.op2 = op2
+
+    def assemble(self, context):
+        frag1 = self.op1.assemble(context)
+        frag2 = self.op2.assemble(context)
+        frag = frag1 | frag2
+        s = context.newState()
+        frag.connect(s, "", frag1.start)
+        frag.connect(s, "", frag2.start)
+        frag.start = s
+        frag.accepts = frag1.accepts | frag2.accepts
+        return frag
+
+class Concat(object):
+    def __init__(self, op1, op2):
+        self.op1 = op1
+        self.op2 = op2
+
+    def assemble(self, context):
+        frag1 = self.op1.assemble(context)
+        frag2 = self.op2.assemble(context)
+        frag = frag1 | frag2
+
+        for state in frag1.accepts:
+            frag.connect(state, "", frag2.start)
+
+        frag.start   = frag1.start
+        frag.accepts = frag2.accepts
+        return frag
+
+class Star(object):
+    def __init__(self, op):
+        self.op = op
+
+    def assemble(self, context):
+        fragOrig = self.op.assemble(context)
+        frag = fragOrig.newSkelton()
+
+        for state in fragOrig.accepts:
+            frag.connect(state, "", fragOrig.start)
+
+        s = context.newState()
+        frag.connect(s, "", fragOrig.start)
+        frag.start = s
+        frag.accepts = fragOrig.accepts | frozenset([s])
+        return frag
+
+class NonDisjoinSets(object):
+    def __init__(self, sub):
+        self.sub = sub
+    def __contains__(self, a_set):
+        return a_set & self.sub
+
+def nfa2dfa(nfa):
+    def transition(set_, alpha):
+        ret = set()
+        for elem in set_:
+            ret |= nfa.transition(elem, alpha)
+        return nfa.epsilonExpand(frozenset(ret))
+
+    def stateSetTransition(stateSet, char):
+        trans = set()
+        for state in stateSet:
+            t = nfa.map.setdefault((state, char), None)
+            if not t == None: trans.update(nfa.epsilonExpand(t))
+        return frozenset(trans)
+
+    map_ = dict()
+    que =  set(frozenset([nfa.epsilonExpand(set([nfa.start]))]))
+    done = set()
+
+    while que:
+        stateSet = que.pop()
+
+        for state in stateSet:
+            for k, v in nfa.map.iteritems():
+                if state == k[0] and k[1] != '':
+                    slot = map_.setdefault((stateSet, k[1]), set())
+                    slot.update(nfa.epsilonExpand(v))
+
+        done.add(stateSet)
+
+        for v in map_.itervalues():
+            if not v in done:
+                que.add(frozenset(v))
+
+    return DeterministicFiniteAutomaton(
+        transition,
+        nfa.epsilonExpand(frozenset([nfa.start])),
+        NonDisjoinSets(nfa.accepts),
+        map_
+        )
+
+# create call graph (as dictionary)
+class CallGraph(object):
+    def __init__(self, dfa):
+        self.dfa = dfa
+        self.start = "state_"+self.set2name(dfa.start)
+        (self.map, self.accepts) = self.getCallGraph(self.dfa)
+
+    # return state name from set
+    def set2name(self, set_):
+        l = list(set_)
+        l.sort()
+        return '_'.join(str(x) for x in l)
+
+    def getCallGraph(self, dfa):
+
+        funcMap = dict()
+        funcMap[dfa.start] = set()
+
+        # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....}
+        #             : frozenset(1, 15) x 'd' -> frozenset([16])
+
+        for v in dfa.map.itervalues():
+            funcMap[frozenset(v)] = set()
+
+        for key in dfa.map.iterkeys():
+            slot = funcMap.setdefault(key[0], set())
+            slot.add(key[1])
+
+        for v in dfa.map.itervalues():
+            funcMap[frozenset(v)] = set()
+
+        for key in dfa.map.iterkeys():
+            slot = funcMap.setdefault(key[0], set())
+            slot.add(key[1])
+
+        callGraph = dict()
+        accepts = list()
+
+        # emit cbc code
+        for k in funcMap.iterkeys():
+            callGraph["state_"+self.set2name(k)] = set()
+
+        for k, v in funcMap.iteritems():
+            caseMap = dict()
+            for case in v:
+                caseMap[case] = "state_"+self.set2name(dfa.map[(frozenset(k), case)])
+            if k in dfa.accepts:
+                accepts.append("state_"+self.set2name(k))
+                caseMap['\\0'] = "accept"
+            callGraph["state_"+self.set2name(k)] = caseMap
+
+        return (callGraph, accepts)
+
+# emit CbC-souce, from CallGraph
+def dfa2CbC(cg, regexp, emitCFlag):
+    if emitCFlag:
+        fun_type = 'void '
+        call_t = ''
+    else:
+        fun_type = '__code '
+        call_t = 'goto '
+
+    # emit cbc(or c) code
+    print "#include <stdio.h>\n"
+    for k in cg.map.iterkeys():
+        print fun_type + k + "(char* s);"
+    print fun_type + 'accept(char* s);'
+    print fun_type + 'reject(char* s);'
+    print """
+int main(int argc, char* argv[]) {
+\tputs(\"regexp: %s\");
+\tputs(\"number of state: %d\");
+\tprintf(\"string: %%s\\n\", argv[1]);
+\t%s%s(argv[1]);
+\treturn 0;
+}\n""" % (regexp, len(cg.map), call_t, cg.start)
+
+    for k, v in cg.map.iteritems():
+        print fun_type + k + "(char* s) {"
+        print "\tswitch(*s++) {"
+        for case, next_state in v.iteritems():
+            print "\t\tcase '%s': " % (case)
+            print "\t\t\t%s%s(s);\n\t\t\tbreak;" % (call_t, next_state)
+
+        print "\t\tdefault: %sreject(s);\n\t}" % call_t
+        print "}\n"
+
+    print """
+%saccept(char* s) {
+\tprintf(\"\\nstring matches regexp. \\n\\n\");
+}\n""" % fun_type
+    print """
+%sreject(char* s) {
+\tprintf(\"\\nstring does not match regexp. \\n\\n\");
+}\n""" % fun_type
+
+class Regexp(object):
+    def __init__(self, regexp):
+        self.regexp = regexp
+        self.nfa    = None
+        self.dfa    = None
+        self._compile()
+
+    def _compile(self):
+        lexer_  = Lexer(self.regexp)
+        parser_ = Parser(lexer_)
+        self.nfa = parser_.expression()
+        self.dfa = nfa2dfa(self.nfa)
+
+    def emitCbC(self, emitCFlag=False):
+        cg = CallGraph(self.dfa)
+        dfa2CbC(cg, self.regexp, emitCFlag)
+
+    def matches(self, string):
+        runtime = self.dfa.getRuntime()
+        return runtime.doesAccept(string)
+
+def compile(regexp):
+    return Regexp(regexp)
+
+def main(argv):
+    myusage = "%prog [-C] regexp"
+    psr = OptionParser(usage=myusage)
+    psr.add_option("-C", action="store_true", dest="emitCFlag", default=False, help="emit C-source")
+    (opts, args) = psr.parse_args(sys.argv)
+    if len(args) == 1:
+        psr.print_help()
+        exit()
+    r = compile(args[1])
+    r.emitCbC(opts.emitCFlag)
+
+if __name__ == '__main__' : main(sys.argv)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ll	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,146 @@
+; ModuleID = 'dfareg'
+global [2 x i8] c"C\00"		; <[2 x i8]*>:0 [#uses=5]
+global [20 x i8] c"state: %s, arg: %s\0A\00"		; <[20 x i8]*>:1 [#uses=1]
+global [24 x i8] c"%s does match regexp. \0A\00"		; <[24 x i8]*>:2 [#uses=1]
+global [28 x i8] c"%s does not match regexp. \0A\00"		; <[28 x i8]*>:3 [#uses=1]
+global [8 x i8] c"state_8\00"		; <[8 x i8]*>:4 [#uses=1]
+global [16 x i8] c"state_1_3_5_6_7\00"		; <[16 x i8]*>:5 [#uses=1]
+global [16 x i8] c"state_1_3_4_5_7\00"		; <[16 x i8]*>:6 [#uses=1]
+global [16 x i8] c"state_1_2_3_5_7\00"		; <[16 x i8]*>:7 [#uses=1]
+
+define fastcc void @accept(i32 %index) {
+entry:
+	call void (i8*, ...)* @printf(i8* getelementptr ([24 x i8]* @2, i32 0, i32 0), i8* getelementptr ([2 x i8]* @0, i32 0, i32 0))
+	ret void
+}
+
+define fastcc void @reject(i32 %index) {
+entry:
+	call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @3, i32 0, i32 0), i8* getelementptr ([2 x i8]* @0, i32 0, i32 0))
+	ret void
+}
+
+define fastcc void @state_8(i32 %index) {
+entry:
+	%0 = getelementptr [2 x i8]* @0, i32 0, i32 %index		; <i8*> [#uses=2]
+	%1 = add i32 %index, 1		; <i32> [#uses=2]
+	call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([8 x i8]* @4, i32 0, i32 0), i8* %0)
+	%2 = load i8* %0		; <i8> [#uses=1]
+	switch i8 %2, label %default [
+		i8 0, label %case0
+	]
+
+return:		; preds = %case0, %default
+	ret void
+
+default:		; preds = %entry
+	call void @reject(i32 %1)
+	br label %return
+
+case0:		; preds = %entry
+	call void @accept(i32 %1)
+	br label %return
+}
+
+define fastcc void @state_1_3_5_6_7(i32 %index) {
+entry:
+	%0 = getelementptr [2 x i8]* @0, i32 0, i32 %index		; <i8*> [#uses=2]
+	%1 = add i32 %index, 1		; <i32> [#uses=4]
+	call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @5, i32 0, i32 0), i8* %0)
+	%2 = load i8* %0		; <i8> [#uses=1]
+	switch i8 %2, label %default [
+		i8 65, label %case0
+		i8 67, label %case1
+		i8 66, label %case2
+	]
+
+return:		; preds = %case2, %case1, %case0, %default
+	ret void
+
+default:		; preds = %entry
+	call void @reject(i32 %1)
+	br label %return
+
+case0:		; preds = %entry
+	call void @state_1_2_3_5_7(i32 %1)
+	br label %return
+
+case1:		; preds = %entry
+	call void @state_8(i32 %1)
+	br label %return
+
+case2:		; preds = %entry
+	call void @state_1_3_4_5_7(i32 %1)
+	br label %return
+}
+
+define fastcc void @state_1_3_4_5_7(i32 %index) {
+entry:
+	%0 = getelementptr [2 x i8]* @0, i32 0, i32 %index		; <i8*> [#uses=2]
+	%1 = add i32 %index, 1		; <i32> [#uses=4]
+	call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @6, i32 0, i32 0), i8* %0)
+	%2 = load i8* %0		; <i8> [#uses=1]
+	switch i8 %2, label %default [
+		i8 65, label %case0
+		i8 67, label %case1
+		i8 66, label %case2
+	]
+
+return:		; preds = %case2, %case1, %case0, %default
+	ret void
+
+default:		; preds = %entry
+	call void @reject(i32 %1)
+	br label %return
+
+case0:		; preds = %entry
+	call void @state_1_2_3_5_7(i32 %1)
+	br label %return
+
+case1:		; preds = %entry
+	call void @state_8(i32 %1)
+	br label %return
+
+case2:		; preds = %entry
+	call void @state_1_3_4_5_7(i32 %1)
+	br label %return
+}
+
+define fastcc void @state_1_2_3_5_7(i32 %index) {
+entry:
+	%0 = getelementptr [2 x i8]* @0, i32 0, i32 %index		; <i8*> [#uses=2]
+	%1 = add i32 %index, 1		; <i32> [#uses=4]
+	call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @7, i32 0, i32 0), i8* %0)
+	%2 = load i8* %0		; <i8> [#uses=1]
+	switch i8 %2, label %default [
+		i8 65, label %case0
+		i8 67, label %case1
+		i8 66, label %case2
+	]
+
+return:		; preds = %case2, %case1, %case0, %default
+	ret void
+
+default:		; preds = %entry
+	call void @reject(i32 %1)
+	br label %return
+
+case0:		; preds = %entry
+	call void @state_1_2_3_5_7(i32 %1)
+	br label %return
+
+case1:		; preds = %entry
+	call void @state_8(i32 %1)
+	br label %return
+
+case2:		; preds = %entry
+	call void @state_1_3_4_5_7(i32 %1)
+	br label %return
+}
+
+declare void @printf(i8*, ...)
+
+start: state_1_3_5_6_7
+state: state_1_3_5_6_7, arg: C
+state: state_8, arg: 
+C does match regexp. 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reg2llvm.py	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+# Import the llvm-py modules.
+# encodingfrom llvm import *
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *  # new import: ee = Execution Engine
+from dfareg import *
+
+class RegLLVM(object):
+    # define llvm core types, and const
+    int_t = Type.int()
+    char_t = Type.int(8)
+    charptr_t = Type.pointer(char_t)
+    charptrptr_t = Type.pointer(charptr_t)
+    const_zero = Constant.int(int_t, 0)
+    const_one = Constant.int(int_t, 1)
+    llvm.GuaranteedTailCallOpt = True
+
+    def __init__(self, modName, regexp, string, impl_label, optimized, dump):
+        # Create a module
+        self.mod = Module.new(modName)
+        self.regexp = regexp
+        self.optimized = optimized
+        self.dump = dump
+        self.impl_label = impl_label
+
+        self.matchp_str = self.new_str_const(string)
+        self.dump_str = self.new_str_const("state: %s, arg: %c(int %d)\n")
+
+        def optional_func_decl(fun):
+            fun.calling_convertion = CC_X86_FASTCALL
+            fun.args[0].name = "index"
+
+        state_list = dict()
+        main = self.mod.add_function(
+            Type.function(self.int_t, (self.int_t,)), "main")
+        optional_func_decl(main)
+        main_entry = main.append_basic_block("entry")
+
+
+        if (impl_label):
+            accept_state = main.append_basic_block("accpet")
+            reject_state = main.append_basic_block("reject")
+            index_ptr = Builder.new(main_entry).malloc(self.int_t)
+            Builder.new(accept_state).free(index_ptr)
+            Builder.new(reject_state).free(index_ptr)
+        else:
+            # Create function - accept and reject (final state).
+            accept_state = self.mod.add_function(
+                Type.function(self.int_t, (self.int_t,)), "accept")
+            optional_func_decl(accept_state)
+            reject_state = self.mod.add_function(
+                Type.function(self.int_t, (self.int_t,)), "reject")
+            optional_func_decl(reject_state)
+
+        state_list["accept"] = accept_state
+        state_list["reject"] = reject_state
+
+        # create Regexp Object (Regexp has dfa)
+        r = Regexp(regexp)
+        self.cg = CallGraph(r.dfa)
+
+        # add state to module, (as function or label).
+        if (impl_label):
+            for state in self.cg.map.iterkeys():
+                label = main.append_basic_block(state)
+                state_list[state] = label
+        else:
+            for state in self.cg.map.iterkeys():
+                fun = self.mod.add_function(
+                    Type.function(self.int_t, (self.int_t,)), state)
+                optional_func_decl(fun)
+                state_list[state] = fun
+
+        # emit instructions
+        if (impl_label): emit = Builder.new(accept_state)
+        else:            emit = Builder.new(accept_state.append_basic_block("entry"))
+
+        if (dump): self.emit_call_printf(emit, "%s does match regexp\n", self.gep_first(emit, self.matchp_str))
+        emit.ret(self.const_one)
+
+        if (impl_label): emit = Builder.new(reject_state)
+        else:            emit = Builder.new(reject_state.append_basic_block("entry"))
+        if (dump): self.emit_call_printf(emit, "%s does not match regexp\n", self.gep_first(emit, self.matchp_str))
+        emit.ret(self.const_zero)
+
+        if (impl_label):
+            # emit transition instruction with jump instruction
+            emit = Builder.new(main_entry)
+            emit.store(main.args[0], index_ptr)
+            emit.branch(state_list[self.cg.start])
+
+            for state, transition in self.cg.map.iteritems():
+                emit = Builder.new(state_list[state])
+                index = emit.load(index_ptr)
+                ptr = emit.gep(self.matchp_str, (self.const_zero, index))
+                emit.store(emit.add(self.const_one, index), index_ptr)
+                char = emit.load(ptr)
+                si = emit.switch(char, state_list['reject'], len(transition))
+                local_label = 0
+                for case, next_state in transition.iteritems():
+                    bb = main.append_basic_block("%s_case%d" % (state, local_label))   #create default bb
+                    emit = Builder.new(bb)
+                    emit.branch(state_list[next_state])
+                    si.add_case(self.char_const(case), bb)
+                    local_label += 1
+        else:
+            for state, transition in self.cg.map.iteritems():
+                cases = dict()
+                for case, next_state in transition.iteritems():
+                    cases[self.char_const(case)] = state_list[next_state]
+                state_fun = state_list[state]
+                emit = Builder.new(state_fun.append_basic_block("entry"))
+                ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0]))
+                next_index = emit.add(state_fun.args[0], self.const_one)
+                char = emit.load(ptr)
+
+                if (dump): self.emit_call_printf(emit, self.dump_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char)
+
+                label = 0
+                default_bb = state_fun.append_basic_block("default") #create default bb
+                builder = Builder.new(default_bb)              # default is reject.
+                ret = builder.call(reject_state, (next_index,))
+                builder.ret(ret)
+
+                si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case.
+                for case, nextFun in cases.iteritems():
+                    bb = state_fun.append_basic_block("case%d" % label)   #create default bb
+                    builder = Builder.new(bb)
+                    ret = builder.call(nextFun, (next_index,))
+                    builder.ret(ret)
+                    si.add_case(case, bb)
+                    label += 1
+            emit = Builder.new(main_entry)
+            ret = emit.call(state_list[self.cg.start], (main.args[0],))
+            emit.ret(ret)
+
+        self.mp = ModuleProvider.new(self.mod)
+        if (optimized): self.optimize()
+        self.ee = ExecutionEngine.new(self.mp)
+        self.main = main
+
+    def getExecutionEngine(self):
+        return self.ee
+
+    def optimize(self):
+        #optimization passes
+        pm = PassManager.new()
+        pm.add(TargetData.new(''))
+        pm.add(PASS_FUNCTION_INLINING)
+        pm.run(self.mod)
+        fp = FunctionPassManager.new(self.mp)
+        fp.add(TargetData.new(''))
+        fp.add(PASS_BLOCK_PLACEMENT)
+        fp.add(PASS_INSTRUCTION_COMBINING)
+        fp.add(PASS_TAIL_CALL_ELIMINATION)
+        fp.add(PASS_AGGRESSIVE_DCE)
+        fp.add(PASS_DEAD_INST_ELIMINATION)
+        fp.add(PASS_DEAD_CODE_ELIMINATION)
+        for fun in self.mod.functions:
+            fp.run(fun)
+
+    def printModule(self):
+        print self.mod
+
+    def execute(self):
+        return self.ee.run_function(self.main,
+                                    (GenericValue.int(self.int_t, 0),))
+
+    def new_str_const(self, val):
+        '''create string(array of int) as a global value '''
+        str = self.mod.add_global_variable(Type.array(self.char_t, len(val) + 1), "")
+        str.initializer = Constant.stringz(val)
+        return str
+
+    def gep_first(self, emit, val):
+        '''get pointer of array'''
+        return emit.gep(val, (self.const_zero, self.const_zero))
+
+    def char_const(self, val):
+        '''create constant int value'''
+        if isinstance(val, str):
+            if val == '\\0':
+                return Constant.int(self.char_t, 0)
+            else:
+                return Constant.int(self.char_t, ord(val))
+        else:
+            exit('char_const: invalid argument.', val)
+
+    def emit_call_printf(self, emit, string, *args):
+        '''emit libc printf function call instruction'''
+        try:
+            printf = self.mod.get_function_named("printf")
+        except llvm.LLVMException:
+            printf = self.mod.add_function(
+                Type.function(Type.void(),
+                              (Type.pointer(self.char_t, 0),), 1), "printf")
+        if isinstance(string, str):
+            string = self.new_str_const(string)
+        emit.call(printf,
+                  [self.gep_first(emit, string)]+list(args))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_DFARuntime.py	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,21 @@
+from dfareg import *
+
+# -*- coding: utf-8 -*-
+
+def transition(stat, char):
+    if stat == 1 and char == 'a': return 2
+    if stat == 2 and char == 'b': return 3
+    return 0
+
+dfa = DeterministicFiniteAutomaton(
+    transition,
+    1,
+    frozenset([3]),
+    )
+
+for str in (u'ab', u'ba'):
+    runtime = dfa.getRuntime()
+    if runtime.doesAccept(str):
+        print u"%s is accepted" % str
+    else:
+        print u"%s is not accepted" % str
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_NFA2DFA.py	Tue Jun 15 00:54:59 2010 +0900
@@ -0,0 +1,36 @@
+from dfareg import *
+
+def transition(state, char):
+    if state == 0 and char == u"a": return frozenset([1, 2])
+    if state == 1 and char == u"b": return frozenset([2])
+    if state == 2 and char == u"" : return frozenset([0])
+    return frozenset([])
+
+nfa = NondeterministicFiniteAutomaton(
+    transition,
+    0,
+    frozenset([2])
+    )
+
+class Token(object):
+    CHARACTER = 0
+    OPE_UNION = 1
+    OPE_STAR  = 2
+    LPAREN    = 3
+    RPAREN    = 4
+    EOF       = 5
+
+# Convert NFA into DFA
+
+def nfa2dfa(nfa):
+    def transition(set_, alpha):
+        ret = set()
+        for elem in set_:
+            ret |= nfa.transition(elem, alpha)
+        return nfa.epsilonExpand(frozenset(ret))
+    return DeterministicFiniteAutomaton(
+        transition,
+        nfa.epsilonExpand(frozenset([nfa.start])),
+        NondeterministicFiniteAutomaton(nfa.accepts)
+        )
+