view pyrect/jitgrep.py @ 88:dafb393108f3

impliment goto-based grep. (icc's tailcall-optimization is suck!)
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 14 Nov 2010 07:56:35 +0900
parents f5c4193913a1
children 5e509a9c951c
line wrap: on
line source

#!/usr/bin/env python

import sys
import os
import re
import time
from optparse import OptionParser
from pyrect.translator import *
from pyrect.regexp import Regexp, CharCollector

def main(argv):
    myusage = """%prog [--buf-size=size] [--dump]
                  [--time] [--debug] [--cc=compiler] [-c]
                  [-Olevel] regexp [file..] [--out=file]
                  [--thread=thread_num] [--filter=algorithm]
                  [--disable-booster] [--functional-transition]"""
    psr = OptionParser(usage=myusage)

    redirect = ""
    srcpath = "/tmp/jitgrep_dfa.c"
    binpath = "/tmp/jitgrep"

    psr.add_option("--cc", action="store", type="string", dest="cc", default="gcc", metavar="FILE",
                   help="Choose compiler (default is gcc).")
    psr.add_option("-c", action="store_true", dest="compile", default=False , help="compile only.")
    psr.add_option("--buf-size=size", action="store", type="string", dest="bufsize", default="1M" , help="Set read-buffer size (e.x. 1024, 1024K, 2M)")
    psr.add_option("--cflags", action="store", type="string", dest="cflags", default="-O3", help="Print compile/matching time.")
    psr.add_option("--time", action="store_true", dest="time", default=False, help="Print compile/matching time.")
    psr.add_option("--thread", action="store", type="string", dest="thread", default="0", metavar="FILE", help="number of thread.")
    psr.add_option("--disable-booster", action="store_true", dest="no_boost", default=False, help="disable boosetr (default: use booster).")
    psr.add_option("--enable-table-lookup", action="store_true", dest="table_lookup", default=False, help="use table-lookup in first-state's transition.")
    psr.add_option("--filter", action="store", type="string", dest="filter", default="", help="chose filtering-algorithm bmh(default), quick, or none.")
    psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).")
    psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.")
    psr.add_option("--dump", action="store_true", dest="dump", default=False, help="Dump generated grep-source.")
    psr.add_option("--out", action="store", type="string", dest="out", default="", metavar="FILE", help="Output file.")

    (opts, args) = psr.parse_args(argv)

    if len(args) < 2:
        psr.print_usage()
        return

    if opts.cc == "cbc":
        cbc = True
        opts.cc = "$CBCROOT/INSTALL_DIR/bin/gcc"
        opts.cflags += " -L$CBCROOT/gcc -w"
    else:
        cbc = False

    if opts.debug: print("option", opts)
    if opts.debug: print("args", args)

    string = args[1]

    try:
        if opts.bufsize[-1] == 'K':
            bufsize = int(opts.bufsize[:-1]) * 2**10
        elif opts.bufsize[-1] == 'M':
            bufsize = int(opts.bufsize[:-1]) * 2**20
        else:
            bufsize = int(opts.bufsize)
    except ValueError:
        psr.print_usage()
        return

    if opts.time : start_time = time.time()

    reg = Regexp(".*"+string)
    reg.chars = Regexp.get_chars(string)
    (reg.max_len, _, _) = Regexp.get_analyze(string)

    if cbc:
        grept = CbCGREPTranslator(reg)
    else:
        if opts.label:
            grept = GOTOGREPTranslator(reg)
        else:
            grept = GREPTranslator(reg)
        if opts.filter: grept.filter = opts.filter
        grept.skip_boost = not opts.no_boost
        grept.table_lookup = opts.table_lookup
        grept.thread_line = int(opts.thread)

    grept.bufsize = bufsize

    if opts.dump:
        grept.translate()
        return
    else:
        tmpsrc = open(srcpath, 'w')
        grept.translate(tmpsrc)
        tmpsrc.close()

    if (opts.time):
        end_time = time.time()
        print("Translation: " + str(end_time - start_time) + " Sec.")

    cmd = " ".join([opts.cc, opts.cflags, srcpath, "-o", binpath])
    if opts.debug:
        print("compile command", cmd)
    else:
        if (opts.time): start_time = time.time()
        os.system(cmd)
        if (opts.time):
            end_time = time.time()
            print("Compiling  : " + str(end_time - start_time) + " Sec.")

    if opts.debug:
        print("argv=",  argv)
        print("args=", args)
        print("opts=", opts)

    if opts.compile:
        return

    if len(args) == 2 and not opts.debug:
        while True:
            try:
                os.system(binpath + ' ' + raw_input())
            except (KeyboardInterrupt, EOFError):
                break
    else:
        if (opts.time): redirect = "> /dev/null"
        if (opts.out):  redirect = ">" + opts.out
        cmd = ' '.join([binpath, "dummy"] + args[2:] + [redirect])
        if opts.debug:
            print("exec command", cmd)
        else:
            if (opts.time): start_time = time.time()
            os.system(cmd)
            if (opts.time):
                end_time = time.time()
                print("Matching   : " + str(end_time - start_time) + " Sec.")

    if not opts.debug:
        #os.remove(srcpath)
        #os.remove(binpath)
        pass

if __name__ == '__main__': main(sys.argv)