view regexParser/threadedSearch.cc @ 266:e51cac73e42a

CeriumGrep start
author masa
date Thu, 28 Jan 2016 21:14:34 +0900
parents ef95a7f1bc03
children 0e423d9f9647
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>

#include "regexParser.h"
#include "threadedSearch.h"
#include "subsetConstruction.h"

void tSearch(TSValue tsv);

TSValue stateNothing(TSValue tsv) {
    return tsv;
}

TSValue stateSkip(TSValue tsv) {
    tsv.buff.matchBegin = tsv.buff.buffptr;
    return tsv;
}

TSValue stateMatch(TSValue tsv) {
    fwrite(tsv.buff.matchBegin,tsv.buff.buffptr-tsv.buff.matchBegin-1,1,stdout);
    puts("");
    tsv.current = tsv.tg->stateList->tState;
    tsv.buff.buffptr--;
    tsv = stateSkip(tsv);
    return tsv;
}

TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) {
    TStatePtr tState = NEW(TState);
    tState->state = state;
    state->tState = tState;
    int ccvSize = 0;
    CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
    while (hasNext(ccw)) {
        getNext(ccw);
        ccvSize++;
    }
    tState->ccvSize = ccvSize;
    if (ccvSize == 0) return tState;
    else tState->ccv = (ccv*)malloc(sizeof(ccv)*ccvSize);
    ccw = createCharClassWalker(state->cc);
    int i = 0;
    while (hasNext(ccw)) {
        CharClassPtr cc = getNext(ccw);
        unsigned long begin = cc->cond.range.begin;
        unsigned long end = cc->cond.range.end;
        struct ccv *ccv = &tState->ccv[i++];
        ccv->begin = begin;
        ccv->end = end;
        ccv->tState = NULL;
        ccv->state = cc->nextState;
        ccv->w = cc->cond.w;
    }
    free(ccw);
    if (state->accept) {
        tState->stateSkip = tg->stateMatch;
        tState->stateContinue = tg->stateNothing;
    } else {
        tState->stateSkip = tg->stateSkip;
        tState->stateContinue = tg->stateNothing;
    }
    return tState;
}

void tSearch(TSValue tsv) {
    next: while (tsv.buff.buffptr < tsv.buff.buffend) {
        unsigned char c = *tsv.buff.buffptr++;
        for (int i = 0; i < tsv.current->ccvSize; i++) {
            CCVPtr ccv = &tsv.current->ccv[i];
            if (c<ccv->begin) {
                tsv = tsv.current->stateSkip(tsv);
                goto next;
            } else if (c<=ccv->end) {
                // range matched.
                if (ccv->w.word) {
                    // match the word.
                    // if (not match) continue;
                }
                TStatePtr current = ccv->tState;
                if (current == NULL) {
                    // create tSearch in next state.
                    StatePtr state = tsv.tg->stateArray[ccv->state.bitContainer];
                    if (state == NULL) {
                        // on the fly subset construction.
                        state = createState(tsv.tg,state->bitState);
                        tsv.tg->stateArray[state->bitState.bitContainer] = state;
                        determinize(state,tsv.tg);
                    }
                    if (state->tState == NULL) {
                        current = generateTState(state);
                        ccv->tState = current;
                    } else {
                        ccv->tState = state->tState;
                    }
                }
                tsv.current = ccv->tState;
                // tsv = tsv.current->stateContinue(tsv);
                goto next;
            }
        }
        tsv = tsv.current->stateSkip(tsv);
    }
}

void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) {
    TSValue tsv;
    tsv.buff = buff;
    tsv.tg = tg;
    tsv.result = NULL;
    tsv.current = generateTState(tg->stateList);
    tsv.tg->stateSkip = stateSkip;
    tsv.tg->stateMatch = stateMatch;
    tsv.tg->stateNothing = stateNothing;
    tSearch(tsv);
}