view regexParser/threadedSearch.cc @ 324:879dc5d1cb6a default tip

fix
author mir3636
date Fri, 27 May 2016 21:21:09 +0900
parents 62f4628d2c0d
children
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "regexParser.h"
#include "CharClass.h"
#include "threadedSearch.h"
#include "subsetConstruction.h"
#include "bmSearch.h"

#define max(a,b)((a)>(b)?a:b) 
#define min(a,b)((a)<(b)?a:b)

TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg);

static
TSValue stateNothing(TSValue tsv) {
    return tsv;
}

static
TSValue stateSkip(TSValue tsv) {
    if (tsv.matchEnd) {
        fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout);
        puts("");
        tsv.matchEnd = NULL;
    }
    tsv.matchBegin = tsv.buff.buffptr;  // next char may be matchBegin
    // if possible use bmsearch
    if (!tsv.current || !tsv.current->ccv[0].w.bm ) return tsv;
    while (tsv.buff.buffptr < tsv.buff.buffend) {
        long skip = tsv.tg->maxWordLen;
        for (int k = 0; k < tsv.current->ccvSize; k++) {
            CCVPtr ccv = &tsv.current->ccv[k];
            if (ccv->w.bm) {
                int i = ccv->w.length - 1;
                while (tsv.buff.buffptr[i] == ccv->w.word[i]) {
                    if (i == 0) {
                        if (ccv->tState) {
                            tsv.current = ccv->tState;
                         } else {
                            tsv.current = nextTState(ccv->state,tsv.tg);
                            ccv->tState = tsv.current;
                        }
                        tsv.matchBegin = tsv.buff.buffptr;
                        tsv.buff.buffptr += ccv->w.length;
                        return tsv; 
                    }
                    --i;
                }
                skip = min(skip,max(ccv->w.bm->skip_table[tsv.buff.buffptr[i]],ccv->w.length - i));
            }
        }
        tsv.buff.buffptr += skip;
    }
    tsv.matchBegin = tsv.buff.buffptr;
    return tsv;
}

static
TSValue stateMatch(TSValue tsv) {
    tsv.matchEnd = tsv.buff.buffptr;    // next char of the match
    return tsv;
}

TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) {
    TStatePtr tState = NEW(TState);
    tState->state = state;
    int ccvSize = 0;
    CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
    while (hasNext(ccw)) {
        getNext(ccw);
        ccvSize++;
    }
    tState->ccvSize = ccvSize;
    if (state->accept) {
        tState->stateMatch  = tg->stateMatch;
        tState->stateSkip  = tg->stateSkip;
    } else {
        tState->stateMatch  = tg->stateNothing;
        tState->stateSkip  = tg->stateSkip;
    }
    if (ccvSize == 0) {
        tState->ccv = NULL;
        state->tState = tState;
        return tState;
    } else tState->ccv = (ccv*)malloc(sizeof(ccv)*(ccvSize+1));
    ccw = createCharClassWalker(state->cc);
    int i = 0;
    while (hasNext(ccw)) {
        CharClassPtr cc = getNext(ccw);
        unsigned long begin = cc->cond.range.begin;
        unsigned long end = cc->cond.range.end;
        struct ccv *ccv = &tState->ccv[i++];
        ccv->begin = begin;
        ccv->end = end;
        ccv->tState = NULL;
        ccv->state = cc->nextState;
        ccv->w = cc->cond.w;
    }
    struct ccv *ccv = &tState->ccv[i];
    ccv->begin = MAXCHAR+1;
    ccv->end = ccv->begin;
    ccv->tState = NULL;
    free(ccw);
    state->tState = tState;
    return tState;
}

TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) {
    // create tSearch in next state.
    StatePtr state = tg->stateArray[bi.bitContainer];
    if (state == NULL) {
        // on the fly subset construction.
        state = createState(tg,bi);
        determinize(state,tg);
        tg->stateArray[bi.bitContainer] = state;
    }
    if (state->tState == NULL) {
        generateTState(state,tg);
    }
    return state->tState;
}

#define DEBUG 0

TSValue tSearch(TSValue tsv) {
    next: while (tsv.buff.buffptr < tsv.buff.buffend) {
        tsv = tsv.current->stateMatch(tsv);
        if (tsv.current->ccvSize==0) {
            // matched start again
            tsv.current = tsv.tg->stateStart->tState;
        }
        unsigned char c = *tsv.buff.buffptr++;
//        printState(tsv.current->state);
        CCVPtr ccv = &tsv.current->ccv[0];
        for (;;) {
            if (c<ccv[0].begin) goto noMatch;
            else if (c<=ccv[0].end) goto match;
            if (c<ccv[1].begin) goto noMatch;
            else if (c<=ccv[1].end) goto match;
            if (c<ccv[2].begin) goto noMatch;
            else if (c<=ccv[2].end) goto match;
            if (c<ccv[3].begin) goto noMatch;
            else if (c<=ccv[3].end) goto match;
            if (c<ccv[4].begin) goto noMatch;
            else if (c<=ccv[4].end) goto match;
            ccv += 5;
        }
        match:
        // range matched.
            if (ccv->w.word) {
                WordPtr w;
                for (w = &ccv->w;w;w = w->next) {
                    // match the word.
                    if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break;
                }
                if (!w) continue; // if (not match) continue;
                tsv.buff.buffptr += w->length - 1;
            }
            if (ccv->tState) {
                tsv.current = ccv->tState;
            } else {
                tsv.current = nextTState(ccv->state,tsv.tg);
                ccv->tState = tsv.current;
            }
            goto next;
        noMatch:
            tsv.current = tsv.tg->stateStart->tState;
            tsv = tsv.current->stateSkip(tsv);
    }
    return tsv;
}

TSValue
createTSValue(TransitionGeneratorPtr tg, Buffer buff) {
    TSValue tsv;
    if (!tg) {
        tg = createTransitionGenerator();
    }
    tsv.buff = buff;
    tsv.tg = tg;
    tsv.blk = NULL;
    tsv.matchBegin = buff.buffptr;
    tsv.matchEnd = NULL;
    tsv.tg->stateSkip = stateSkip;
    tsv.tg->stateMatch = stateMatch;
    tsv.tg->stateNothing = stateNothing;
    tsv.current = NULL;
    return tsv;
}


void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) {
    TSValue tsv = createTSValue(tg,buff);
    generateTState(tg->stateList,tg);
    tg->stateStart = NEW(State);
    *tg->stateStart = *tg->stateList;
    tg->stateStart->accept = false; // Start state never accept
    StatePtr state = tg->stateStart;
    checkBMSearch(state->cc);
    tsv.current = generateTState(tg->stateStart,tg);
    tSearch(tsv);
}

/* end */