view regexParser/grepWalk.cc @ 319:7b8234c090f7

bmSearch
author mir3636
date Sun, 08 May 2016 22:53:20 +0900
parents fa590a7272ae
children
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "grepWalk.h"
#include "subsetConstruction.h"
#include "CharClass.h"
#include "threadedSearch.h"
#include "bmSearch.h"

StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) {
    // create tSearch in next state.
    StatePtr state = tg->stateArray[bi.bitContainer];
    if (state == NULL) {
        // on the fly subset construction.
        state = createState(tg,bi);
        determinize(state,tg);
        tg->stateArray[bi.bitContainer] = state;
    }
    return state;
}

void grepWalk(TransitionGeneratorPtr tg,Buffer buff) {
    TSValue tsv = createTSValue(tg,buff);
    tg->stateStart = NEW(State);
    *tg->stateStart = *tg->stateList;
    tg->stateStart->accept = false; // Start state never accept
    StatePtr state = tg->stateStart;
    checkBMSearch(state->cc); 

#if DEBUG
    TSValuePtr tsvp = &tsv;   // make tsv visible in lldb
#endif
    next: while (tsv.buff.buffptr < tsv.buff.buffend) {
        if (state->accept) {
            tsv = tg->stateMatch(tsv);
        }
        CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
        if (!hasNext(ccw)) {
            // matched start again
            state = tg->stateStart;
            ccw = createCharClassWalker(state->cc);
        }
        unsigned char c = *tsv.buff.buffptr++;
//        printState(tsv.current->state);
        while (hasNext(ccw)) {
            CharClassPtr cc = getNext(ccw);
            if (c<cc->cond.range.begin) {
                state = tg->stateStart;
                tsv = tg->stateSkip(tsv);
                goto next;
            } else if (c<=cc->cond.range.end) {
                // range matched.
                if (cc->cond.w.word) {
                    WordPtr w;
                    for (w = &cc->cond.w;w;w = w->next) {
                        // match the word.
                        if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break;
                    }
                    if (!w) continue; // if (not match) continue;
                    tsv.buff.buffptr += w->length - 1;
                }
                state = nextState(cc->nextState,tg);
                goto next;
            }
        }
        state = tg->stateStart;
        tsv = tg->stateSkip(tsv);
    }
#if DEBUG
    *tsvp = tsv;
#endif
}