view regexParser/grepWalk.cc @ 315:66012db6a717

add wordMode in grepWalk
author mir3636
date Sat, 07 May 2016 19:18:43 +0900
parents 1188debbef10
children c4d33b7c3ccd
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>

#include "grepWalk.h"
#include "subsetConstruction.h"
#include "CharClass.h"
#include "threadedSearch.h"

StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) {
    // create tSearch in next state.
    StatePtr state = tg->stateArray[bi.bitContainer];
    if (state == NULL) {
        // on the fly subset construction.
        state = createState(tg,bi);
        determinize(state,tg);
        tg->stateArray[bi.bitContainer] = state;
    }
    return state;
}

void grepWalk(TransitionGeneratorPtr tg,Buffer buff) {
    TSValue tsv = createTSValue(tg,buff);
    tg->stateStart = NEW(State);
    *tg->stateStart = *tg->stateList;
    tg->stateStart->accept = false; // Start state never accept
    StatePtr state = tg->stateStart;

#if DEBUG
    TSValuePtr tsvp = &tsv;   // make tsv visible in lldb
#endif
    next: while (tsv.buff.buffptr < tsv.buff.buffend) {
        if (state->accept) {
            tsv = tg->stateMatch(tsv);
        }
        CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
        if (!hasNext(ccw)) {
            // matched start again
            state = tg->stateStart;
            ccw = createCharClassWalker(state->cc);
        }
        unsigned char c = *tsv.buff.buffptr++;
//        printState(tsv.current->state);
        while (hasNext(ccw)) {
            CharClassPtr cc = getNext(ccw);
            if (c<cc->cond.range.begin) {
                state = tg->stateStart;
                tsv = tg->stateSkip(tsv);
                goto next;
            } else if (c<=cc->cond.range.end) {
                // range matched.
                if (cc->cond.w.word) {
                    WordPtr w;
                    for (w = &cc->cond.w.word;w;w = w->next) {
                        // match the word.
                        // if (not match) continue;
                        if (strncmp(w.word,tsv.buff.buffptr-1,w.length)) break;
                    }
                    if (!w) continue;
                    tsv.buff.buffptr += w.length - 1;
                }
                state = nextState(cc->nextState,tg);
                goto next;
            }
        }
        state = tg->stateStart;
        tsv = tg->stateSkip(tsv);
    }
#if DEBUG
    *tsvp = tsv;
#endif
}