Mercurial > hg > Applications > Grep
view regexParser/threadedSearch.cc @ 324:879dc5d1cb6a default tip
fix
author | mir3636 |
---|---|
date | Fri, 27 May 2016 21:21:09 +0900 |
parents | 62f4628d2c0d |
children |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <string.h> #include "regexParser.h" #include "CharClass.h" #include "threadedSearch.h" #include "subsetConstruction.h" #include "bmSearch.h" #define max(a,b)((a)>(b)?a:b) #define min(a,b)((a)<(b)?a:b) TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg); static TSValue stateNothing(TSValue tsv) { return tsv; } static TSValue stateSkip(TSValue tsv) { if (tsv.matchEnd) { fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); puts(""); tsv.matchEnd = NULL; } tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin // if possible use bmsearch if (!tsv.current || !tsv.current->ccv[0].w.bm ) return tsv; while (tsv.buff.buffptr < tsv.buff.buffend) { long skip = tsv.tg->maxWordLen; for (int k = 0; k < tsv.current->ccvSize; k++) { CCVPtr ccv = &tsv.current->ccv[k]; if (ccv->w.bm) { int i = ccv->w.length - 1; while (tsv.buff.buffptr[i] == ccv->w.word[i]) { if (i == 0) { if (ccv->tState) { tsv.current = ccv->tState; } else { tsv.current = nextTState(ccv->state,tsv.tg); ccv->tState = tsv.current; } tsv.matchBegin = tsv.buff.buffptr; tsv.buff.buffptr += ccv->w.length; return tsv; } --i; } skip = min(skip,max(ccv->w.bm->skip_table[tsv.buff.buffptr[i]],ccv->w.length - i)); } } tsv.buff.buffptr += skip; } tsv.matchBegin = tsv.buff.buffptr; return tsv; } static TSValue stateMatch(TSValue tsv) { tsv.matchEnd = tsv.buff.buffptr; // next char of the match return tsv; } TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { TStatePtr tState = NEW(TState); tState->state = state; int ccvSize = 0; CharClassWalkerPtr ccw = createCharClassWalker(state->cc); while (hasNext(ccw)) { getNext(ccw); ccvSize++; } tState->ccvSize = ccvSize; if (state->accept) { tState->stateMatch = tg->stateMatch; tState->stateSkip = tg->stateSkip; } else { tState->stateMatch = tg->stateNothing; tState->stateSkip = tg->stateSkip; } if (ccvSize == 0) { tState->ccv = NULL; state->tState = tState; return tState; } else tState->ccv = (ccv*)malloc(sizeof(ccv)*(ccvSize+1)); ccw = createCharClassWalker(state->cc); int i = 0; while (hasNext(ccw)) { CharClassPtr cc = getNext(ccw); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; struct ccv *ccv = &tState->ccv[i++]; ccv->begin = begin; ccv->end = end; ccv->tState = NULL; ccv->state = cc->nextState; ccv->w = cc->cond.w; } struct ccv *ccv = &tState->ccv[i]; ccv->begin = MAXCHAR+1; ccv->end = ccv->begin; ccv->tState = NULL; free(ccw); state->tState = tState; return tState; } TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { // create tSearch in next state. StatePtr state = tg->stateArray[bi.bitContainer]; if (state == NULL) { // on the fly subset construction. state = createState(tg,bi); determinize(state,tg); tg->stateArray[bi.bitContainer] = state; } if (state->tState == NULL) { generateTState(state,tg); } return state->tState; } #define DEBUG 0 TSValue tSearch(TSValue tsv) { next: while (tsv.buff.buffptr < tsv.buff.buffend) { tsv = tsv.current->stateMatch(tsv); if (tsv.current->ccvSize==0) { // matched start again tsv.current = tsv.tg->stateStart->tState; } unsigned char c = *tsv.buff.buffptr++; // printState(tsv.current->state); CCVPtr ccv = &tsv.current->ccv[0]; for (;;) { if (c<ccv[0].begin) goto noMatch; else if (c<=ccv[0].end) goto match; if (c<ccv[1].begin) goto noMatch; else if (c<=ccv[1].end) goto match; if (c<ccv[2].begin) goto noMatch; else if (c<=ccv[2].end) goto match; if (c<ccv[3].begin) goto noMatch; else if (c<=ccv[3].end) goto match; if (c<ccv[4].begin) goto noMatch; else if (c<=ccv[4].end) goto match; ccv += 5; } match: // range matched. if (ccv->w.word) { WordPtr w; for (w = &ccv->w;w;w = w->next) { // match the word. if (strncmp((const char *)w->word,(const char *)tsv.buff.buffptr-1,w->length)==0) break; } if (!w) continue; // if (not match) continue; tsv.buff.buffptr += w->length - 1; } if (ccv->tState) { tsv.current = ccv->tState; } else { tsv.current = nextTState(ccv->state,tsv.tg); ccv->tState = tsv.current; } goto next; noMatch: tsv.current = tsv.tg->stateStart->tState; tsv = tsv.current->stateSkip(tsv); } return tsv; } TSValue createTSValue(TransitionGeneratorPtr tg, Buffer buff) { TSValue tsv; if (!tg) { tg = createTransitionGenerator(); } tsv.buff = buff; tsv.tg = tg; tsv.blk = NULL; tsv.matchBegin = buff.buffptr; tsv.matchEnd = NULL; tsv.tg->stateSkip = stateSkip; tsv.tg->stateMatch = stateMatch; tsv.tg->stateNothing = stateNothing; tsv.current = NULL; return tsv; } void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { TSValue tsv = createTSValue(tg,buff); generateTState(tg->stateList,tg); tg->stateStart = NEW(State); *tg->stateStart = *tg->stateList; tg->stateStart->accept = false; // Start state never accept StatePtr state = tg->stateStart; checkBMSearch(state->cc); tsv.current = generateTState(tg->stateStart,tg); tSearch(tsv); } /* end */