view regexParser/regexParser.h @ 319:7b8234c090f7

bmSearch
author mir3636
date Sun, 08 May 2016 22:53:20 +0900
parents a4484c02cba5
children
line wrap: on
line source

#define NEW(type) (type*)malloc(sizeof(type))

#ifndef INCLUDED_STRUCT
#define INCLUDED_STRUCT

#define BITBLOCK 64
typedef struct bitVector {
    unsigned long bitContainer;
} BitVector,*BitVectorPtr;

// skip table of Boyer-Moore Search
typedef struct bm {
    int* skip_table;
    unsigned char *search_word;
    int search_word_len;
    struct bm *next;
} BM, *BMPtr;

typedef struct word {
    unsigned char *word;
    int length;
    BMPtr bm;
    struct word *next;
} Word, *WordPtr;

typedef struct utf8Range {
    unsigned long begin;
    unsigned long end;
} RangeList , *RangeListPtr;

typedef struct condition {
    RangeList range;
    Word w;
} Condition, *ConditionList;

typedef struct charClass {
    struct charClass *left;
    struct charClass *right;
    Condition cond;
    int stateNum;
    BitVector nextState;
} CharClass, *CharClassPtr;

struct node;

typedef struct state {
    int stateNum;
    BitVector bitState;
    CharClassPtr cc;
    bool accept;
    struct tState *tState;
    struct node *node;
    struct state *next;
} State, *StatePtr;

struct tsValue;

typedef struct ccv {
    unsigned long begin;
    unsigned long end;
    Word w;
    BitVector state;
    struct tState *tState;
} CCV,*CCVPtr;

typedef struct tState {
    State *state;
    tsValue (*stateSkip)(tsValue);
    tsValue (*stateMatch)(tsValue);
    int ccvSize;
    CCVPtr ccv;
} TState, *TStatePtr;

typedef struct result {
    unsigned char *begin;
    unsigned char *end;
    bool continued;
    struct result *next;
} Result, *ResultPtr;

typedef struct node {
    unsigned char tokenType;
    CharClassPtr cc;
    int stateNum;
    int nextStateNum;
    StatePtr state;
    StatePtr nextState;
    struct node *left;
    struct node *right;
} Node, *NodePtr;

typedef struct stateStack {
    BitVector state;
    struct stateStack *next;
} StateStack, *StateStackPtr;

typedef struct transitionGenerator {
    long totalStateCount;
    long totalBasicState;
    long maxWordLen;
    StateStackPtr stack;
    StatePtr stateEnd;
    StatePtr stateStart;   // start state without accept flag
    StatePtr *stateArray;
    StatePtr stateList;
    StatePtr anyState;
    tsValue (*stateSkip)(tsValue tsv);
    tsValue (*stateMatch)(tsValue tsv);
    tsValue (*stateNothing)(tsValue tsv);
} TransitionGenerator, *TransitionGeneratorPtr;

typedef struct buffer {
    unsigned char *buff;
    unsigned char *buffptr;
    unsigned char *buffend;
} Buffer, *BufferPtr;

typedef struct blockOutput {
    ResultPtr result;
    ResultPtr *resultEnd;
    TState *blockBegin;
    TState *blockEnd;
} BlockOutput, *BlockOutputPtr;

typedef struct tsValue {
    TState *current;
    Buffer buff;
    unsigned char *matchBegin;
    unsigned char *matchEnd;
    BlockOutputPtr blk;
    TransitionGeneratorPtr tg;
} TSValue, *TSValuePtr;

typedef struct tgValue {
    StatePtr asterisk;   // last * state of the expression
    StatePtr startState; // startState of the expression
    StatePtr endState;
    TransitionGeneratorPtr tg;
} TGValue, *TGValuePtr;

enum charClassStackState {
    LEFT,
    SELF,
    RIGHT
};

typedef struct charClassStack {
    charClassStackState turn;
    CharClassPtr cc;
    struct charClassStack *next;
} CharClassStack, *CharClassStackPtr;

typedef struct charClassWalker {
    CharClassStackPtr stack;
    charClassStackState turn;
    CharClassPtr next;
} CharClassWalker, *CharClassWalkerPtr;


typedef struct regexInfo {
    unsigned char *ptr;
    unsigned char tokenType;
    unsigned char *tokenValue;
    int stateNumber;
    long maxWordLen;
    bool wordMode;
} RegexInfo, *RegexInfoPtr;

typedef struct {
    unsigned char* file_mmap;
    unsigned long size;
} st_mmap_t;

typedef struct search {
    TransitionGeneratorPtr tg;
    const char* filename;
} Search, *SearchPtr;
#endif

extern NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right);
extern CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right);
extern NodePtr regex(RegexInfoPtr);