# HG changeset patch # User Masataka Kohagura # Date 1448343506 -32400 # Node ID ca30f83347418db4e57638a060473c1d89f3c551 # Parent c82e7a7ef8d97e5b7f531c68a1225afcd482f132 rename createRegexParser.cc to regexParser.cc diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/bitVector.cc --- a/c/regexParser/bitVector.cc Mon Nov 23 19:19:43 2015 +0900 +++ b/c/regexParser/bitVector.cc Tue Nov 24 14:38:26 2015 +0900 @@ -2,19 +2,23 @@ #include #include #include "bitVector.h" +#include "regexParser.h" -void bitPrint(BitVectorPtr bi); +extern BitVectorListPtr allocateBitVectorList(); +BitVectorListPtr createBitVector(NodePtr,BitVectorListPtr); +const BitVectorPtr allocateBitVector(); +BitVectorPtr bitSet(int); +void bitPrint(BitVectorPtr); int bitBlock = sizeof(unsigned long) * 8; BitVectorListPtr createBitVector(NodePtr n,BitVectorListPtr bvl) { BitVectorListPtr nextBvl = allocateBitVectorList(); nextBvl->bi = bitSet(n->nodeNumber); - nextBvl->initBvl = initBvl; return nextBvl; } -BitVectorPtr createBitVector(int bitSetPosition) { +const BitVectorPtr allocateBitVector() { BitVectorPtr bi = (BitVectorPtr)malloc(sizeof(BitVector)); if (bi == NULL) { @@ -35,7 +39,9 @@ return bi; } -BitVectorPtr bitSet(BitVectorPtr bi, int bitSetPosition) { +BitVectorPtr bitSet(int bitSetPosition) { + + BitVectorPtr bi = allocateBitVector(); bi->arrayNum = (bitSetPosition + bitBlock - 1) / bitBlock; diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/bitVectorNode.cc --- a/c/regexParser/bitVectorNode.cc Mon Nov 23 19:19:43 2015 +0900 +++ b/c/regexParser/bitVectorNode.cc Tue Nov 24 14:38:26 2015 +0900 @@ -4,6 +4,10 @@ #include "bitVector.h" #include "regexParser.h" +extern BitVectorPtr bitSet(int); +BitVectorListPtr allocateBitVectorList(); +BitVectorListPtr initBitVector(); + BitVectorListPtr allocateBitVectorList() { BitVectorListPtr bvl = (BitVectorListPtr)malloc(sizeof(BitVectorList)); if (bvl == NULL) { @@ -26,7 +30,7 @@ BitVectorListPtr initBitVector() { BitVectorListPtr bvl = allocateBitVectorList(); - bvl->initBvl = initBvl = bvl; + bvl->initBvl = bvl; bvl->bi = bitSet(0); for (int i = 0; i < 256; i++) { diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/createRegexParser.cc --- a/c/regexParser/createRegexParser.cc Mon Nov 23 19:19:43 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,158 +0,0 @@ -#include -#include -#include "regexParser.h" - -static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); -static NodePtr charClass(RegexInfoPtr); -static NodePtr group(RegexInfoPtr); -static void token(RegexInfoPtr); -static NodePtr regexAtom(RegexInfoPtr); -NodePtr regex(RegexInfoPtr); - -/** - * Create a node of regex parse tree. - * tokenType - * regexPosition(state) - * stateTransitionTable - */ - -static -NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { - NodePtr n = (NodePtr)malloc(sizeof(Node)); - if (n == NULL) { - mallocFailedMessage(); - } - - n->tokenType = ri->tokenType; - n->self = n; - n->Value.character = character; - n->left = left; - n->right = right; - - if (ri->tokenType == '*') { - n->left->parent = n->self; - } else if (ri->tokenType != 'a') { - n->right = right; - n->left->parent = n->right->parent = n->self; - } - - if (ri->tokenType == 'a') { - n->nodeNumber = ri->nodeNumber; - ri->nodeNumber++; - ri->tokenType = 0; - } - return n; -} - -// ::= '[''-'']' -static -NodePtr charClass(RegexInfoPtr ri) { - NodePtr n = (NodePtr)malloc(sizeof(Node)); - if (n == NULL) { - mallocFailedMessage(); - } - while (ri->ptr[0] == '-') { - ri->ptr++; - } - return n; -} - -// ::= [a-z][A-Z][0-9] -static -NodePtr literal(RegexInfoPtr ri) { - NodePtr n = createNode(ri,ri->ptr[0],0,0); - ri->ptr++; - return n; -} - -// ::= '('')' -static -NodePtr group(RegexInfoPtr ri) { - return regex(ri); -} - -static -void token(RegexInfoPtr ri) { - while (ri->ptr[0] != '\0') { - if (ri->ptr[0] == '('){ - ri->ptr++; - ri->tokenType = '('; - ri->tokenValue = 0; - if (ri->ptr[1] == ')') { - ri->ptr++; - } - return; - } else if (ri->ptr[0] == ')') { - ri->ptr++; - ri->tokenType = ')'; - ri->tokenValue = ri->ptr[0]; - return; - } else if (ri->ptr[0] == '[') { - ri->ptr++; - ri->tokenType = '['; - ri->tokenValue = ri->ptr[0]; - if (ri->ptr[1] == ']') { - ri->ptr++; - } - return; - } else if (ri->ptr[0] == '|'){ - ri->ptr++; - ri->tokenType = '|'; - ri->tokenValue = 0; - return; - } else if (ri->ptr[0] == '*'){ - ri->ptr++; - ri->tokenType = '*'; - ri->tokenValue = 0; - return; - } else if (ri->ptr[0] == '\\'){ - // need more proccesing - /* - \277 - \0xa5 - \[ - \\ - \utf-8 etc... - */ - } else { - ri->tokenType = 'a'; - ri->tokenValue = ri->ptr[0]; - return; - } - } - ri->tokenType = 0; - ri->tokenValue = 0; - return; -} - -// ::= || -static -NodePtr regexAtom(RegexInfoPtr ri) { - - token(ri); - NodePtr n = NULL; - if (ri->tokenType == 'a') n = literal(ri); - else if (ri->tokenType == '[') n = charClass(ri); - else if (ri->tokenType == '(') n = group(ri); - - return n; -} - -// ::= |'*'|'|'| -NodePtr regex(RegexInfoPtr ri) { - NodePtr n = regexAtom(ri); - while (ri->ptr[0]) { - token(ri); - if (ri->tokenType == '*') { - n = createNode(ri,'*',n,0); - } else if (ri->tokenType == '|') { - NodePtr n1 = regex(ri); - n = createNode(ri,'|',n,n1); - } else if (ri->tokenType == ')') { - return n; - } else { - NodePtr n1 = regex(ri); - n = createNode(ri,'+',n,n1); - } - } return n; -} diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/error.cc --- a/c/regexParser/error.cc Mon Nov 23 19:19:43 2015 +0900 +++ b/c/regexParser/error.cc Tue Nov 24 14:38:26 2015 +0900 @@ -1,7 +1,5 @@ #include -void mallocFailedMessage(); - void mallocFailedMessage() { fprintf(stderr, "Failed to allocate memory.\n"); exit(-1); diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/error.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/error.h Tue Nov 24 14:38:26 2015 +0900 @@ -0,0 +1,1 @@ +void mallocFailedMessage(); diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/regexParser.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/regexParser.cc Tue Nov 24 14:38:26 2015 +0900 @@ -0,0 +1,158 @@ +#include +#include +#include "regexParser.h" +#include "error.h" + +typedef struct regexInfo { + unsigned char *ptr; + unsigned char tokenType; + int tokenValue; + int nodeNumber; +} RegexInfo, *RegexInfoPtr; + +static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); +static NodePtr charClass(RegexInfoPtr); +static NodePtr group(RegexInfoPtr); +static void token(RegexInfoPtr); +static NodePtr regexAtom(RegexInfoPtr); +NodePtr regex(RegexInfoPtr); + +/** + * Create a node of regex parse tree. + * tokenType + * regexPosition(state) + * stateTransitionTable + */ + +static +NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { + NodePtr n = (NodePtr)malloc(sizeof(Node)); + if (n == NULL) { + mallocFailedMessage(); + } + + n->tokenType = ri->tokenType; + n->cc->conditionList->character = character; + n->left = left; + n->right = right; + + if (ri->tokenType == 'a') { + n->nodeNumber = ri->nodeNumber; + ri->nodeNumber++; + ri->tokenType = 0; + } + return n; +} + +// ::= '[''-'']' +static +NodePtr charClass(RegexInfoPtr ri) { + NodePtr n = (NodePtr)malloc(sizeof(Node)); + if (n == NULL) { + mallocFailedMessage(); + } + while (ri->ptr[0] == '-') { + ri->ptr++; + } + return n; +} + +// ::= [a-z][A-Z][0-9] +static +NodePtr literal(RegexInfoPtr ri) { + NodePtr n = createNode(ri,ri->ptr[0],0,0); + ri->ptr++; + return n; +} + +// ::= '('')' +static +NodePtr group(RegexInfoPtr ri) { + return regex(ri); +} + +static +void token(RegexInfoPtr ri) { + while (ri->ptr[0] != '\0') { + if (ri->ptr[0] == '('){ + ri->ptr++; + ri->tokenType = '('; + ri->tokenValue = 0; + if (ri->ptr[1] == ')') { + ri->ptr++; + } + return; + } else if (ri->ptr[0] == ')') { + ri->ptr++; + ri->tokenType = ')'; + ri->tokenValue = ri->ptr[0]; + return; + } else if (ri->ptr[0] == '[') { + ri->ptr++; + ri->tokenType = '['; + ri->tokenValue = ri->ptr[0]; + if (ri->ptr[1] == ']') { + ri->ptr++; + } + return; + } else if (ri->ptr[0] == '|'){ + ri->ptr++; + ri->tokenType = '|'; + ri->tokenValue = 0; + return; + } else if (ri->ptr[0] == '*'){ + ri->ptr++; + ri->tokenType = '*'; + ri->tokenValue = 0; + return; + } else if (ri->ptr[0] == '\\'){ + // need more proccesing + /* + \277 + \0xa5 + \[ + \\ + \utf-8 etc... + */ + } else { + ri->tokenType = 'a'; + ri->tokenValue = ri->ptr[0]; + return; + } + } + ri->tokenType = 0; + ri->tokenValue = 0; + return; +} + +// ::= || +static +NodePtr regexAtom(RegexInfoPtr ri) { + + token(ri); + NodePtr n = NULL; + if (ri->tokenType == 'a') n = literal(ri); + else if (ri->tokenType == '[') n = charClass(ri); + else if (ri->tokenType == '(') n = group(ri); + + return n; +} + +// ::= |'*'|'|'| +NodePtr regex(RegexInfoPtr ri) { + NodePtr n = regexAtom(ri); + while (ri->ptr[0]) { + token(ri); + if (ri->tokenType == '*') { + n = createNode(ri,'*',n,0); + } else if (ri->tokenType == '|') { + NodePtr n1 = regex(ri); + n = createNode(ri,'|',n,n1); + } else if (ri->tokenType == ')') { + return n; + } else { + NodePtr n1 = regex(ri); + n = createNode(ri,'+',n,n1); + } + } return n; +} diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/regexParser.h --- a/c/regexParser/regexParser.h Mon Nov 23 19:19:43 2015 +0900 +++ b/c/regexParser/regexParser.h Tue Nov 24 14:38:26 2015 +0900 @@ -1,3 +1,8 @@ +typedef struct word { + unsigned char *word; + long length; +} Word, *WordPtr; + typedef struct charClass { unsigned char type; union condition { @@ -9,14 +14,12 @@ unsigned char character; WordPtr w; } *conditionList; - struct charClass *next; + struct charClass *left; + struct charClass *right; + unsigned long *begin; + unsigned long *end; } CharClass, *CharClassPtr; -typedef struct word { - unsigned char *word; - long length; -} Word, *WordPtr; - typedef struct node { unsigned char tokenType; unsigned long nodeNumber; diff -r c82e7a7ef8d9 -r ca30f8334741 c/regexParser/word.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/word.c Tue Nov 24 14:38:26 2015 +0900 @@ -0,0 +1,17 @@ +#include +#include +#include +#include + +unsigned char* getWord(unsigned char *string) { + int wordSize = 0; + while (isalnum(string[wordSize])) { + wordSize++; + } + + int allocateWordSize = wordSize + 1; + unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); + strncpy((char*)word, (char*)string, allocateWordSize); + word[wordSize] = '\0'; + return word; +}