# HG changeset patch # User masa # Date 1448974209 -32400 # Node ID b061cd8205ccc577037f29f80eaf85cc15b5fed8 # Parent 639b0b437ebf9df8605ad15835cbf3087e10e4e9# Parent c363a66dc1a7119bb459c6161db9f2876147a78e merge diff -r c363a66dc1a7 -r b061cd8205cc c/regexParser/regexParser.cc --- a/c/regexParser/regexParser.cc Tue Dec 01 17:06:26 2015 +0900 +++ b/c/regexParser/regexParser.cc Tue Dec 01 21:50:09 2015 +0900 @@ -6,7 +6,7 @@ #include "error.h" static NodePtr allocateNode(); -static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); +static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); static NodePtr charClass(RegexInfoPtr); static NodePtr group(RegexInfoPtr); static void token(RegexInfoPtr); @@ -23,64 +23,76 @@ static NodePtr allocateNode() { NodePtr n = (NodePtr)malloc(sizeof(node)); - n->cc = (CharClassPtr)malloc(sizeof(CharClass)); - n->cc->cond = (ConditionList)malloc(sizeof(Condition)); + n->cc = NULL; + n->left = NULL; + n->right = NULL; return n; } static -NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) { +CharClassPtr createCharClassWord(RegexInfoPtr ri) { + CharClassPtr cc = NEW(CharClass); + cc->type = 'a'; + cc->cond = NEW(Condition); + cc->cond->w = NEW(Word); + cc->cond->w->word = ri->tokenValue; + cc->cond->w->length = ri->ptr - ri->tokenValue; + + return cc; +} + +static +NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) { NodePtr n = allocateNode(); - if (n == NULL) { - mallocFailedMessage(); - } - n->tokenType = ri->tokenType; + n->tokenType = type; n->left = left; n->right = right; n->nodeNumber = ri->nodeNumber; ri->nodeNumber++; - if (ri->tokenType == 'a') { - ri->tokenType = 0; - n->cc->cond->w = getWord(ri->tokenValue); - } else { - WordPtr w = (WordPtr)malloc(sizeof(Word)); - w->word = character; - w->length = 1; - n->cc->cond->w = w; + if (type == 'a') { + n->cc = createCharClassWord(ri); } + return n; } + // ::= '[''-'']' static NodePtr charClass(RegexInfoPtr ri) { NodePtr n = allocateNode(); - if (n == NULL) { - mallocFailedMessage(); - } - n->tokenType = ri->tokenType; + n->tokenType = 'c'; n->nodeNumber = ri->nodeNumber; ri->nodeNumber++; - n->cc->cond->w = (WordPtr)malloc(sizeof(Word)); + + CharClassPtr cc = NEW(CharClass); + cc->type = 'r'; + cc->cond = NEW(Condition); + cc->cond->range = NEW(RangeList); + cc->cond->range->begin = ri->ptr; + cc->cond->range->end = ri->ptr + 1; + cc->cond->range->next = NULL; int i = 0; + RangeListPtr rangeList = cc->cond->range; + while (ri->ptr[i] != ']') { - if (ri->ptr[i] == '-') { - n->cc->begin = ri->ptr[i-1]; - n->cc->end = ri->ptr[i+1]; - } + if (ri->ptr[i] == '-') i++; + + rangeList->end = ri->ptr + i; + rangeList->next = NEW(RangeList); + rangeList = rangeList->next; + rangeList->begin = ri->ptr+i+1; + rangeList->next = NULL; i++; } // TODO literal support - n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1)); - strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1); - n->cc->cond->w->word[i] = '\0'; - ri->ptr += i+1; + rangeList->end = ri->ptr + i - 1; return n; } @@ -88,7 +100,7 @@ // ::= [a-z][A-Z][0-9] static NodePtr literal(RegexInfoPtr ri) { - NodePtr n = createNode(ri,ri->ptr,0,0); + NodePtr n = createNode(ri,'a',0,0); return n; } @@ -158,31 +170,16 @@ return n; } -// ::= | '*' | '*' | '|' | | '(' regex ')' +// ::= | '*' | '|' | | '(' regex ')' NodePtr regex(RegexInfoPtr ri) { NodePtr n = NULL; while (ri->ptr[0]) { token(ri); if (ri->tokenType == '*') { - // TODO literal support - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '*'; - NodePtr n1 = createNode(ri,syntax,n->right,0); - - unsigned char *syntax1 = (unsigned char*)malloc(sizeof(unsigned char)); - syntax1[0] = '+'; - - n = createNode(ri,syntax1,n->left,n1); + n = createNode(ri,'*',n,0); } else if (ri->tokenType == '|') { NodePtr n1 = regex(ri); - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '|'; - n = createNode(ri,syntax,n,n1); - } else if (ri->tokenType == '(') { - NodePtr n1 = regex(ri); - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '+'; - n = createNode(ri,syntax,n,n1); + n = createNode(ri,'|',n,n1); } else if (ri->tokenType == ')') { return n; } else if (ri->tokenType == 'a') { @@ -193,9 +190,7 @@ } else { // return NULL NodePtr n1 = regex(ri); - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '+'; - n = createNode(ri,syntax,n,n1); + n = createNode(ri,'a',n,n1); } } return n; } diff -r c363a66dc1a7 -r b061cd8205cc c/regexParser/regexParser.h --- a/c/regexParser/regexParser.h Tue Dec 01 17:06:26 2015 +0900 +++ b/c/regexParser/regexParser.h Tue Dec 01 21:50:09 2015 +0900 @@ -1,11 +1,16 @@ #include "word.h" #include "error.h" + +#define NEW(type) (type*)malloc(sizeof(type)) + +typedef struct utf8Range { + unsigned char *begin; + unsigned char *end; + struct utf8Range *next; +} RangeList , *RangeListPtr; + typedef union condition { - struct utf8Range { - unsigned char *begin; - unsigned char *end; - struct utf8Range *next; - } rangeList; + RangeListPtr range; unsigned char character; WordPtr w; } Condition, *ConditionList; diff -r c363a66dc1a7 -r b061cd8205cc c/regexParser/word.cc --- a/c/regexParser/word.cc Tue Dec 01 17:06:26 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#include -#include -#include -#include -#include "word.h" - -int getWordLength(unsigned char* w){ - int i = 0; - - for (i=0;isalnum(w[i]);i++); - - return i; -} - -WordPtr getWord(unsigned char *string) { - - WordPtr w = (WordPtr)malloc(sizeof(Word)); - - int i = getWordLength(string); - int wordLength; - int allocateWordSize; - - wordLength = i; - allocateWordSize = i+1; - unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); - strncpy((char*)word, (char*)string, allocateWordSize); - word[wordLength] = '\0'; - w->word = word; - w->length = wordLength; - return w; -} diff -r c363a66dc1a7 -r b061cd8205cc c/regexParser/word.h --- a/c/regexParser/word.h Tue Dec 01 17:06:26 2015 +0900 +++ b/c/regexParser/word.h Tue Dec 01 21:50:09 2015 +0900 @@ -2,6 +2,3 @@ unsigned char *word; int length; } Word, *WordPtr; - -WordPtr getWord(unsigned char*); -int getWordLength(unsigned char* w);