# HG changeset patch # User Masataka Kohagura # Date 1448540246 -32400 # Node ID aa266a4db47c011e7cbd0cbc5a744f8311e6cc50 # Parent 2f0653f8eabbe86e16b953f0df20d6729f92ad27# Parent 5d29b6a1b50fe002741558e6ea48626422df3896 merge diff -r 2f0653f8eabb -r aa266a4db47c c/regexParser/node.cc --- a/c/regexParser/node.cc Thu Nov 26 18:05:39 2015 +0900 +++ b/c/regexParser/node.cc Thu Nov 26 21:17:26 2015 +0900 @@ -7,11 +7,8 @@ descendTree(n->right, d); d--; } - if (n->tokenType == 'a') { - printf("%*c%c(%lu)\n",d*4, ' ',n->cc->cond->character,n->nodeNumber); - } else { - printf("%*c%c\n",d*4, ' ',n->cc->cond->character); - } + + printf("%*c%s(%lu)\n",d*4, ' ',n->cc->cond->w->word,n->nodeNumber); if (n->left != NULL) { d++; diff -r 2f0653f8eabb -r aa266a4db47c c/regexParser/regexParser.cc --- a/c/regexParser/regexParser.cc Thu Nov 26 18:05:39 2015 +0900 +++ b/c/regexParser/regexParser.cc Thu Nov 26 21:17:26 2015 +0900 @@ -3,7 +3,8 @@ #include "regexParser.h" #include "error.h" -static NodePtr createNode(RegexInfoPtr,NodePtr,NodePtr); +static NodePtr allocateNode(); +static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); static NodePtr charClass(RegexInfoPtr); static NodePtr group(RegexInfoPtr); static void token(RegexInfoPtr); @@ -17,6 +18,7 @@ * stateTransitionTable */ +static NodePtr allocateNode() { NodePtr n = (NodePtr)malloc(sizeof(node)); n->cc = (CharClassPtr)malloc(sizeof(CharClass)); @@ -25,21 +27,27 @@ } static -NodePtr createNode(RegexInfoPtr ri, NodePtr left, NodePtr right) { +NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) { NodePtr n = allocateNode(); if (n == NULL) { mallocFailedMessage(); } n->tokenType = ri->tokenType; - n->cc->cond->character = ri->tokenValue; n->left = left; n->right = right; + n->nodeNumber = ri->nodeNumber; + ri->nodeNumber++; if (ri->tokenType == 'a') { - n->nodeNumber = ri->nodeNumber; - ri->nodeNumber++; ri->tokenType = 0; + n->cc->cond->w = getWord(ri->tokenValue); + ri->ptr += n->cc->cond->w->length-1; + } else { + WordPtr w = (WordPtr)malloc(sizeof(Word)); + w->word = character; + w->length = 1; + n->cc->cond->w = w; } return n; } @@ -60,8 +68,7 @@ // ::= [a-z][A-Z][0-9] static NodePtr literal(RegexInfoPtr ri) { - NodePtr n = createNode(ri,0,0); - ri->ptr++; + NodePtr n = createNode(ri,ri->ptr,0,0); return n; } @@ -77,7 +84,7 @@ if (ri->ptr[0] == '('){ ri->ptr++; ri->tokenType = '('; - ri->tokenValue = 0; + ri->tokenValue = NULL; if (ri->ptr[1] == ')') { ri->ptr++; } @@ -85,12 +92,12 @@ } else if (ri->ptr[0] == ')') { ri->ptr++; ri->tokenType = ')'; - ri->tokenValue = ri->ptr[0]; + ri->tokenValue = ri->ptr; return; } else if (ri->ptr[0] == '[') { ri->ptr++; ri->tokenType = '['; - ri->tokenValue = ri->ptr[0]; + ri->tokenValue = ri->ptr; if (ri->ptr[1] == ']') { ri->ptr++; } @@ -98,12 +105,12 @@ } else if (ri->ptr[0] == '|'){ ri->ptr++; ri->tokenType = '|'; - ri->tokenValue = '|'; + ri->tokenValue = NULL; return; } else if (ri->ptr[0] == '*'){ ri->ptr++; ri->tokenType = '*'; - ri->tokenValue = '*'; + ri->tokenValue = NULL; return; } else if (ri->ptr[0] == '\\'){ // need more proccesing @@ -116,12 +123,11 @@ */ } else { ri->tokenType = 'a'; - ri->tokenValue = ri->ptr[0]; + ri->tokenValue = ri->ptr; + ri->ptr++; return; } } - ri->tokenType = 0; - ri->tokenValue = 0; return; } @@ -144,17 +150,21 @@ while (ri->ptr[0]) { token(ri); if (ri->tokenType == '*') { - n = createNode(ri,n,0); + unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); + syntax[0] = '*'; + n = createNode(ri,syntax,n,0); } else if (ri->tokenType == '|') { NodePtr n1 = regex(ri); - ri->tokenValue = '|'; - n = createNode(ri,n,n1); + unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); + syntax[0] = '|'; + n = createNode(ri,syntax,n,n1); } else if (ri->tokenType == ')') { return n; } else { NodePtr n1 = regex(ri); - ri->tokenValue = '+'; - n = createNode(ri,n,n1); + unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); + syntax[0] = '+'; + n = createNode(ri,syntax,n,n1); } } return n; } diff -r 2f0653f8eabb -r aa266a4db47c c/regexParser/regexParser.h --- a/c/regexParser/regexParser.h Thu Nov 26 18:05:39 2015 +0900 +++ b/c/regexParser/regexParser.h Thu Nov 26 21:17:26 2015 +0900 @@ -1,8 +1,5 @@ -typedef struct word { - unsigned char *word; - long length; -} Word, *WordPtr; - +#include "word.h" +#include "error.h" typedef union condition { struct utf8Range { unsigned char *begin; @@ -33,6 +30,6 @@ typedef struct regexInfo { unsigned char *ptr; unsigned char tokenType; - int tokenValue; + unsigned char *tokenValue; int nodeNumber; } RegexInfo, *RegexInfoPtr; diff -r 2f0653f8eabb -r aa266a4db47c c/regexParser/word.c --- a/c/regexParser/word.c Thu Nov 26 18:05:39 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#include -#include -#include -#include -#include "word.h" - -unsigned char* getWord(unsigned char *string) { - int wordSize = 0; - while (isalnum(string[wordSize])) { - wordSize++; - } - - int allocateWordSize = wordSize + 1; - unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); - strncpy((char*)word, (char*)string, allocateWordSize); - word[wordSize] = '\0'; - return word; -} diff -r 2f0653f8eabb -r aa266a4db47c c/regexParser/word.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/word.cc Thu Nov 26 21:17:26 2015 +0900 @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include "word.h" + +int getWordLength(unsigned char* w){ + int i = 0; + + for (i=0;isalnum(w[i]);i++); + + return i; +} + +WordPtr getWord(unsigned char *string) { + + WordPtr w = (WordPtr)malloc(sizeof(Word)); + + int i = getWordLength(string); + int wordLength; + int allocateWordSize; + + wordLength = i; + allocateWordSize = i+1; + unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); + strncpy((char*)word, (char*)string, allocateWordSize); + word[wordLength] = '\0'; + w->word = word; + w->length = wordLength; + return w; +} diff -r 2f0653f8eabb -r aa266a4db47c c/regexParser/word.h --- a/c/regexParser/word.h Thu Nov 26 18:05:39 2015 +0900 +++ b/c/regexParser/word.h Thu Nov 26 21:17:26 2015 +0900 @@ -1,1 +1,7 @@ -unsigned char* getWord(unsigned char*); +typedef struct word { + unsigned char *word; + int length; +} Word, *WordPtr; + +WordPtr getWord(unsigned char*); +int getWordLength(unsigned char* w);