# HG changeset patch # User Masataka Kohagura # Date 1448525940 -32400 # Node ID 31b0ba0050faf2e3cdcab4747d542b10c8733eff # Parent 166136236891eed2c7093ef1948020083902e865 text diff -r 166136236891 -r 31b0ba0050fa c/regexParser/node.cc --- a/c/regexParser/node.cc Wed Nov 25 14:58:03 2015 +0900 +++ b/c/regexParser/node.cc Thu Nov 26 17:19:00 2015 +0900 @@ -8,9 +8,9 @@ d--; } if (n->tokenType == 'a') { - printf("%*c%c(%lu)\n",d*4, ' ',n->cc->cond->character,n->nodeNumber); + printf("%*c%s(%lu)\n",d*4, ' ',n->cc->cond->w->word,n->nodeNumber); } else { - printf("%*c%c\n",d*4, ' ',n->cc->cond->character); + printf("%*c%s\n",d*4, ' ',n->cc->cond->w->word); } if (n->left != NULL) { diff -r 166136236891 -r 31b0ba0050fa c/regexParser/regexParser.cc --- a/c/regexParser/regexParser.cc Wed Nov 25 14:58:03 2015 +0900 +++ b/c/regexParser/regexParser.cc Thu Nov 26 17:19:00 2015 +0900 @@ -3,7 +3,8 @@ #include "regexParser.h" #include "error.h" -static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); +static NodePtr allocateNode(); +static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); static NodePtr charClass(RegexInfoPtr); static NodePtr group(RegexInfoPtr); static void token(RegexInfoPtr); @@ -17,6 +18,7 @@ * stateTransitionTable */ +static NodePtr allocateNode() { NodePtr n = (NodePtr)malloc(sizeof(node)); n->cc = (CharClassPtr)malloc(sizeof(CharClass)); @@ -25,14 +27,13 @@ } static -NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { +NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) { NodePtr n = allocateNode(); if (n == NULL) { mallocFailedMessage(); } n->tokenType = ri->tokenType; - n->cc->cond->character = character; n->left = left; n->right = right; @@ -40,6 +41,13 @@ n->nodeNumber = ri->nodeNumber; ri->nodeNumber++; ri->tokenType = 0; + n->cc->cond->w = getWord(ri->tokenValue); + ri->ptr += n->cc->cond->w->length-1; + } else { + WordPtr w = (WordPtr)malloc(sizeof(Word)); + w->word = character; + w->length = 1; + n->cc->cond->w = w; } return n; } @@ -60,8 +68,7 @@ // ::= [a-z][A-Z][0-9] static NodePtr literal(RegexInfoPtr ri) { - NodePtr n = createNode(ri,ri->ptr[0],0,0); - ri->ptr++; + NodePtr n = createNode(ri,ri->ptr,0,0); return n; } @@ -77,7 +84,7 @@ if (ri->ptr[0] == '('){ ri->ptr++; ri->tokenType = '('; - ri->tokenValue = 0; + ri->tokenValue = NULL; if (ri->ptr[1] == ')') { ri->ptr++; } @@ -85,12 +92,12 @@ } else if (ri->ptr[0] == ')') { ri->ptr++; ri->tokenType = ')'; - ri->tokenValue = ri->ptr[0]; + ri->tokenValue = ri->ptr; return; } else if (ri->ptr[0] == '[') { ri->ptr++; ri->tokenType = '['; - ri->tokenValue = ri->ptr[0]; + ri->tokenValue = ri->ptr; if (ri->ptr[1] == ']') { ri->ptr++; } @@ -98,12 +105,12 @@ } else if (ri->ptr[0] == '|'){ ri->ptr++; ri->tokenType = '|'; - ri->tokenValue = 0; + ri->tokenValue = NULL; return; } else if (ri->ptr[0] == '*'){ ri->ptr++; ri->tokenType = '*'; - ri->tokenValue = 0; + ri->tokenValue = NULL; return; } else if (ri->ptr[0] == '\\'){ // need more proccesing @@ -116,7 +123,8 @@ */ } else { ri->tokenType = 'a'; - ri->tokenValue = ri->ptr[0]; + ri->tokenValue = ri->ptr; + ri->ptr++; return; } } @@ -144,15 +152,21 @@ while (ri->ptr[0]) { token(ri); if (ri->tokenType == '*') { - n = createNode(ri,'*',n,0); + unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); + syntax[0] = '*'; + n = createNode(ri,syntax,n,0); } else if (ri->tokenType == '|') { NodePtr n1 = regex(ri); - n = createNode(ri,'|',n,n1); + unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); + syntax[0] = '|'; + n = createNode(ri,syntax,n,n1); } else if (ri->tokenType == ')') { return n; } else { NodePtr n1 = regex(ri); - n = createNode(ri,'+',n,n1); + unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); + syntax[0] = '+'; + n = createNode(ri,syntax,n,n1); } } return n; } diff -r 166136236891 -r 31b0ba0050fa c/regexParser/regexParser.h --- a/c/regexParser/regexParser.h Wed Nov 25 14:58:03 2015 +0900 +++ b/c/regexParser/regexParser.h Thu Nov 26 17:19:00 2015 +0900 @@ -1,8 +1,5 @@ -typedef struct word { - unsigned char *word; - long length; -} Word, *WordPtr; - +#include "word.h" +#include "error.h" typedef union condition { struct utf8Range { unsigned char *begin; @@ -33,6 +30,6 @@ typedef struct regexInfo { unsigned char *ptr; unsigned char tokenType; - int tokenValue; + unsigned char *tokenValue; int nodeNumber; } RegexInfo, *RegexInfoPtr; diff -r 166136236891 -r 31b0ba0050fa c/regexParser/word.c --- a/c/regexParser/word.c Wed Nov 25 14:58:03 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#include -#include -#include -#include -#include "word.h" - -unsigned char* getWord(unsigned char *string) { - int wordSize = 0; - while (isalnum(string[wordSize])) { - wordSize++; - } - - int allocateWordSize = wordSize + 1; - unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); - strncpy((char*)word, (char*)string, allocateWordSize); - word[wordSize] = '\0'; - return word; -} diff -r 166136236891 -r 31b0ba0050fa c/regexParser/word.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/word.cc Thu Nov 26 17:19:00 2015 +0900 @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include "word.h" + +WordPtr getWord(unsigned char *string) { + + WordPtr w = (WordPtr)malloc(sizeof(Word)); + + int i = 0; + while (isalnum(string[i])) { + i++; + } + + int allocateWordSize, wordLength; + + unsigned char *word = NULL; + if (string[i] == '*') { + wordLength = i-1; + allocateWordSize = i; + word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); + strncpy((char*)word, (char*)string, allocateWordSize); + word[allocateWordSize-1] = '\0'; + } else { + wordLength = i; + allocateWordSize = i+1; + word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); + strncpy((char*)word, (char*)string, allocateWordSize); + word[allocateWordSize] = '\0'; + } + + w->word = word; + w->length = wordLength; + return w; +} diff -r 166136236891 -r 31b0ba0050fa c/regexParser/word.h --- a/c/regexParser/word.h Wed Nov 25 14:58:03 2015 +0900 +++ b/c/regexParser/word.h Thu Nov 26 17:19:00 2015 +0900 @@ -1,1 +1,6 @@ -unsigned char* getWord(unsigned char*); +typedef struct word { + unsigned char *word; + int length; +} Word, *WordPtr; + +WordPtr getWord(unsigned char*);