# HG changeset patch # User Masataka Kohagura # Date 1447137349 -32400 # Node ID 1cdad0468484d526edd482f864c03cca42bb3603 # Parent 43b807f88961187c439eef281e2b2ab13a2be673 rename createRegexTree to createRegexParser diff -r 43b807f88961 -r 1cdad0468484 c/regexParser/createRegexParser.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/createRegexParser.cc Tue Nov 10 15:35:49 2015 +0900 @@ -0,0 +1,144 @@ +#include +#include +#include "regexParser.h" + +NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); +NodePtr charClass(RegexInfoPtr); +NodePtr group(RegexInfoPtr); +void token(RegexInfoPtr); +NodePtr regexAtom(RegexInfoPtr); +NodePtr regex(RegexInfoPtr); + +/** + * Create a node of regex parse tree. + * tokenType + * regexPosition(state) + * stateTransitionTable + */ +NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { + NodePtr n = (NodePtr)malloc(sizeof(Node)); + n->tokenType = ri->tokenType; + n->self = n; + n->Value.character = character; + n->left = left; + n->right = right; + + if (ri->tokenType == 'a') { + n->nodeNumber = ri->nodeNumber; + ri->nodeNumber++; + ri->tokenType = 0; + } + return n; +} + +// ::= '[''-'']' +NodePtr charClass(RegexInfoPtr ri) { + NodePtr n = (NodePtr)malloc(sizeof(Node)); + unsigned char startChar = ri->ptr[0]; + while (ri->ptr[0] == '-') { + ri->ptr++; + } + unsigned char endChar = ri->ptr[0]; + unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); + + return n; +} + +// ::= [a-z][A-Z][0-9] +NodePtr literal(RegexInfoPtr ri) { + unsigned char *top = ri->ptr; + NodePtr n = createNode(ri,ri->ptr[0],0,0); + ri->ptr++; + return n; +} + +// ::= '('')' +NodePtr group(RegexInfoPtr ri) { + return regex(ri); +} + + + +void token(RegexInfoPtr ri) { + while (ri->ptr[0] != '\0') { + if (ri->ptr[0] == '('){ + ri->ptr++; + ri->tokenType = '('; + ri->tokenValue = 0; + if (ri->ptr[1] == ')') { + ri->ptr++; + } + return; + } else if (ri->ptr[0] == ')') { + ri->ptr++; + ri->tokenType = ')'; + ri->tokenValue = ri->ptr[0]; + return; + } else if (ri->ptr[0] == '[') { + ri->ptr++; + ri->tokenType = '['; + ri->tokenValue = ri->ptr[0]; + if (ri->ptr[1] == ']') { + ri->ptr++; + } + return; + } else if (ri->ptr[0] == '|'){ + ri->ptr++; + ri->tokenType = '|'; + ri->tokenValue = 0; + return; + } else if (ri->ptr[0] == '*'){ + ri->ptr++; + ri->tokenType = '*'; + ri->tokenValue = 0; + return; + } else if (ri->ptr[0] == '\\'){ + // need more proccesing + /* + \277 + \0xa5 + \[ + \\ + \utf-8 etc... + */ + } else { + ri->tokenType = 'a'; + ri->tokenValue = ri->ptr[0]; + return; + } + } + ri->tokenType = 0; + ri->tokenValue = 0; + return; +} + +// ::= || +NodePtr regexAtom(RegexInfoPtr ri) { + + token(ri); + NodePtr n = NULL; + if (ri->tokenType == 'a') n = literal(ri); + else if (ri->tokenType == '[') n = charClass(ri); + else if (ri->tokenType == '(') n = group(ri); + + return n; +} + +// ::= |'*'|'|'| +NodePtr regex(RegexInfoPtr ri) { + NodePtr n = regexAtom(ri); + while (ri->ptr[0]) { + token(ri); + if (ri->tokenType == '*') { + n = createNode(ri,'*',n,0); + } else if (ri->tokenType == '|') { + NodePtr n1 = regex(ri); + n = createNode(ri,'|',n,n1); + } else if (ri->tokenType == ')') { + return n; + } else { + NodePtr n1 = regex(ri); + n = createNode(ri,'+',n,n1); + } + } return n; +} diff -r 43b807f88961 -r 1cdad0468484 c/regexParser/createRegexTree.cc --- a/c/regexParser/createRegexTree.cc Mon Nov 09 12:26:07 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,144 +0,0 @@ -#include -#include -#include "regexParser.h" - -NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); -NodePtr charClass(RegexInfoPtr); -NodePtr group(RegexInfoPtr); -void token(RegexInfoPtr); -NodePtr regexAtom(RegexInfoPtr); -NodePtr regex(RegexInfoPtr); - -/** - * Create a node of regex parse tree. - * tokenType - * regexPosition(state) - * stateTransitionTable - */ -NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) { - NodePtr n = (NodePtr)malloc(sizeof(Node)); - n->tokenType = ri->tokenType; - n->self = n; - n->Value.character = character; - n->left = left; - n->right = right; - - if (ri->tokenType == 'a') { - n->nodeNumber = ri->nodeNumber; - ri->nodeNumber++; - ri->tokenType = 0; - } - return n; -} - -// ::= '[''-'']' -NodePtr charClass(RegexInfoPtr ri) { - NodePtr n = (NodePtr)malloc(sizeof(Node)); - unsigned char startChar = ri->ptr[0]; - while (ri->ptr[0] == '-') { - ri->ptr++; - } - unsigned char endChar = ri->ptr[0]; - unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); - - return n; -} - -// ::= [a-z][A-Z][0-9] -NodePtr literal(RegexInfoPtr ri) { - unsigned char *top = ri->ptr; - NodePtr n = createNode(ri,ri->ptr[0],0,0); - ri->ptr++; - return n; -} - -// ::= '('')' -NodePtr group(RegexInfoPtr ri) { - return regex(ri); -} - - - -void token(RegexInfoPtr ri) { - while (ri->ptr[0] != '\0') { - if (ri->ptr[0] == '('){ - ri->ptr++; - ri->tokenType = '('; - ri->tokenValue = 0; - if (ri->ptr[1] == ')') { - ri->ptr++; - } - return; - } else if (ri->ptr[0] == ')') { - ri->ptr++; - ri->tokenType = ')'; - ri->tokenValue = ri->ptr[0]; - return; - } else if (ri->ptr[0] == '[') { - ri->ptr++; - ri->tokenType = '['; - ri->tokenValue = ri->ptr[0]; - if (ri->ptr[1] == ']') { - ri->ptr++; - } - return; - } else if (ri->ptr[0] == '|'){ - ri->ptr++; - ri->tokenType = '|'; - ri->tokenValue = 0; - return; - } else if (ri->ptr[0] == '*'){ - ri->ptr++; - ri->tokenType = '*'; - ri->tokenValue = 0; - return; - } else if (ri->ptr[0] == '\\'){ - // need more proccesing - /* - \277 - \0xa5 - \[ - \\ - \utf-8 etc... - */ - } else { - ri->tokenType = 'a'; - ri->tokenValue = ri->ptr[0]; - return; - } - } - ri->tokenType = 0; - ri->tokenValue = 0; - return; -} - -// ::= || -NodePtr regexAtom(RegexInfoPtr ri) { - - token(ri); - NodePtr n = NULL; - if (ri->tokenType == 'a') n = literal(ri); - else if (ri->tokenType == '[') n = charClass(ri); - else if (ri->tokenType == '(') n = group(ri); - - return n; -} - -// ::= |'*'|'|'| -NodePtr regex(RegexInfoPtr ri) { - NodePtr n = regexAtom(ri); - while (ri->ptr[0]) { - token(ri); - if (ri->tokenType == '*') { - n = createNode(ri,'*',n,0); - } else if (ri->tokenType == '|') { - NodePtr n1 = regex(ri); - n = createNode(ri,'|',n,n1); - } else if (ri->tokenType == ')') { - return n; - } else { - NodePtr n1 = regex(ri); - n = createNode(ri,'+',n,n1); - } - } return n; -} diff -r 43b807f88961 -r 1cdad0468484 c/regexParser/main.cc --- a/c/regexParser/main.cc Mon Nov 09 12:26:07 2015 +0900 +++ b/c/regexParser/main.cc Tue Nov 10 15:35:49 2015 +0900 @@ -10,9 +10,11 @@ #include #include #include "regexParser.h" +#include "bitVector.h" extern NodePtr regex(RegexInfoPtr); extern void printTree(NodePtr); +BitVectorListPtr createBitVectorList(NodePtr); int main(int argc, char **argv) @@ -29,5 +31,6 @@ printf("regex : %s\n",ri->ptr); NodePtr n = regex(ri); printTree(n); + createBitVectorList(n); return 0; }