# HG changeset patch # User Masataka Kohagura # Date 1438245057 -32400 # Node ID e0ad6c145f89c3fb03db0b2dab6d7d3adb618b8f # Parent 8fd3d35e98614bc73def16a2960daf05a8ff6abd remove some errors diff -r 8fd3d35e9861 -r e0ad6c145f89 c/regexParser/main.cc --- a/c/regexParser/main.cc Thu Jul 23 18:01:02 2015 +0900 +++ b/c/regexParser/main.cc Thu Jul 30 17:30:57 2015 +0900 @@ -17,22 +17,24 @@ struct utf8Range { unsigned char *begin; unsigned char *end; - struct utf8Range next; + struct utf8Range *next; } *rangeList; -} +} CharClass, *CharClassPtr; typedef struct node { unsigned char type; union value { charClass *cc; unsigned char *string; - } + }; struct node *self; struct node *left; struct node *right; } Node, *NodePtr; -char *ptr; +unsigned char *ptr; +unsigned char tokenType; +int tokenValue; NodePtr regexHeadNode; NodePtr charClass(); @@ -42,6 +44,9 @@ NodePtr asterisk(); NodePtr regex(); NodePtr createNode(char,NodePtr,NodePtr); +extern void token(); +extern NodePtr regexAtom(); + bool isLiteral(char c) { if (*ptr > 0x7f) return true; @@ -54,7 +59,7 @@ void printNodeDate(NodePtr n) { puts("---------------------"); - printf("Self Node char : %c\n", n->character); + // printf("Self Node char : %c\n", n->character); printf("Self Node addr : %p\n", n->self); printf("left Node addr : %p\n", n->left); printf("right Node addr : %p\n", n->right); @@ -65,7 +70,7 @@ NodePtr createNode(char character, NodePtr left, NodePtr right) { NodePtr n = (NodePtr)malloc(sizeof(Node)); n->self = n; - n->character = character; + // n->character = character; n->left = left; n->right = right; @@ -113,34 +118,6 @@ return n; } -// ::= |'*'|'|'| -NodePtr regex() { - NodePtr n = regexAtom(); - while (*ptr) { - token(); - if (tokenType == '*') { - n = createNode('*',n,0); - } else if (tokenType == '|') { - NodePtr n1 = regex(); - n = createNode('|',n,n1); - } else { - NodePtr n1 = regex(); - n = createNode('+',n,n1); - } - } -} - -// ::= || -NodePtr regexAtom() { - - token(); - NodePter n; - if (tokenType == 'a') n = literal(); - else if (tokenType == '[') n = charClass(); - else if (tokenType == '(') n = group(); - - return n; -} void token() { while (*ptr != '\0') { @@ -150,7 +127,7 @@ return ; } else if (*ptr == '[') { tokenType = '['; - tokenValue = ptr; + tokenValue = *ptr; if (ptr[1] == ']') { ptr++; } @@ -168,7 +145,8 @@ } tokenType = 'a'; - tokenValue = ptr; + tokenValue = *ptr; + ptr++; if (*ptr == '\\') ptr++; // need more proccesing /* @@ -182,11 +160,42 @@ } } +// ::= || +NodePtr regexAtom() { + + token(); + NodePtr n = NULL; + if (tokenType == 'a') n = literal(); + else if (tokenType == '[') n = charClass(); + else if (tokenType == '(') n = group(); + + return n; +} + +// ::= |'*'|'|'| +NodePtr regex() { + NodePtr n = regexAtom(); + while (*ptr) { + token(); + if (tokenType == '*') { + n = createNode('*',n,0); + } else if (tokenType == '|') { + NodePtr n1 = regex(); + n = createNode('|',n,n1); + } else { + NodePtr n1 = regex(); + n = createNode('+',n,n1); + } + } + return n; +} + + int main(int argc, char **argv) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { - ptr = argv[i+1]; i++; + ptr = (unsigned char*)argv[i+1]; i++; } }