changeset 125:4d6ac69801ad pairPro

fix createNode
author Nozomi
date Tue, 01 Dec 2015 21:11:27 +0900
parents 8ce93ffaf1ad
children 639b0b437ebf
files c/regexParser/regexParser.cc c/regexParser/regexParser.h c/regexParser/word.cc c/regexParser/word.h
diffstat 4 files changed, 43 insertions(+), 73 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc	Tue Dec 01 00:09:09 2015 +0900
+++ b/c/regexParser/regexParser.cc	Tue Dec 01 21:11:27 2015 +0900
@@ -23,63 +23,73 @@
 static
 NodePtr allocateNode() {
     NodePtr n = (NodePtr)malloc(sizeof(node));
-    n->cc = (CharClassPtr)malloc(sizeof(CharClass));
-    n->cc->cond = (ConditionList)malloc(sizeof(Condition));
+    n->cc = NULL;
+    n->left = NULL;
+    n->right = NULL;
     return n;
 }
 
 static
-NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) {
+CharClassPtr createChaClassWord(RegexInfoPtr ri) {
+    CharClassPtr cc = NEW(CharClass);
+    cc->type = 'a';
+    cc->cond = NEW(Condition);
+    cc->cond->w = NEW(Word);
+    cc->cond->w->word = ri->tokenValue;
+    cc->cond->w->length = ri->ptr - ri->tokenValue;
+}
+
+static
+NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) {
     NodePtr n = allocateNode();
-    if (n == NULL) {
-        mallocFailedMessage();
-    }
 
-    n->tokenType = ri->tokenType;
+    n->tokenType = type;
     n->left = left;
     n->right = right;
     n->nodeNumber = ri->nodeNumber;
     ri->nodeNumber++;
 
-    if (ri->tokenType == 'a') {
-        ri->tokenType = 0;
-        n->cc->cond->w = getWord(ri->tokenValue);
-    } else {
-        WordPtr w = (WordPtr)malloc(sizeof(Word));
-        w->word = character;
-        w->length = 1;
-        n->cc->cond->w = w;
+    if (type == 'a') {
+        n->cc = createCharClassWord(ri);
     }
+
     return n;
 }
 
+
 // <charClass> ::= '['<literal>'-'<literal>']'
 static
 NodePtr charClass(RegexInfoPtr ri) {
     NodePtr n = allocateNode();
-    if (n == NULL) {
-        mallocFailedMessage();
-    }
 
-    n->tokenType = ri->tokenType;
+    n->tokenType = 'c';
     n->nodeNumber = ri->nodeNumber;
     ri->nodeNumber++;
-    n->cc->cond->w = (WordPtr)malloc(sizeof(Word));
+
+    CharClassPtr cc = NEW(CharClass);
+    cc->cond = NEW(Condition);
+    cc->cond->type = 'r';
+    cc->cond->rangeList = NEW(struct utf8Range);
+    cc->cond->rangeList->begin = ri->ptr;
+    cc->cond->rangeList->end = ri->ptr + 1;
+    cc->cond->rangeList->next = NULL;
 
     int i = 0;
 
+    struct utf8Range *rangeList= cc->cond->rangeList;
+  
     while (ri->ptr[i] != ']') {
-        if (ri->ptr[i] == '-') {
-            n->cc->begin = ri->ptr[i-1];
-            n->cc->end = ri->ptr[i+1];
-        }
+        if (ri->ptr[i] == '-') i++;
+
+        rangeList->end = ri->ptr + i;
+        rangeList->next = NEW(struct utf8Range);
+        rangeList = rangeList->next;
+        rangeList->begin = ri->ptr+i+1;
+        rangeList->next = NULL;
         i++;
     }
 
-    n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1));
-    strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1);
-    n->cc->cond->w->word[i] = '\0';
-    ri->ptr += i+1;
+    rangeList->end = ri->ptr[i-1];
 
     return n;
 }
@@ -165,27 +175,21 @@
     return n;
 }
 
-// <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')'
+// <regex> ::= <regexAtom> | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')'
 NodePtr regex(RegexInfoPtr ri) {
     NodePtr n = regexAtom(ri);
     while (ri->ptr[0]) {
         token(ri);
         if (ri->tokenType == '*') {
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '*';
-            n = createNode(ri,syntax,n,0);
+            n = createNode(ri,'*',n,0);
         } else if (ri->tokenType == '|') {
             NodePtr n1 = regex(ri);
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '|';
-            n = createNode(ri,syntax,n,n1);
+            n = createNode(ri,'|',n,n1);
         } else if (ri->tokenType == ')') {
             return n;
         } else {
             NodePtr n1 = regex(ri);
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '+';
-            n = createNode(ri,syntax,n,n1);
+            n = createNode(ri,'a',n,n1);
         }
     } return n;
 }
--- a/c/regexParser/regexParser.h	Tue Dec 01 00:09:09 2015 +0900
+++ b/c/regexParser/regexParser.h	Tue Dec 01 21:11:27 2015 +0900
@@ -5,7 +5,7 @@
         unsigned char *begin;
         unsigned char *end;
         struct utf8Range *next;
-    } rangeList;
+    } *rangeList;
     unsigned char character;
     WordPtr w;
 } Condition, *ConditionList;
--- a/c/regexParser/word.cc	Tue Dec 01 00:09:09 2015 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#include <ctype.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "word.h"
-
-int getWordLength(unsigned char* w){
-    int i = 0;
-
-    for (i=0;isalnum(w[i]);i++);
-
-    return i;
-}
-
-WordPtr getWord(unsigned char *string) {
-
-    WordPtr w = (WordPtr)malloc(sizeof(Word));
-
-    int i = getWordLength(string);
-    int wordLength;
-    int allocateWordSize;
-
-    wordLength = i;
-    allocateWordSize = i+1;
-    unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
-    strncpy((char*)word, (char*)string, allocateWordSize);
-    word[wordLength] = '\0';
-    w->word = word;
-    w->length = wordLength;
-    return w;
-}
--- a/c/regexParser/word.h	Tue Dec 01 00:09:09 2015 +0900
+++ b/c/regexParser/word.h	Tue Dec 01 21:11:27 2015 +0900
@@ -2,6 +2,3 @@
     unsigned char *word;
     int length;
 } Word, *WordPtr;
-
-WordPtr getWord(unsigned char*);
-int getWordLength(unsigned char* w);