changeset 127:b061cd8205cc pairPro

merge
author masa
date Tue, 01 Dec 2015 21:50:09 +0900
parents 639b0b437ebf (diff) c363a66dc1a7 (current diff)
children f827682d4687
files c/regexParser/regexParser.cc
diffstat 4 files changed, 56 insertions(+), 90 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc	Tue Dec 01 17:06:26 2015 +0900
+++ b/c/regexParser/regexParser.cc	Tue Dec 01 21:50:09 2015 +0900
@@ -6,7 +6,7 @@
 #include "error.h"
 
 static NodePtr allocateNode();
-static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr);
+static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr);
 static NodePtr charClass(RegexInfoPtr);
 static NodePtr group(RegexInfoPtr);
 static void token(RegexInfoPtr);
@@ -23,64 +23,76 @@
 static
 NodePtr allocateNode() {
     NodePtr n = (NodePtr)malloc(sizeof(node));
-    n->cc = (CharClassPtr)malloc(sizeof(CharClass));
-    n->cc->cond = (ConditionList)malloc(sizeof(Condition));
+    n->cc = NULL;
+    n->left = NULL;
+    n->right = NULL;
     return n;
 }
 
 static
-NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) {
+CharClassPtr createCharClassWord(RegexInfoPtr ri) {
+    CharClassPtr cc = NEW(CharClass);
+    cc->type = 'a';
+    cc->cond = NEW(Condition);
+    cc->cond->w = NEW(Word);
+    cc->cond->w->word = ri->tokenValue;
+    cc->cond->w->length = ri->ptr - ri->tokenValue;
+
+    return cc;
+}
+
+static
+NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) {
     NodePtr n = allocateNode();
-    if (n == NULL) {
-        mallocFailedMessage();
-    }
 
-    n->tokenType = ri->tokenType;
+    n->tokenType = type;
     n->left = left;
     n->right = right;
     n->nodeNumber = ri->nodeNumber;
     ri->nodeNumber++;
 
-    if (ri->tokenType == 'a') {
-        ri->tokenType = 0;
-        n->cc->cond->w = getWord(ri->tokenValue);
-    } else {
-        WordPtr w = (WordPtr)malloc(sizeof(Word));
-        w->word = character;
-        w->length = 1;
-        n->cc->cond->w = w;
+    if (type == 'a') {
+        n->cc = createCharClassWord(ri);
     }
+
     return n;
 }
 
+
 // <charClass> ::= '['<literal>'-'<literal>']'
 static
 NodePtr charClass(RegexInfoPtr ri) {
     NodePtr n = allocateNode();
-    if (n == NULL) {
-        mallocFailedMessage();
-    }
 
-    n->tokenType = ri->tokenType;
+    n->tokenType = 'c';
     n->nodeNumber = ri->nodeNumber;
     ri->nodeNumber++;
-    n->cc->cond->w = (WordPtr)malloc(sizeof(Word));
+
+    CharClassPtr cc = NEW(CharClass);
+    cc->type = 'r';
+    cc->cond = NEW(Condition);
+    cc->cond->range = NEW(RangeList);
+    cc->cond->range->begin = ri->ptr;
+    cc->cond->range->end = ri->ptr + 1;
+    cc->cond->range->next = NULL;
 
     int i = 0;
 
+    RangeListPtr rangeList = cc->cond->range;
+  
     while (ri->ptr[i] != ']') {
-        if (ri->ptr[i] == '-') {
-            n->cc->begin = ri->ptr[i-1];
-            n->cc->end = ri->ptr[i+1];
-        }
+        if (ri->ptr[i] == '-') i++;
+
+        rangeList->end = ri->ptr + i;
+        rangeList->next = NEW(RangeList);
+        rangeList = rangeList->next;
+        rangeList->begin = ri->ptr+i+1;
+        rangeList->next = NULL;
         i++;
     }
             // TODO literal support
 
-    n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1));
-    strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1);
-    n->cc->cond->w->word[i] = '\0';
-    ri->ptr += i+1;
+    rangeList->end = ri->ptr + i - 1;
 
     return n;
 }
@@ -88,7 +100,7 @@
 // <literal> ::= [a-z][A-Z][0-9]
 static
 NodePtr literal(RegexInfoPtr ri) {
-    NodePtr n = createNode(ri,ri->ptr,0,0);
+    NodePtr n = createNode(ri,'a',0,0);
     return n;
 }
 
@@ -158,31 +170,16 @@
     return n;
 }
 
-// <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')'
+// <regex> ::= <regexAtom> | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')'
 NodePtr regex(RegexInfoPtr ri) {
     NodePtr n = NULL;
     while (ri->ptr[0]) {
         token(ri);
         if (ri->tokenType == '*') {
-            // TODO literal support
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '*';
-            NodePtr n1 = createNode(ri,syntax,n->right,0);
-
-            unsigned char *syntax1 = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax1[0] = '+';
-
-            n = createNode(ri,syntax1,n->left,n1);
+            n = createNode(ri,'*',n,0);
         } else if (ri->tokenType == '|') {
             NodePtr n1 = regex(ri);
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '|';
-            n = createNode(ri,syntax,n,n1);
-        } else if (ri->tokenType == '(') {
-            NodePtr n1 = regex(ri);
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '+';
-            n = createNode(ri,syntax,n,n1);
+            n = createNode(ri,'|',n,n1);
         } else if (ri->tokenType == ')') {
             return n;
         } else if (ri->tokenType == 'a') {
@@ -193,9 +190,7 @@
         } else {
             // return NULL
             NodePtr n1 = regex(ri);
-            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-            syntax[0] = '+';
-            n = createNode(ri,syntax,n,n1);
+            n = createNode(ri,'a',n,n1);
         }
     } return n;
 }
--- a/c/regexParser/regexParser.h	Tue Dec 01 17:06:26 2015 +0900
+++ b/c/regexParser/regexParser.h	Tue Dec 01 21:50:09 2015 +0900
@@ -1,11 +1,16 @@
 #include "word.h"
 #include "error.h"
+
+#define NEW(type) (type*)malloc(sizeof(type))
+
+typedef struct utf8Range {
+    unsigned char *begin;
+    unsigned char *end;
+    struct utf8Range *next;
+} RangeList , *RangeListPtr;
+
 typedef union condition {
-    struct utf8Range {
-        unsigned char *begin;
-        unsigned char *end;
-        struct utf8Range *next;
-    } rangeList;
+    RangeListPtr range;
     unsigned char character;
     WordPtr w;
 } Condition, *ConditionList;
--- a/c/regexParser/word.cc	Tue Dec 01 17:06:26 2015 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#include <ctype.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "word.h"
-
-int getWordLength(unsigned char* w){
-    int i = 0;
-
-    for (i=0;isalnum(w[i]);i++);
-
-    return i;
-}
-
-WordPtr getWord(unsigned char *string) {
-
-    WordPtr w = (WordPtr)malloc(sizeof(Word));
-
-    int i = getWordLength(string);
-    int wordLength;
-    int allocateWordSize;
-
-    wordLength = i;
-    allocateWordSize = i+1;
-    unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
-    strncpy((char*)word, (char*)string, allocateWordSize);
-    word[wordLength] = '\0';
-    w->word = word;
-    w->length = wordLength;
-    return w;
-}
--- a/c/regexParser/word.h	Tue Dec 01 17:06:26 2015 +0900
+++ b/c/regexParser/word.h	Tue Dec 01 21:50:09 2015 +0900
@@ -2,6 +2,3 @@
     unsigned char *word;
     int length;
 } Word, *WordPtr;
-
-WordPtr getWord(unsigned char*);
-int getWordLength(unsigned char* w);