changeset 121:aa266a4db47c pairPro

merge
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 26 Nov 2015 21:17:26 +0900
parents 2f0653f8eabb (current diff) 5d29b6a1b50f (diff)
children 188d866227a4
files c/regexParser/regexParser.cc c/regexParser/word.c
diffstat 6 files changed, 73 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/node.cc	Thu Nov 26 18:05:39 2015 +0900
+++ b/c/regexParser/node.cc	Thu Nov 26 21:17:26 2015 +0900
@@ -7,11 +7,8 @@
         descendTree(n->right, d);
         d--;
     }
-    if (n->tokenType == 'a') {
-        printf("%*c%c(%lu)\n",d*4, ' ',n->cc->cond->character,n->nodeNumber);
-    } else {
-        printf("%*c%c\n",d*4, ' ',n->cc->cond->character);
-    }
+
+    printf("%*c%s(%lu)\n",d*4, ' ',n->cc->cond->w->word,n->nodeNumber);
 
     if (n->left != NULL) {
         d++;
--- a/c/regexParser/regexParser.cc	Thu Nov 26 18:05:39 2015 +0900
+++ b/c/regexParser/regexParser.cc	Thu Nov 26 21:17:26 2015 +0900
@@ -3,7 +3,8 @@
 #include "regexParser.h"
 #include "error.h"
 
-static NodePtr createNode(RegexInfoPtr,NodePtr,NodePtr);
+static NodePtr allocateNode();
+static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr);
 static NodePtr charClass(RegexInfoPtr);
 static NodePtr group(RegexInfoPtr);
 static void token(RegexInfoPtr);
@@ -17,6 +18,7 @@
  *     stateTransitionTable
  */
 
+static
 NodePtr allocateNode() {
     NodePtr n = (NodePtr)malloc(sizeof(node));
     n->cc = (CharClassPtr)malloc(sizeof(CharClass));
@@ -25,21 +27,27 @@
 }
 
 static
-NodePtr createNode(RegexInfoPtr ri, NodePtr left, NodePtr right) {
+NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) {
     NodePtr n = allocateNode();
     if (n == NULL) {
         mallocFailedMessage();
     }
 
     n->tokenType = ri->tokenType;
-    n->cc->cond->character = ri->tokenValue;
     n->left = left;
     n->right = right;
+    n->nodeNumber = ri->nodeNumber;
+    ri->nodeNumber++;
 
     if (ri->tokenType == 'a') {
-        n->nodeNumber = ri->nodeNumber;
-        ri->nodeNumber++;
         ri->tokenType = 0;
+        n->cc->cond->w = getWord(ri->tokenValue);
+        ri->ptr += n->cc->cond->w->length-1;
+    } else {
+        WordPtr w = (WordPtr)malloc(sizeof(Word));
+        w->word = character;
+        w->length = 1;
+        n->cc->cond->w = w;
     }
     return n;
 }
@@ -60,8 +68,7 @@
 // <literal> ::= [a-z][A-Z][0-9]
 static
 NodePtr literal(RegexInfoPtr ri) {
-    NodePtr n = createNode(ri,0,0);
-    ri->ptr++;
+    NodePtr n = createNode(ri,ri->ptr,0,0);
     return n;
 }
 
@@ -77,7 +84,7 @@
         if (ri->ptr[0] == '('){
             ri->ptr++;
             ri->tokenType = '(';
-            ri->tokenValue = 0;
+            ri->tokenValue = NULL;
             if (ri->ptr[1] == ')') {
                 ri->ptr++;
             }
@@ -85,12 +92,12 @@
         } else if (ri->ptr[0] == ')') {
             ri->ptr++;
             ri->tokenType = ')';
-            ri->tokenValue = ri->ptr[0];
+            ri->tokenValue = ri->ptr;
             return;
         } else if (ri->ptr[0] == '[') {
             ri->ptr++;
             ri->tokenType = '[';
-            ri->tokenValue = ri->ptr[0];
+            ri->tokenValue = ri->ptr;
             if (ri->ptr[1] == ']') {
                 ri->ptr++;
             }
@@ -98,12 +105,12 @@
         } else if (ri->ptr[0] == '|'){
             ri->ptr++;
             ri->tokenType = '|';
-            ri->tokenValue = '|';
+            ri->tokenValue = NULL;
             return;
         } else if (ri->ptr[0] == '*'){
             ri->ptr++;
             ri->tokenType = '*';
-            ri->tokenValue = '*';
+            ri->tokenValue = NULL;
             return;
         } else if (ri->ptr[0] == '\\'){
             // need more proccesing 
@@ -116,12 +123,11 @@
             */
         } else {
             ri->tokenType = 'a';
-            ri->tokenValue = ri->ptr[0];
+            ri->tokenValue = ri->ptr;
+            ri->ptr++;
             return;
         }
     }
-    ri->tokenType = 0;
-    ri->tokenValue = 0;
     return;
 }
 
@@ -144,17 +150,21 @@
     while (ri->ptr[0]) {
         token(ri);
         if (ri->tokenType == '*') {
-            n = createNode(ri,n,0);
+            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+            syntax[0] = '*';
+            n = createNode(ri,syntax,n,0);
         } else if (ri->tokenType == '|') {
             NodePtr n1 = regex(ri);
-            ri->tokenValue = '|';
-            n = createNode(ri,n,n1);
+            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+            syntax[0] = '|';
+            n = createNode(ri,syntax,n,n1);
         } else if (ri->tokenType == ')') {
             return n;
         } else {
             NodePtr n1 = regex(ri);
-            ri->tokenValue = '+';
-            n = createNode(ri,n,n1);
+            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+            syntax[0] = '+';
+            n = createNode(ri,syntax,n,n1);
         }
     } return n;
 }
--- a/c/regexParser/regexParser.h	Thu Nov 26 18:05:39 2015 +0900
+++ b/c/regexParser/regexParser.h	Thu Nov 26 21:17:26 2015 +0900
@@ -1,8 +1,5 @@
-typedef struct word {
-    unsigned char *word;
-    long length;
-} Word, *WordPtr;
-
+#include "word.h"
+#include "error.h"
 typedef union condition {
     struct utf8Range {
         unsigned char *begin;
@@ -33,6 +30,6 @@
 typedef struct regexInfo {
     unsigned char *ptr;
     unsigned char tokenType;
-    int tokenValue;
+    unsigned char *tokenValue;
     int nodeNumber;
 } RegexInfo, *RegexInfoPtr;
--- a/c/regexParser/word.c	Thu Nov 26 18:05:39 2015 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#include <ctype.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "word.h"
-
-unsigned char* getWord(unsigned char *string) {
-    int wordSize = 0;
-    while (isalnum(string[wordSize])) {
-        wordSize++;
-    }
-
-    int allocateWordSize = wordSize + 1;
-    unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
-    strncpy((char*)word, (char*)string, allocateWordSize);
-    word[wordSize] = '\0';
-    return word;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c/regexParser/word.cc	Thu Nov 26 21:17:26 2015 +0900
@@ -0,0 +1,31 @@
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "word.h"
+
+int getWordLength(unsigned char* w){
+    int i = 0;
+
+    for (i=0;isalnum(w[i]);i++);
+
+    return i;
+}
+
+WordPtr getWord(unsigned char *string) {
+
+    WordPtr w = (WordPtr)malloc(sizeof(Word));
+
+    int i = getWordLength(string);
+    int wordLength;
+    int allocateWordSize;
+
+    wordLength = i;
+    allocateWordSize = i+1;
+    unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
+    strncpy((char*)word, (char*)string, allocateWordSize);
+    word[wordLength] = '\0';
+    w->word = word;
+    w->length = wordLength;
+    return w;
+}
--- a/c/regexParser/word.h	Thu Nov 26 18:05:39 2015 +0900
+++ b/c/regexParser/word.h	Thu Nov 26 21:17:26 2015 +0900
@@ -1,1 +1,7 @@
-unsigned char* getWord(unsigned char*);
+typedef struct word {
+    unsigned char *word;
+    int length;
+} Word, *WordPtr;
+
+WordPtr getWord(unsigned char*);
+int getWordLength(unsigned char* w);