Mercurial > hg > Applications > Grep

--- a/c/regexParser/node.cc	Wed Nov 25 14:58:03 2015 +0900
+++ b/c/regexParser/node.cc	Thu Nov 26 17:19:00 2015 +0900
@@ -8,9 +8,9 @@
         d--;
     }
     if (n->tokenType == 'a') {
-        printf("%*c%c(%lu)\n",d*4, ' ',n->cc->cond->character,n->nodeNumber);
+        printf("%*c%s(%lu)\n",d*4, ' ',n->cc->cond->w->word,n->nodeNumber);
     } else {
-        printf("%*c%c\n",d*4, ' ',n->cc->cond->character);
+        printf("%*c%s\n",d*4, ' ',n->cc->cond->w->word);
     }

     if (n->left != NULL) {
--- a/c/regexParser/regexParser.cc	Wed Nov 25 14:58:03 2015 +0900
+++ b/c/regexParser/regexParser.cc	Thu Nov 26 17:19:00 2015 +0900
@@ -3,7 +3,8 @@
 #include "regexParser.h"
 #include "error.h"

-static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr);
+static NodePtr allocateNode();
+static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr);
 static NodePtr charClass(RegexInfoPtr);
 static NodePtr group(RegexInfoPtr);
 static void token(RegexInfoPtr);
@@ -17,6 +18,7 @@
  *     stateTransitionTable
  */

+static
 NodePtr allocateNode() {
     NodePtr n = (NodePtr)malloc(sizeof(node));
     n->cc = (CharClassPtr)malloc(sizeof(CharClass));
@@ -25,14 +27,13 @@
 }

 static
-NodePtr createNode(RegexInfoPtr ri,unsigned char character, NodePtr left, NodePtr right) {
+NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) {
     NodePtr n = allocateNode();
     if (n == NULL) {
         mallocFailedMessage();
     }

     n->tokenType = ri->tokenType;
-    n->cc->cond->character = character;
     n->left = left;
     n->right = right;

@@ -40,6 +41,13 @@
         n->nodeNumber = ri->nodeNumber;
         ri->nodeNumber++;
         ri->tokenType = 0;
+        n->cc->cond->w = getWord(ri->tokenValue);
+        ri->ptr += n->cc->cond->w->length-1;
+    } else {
+        WordPtr w = (WordPtr)malloc(sizeof(Word));
+        w->word = character;
+        w->length = 1;
+        n->cc->cond->w = w;
     }
     return n;
 }
@@ -60,8 +68,7 @@
 // <literal> ::= [a-z][A-Z][0-9]
 static
 NodePtr literal(RegexInfoPtr ri) {
-    NodePtr n = createNode(ri,ri->ptr[0],0,0);
-    ri->ptr++;
+    NodePtr n = createNode(ri,ri->ptr,0,0);
     return n;
 }

@@ -77,7 +84,7 @@
         if (ri->ptr[0] == '('){
             ri->ptr++;
             ri->tokenType = '(';
-            ri->tokenValue = 0;
+            ri->tokenValue = NULL;
             if (ri->ptr[1] == ')') {
                 ri->ptr++;
             }
@@ -85,12 +92,12 @@
         } else if (ri->ptr[0] == ')') {
             ri->ptr++;
             ri->tokenType = ')';
-            ri->tokenValue = ri->ptr[0];
+            ri->tokenValue = ri->ptr;
             return;
         } else if (ri->ptr[0] == '[') {
             ri->ptr++;
             ri->tokenType = '[';
-            ri->tokenValue = ri->ptr[0];
+            ri->tokenValue = ri->ptr;
             if (ri->ptr[1] == ']') {
                 ri->ptr++;
             }
@@ -98,12 +105,12 @@
         } else if (ri->ptr[0] == '|'){
             ri->ptr++;
             ri->tokenType = '|';
-            ri->tokenValue = 0;
+            ri->tokenValue = NULL;
             return;
         } else if (ri->ptr[0] == '*'){
             ri->ptr++;
             ri->tokenType = '*';
-            ri->tokenValue = 0;
+            ri->tokenValue = NULL;
             return;
         } else if (ri->ptr[0] == '\\'){
             // need more proccesing
@@ -116,7 +123,8 @@
             */
         } else {
             ri->tokenType = 'a';
-            ri->tokenValue = ri->ptr[0];
+            ri->tokenValue = ri->ptr;
+            ri->ptr++;
             return;
         }
     }
@@ -144,15 +152,21 @@
     while (ri->ptr[0]) {
         token(ri);
         if (ri->tokenType == '*') {
-            n = createNode(ri,'*',n,0);
+            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+            syntax[0] = '*';
+            n = createNode(ri,syntax,n,0);
         } else if (ri->tokenType == '|') {
             NodePtr n1 = regex(ri);
-            n = createNode(ri,'|',n,n1);
+            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+            syntax[0] = '|';
+            n = createNode(ri,syntax,n,n1);
         } else if (ri->tokenType == ')') {
             return n;
         } else {
             NodePtr n1 = regex(ri);
-            n = createNode(ri,'+',n,n1);
+            unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+            syntax[0] = '+';
+            n = createNode(ri,syntax,n,n1);
         }
     } return n;
 }
--- a/c/regexParser/regexParser.h	Wed Nov 25 14:58:03 2015 +0900
+++ b/c/regexParser/regexParser.h	Thu Nov 26 17:19:00 2015 +0900
@@ -1,8 +1,5 @@
-typedef struct word {
-    unsigned char *word;
-    long length;
-} Word, *WordPtr;
-
+#include "word.h"
+#include "error.h"
 typedef union condition {
     struct utf8Range {
         unsigned char *begin;
@@ -33,6 +30,6 @@
 typedef struct regexInfo {
     unsigned char *ptr;
     unsigned char tokenType;
-    int tokenValue;
+    unsigned char *tokenValue;
     int nodeNumber;
 } RegexInfo, *RegexInfoPtr;
--- a/c/regexParser/word.c	Wed Nov 25 14:58:03 2015 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#include <ctype.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "word.h"
-
-unsigned char* getWord(unsigned char *string) {
-    int wordSize = 0;
-    while (isalnum(string[wordSize])) {
-        wordSize++;
-    }
-
-    int allocateWordSize = wordSize + 1;
-    unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
-    strncpy((char*)word, (char*)string, allocateWordSize);
-    word[wordSize] = '\0';
-    return word;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c/regexParser/word.cc	Thu Nov 26 17:19:00 2015 +0900
@@ -0,0 +1,36 @@
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "word.h"
+
+WordPtr getWord(unsigned char *string) {
+
+    WordPtr w = (WordPtr)malloc(sizeof(Word));
+
+    int i = 0;
+    while (isalnum(string[i])) {
+        i++;
+    }
+
+    int allocateWordSize, wordLength;
+
+    unsigned char *word = NULL;
+    if (string[i] == '*') {
+        wordLength = i-1;
+        allocateWordSize = i;
+        word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
+        strncpy((char*)word, (char*)string, allocateWordSize);
+        word[allocateWordSize-1] = '\0';
+    } else {
+        wordLength = i;
+        allocateWordSize = i+1;
+        word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize);
+        strncpy((char*)word, (char*)string, allocateWordSize);
+        word[allocateWordSize] = '\0';
+    }
+
+    w->word = word;
+    w->length = wordLength;
+    return w;
+}
--- a/c/regexParser/word.h	Wed Nov 25 14:58:03 2015 +0900
+++ b/c/regexParser/word.h	Thu Nov 26 17:19:00 2015 +0900
@@ -1,1 +1,6 @@
-unsigned char* getWord(unsigned char*);
+typedef struct word {
+    unsigned char *word;
+    int length;
+} Word, *WordPtr;
+
+WordPtr getWord(unsigned char*);