changeset 110:a3adc5c24e19 pairPro

start branch
author masa
date Fri, 20 Nov 2015 21:02:00 +0900
parents 6401c708f5dd
children 1d30f70702df
files c/regexParser/bitVector.cc c/regexParser/bitVector.h c/regexParser/bitVectorNode.cc c/regexParser/createBitVectorList.cc c/regexParser/node.cc c/regexParser/printTree.cc c/regexParser/regexParser.h c/regexParser/subsetConstraction.cc c/regexParser/transition.h
diffstat 9 files changed, 204 insertions(+), 182 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/bitVector.cc	Fri Nov 20 15:19:09 2015 +0900
+++ b/c/regexParser/bitVector.cc	Fri Nov 20 21:02:00 2015 +0900
@@ -7,7 +7,14 @@
 
 int bitBlock = sizeof(unsigned long) * 8;
 
-BitVectorPtr bitSet(int bitSetPosition) {
+BitVectorListPtr createBitVector(NodePtr n,BitVectorListPtr bvl) {
+    BitVectorListPtr nextBvl = allocateBitVectorList();
+    nextBvl->bi = bitSet(n->nodeNumber);
+    nextBvl->initBvl = initBvl;
+    return nextBvl;
+}
+
+BitVectorPtr createBitVector(int bitSetPosition) {
 
     BitVectorPtr bi = (BitVectorPtr)malloc(sizeof(BitVector));
     if (bi == NULL) {
@@ -21,19 +28,20 @@
         exit(-1);
     }
 
-    bi->arrayNum = (bitSetPosition + 1) / bitBlock;
-    if (((bitSetPosition + 1) % bitBlock) != 0) bi->arrayNum++;
-
     for (int i = 0; i < bi->arrayNum; i++) {
         bi->bitContainer[i] = 0;
     }
-    unsigned long tmp = 1;
-    int arrayPosition = 0;
+
+    return bi;
+}
+
+BitVectorPtr bitSet(BitVectorPtr bi, int bitSetPosition) {
 
-    arrayPosition = bitSetPosition / bitBlock;
-    bitSetPosition = bitSetPosition % bitBlock;
+    bi->arrayNum = (bitSetPosition + bitBlock - 1) / bitBlock;
 
-    tmp = tmp << (bitBlock - 1 - bitSetPosition);
+    int arrayPosition = bitSetPosition / bitBlock;
+
+    unsigned long tmp = 1 << (bitSetPosition % bitBlock);
     bi->bitContainer[arrayPosition] = bi->bitContainer[arrayPosition] | tmp;
 
     return bi;
@@ -41,8 +49,10 @@
 
 void bitPrint(BitVectorPtr bi) {
     for (int i = 0; i < bi->arrayNum ; i++) {
-        for (int j = bitBlock - 1; j >= 0; j--) {
-            printf( "%lu", ( bi->bitContainer[i] >> j ) & 1 );
+        unsigned long vec = bi->bitContainer[i];
+        for (int j = 0; j < bitBlock; j++) {
+            printf( "%lu", vec & 1 );
+            vec >>= 1;
         }
         printf(" ");
     }
--- a/c/regexParser/bitVector.h	Fri Nov 20 15:19:09 2015 +0900
+++ b/c/regexParser/bitVector.h	Fri Nov 20 21:02:00 2015 +0900
@@ -1,3 +1,5 @@
+#define BITBLOCK (sizeof(unsigned long) * 8)
+
 typedef struct bitVector {
     int arrayNum;
     unsigned long *bitContainer;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c/regexParser/bitVectorNode.cc	Fri Nov 20 21:02:00 2015 +0900
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "bitVector.h"
+#include "regexParser.h"
+
+BitVectorListPtr allocateBitVectorList() {
+    BitVectorListPtr bvl = (BitVectorListPtr)malloc(sizeof(BitVectorList));
+    if (bvl == NULL) {
+        fprintf(stderr, "Failed to allocate memory.\n");
+        exit(-1);
+    }
+
+    bvl->self = bvl;
+    bvl->bi = (BitVectorPtr)malloc(sizeof(BitVector));
+    if (bvl->bi == NULL) {
+        fprintf(stderr, "Failed to allocate memory.\n");
+        exit(-1);
+    }
+
+
+    return bvl;
+}
+
+
+BitVectorListPtr initBitVector() {
+
+    BitVectorListPtr bvl = allocateBitVectorList();
+    bvl->initBvl = initBvl = bvl;
+    bvl->bi = bitSet(0);
+
+    for (int i = 0; i < 256; i++) {
+        bvl->next[i] = NULL;
+    }
+
+    return bvl;
+}
+
--- a/c/regexParser/createBitVectorList.cc	Fri Nov 20 15:19:09 2015 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include "bitVector.h"
-#include "regexParser.h"
-
-extern BitVectorPtr bitSet(int);
-extern void bitPrint(BitVectorPtr);
-BitVectorListPtr createBitVector(NodePtr);
-BitVectorListPtr descendTreeNode(NodePtr, BitVectorListPtr, BitVectorListPtr, bool&);
-BitVectorListPtr setNextBitVectorList(unsigned char, BitVectorListPtr, BitVectorListPtr);
-
-BitVectorListPtr initBvl;
-
-BitVectorListPtr allocateBitVectorList() {
-    BitVectorListPtr bvl = (BitVectorListPtr)malloc(sizeof(BitVectorList));
-    if (bvl == NULL) {
-        fprintf(stderr, "Failed to allocate memory.\n");
-        exit(-1);
-    }
-
-    bvl->self = bvl;
-    bvl->bi = (BitVectorPtr)malloc(sizeof(BitVector));
-    if (bvl->bi == NULL) {
-        fprintf(stderr, "Failed to allocate memory.\n");
-        exit(-1);
-    }
-
-
-    return bvl;
-}
-
-BitVectorListPtr createBitVector(NodePtr n,BitVectorListPtr bvl) {
-    BitVectorListPtr nextBvl = allocateBitVectorList();
-    nextBvl->bi = bitSet(n->nodeNumber);
-    nextBvl->initBvl = initBvl;
-    return nextBvl;
-}
-
-
-BitVectorListPtr initBitVector() {
-
-    BitVectorListPtr bvl = allocateBitVectorList();
-    bvl->initBvl = initBvl = bvl;
-    bvl->bi = bitSet(0);
-
-    for (int i = 0; i < 256; i++) {
-        bvl->next[i] = NULL;
-    }
-
-    return bvl;
-}
-
-void printBitVectorList (BitVectorListPtr bvl) {
-    bool isFirstPrint = true;
-    for (int i = 0; i < 256; i++) {
-        if (bvl->next[i] != NULL) {
-            if (isFirstPrint){
-                puts("----");
-                printf("     state : "); bitPrint(bvl->bi);
-                isFirstPrint = false;
-            }
-            printf("input char : %c\n",i);
-            printf("next state : ");bitPrint(bvl->next[i]->bi);
-            bvl->isLoop = bvl->isLoopAnker;
-        }
-    }
-
-    for (int i = 0; i < 256; i++) {
-        if ((bvl->next[i] != NULL) && !(bvl->isLoop && bvl->isLoopAnker)) {
-        // if ((bvl->next[i] != NULL) ) {
-            printBitVectorList(bvl->next[i]);
-        }
-    }
-}
-
-BitVectorListPtr descendTreeNode(NodePtr n,BitVectorListPtr bvl, BitVectorListPtr prev, bool &fromOr, bool &fromAsterisk) {
-    bool leftIsOr, rightIsOr;
-    if (n->Value.character == '*') {
-        bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk);
-        unsigned char repertChar = 0;
-        for (int i = 0; i < 256; i++) {
-            if (prev->next[i] != NULL) repertChar = i;
-        }
-        setNextBitVectorList(repertChar, bvl, prev->next[repertChar]); // here
-        bvl->isLoopAnker = true;
-        fromAsterisk = true;
-
-        return prev;
-    } else if (n->Value.character == '|') {
-        bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk);
-        setNextBitVectorList(n->left->Value.character, prev, bvl);
-        bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk);
-        setNextBitVectorList(n->right->Value.character, prev, bvl);
-        fromOr = true;
-        return prev;
-    } else if (n->Value.character == '+') {
-        bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk);
-        setNextBitVectorList(n->left->Value.character, prev, bvl);
-        prev = bvl;
-        bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk);
-
-        if (leftIsOr){
-            for (int i = 0; i < 256; i++)
-                if (prev->next[i] != NULL)
-                    setNextBitVectorList(n->right->Value.character, prev->next[i], bvl);
-        }
-        else {
-            setNextBitVectorList(n->right->Value.character, prev, bvl);
-        }
-
-        fromOr = false;
-    } else if (n->tokenType == 'a') {
-        bvl = createBitVector(n,bvl);
-        fromOr = false;
-    }
-    return bvl;
-}
-
-BitVectorListPtr setNextBitVectorList(unsigned char inputChar, BitVectorListPtr bvl, BitVectorListPtr next){
-    if (isalnum((int)inputChar)){
-        bvl->next[(int)inputChar] = next;
-    }
-    return next;
-}
-
-BitVectorListPtr createBitVectorList(NodePtr n) {
-    BitVectorListPtr bvl = initBitVector();
-    bool fromOr = false;
-    bool fromAsterisk = false;
-    descendTreeNode(n, bvl, bvl, fromOr, fromAsterisk);
-    printBitVectorList(bvl);
-    return bvl;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c/regexParser/node.cc	Fri Nov 20 21:02:00 2015 +0900
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include "regexParser.h"
+
+void descendTree(NodePtr n, int d) {
+    if (n->right != NULL) {
+        d++;
+        descendTree(n->right, d);
+        d--;
+    }
+    if (n->tokenType == 'a') {
+        printf("%*c%c(%d)\n",d*4, ' ',n->Value.character,n->nodeNumber);
+    } else {
+        printf("%*c%c\n",d*4, ' ',n->Value.character);
+    }
+
+    if (n->left != NULL) {
+        d++;
+        descendTree(n->left, d);
+        d--;
+    }
+}
+
+void printTree(NodePtr n) {
+    puts("---Print Node----");
+    int d = 0;
+    descendTree(n,d);
+    puts("-----------------");
+}
--- a/c/regexParser/printTree.cc	Fri Nov 20 15:19:09 2015 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#include <stdio.h>
-#include "regexParser.h"
-
-void descendTree(NodePtr n, int d) {
-    if (n->right != NULL) {
-        d++;
-        descendTree(n->right, d);
-        d--;
-    }
-    if (n->tokenType == 'a') {
-        printf("%*c%c(%d)\n",d*4, ' ',n->Value.character,n->nodeNumber);
-    } else {
-        printf("%*c%c\n",d*4, ' ',n->Value.character);
-    }
-
-    if (n->left != NULL) {
-        d++;
-        descendTree(n->left, d);
-        d--;
-    }
-}
-
-void printTree(NodePtr n) {
-    puts("---Print Node----");
-    int d = 0;
-    descendTree(n,d);
-    puts("-----------------");
-}
--- a/c/regexParser/regexParser.h	Fri Nov 20 15:19:09 2015 +0900
+++ b/c/regexParser/regexParser.h	Fri Nov 20 21:02:00 2015 +0900
@@ -20,15 +20,6 @@
         unsigned char character;
         WordPtr w;
     } Value, *ValuePtr;
-    struct node *self;
-    struct node *parent;
     struct node *left;
     struct node *right;
 } Node, *NodePtr;
-
-typedef struct regexInfo {
-    unsigned char *ptr;
-    unsigned char tokenType;
-    int tokenValue;
-    int nodeNumber;
-} RegexInfo, *RegexInfoPtr;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c/regexParser/subsetConstraction.cc	Fri Nov 20 21:02:00 2015 +0900
@@ -0,0 +1,88 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "bitVector.h"
+#include "regexParser.h"
+
+
+void printBitVectorList (BitVectorListPtr bvl) {
+    bool isFirstPrint = true;
+    for (int i = 0; i < 256; i++) {
+        if (bvl->next[i] != NULL) {
+            if (isFirstPrint){
+                puts("----");
+                printf("     state : "); bitPrint(bvl->bi);
+                isFirstPrint = false;
+            }
+            printf("input char : %c\n",i);
+            printf("next state : ");bitPrint(bvl->next[i]->bi);
+            bvl->isLoop = bvl->isLoopAnker;
+        }
+    }
+
+    for (int i = 0; i < 256; i++) {
+        if ((bvl->next[i] != NULL) && !(bvl->isLoop && bvl->isLoopAnker)) {
+        // if ((bvl->next[i] != NULL) ) {
+            printBitVectorList(bvl->next[i]);
+        }
+    }
+}
+
+BitVectorListPtr descendTreeNode(NodePtr n,BitVectorListPtr bvl, BitVectorListPtr prev, bool &fromOr, bool &fromAsterisk) {
+    bool leftIsOr, rightIsOr;
+    if (n->Value.character == '*') {
+        bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk);
+        unsigned char repertChar = 0;
+        for (int i = 0; i < 256; i++) {
+            if (prev->next[i] != NULL) repertChar = i;
+        }
+        setNextBitVectorList(repertChar, bvl, prev->next[repertChar]); // here
+        bvl->isLoopAnker = true;
+        fromAsterisk = true;
+
+        return prev;
+    } else if (n->Value.character == '|') {
+        bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk);
+        setNextBitVectorList(n->left->Value.character, prev, bvl);
+        bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk);
+        setNextBitVectorList(n->right->Value.character, prev, bvl);
+        fromOr = true;
+        return prev;
+    } else if (n->Value.character == '+') {
+        bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk);
+        setNextBitVectorList(n->left->Value.character, prev, bvl);
+        prev = bvl;
+        bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk);
+
+        if (leftIsOr){
+            for (int i = 0; i < 256; i++)
+                if (prev->next[i] != NULL)
+                    setNextBitVectorList(n->right->Value.character, prev->next[i], bvl);
+        }
+        else {
+            setNextBitVectorList(n->right->Value.character, prev, bvl);
+        }
+
+        fromOr = false;
+    } else if (n->tokenType == 'a') {
+        bvl = createBitVector(n,bvl);
+        fromOr = false;
+    }
+    return bvl;
+}
+
+BitVectorListPtr setNextBitVectorList(unsigned char inputChar, BitVectorListPtr bvl, BitVectorListPtr next){
+    if (isalnum((int)inputChar)){
+        bvl->next[(int)inputChar] = next;
+    }
+    return next;
+}
+
+BitVectorListPtr createBitVectorList(NodePtr n) {
+    BitVectorListPtr bvl = initBitVector();
+    bool fromOr = false;
+    bool fromAsterisk = false;
+    descendTreeNode(n, bvl, bvl, fromOr, fromAsterisk);
+    printBitVectorList(bvl);
+    return bvl;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/c/regexParser/transition.h	Fri Nov 20 21:02:00 2015 +0900
@@ -0,0 +1,27 @@
+typedef struct transition {
+    CharClassPtr condition;
+    BitVectorPtr nextState;
+    struct transition *next;
+} Transition, TransitionPtr;
+
+
+typedef struct state {
+    TransitionPtr transition;
+    struct state *next;
+} State; StatePtr;
+/*
+  正規表現木を辿って transition のList をつくる
+  HcarClass のかさなりを判定して重なりのない新しいCharClassをつくる
+  重なっている状態はbitvectorのorをとる
+  重なっている状態はそれぞれの状態について木をたどる
+  
+  nextState == 0 は正規表現の末端を表す
+  nextState == 1 は受理状態を表す
+  正規表現のノードの番号 n に対応する 2^n のbitをセットした状態
+  
+  
+  | の場合は両方のListを結合する
+  + の場合は左のノードに * がある場合は右のリストも結合する
+  左のノードにあすたがない場合は、右のほうだけみる
+  * は直下のリストを使って、次の状態を自分自身にする
+ */