# HG changeset patch # User masa # Date 1448020920 -32400 # Node ID a3adc5c24e1957bdd3f3429e02c83342778268c4 # Parent 6401c708f5ddb35159863d2017360619c0b97ce0 start branch diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/bitVector.cc --- a/c/regexParser/bitVector.cc Fri Nov 20 15:19:09 2015 +0900 +++ b/c/regexParser/bitVector.cc Fri Nov 20 21:02:00 2015 +0900 @@ -7,7 +7,14 @@ int bitBlock = sizeof(unsigned long) * 8; -BitVectorPtr bitSet(int bitSetPosition) { +BitVectorListPtr createBitVector(NodePtr n,BitVectorListPtr bvl) { + BitVectorListPtr nextBvl = allocateBitVectorList(); + nextBvl->bi = bitSet(n->nodeNumber); + nextBvl->initBvl = initBvl; + return nextBvl; +} + +BitVectorPtr createBitVector(int bitSetPosition) { BitVectorPtr bi = (BitVectorPtr)malloc(sizeof(BitVector)); if (bi == NULL) { @@ -21,19 +28,20 @@ exit(-1); } - bi->arrayNum = (bitSetPosition + 1) / bitBlock; - if (((bitSetPosition + 1) % bitBlock) != 0) bi->arrayNum++; - for (int i = 0; i < bi->arrayNum; i++) { bi->bitContainer[i] = 0; } - unsigned long tmp = 1; - int arrayPosition = 0; + + return bi; +} + +BitVectorPtr bitSet(BitVectorPtr bi, int bitSetPosition) { - arrayPosition = bitSetPosition / bitBlock; - bitSetPosition = bitSetPosition % bitBlock; + bi->arrayNum = (bitSetPosition + bitBlock - 1) / bitBlock; - tmp = tmp << (bitBlock - 1 - bitSetPosition); + int arrayPosition = bitSetPosition / bitBlock; + + unsigned long tmp = 1 << (bitSetPosition % bitBlock); bi->bitContainer[arrayPosition] = bi->bitContainer[arrayPosition] | tmp; return bi; @@ -41,8 +49,10 @@ void bitPrint(BitVectorPtr bi) { for (int i = 0; i < bi->arrayNum ; i++) { - for (int j = bitBlock - 1; j >= 0; j--) { - printf( "%lu", ( bi->bitContainer[i] >> j ) & 1 ); + unsigned long vec = bi->bitContainer[i]; + for (int j = 0; j < bitBlock; j++) { + printf( "%lu", vec & 1 ); + vec >>= 1; } printf(" "); } diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/bitVector.h --- a/c/regexParser/bitVector.h Fri Nov 20 15:19:09 2015 +0900 +++ b/c/regexParser/bitVector.h Fri Nov 20 21:02:00 2015 +0900 @@ -1,3 +1,5 @@ +#define BITBLOCK (sizeof(unsigned long) * 8) + typedef struct bitVector { int arrayNum; unsigned long *bitContainer; diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/bitVectorNode.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/bitVectorNode.cc Fri Nov 20 21:02:00 2015 +0900 @@ -0,0 +1,38 @@ +#include +#include +#include +#include "bitVector.h" +#include "regexParser.h" + +BitVectorListPtr allocateBitVectorList() { + BitVectorListPtr bvl = (BitVectorListPtr)malloc(sizeof(BitVectorList)); + if (bvl == NULL) { + fprintf(stderr, "Failed to allocate memory.\n"); + exit(-1); + } + + bvl->self = bvl; + bvl->bi = (BitVectorPtr)malloc(sizeof(BitVector)); + if (bvl->bi == NULL) { + fprintf(stderr, "Failed to allocate memory.\n"); + exit(-1); + } + + + return bvl; +} + + +BitVectorListPtr initBitVector() { + + BitVectorListPtr bvl = allocateBitVectorList(); + bvl->initBvl = initBvl = bvl; + bvl->bi = bitSet(0); + + for (int i = 0; i < 256; i++) { + bvl->next[i] = NULL; + } + + return bvl; +} + diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/createBitVectorList.cc --- a/c/regexParser/createBitVectorList.cc Fri Nov 20 15:19:09 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ -#include -#include -#include -#include "bitVector.h" -#include "regexParser.h" - -extern BitVectorPtr bitSet(int); -extern void bitPrint(BitVectorPtr); -BitVectorListPtr createBitVector(NodePtr); -BitVectorListPtr descendTreeNode(NodePtr, BitVectorListPtr, BitVectorListPtr, bool&); -BitVectorListPtr setNextBitVectorList(unsigned char, BitVectorListPtr, BitVectorListPtr); - -BitVectorListPtr initBvl; - -BitVectorListPtr allocateBitVectorList() { - BitVectorListPtr bvl = (BitVectorListPtr)malloc(sizeof(BitVectorList)); - if (bvl == NULL) { - fprintf(stderr, "Failed to allocate memory.\n"); - exit(-1); - } - - bvl->self = bvl; - bvl->bi = (BitVectorPtr)malloc(sizeof(BitVector)); - if (bvl->bi == NULL) { - fprintf(stderr, "Failed to allocate memory.\n"); - exit(-1); - } - - - return bvl; -} - -BitVectorListPtr createBitVector(NodePtr n,BitVectorListPtr bvl) { - BitVectorListPtr nextBvl = allocateBitVectorList(); - nextBvl->bi = bitSet(n->nodeNumber); - nextBvl->initBvl = initBvl; - return nextBvl; -} - - -BitVectorListPtr initBitVector() { - - BitVectorListPtr bvl = allocateBitVectorList(); - bvl->initBvl = initBvl = bvl; - bvl->bi = bitSet(0); - - for (int i = 0; i < 256; i++) { - bvl->next[i] = NULL; - } - - return bvl; -} - -void printBitVectorList (BitVectorListPtr bvl) { - bool isFirstPrint = true; - for (int i = 0; i < 256; i++) { - if (bvl->next[i] != NULL) { - if (isFirstPrint){ - puts("----"); - printf(" state : "); bitPrint(bvl->bi); - isFirstPrint = false; - } - printf("input char : %c\n",i); - printf("next state : ");bitPrint(bvl->next[i]->bi); - bvl->isLoop = bvl->isLoopAnker; - } - } - - for (int i = 0; i < 256; i++) { - if ((bvl->next[i] != NULL) && !(bvl->isLoop && bvl->isLoopAnker)) { - // if ((bvl->next[i] != NULL) ) { - printBitVectorList(bvl->next[i]); - } - } -} - -BitVectorListPtr descendTreeNode(NodePtr n,BitVectorListPtr bvl, BitVectorListPtr prev, bool &fromOr, bool &fromAsterisk) { - bool leftIsOr, rightIsOr; - if (n->Value.character == '*') { - bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk); - unsigned char repertChar = 0; - for (int i = 0; i < 256; i++) { - if (prev->next[i] != NULL) repertChar = i; - } - setNextBitVectorList(repertChar, bvl, prev->next[repertChar]); // here - bvl->isLoopAnker = true; - fromAsterisk = true; - - return prev; - } else if (n->Value.character == '|') { - bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk); - setNextBitVectorList(n->left->Value.character, prev, bvl); - bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk); - setNextBitVectorList(n->right->Value.character, prev, bvl); - fromOr = true; - return prev; - } else if (n->Value.character == '+') { - bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk); - setNextBitVectorList(n->left->Value.character, prev, bvl); - prev = bvl; - bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk); - - if (leftIsOr){ - for (int i = 0; i < 256; i++) - if (prev->next[i] != NULL) - setNextBitVectorList(n->right->Value.character, prev->next[i], bvl); - } - else { - setNextBitVectorList(n->right->Value.character, prev, bvl); - } - - fromOr = false; - } else if (n->tokenType == 'a') { - bvl = createBitVector(n,bvl); - fromOr = false; - } - return bvl; -} - -BitVectorListPtr setNextBitVectorList(unsigned char inputChar, BitVectorListPtr bvl, BitVectorListPtr next){ - if (isalnum((int)inputChar)){ - bvl->next[(int)inputChar] = next; - } - return next; -} - -BitVectorListPtr createBitVectorList(NodePtr n) { - BitVectorListPtr bvl = initBitVector(); - bool fromOr = false; - bool fromAsterisk = false; - descendTreeNode(n, bvl, bvl, fromOr, fromAsterisk); - printBitVectorList(bvl); - return bvl; -} diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/node.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/node.cc Fri Nov 20 21:02:00 2015 +0900 @@ -0,0 +1,28 @@ +#include +#include "regexParser.h" + +void descendTree(NodePtr n, int d) { + if (n->right != NULL) { + d++; + descendTree(n->right, d); + d--; + } + if (n->tokenType == 'a') { + printf("%*c%c(%d)\n",d*4, ' ',n->Value.character,n->nodeNumber); + } else { + printf("%*c%c\n",d*4, ' ',n->Value.character); + } + + if (n->left != NULL) { + d++; + descendTree(n->left, d); + d--; + } +} + +void printTree(NodePtr n) { + puts("---Print Node----"); + int d = 0; + descendTree(n,d); + puts("-----------------"); +} diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/printTree.cc --- a/c/regexParser/printTree.cc Fri Nov 20 15:19:09 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#include -#include "regexParser.h" - -void descendTree(NodePtr n, int d) { - if (n->right != NULL) { - d++; - descendTree(n->right, d); - d--; - } - if (n->tokenType == 'a') { - printf("%*c%c(%d)\n",d*4, ' ',n->Value.character,n->nodeNumber); - } else { - printf("%*c%c\n",d*4, ' ',n->Value.character); - } - - if (n->left != NULL) { - d++; - descendTree(n->left, d); - d--; - } -} - -void printTree(NodePtr n) { - puts("---Print Node----"); - int d = 0; - descendTree(n,d); - puts("-----------------"); -} diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/regexParser.h --- a/c/regexParser/regexParser.h Fri Nov 20 15:19:09 2015 +0900 +++ b/c/regexParser/regexParser.h Fri Nov 20 21:02:00 2015 +0900 @@ -20,15 +20,6 @@ unsigned char character; WordPtr w; } Value, *ValuePtr; - struct node *self; - struct node *parent; struct node *left; struct node *right; } Node, *NodePtr; - -typedef struct regexInfo { - unsigned char *ptr; - unsigned char tokenType; - int tokenValue; - int nodeNumber; -} RegexInfo, *RegexInfoPtr; diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/subsetConstraction.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/subsetConstraction.cc Fri Nov 20 21:02:00 2015 +0900 @@ -0,0 +1,88 @@ +#include +#include +#include +#include "bitVector.h" +#include "regexParser.h" + + +void printBitVectorList (BitVectorListPtr bvl) { + bool isFirstPrint = true; + for (int i = 0; i < 256; i++) { + if (bvl->next[i] != NULL) { + if (isFirstPrint){ + puts("----"); + printf(" state : "); bitPrint(bvl->bi); + isFirstPrint = false; + } + printf("input char : %c\n",i); + printf("next state : ");bitPrint(bvl->next[i]->bi); + bvl->isLoop = bvl->isLoopAnker; + } + } + + for (int i = 0; i < 256; i++) { + if ((bvl->next[i] != NULL) && !(bvl->isLoop && bvl->isLoopAnker)) { + // if ((bvl->next[i] != NULL) ) { + printBitVectorList(bvl->next[i]); + } + } +} + +BitVectorListPtr descendTreeNode(NodePtr n,BitVectorListPtr bvl, BitVectorListPtr prev, bool &fromOr, bool &fromAsterisk) { + bool leftIsOr, rightIsOr; + if (n->Value.character == '*') { + bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk); + unsigned char repertChar = 0; + for (int i = 0; i < 256; i++) { + if (prev->next[i] != NULL) repertChar = i; + } + setNextBitVectorList(repertChar, bvl, prev->next[repertChar]); // here + bvl->isLoopAnker = true; + fromAsterisk = true; + + return prev; + } else if (n->Value.character == '|') { + bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk); + setNextBitVectorList(n->left->Value.character, prev, bvl); + bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk); + setNextBitVectorList(n->right->Value.character, prev, bvl); + fromOr = true; + return prev; + } else if (n->Value.character == '+') { + bvl = descendTreeNode(n->left, bvl, prev, leftIsOr, fromAsterisk); + setNextBitVectorList(n->left->Value.character, prev, bvl); + prev = bvl; + bvl = descendTreeNode(n->right, bvl, prev, rightIsOr, fromAsterisk); + + if (leftIsOr){ + for (int i = 0; i < 256; i++) + if (prev->next[i] != NULL) + setNextBitVectorList(n->right->Value.character, prev->next[i], bvl); + } + else { + setNextBitVectorList(n->right->Value.character, prev, bvl); + } + + fromOr = false; + } else if (n->tokenType == 'a') { + bvl = createBitVector(n,bvl); + fromOr = false; + } + return bvl; +} + +BitVectorListPtr setNextBitVectorList(unsigned char inputChar, BitVectorListPtr bvl, BitVectorListPtr next){ + if (isalnum((int)inputChar)){ + bvl->next[(int)inputChar] = next; + } + return next; +} + +BitVectorListPtr createBitVectorList(NodePtr n) { + BitVectorListPtr bvl = initBitVector(); + bool fromOr = false; + bool fromAsterisk = false; + descendTreeNode(n, bvl, bvl, fromOr, fromAsterisk); + printBitVectorList(bvl); + return bvl; +} diff -r 6401c708f5dd -r a3adc5c24e19 c/regexParser/transition.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c/regexParser/transition.h Fri Nov 20 21:02:00 2015 +0900 @@ -0,0 +1,27 @@ +typedef struct transition { + CharClassPtr condition; + BitVectorPtr nextState; + struct transition *next; +} Transition, TransitionPtr; + + +typedef struct state { + TransitionPtr transition; + struct state *next; +} State; StatePtr; +/* + 正規表現木を辿って transition のList をつくる + HcarClass のかさなりを判定して重なりのない新しいCharClassをつくる + 重なっている状態はbitvectorのorをとる + 重なっている状態はそれぞれの状態について木をたどる + + nextState == 0 は正規表現の末端を表す + nextState == 1 は受理状態を表す + 正規表現のノードの番号 n に対応する 2^n のbitをセットした状態 + + + | の場合は両方のListを結合する + + の場合は左のノードに * がある場合は右のリストも結合する + 左のノードにあすたがない場合は、右のほうだけみる + * は直下のリストを使って、次の状態を自分自身にする + */