Mercurial > hg > Applications > Grep
diff c/regexParser/regexParser.cc @ 142:de0f332d560c pairPro
insert charClassMerge function
author | masa |
---|---|
date | Fri, 11 Dec 2015 14:54:00 +0900 |
parents | 15815fcb6c2f |
children | 32977f5a2ed0 |
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/regexParser.cc Fri Dec 11 14:54:00 2015 +0900 @@ -30,10 +30,9 @@ CharClassPtr createCharClassWord(RegexInfoPtr ri) { CharClassPtr cc = NEW(CharClass); cc->type = 'a'; - cc->cond = NEW(Condition); - cc->cond->w = NEW(Word); - cc->cond->w->word = ri->tokenValue; - cc->cond->w->length = ri->ptr - ri->tokenValue; + cc->cond.w.word = ri->tokenValue; + cc->cond.w.length = ri->ptr - ri->tokenValue; + cc->nextState.bitContainer = 0; token(ri); return cc; @@ -53,6 +52,28 @@ return n; } +CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) { + // 重なっているccの領域を分割する + // 必要ならばnextStateを重ねあわせる + // 変更があった場合は新しくリストを作って返す + if (src->type == 'a') { + if (add->type == 'a') { + if (src->cond.w.word[0] > add->cond.w.word[0]) { + // add のほうが小さいので小さい順のccをつくる + CharClassPtr left = charClassMerge(add->left.src); + return createCharClassWord(add->cond->w.word, left, add->right); + } else { + + } + } else if (add->type == 'c') { + // + if (src->cond.w.word[0] < add->cond.range.begin) { + + } else (src->cond->w.word[0] < add->end) { + } else if (src->type == 'c') { + + } +} // <charClass> ::= '['<literal>'-'<literal>']' static @@ -60,14 +81,10 @@ CharClassPtr cc = NEW(CharClass); NodePtr n = createNode(ri,'c',cc,0,0); cc->type = 'r'; - cc->cond = NEW(Condition); - cc->cond->range = NEW(RangeList); - cc->cond->range->begin = ri->ptr; - cc->cond->range->end = ri->ptr; - cc->cond->range->next = NULL; - - - RangeListPtr rangeList = cc->cond->range; + cc->nextState.bitContainer = 0; + RangeListPtr rangeList = &cc->cond.range; + rangeList->begin = ri->ptr; + rangeList->end = ri->ptr; for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { if (*ri->ptr == '-') { @@ -86,8 +103,8 @@ rangeList->end = ri->ptr; rangeList->next = NULL; } - // TODO literal support - + // TODO literal support + // merge rangeList here if (*ri->ptr) ri->ptr++; token(ri); return n; @@ -114,11 +131,6 @@ ri->tokenType = ')'; ri->tokenValue = ri->ptr; return; - } else if (ri->ptr[0] == '[') { - ri->ptr++; - ri->tokenType = 'c'; - ri->tokenValue = ri->ptr; - return; } else if (ri->ptr[0] == ']') { ri->ptr++; ri->tokenType = ']'; @@ -144,6 +156,11 @@ \\ \utf-8 etc... */ + } else if (ri->ptr[0] == '[') { + ri->ptr++; + ri->tokenType = 'c'; + ri->tokenValue = ri->ptr; + return; } else { ri->tokenType = 'a'; ri->tokenValue = ri->ptr;