# HG changeset patch # User Ryoma SHINYA # Date 1292306545 -32400 # Node ID 8102bf4bbec6e7f8b9e3a8b72e4818c3582fa576 # Parent 14faa199c3bf46f6313e4648d9bbec01b11d1cb2 modify range stmt. diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/jitgrep.py --- a/pyrect/jitgrep.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/jitgrep.py Tue Dec 14 15:02:25 2010 +0900 @@ -73,8 +73,10 @@ if opts.time : start_time = time.time() + chars = Regexp.get_chars(string) + chars = Regexp.get_chars(string) reg = Regexp(".*"+string) - reg.chars = Regexp.get_chars(string) + reg.chars = chars (reg.max_len, _, _) = Regexp.get_analyze(string) if opts.cbc: diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/regexp/__init__.py --- a/pyrect/regexp/__init__.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/__init__.py Tue Dec 14 15:02:25 2010 +0900 @@ -47,7 +47,8 @@ @classmethod def parse(cls, regexp): psr = Parser() - return psr.parse(regexp) + ast = psr.parse(regexp) + return ast def matches(self, string): runtime = self.dfa.get_runtime() diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/regexp/analyzer.py --- a/pyrect/regexp/analyzer.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/analyzer.py Tue Dec 14 15:02:25 2010 +0900 @@ -69,6 +69,9 @@ (m, _, _) = qmark.op.accept(self) return m, 0, ["", ""] + def visit_CharClass(self, cclass): + return 1, 1, ["", ""] + def test(): import doctest doctest.testmod() diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/regexp/ast.py --- a/pyrect/regexp/ast.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/ast.py Tue Dec 14 15:02:25 2010 +0900 @@ -195,6 +195,18 @@ self.inverse = inverse self.factor = factor + def get_chars(self): + char = set() + for f in self.factor: + if type(f) == Range: + for ff in f: + char.add(ff) + else: + char.add(f.char) + if self.inverse: + char = set(range(256)) - char + return char + def __repr__(self): return self.__class__.__name__+"[%s]" \ % ",".join((s.__repr__() for s in self.factor)) @@ -210,6 +222,10 @@ self.lower = lower self.upper = upper + def __iter__(self): + for c in range(self.lower.char, self.upper.char+1): + yield c + def __contains__(self, input_node): if isinstance(input_node, Character): self.lower diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/regexp/char_collector.py --- a/pyrect/regexp/char_collector.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/char_collector.py Tue Dec 14 15:02:25 2010 +0900 @@ -36,6 +36,10 @@ def visit_AnyChar(self, anychar): return [anychar] + def visit_CharClass(self, cclass): + chars = [chr(x) for x in cclass.get_chars()] + return chars + def visit_Character(self, char): return [chr(char.char)] diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/translator/cbc_grep_translator.py --- a/pyrect/translator/cbc_grep_translator.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/translator/cbc_grep_translator.py Tue Dec 14 15:02:25 2010 +0900 @@ -88,7 +88,7 @@ if self.skip_boost and not self.filter_only and \ not AnyChar() in self.regexp.chars and \ - self.regexp.min_len > 2: + self.regexp.min_len >= 2: self.emit_booster(self.regexp.min_len, self.regexp.chars) else: self.skip_boost = False diff -r 14faa199c3bf -r 8102bf4bbec6 pyrect/translator/grep_translator.py --- a/pyrect/translator/grep_translator.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/translator/grep_translator.py Tue Dec 14 15:02:25 2010 +0900 @@ -81,7 +81,7 @@ if self.skip_boost and not self.filter_only and \ not AnyChar() in self.regexp.chars and \ - self.regexp.min_len > 2: + self.regexp.min_len >= 2: self.emit_booster(self.regexp.min_len, self.regexp.chars) else: self.skip_boost = False