changeset 106:8102bf4bbec6

modify range stmt.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Dec 2010 15:02:25 +0900
parents 14faa199c3bf
children 492daa4d7fa5
files pyrect/jitgrep.py pyrect/regexp/__init__.py pyrect/regexp/analyzer.py pyrect/regexp/ast.py pyrect/regexp/char_collector.py pyrect/translator/cbc_grep_translator.py pyrect/translator/grep_translator.py
diffstat 7 files changed, 30 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/pyrect/jitgrep.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/jitgrep.py	Tue Dec 14 15:02:25 2010 +0900
@@ -73,8 +73,10 @@
 
     if opts.time : start_time = time.time()
 
+    chars = Regexp.get_chars(string)
+    chars = Regexp.get_chars(string)
     reg = Regexp(".*"+string)
-    reg.chars = Regexp.get_chars(string)
+    reg.chars = chars
     (reg.max_len, _, _) = Regexp.get_analyze(string)
 
     if opts.cbc:
--- a/pyrect/regexp/__init__.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/regexp/__init__.py	Tue Dec 14 15:02:25 2010 +0900
@@ -47,7 +47,8 @@
     @classmethod
     def parse(cls, regexp):
         psr = Parser()
-        return psr.parse(regexp)
+        ast = psr.parse(regexp)
+        return ast
 
     def matches(self, string):
         runtime = self.dfa.get_runtime()
--- a/pyrect/regexp/analyzer.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/regexp/analyzer.py	Tue Dec 14 15:02:25 2010 +0900
@@ -69,6 +69,9 @@
         (m, _, _) = qmark.op.accept(self)
         return m, 0, ["", ""]
 
+    def visit_CharClass(self, cclass):
+        return 1, 1, ["", ""]
+
 def test():
     import doctest
     doctest.testmod()
--- a/pyrect/regexp/ast.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/regexp/ast.py	Tue Dec 14 15:02:25 2010 +0900
@@ -195,6 +195,18 @@
         self.inverse = inverse
         self.factor = factor
 
+    def get_chars(self):
+        char = set()
+        for f in self.factor:
+            if type(f) == Range:
+                for ff in f:
+                    char.add(ff)
+            else:
+                char.add(f.char)
+        if self.inverse:
+            char = set(range(256)) - char
+        return char
+
     def __repr__(self):
         return self.__class__.__name__+"[%s]" \
                % ",".join((s.__repr__() for s in self.factor))
@@ -210,6 +222,10 @@
         self.lower = lower
         self.upper = upper
 
+    def __iter__(self):
+        for c in range(self.lower.char, self.upper.char+1):
+            yield c
+
     def __contains__(self, input_node):
         if isinstance(input_node, Character):
             self.lower
--- a/pyrect/regexp/char_collector.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/regexp/char_collector.py	Tue Dec 14 15:02:25 2010 +0900
@@ -36,6 +36,10 @@
     def visit_AnyChar(self, anychar):
         return [anychar]
 
+    def visit_CharClass(self, cclass):
+        chars = [chr(x) for x in cclass.get_chars()]
+        return chars
+
     def visit_Character(self, char):
         return [chr(char.char)]
 
--- a/pyrect/translator/cbc_grep_translator.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/translator/cbc_grep_translator.py	Tue Dec 14 15:02:25 2010 +0900
@@ -88,7 +88,7 @@
 
         if self.skip_boost and not self.filter_only and \
                not AnyChar() in self.regexp.chars and \
-               self.regexp.min_len > 2:
+               self.regexp.min_len >= 2:
             self.emit_booster(self.regexp.min_len, self.regexp.chars)
         else:
             self.skip_boost = False
--- a/pyrect/translator/grep_translator.py	Tue Dec 14 11:49:59 2010 +0900
+++ b/pyrect/translator/grep_translator.py	Tue Dec 14 15:02:25 2010 +0900
@@ -81,7 +81,7 @@
 
         if self.skip_boost and not self.filter_only and \
                not AnyChar() in self.regexp.chars and \
-               self.regexp.min_len > 2:
+               self.regexp.min_len >= 2:
             self.emit_booster(self.regexp.min_len, self.regexp.chars)
         else:
             self.skip_boost = False