diff pyrect/pyrect/regexp/analyzer.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/pyrect/regexp/analyzer.py	Tue Jun 14 17:24:03 2011 +0900
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+"""
+Extract Keywords from AST. Keywords,
+which are necessary words to be accepted with Regular-Expression.
+and which are used to Fixed-String-Filtering (ex: Boyer-Moore).
+kwset is also used in GNU-GREP.
+"""
+
+from pyrect.regexp.parser import Parser
+from pyrect.regexp.ast import ASTWalker, Plus
+
+class Analyzer(ASTWalker):
+    """ Extract with Visitor-Pattern.
+    AST (ast), is represented by Node-Tree.
+    >>> prs = Parser()
+    >>> an  = Analyzer()
+    >>> an.analyze(prs.parse('fixed-string'))
+    (12, 12, ['fixed-string'])
+    >>> an.analyze(prs.parse('(build|fndecl|gcc)'))
+    (6, 3, [])
+    >>> an.analyze(prs.parse('123(AB|CD)*456'))
+    (inf, 6, ['123', '456'])
+    >>> an.analyze(prs.parse('((12)+|3)|456'))
+    (inf, 1, [])
+    >>> an.analyze(prs.parse('^(plus)?(qmark)?'))
+    (9, 0, [])
+    >>> an.analyze(prs.parse('\*+ \[\['))
+    (inf, 4, ['* [['])
+    """
+
+    def __init__(self, ast=None):
+        if ast:
+            self.analyze(ast)
+        else:
+            self.max_len = 0
+            self.min_len = 0
+
+    def analyze(self, ast=None):
+        if ast:
+            self.max_len, self.min_len, self.must_words = ast.accept(self)
+            self.must_words = [x for x in self.must_words if x != ""]
+        return self.max_len, self.min_len, self.must_words
+
+    def visit(self, ast):
+        """Following Classes contain no-Keywords.
+        Union, Star
+        """
+        return 1, 1, [str(ast)]
+
+    def visit_Character(self, ast):
+        return 1, 1, [chr(ast.char)]
+
+    def concat(self, (max1, min1, key1), (max2, min2, key2)):
+        return max1 + max2, min1 + min2, key1[0:-1] \
+               + ([key1[-1] + key2[0]]) + key2[1:]
+
+    def union(self, (max1, min1, _), (max2, min2, __)):
+        return max(max1, max2), min(min1, min2), ["", ""]
+
+    def visit_Star(self, star):
+        return float("inf"), 0, ["", ""]
+
+    def visit_Plus(self, plus):
+        (_, m, k) = plus.op.accept(self)
+        return float("inf"), m, k + [""] + k
+
+    def visit_Qmark(self, qmark):
+        (m, _, _) = qmark.op.accept(self)
+        return m, 0, ["", ""]
+
+    def visit_CharClass(self, cclass):
+        return 1, 1, ["", ""]
+
+def test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == "__main__": test()