changeset 55:4ae288b37591

ddd analyzer. analyzer can analyzing to regexp max-length.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 26 Oct 2010 16:37:43 +0900
parents 36cdfcaf5420
children ee9945561f80
files pyrect/regexp/analyzer.py pyrect/regexp/kwset.py pyrect/translator/c_translator.py pyrect/translator/dot_translator.py
diffstat 4 files changed, 69 insertions(+), 78 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyrect/regexp/analyzer.py	Tue Oct 26 16:37:43 2010 +0900
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+"""
+Extract Keywords from AST. Keywords,
+which are necessary words to be accepted with Regular-Expression.
+and which are used to Fixed-String-Filtering (ex: Boyer-Moore).
+kwset is also used in GNU-GREP.
+"""
+
+from pyrect.regexp.parser import Parser
+from pyrect.regexp.ast import ASTWalker
+
+class Analyzer(ASTWalker):
+    """ Extract with Visitor-Pattern.
+    AST (ast), is represented by Node-Tree.
+    >>> prs = Parser()
+    >>> an  = Analyzer()
+    >>> an.analyze(prs.parse('fixed-string'))
+    12
+    >>> an.analyze(prs.parse('(build|fndecl|gcc)'))
+    6
+    >>> an.analyze(prs.parse('(AB|CD)*123'))
+    inf
+    >>> an.analyze(prs.parse('((12)*|3)|456'))
+    inf
+    >>> an.analyze(prs.parse('(plus)?(qmark)?'))
+    9
+    """
+    def __init__(self):
+        self.maxlen = 0
+
+    def analyze(self, ast=None):
+        if ast:
+            self.maxlen = ast.accept(self)
+        return self.maxlen
+
+    def visit(self, ast):
+        """Following Classes contain no-Keywords.
+        Union, Star
+        """
+        return 1
+
+    def visit_Concat(self, concat):
+        a1 = concat.op1.accept(self)
+        a2 = concat.op2.accept(self)
+
+        return a1 + a2
+
+    def visit_Union(self, union):
+        a1 = union.op1.accept(self)
+        a2 = union.op2.accept(self)
+        return max(a1, a2)
+
+    def visit_Star(self, star):
+        return float("inf")
+
+    def visit_Plus(self, plus):
+        return float("inf")
+
+    def visit_Qmark(self, qmark):
+        return qmark.op.accept(self)
+
+def test():
+    import doctest
+    doctest.testmod()
+
+if __name__ == "__main__": test()
--- a/pyrect/regexp/kwset.py	Wed Aug 25 22:22:54 2010 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Extract Keywords from AST. Keywords,
-which are necessary words to be accepted with Regular-Expression.
-and which are used to Fixed-String-Filtering (ex: Boyer-Moore).
-kwset is also used in GNU-GREP.
-"""
-
-from pyrect.regexp.parser import Parser
-from pyrect.regexp.ast import ASTWalker
-
-class KeywordsExtractor(ASTWalker):
-    """ Extract with Visitor-Pattern.
-    AST (ast), is represented by Node-Tree.
-    >>> prs = Parser()
-    >>> kex = KeywordsExtractor()
-    >>> kex.extract_keywords(prs.parse('(AB|CD)*123'))
-    ['', '123']
-    >>> kex.extract_keywords(prs.parse('WOOO*PS!!'))
-    ['WOO', '', 'PS!!']
-    >>> kex.extract_keywords(prs.parse('(build|fndecl|gcc)'))
-    ['']
-    >>> kex.extract_keywords(prs.parse('(plus)+(qmark)?'))
-    ['plus', '']
-    """
-    def __init__(self):
-        self.keywords = []
-
-    def extract_keywords(self, ast=None):
-        if ast:
-            self.keywords = ast.accept(self)
-        return self.keywords
-
-    def visit(self, ast):
-        """Following Classes contain no-Keywords.
-        Union, Star
-        """
-        return ['']
-
-    def visit_Character(self, character):
-        return character.char
-
-    def visit_Concat(self, concat):
-        key1 = concat.op1.accept(self)
-        key2 = concat.op2.accept(self)
-
-        if isinstance(key1, str) and isinstance(key2, str):
-            return key1 + key2
-        elif isinstance(key1, str) and isinstance(key2, list):
-            if key2[0]:
-                key2[0] = key1 + key2[0]
-            else:
-                key2 = [key1] + key2
-            return key2
-        elif isinstance(key1, list) and isinstance(key2, str):
-            if key1[-1]:
-                key1[-1] = key1[-1] + key2
-            else:
-                key1 = key1 + [key2]
-            return key1
-        else:
-            return key1 + key2
-
-    def visit_Plus(self, plus):
-        return plus.op.accept(self)
-
-def extract_keywords(ast):
-    return KeywordsExtractor().extract_keywords(ast)
-
-def test():
-    import doctest
-    doctest.testmod()
-
-if __name__ == "__main__": test()
--- a/pyrect/translator/c_translator.py	Wed Aug 25 22:22:54 2010 +0900
+++ b/pyrect/translator/c_translator.py	Tue Oct 26 16:37:43 2010 +0900
@@ -50,7 +50,6 @@
         self.emit("3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,")
         self.emit("};")
         self.emitd("#define SKIP(s) ((s) + skip_tbl[*(unsigned char *)s])", 2)
-        #self.emitd("#define SKIP(s) s+1", 2)
 
     def emit_driver(self):
         self.emiti("int main(int argc, unsigned char* argv[]) {")
--- a/pyrect/translator/dot_translator.py	Wed Aug 25 22:22:54 2010 +0900
+++ b/pyrect/translator/dot_translator.py	Tue Oct 26 16:37:43 2010 +0900
@@ -22,8 +22,8 @@
             self.cg = regexp.nfacg
         else:
             self.cg = regexp.dfacg
-        self.fill_color = "white" #"lightsteelblue1"
-        self.frame_color = "black" #"navyblue"
+        self.fill_color = "lightsteelblue1"
+        self.frame_color = "navyblue"
 
     def state_name(self, name):
         return "q"+name