Mercurial > hg > Members > nobuyasu > test
view pyrect/pyrect/regexp/ast.py @ 9:493c96d030c0
add pyrect
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 14 Jun 2011 17:24:03 +0900 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- """ General-Node-set. Parser create AST (be composed of Nodes) from Regexp. Node are Printable, and Keywords Countable(kwset_node). """ class ASTWalker(object): def visit(self, ast): return def visit_Star(self, star): return star.op.accept(self) def visit_Plus(self, plus): return plus.op.accept(self) def visit_Qmark(self, qmark): return qmark.op.accept(self) def visit_Concat(self, concat): r1 = concat.op1.accept(self) r2 = concat.op2.accept(self) return self.concat(r1, r2) def visit_Union(self, union): r1 = union.op1.accept(self) r2 = union.op2.accept(self) return self.union(r1, r2) def union(self, r1, r2): return def concat(self, r1, r2): return # AST-Nodes class Node(object): def __init__(self): pass def __str__(self): return str(self.__class__) def __repr__(self): return "("+self.__class__.__name__+":"+str(self)+")" def accept(self, visitor): visit = "visit_%s" % self.__class__.__name__ return getattr(visitor, visit, visitor.visit)(self) """ NFA basic elements. Concat, Union, Star, Qmark, Plus """ class Concat(Node): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 def __repr__(self): return self.__class__.__name__ + "(%s.%s)" \ % (self.op1.__repr__(), self.op2.__repr__()) def __str__(self): return "(%s.%s)" % (self.op1, self.op2) class Union(Node): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 def __repr__(self): return "(Union:(%s|%s))" % \ (self.op1.__repr__(), self.op2.__repr__()) def __str__(self): return "(%s|%s)" % (self.op1, self.op2) class Star(Node): def __init__(self, op): self.op = op def __str__(self): return "(%s)*" % self.op class Qmark(Node): def __init__(self, op): self.op = op def __str__(self): return "(%s)?" % self.op class Plus(Node): def __init__(self, op): self.op = op def __str__(self): return "(%s)+" % self.op """ following Nodes are'nt convert NFA/DFA's each state, InputNode remains as input which is decided at matching. """ """ basic elements. Character, MBCharacter """ class Singleton(type): def __new__(self, name, bases, dict): dict['instances'] = {} return type.__new__(self, name, bases, dict) def __call__(self, *args): if not args in self.instances: self.instances[args] = type.__call__(self, *args) return self.instances[args] class InputNode(Node): __metaclass__ = Singleton def __hash__(self): return id(self.__str__()) def __cmp__(self, other): if self.__hash__() == other.__hash__(): return 0 elif self.__hash__() > other.__hash__(): return 1 else: return -1 class SpecialInputNode(InputNode): __metaclass__ = Singleton class Character(InputNode): import curses.ascii as ascii ASCII = ascii.controlnames + \ ["'"+chr(c)+"'" for c in range(33, 127)]\ + ['DEL'] + [r"\x%x" % c for c in range(128, 256)] def __init__(self, char): self.char = ord(char) def __str__(self): return self.ASCII[self.char] def __hash__(self): return self.char.__hash__() @classmethod def ascii(cls, c): return cls.ASCII[ord(c)] class MBCharacter(Character): def __init__(self, mbchar): ret = Character.__init__(self, mbchar) self.bytes = map(ord, str(mbchar)) class EscapeCharacter(Character): def __init__(self, char): Character.__init__(self, char) """ Anchor, is Special-Input rules to match specify text position. BegLine, EndLine, """ class Anchor(SpecialInputNode): pass class BegLine(Anchor): def __str__(self): return "^" class EndLine(Anchor): def __str__(self): return "$" """ other Special Inputs. AnyChar, CharClass """ class AnyChar(InputNode): def __str__(self): return "." class CharClass(InputNode): def __init__(self, factor, inverse=False): self.inverse = inverse self.factor = factor def get_chars(self): char = set() for f in self.factor: if type(f) == Range: for ff in f: char.add(ff) else: char.add(f.char) if self.inverse: char = set(range(256)) - char return char def __repr__(self): return self.__class__.__name__+"[%s]" \ % ",".join((s.__repr__() for s in self.factor)) def __str__(self): if self.inverse: return "[^%s]" % "".join(map(str, self.factor)) else: return "[%s]" % "".join(map(str, self.factor)) class Range(InputNode): def __init__(self, lower, upper): self.lower = lower self.upper = upper def __iter__(self): for c in range(self.lower.char, self.upper.char+1): yield c def __contains__(self, input_node): if isinstance(input_node, Character): self.lower def __str__(self): return "%s-%s" % (self.lower, self.upper) class RepMN(SpecialInputNode): def __init__(self, min, max, op): self.op = op self.min = min self.max = max def __str__(self): if self.max == self.min: return "%s{%d}" % (self.op, self.min) elif self.max == None: return "%s{%d,}" % (self.op, self.min) else: return "%s{%d, %d}" % (self.op, self.min, self.max) def __hash__(self): return self.op.__hash__()+self.min.__hash__()+self.max.__hash__()