Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/ast.py @ 63:020ba001c58a
modify I/O routine. use mmap. it's really faster than fgets ;-)
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 05 Nov 2010 01:39:42 +0900 |
parents | fd3d0b8326fe |
children | 4de11d799dee |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- """ General-Node-set. Parser create AST (be composed of Nodes) from Regexp. Node are Printable, and Keywords Countable(kwset_node). """ class ASTWalker(object): def visit(self, ast): pass # AST-Nodes class Node(object): def __init__(self): pass def __str__(self): return str(self.__class__) def __repr__(self): return "("+self.__class__.__name__+":"+str(self)+")" def accept(self, visitor): visit = "visit_%s" % self.__class__.__name__ return getattr(visitor, visit, visitor.visit)(self) """ NFA basic elements. Concat, Union, Star, Qmark, Plus """ class Concat(Node): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 def __repr__(self): return self.__class__.__name__ + "(%s.%s)" \ % (self.op1.__repr__(), self.op2.__repr__()) def __str__(self): return "(%s.%s)" % (self.op1, self.op2) class Union(Node): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 def __repr__(self): return "(Union:(%s|%s))" % \ (self.op1.__repr__(), self.op2.__repr__()) def __str__(self): return "(%s|%s)" % (self.op1, self.op2) class Star(Node): def __init__(self, op): self.op = op def __str__(self): return "(%s)*" % self.op class Qmark(Node): def __init__(self, op): self.op = op def __str__(self): return "(%s)?" % self.op class Plus(Node): def __init__(self, op): self.op = op def __str__(self): return "(%s)+" % self.op """ following Nodes are'nt convert NFA/DFA's each state, InputNode remains as input which is decided at matching. """ """ basic elements. Character, MBCharacter """ class Singleton(type): def __new__(self, name, bases, dict): dict['instances'] = {} return type.__new__(self, name, bases, dict) def __call__(self, *args): if not args in self.instances: self.instances[args] = type.__call__(self, *args) return self.instances[args] class InputNode(Node): __metaclass__ = Singleton def __add__(self, other): return FixedString(self, other) def __hash__(self): return id(self.__str__()) def __cmp__(self, other): if self.__hash__() == other.__hash__(): return 0 elif self.__hash__() > other.__hash__(): return 1 else: return -1 class Character(InputNode): import curses.ascii as ascii ASCII = ascii.controlnames + \ ["'"+chr(c)+"'" for c in range(33, 127)]\ + ['DEL'] + [r"\x%x" % c for c in range(128, 256)] def __init__(self, char): self.char = ord(char) def __str__(self): return self.ASCII[self.char] def __hash__(self): return self.char.__hash__() class MBCharacter(Character): def __init__(self, mbchar): ret = Character.__init__(self, mbchar) self.bytes = map(ord, str(mbchar)) class EscapeCharacter(Character): def __init__(self, char): Character.__init__(self, char) class FixedString(InputNode): def __init__(self, char): self.string = list() def appfront(self, input_): self.string.insert(0, input_) return self """ Anchor, is Special-Input rules to match specify text position. BegLine, EndLine, """ class Anchor(InputNode): pass class BegLine(Anchor): def __str__(self): return "^" class EndLine(Anchor): def __str__(self): return "$" """ other Special Inputs. AnyChar, CharClass """ class AnyChar(InputNode): def __str__(self): return "." class CharClass(InputNode): def __init__(self, factor, inverse=False): self.inverse = inverse self.factor = factor def __repr__(self): return self.__class__.__name__+"[%s]" \ % ",".join((s.__repr__() for s in self.factor)) def __str__(self): if self.inverse: return "[^%s]" % "".join(map(str, self.factor)) else: return "[%s]" % "".join(map(str, self.factor)) class Range(InputNode): def __init__(self, lower, upper): self.lower = lower self.upper = upper def __contains__(self, input_node): if isinstance(input_node, Character): self.lower def __str__(self): return "%s-%s" % (self.lower, self.upper)