view pyrect/pyrect/regexp/__init__.py @ 9:493c96d030c0

add pyrect
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 14 Jun 2011 17:24:03 +0900
parents
children
line wrap: on
line source

#!/usr/bin/env python

from pyrect.regexp.parser import Parser
from pyrect.regexp.dfa import DFA
from pyrect.regexp.nfa import NFA
from pyrect.regexp.nfa_translator import NFATranslator
from pyrect.regexp.dfa_translator import DFATranslator, SuffixDFATranslator, SuffixTrieTranslator
from pyrect.regexp.analyzer import Analyzer
from pyrect.regexp.char_collector import CharCollector

class Regexp(object):
    """Regexp is basic class in Pyrect.
    this contains regexp, dfa, nfa,, actually it's include all.
    >>> regexp = Regexp('(A|B)*C')
    >>> print(regexp.dfa.transition)
    Transition: 0 x 'A' -> 0, 0 x 'B' -> 0, 0 x 'C' -> 1,
    >>> regexp.matches('ABC')
    True
    >>> regexp = Regexp('(a|b)*cd*e')
    >>> regexp.matches('abababcdddde')
    True
    >>> regexp.matches('ababccdeee')
    False
    """

    def __init__(self, regexp):
        self.regexp = regexp
        self.ast    = Parser().parse(regexp)
        self.nfa    = NFATranslator().translate(self.ast)
        self.dfa    = DFATranslator().translate(self.nfa)
        an = Analyzer()
        self.max_len, self.min_len, self.must_words =\
                      an.analyze(self.ast)
        an = CharCollector()
        self.chars = an.analyze(self.ast)

    @classmethod
    def get_chars(cls, regexp):
        an = CharCollector()
        return an.analyze(cls.parse(regexp))

    @classmethod
    def get_analyze(cls, regexp):
        an = Analyzer()
        return an.analyze(cls.parse(regexp))

    @classmethod
    def parse(cls, regexp):
        psr = Parser()
        ast = psr.parse(regexp)
        return ast

    def matches(self, string):
        runtime = self.dfa.get_runtime()
        return runtime.accept(string)


def test():
    import doctest
    doctest.testmod()


if __name__ == "__main__": test()