view pyrect/regexp/__init__.py @ 87:d23f12ce0369

add suffix-dfa, it's used to be parallel-matching-algorithm (not implement, yet).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 14 Nov 2010 04:16:12 +0900
parents 74f4e50c4f11
children 4d498b002de5
line wrap: on
line source

#!/usr/bin/env python

from pyrect.regexp.parser import Parser
from pyrect.regexp.dfa import DFA
from pyrect.regexp.nfa import NFA
from pyrect.regexp.nfa_translator import NFATranslator
from pyrect.regexp.dfa_translator import DFATranslator, SuffixDFATranslator, SuffixTrieTranslator
from pyrect.regexp.callgraph import CallGraph
from pyrect.regexp.analyzer import Analyzer
from pyrect.regexp.char_collector import CharCollector

class Regexp(object):
    """Regexp is basic class in Pyrect.
    this contains regexp, dfa, nfa,, actually it's include all.
    >>> regexp = Regexp('(A|B)*C')
    >>> regexp.dfacg.map
    {'1': {}, '0': {(Character:'A'): '0', (Character:'B'): '0', (Character:'C'): '1'}}
    >>> regexp.matches('ABC')
    True
    >>> regexp = Regexp('(a|b)*cd*e')
    >>> regexp.matches('abababcdddde')
    True
    >>> regexp.matches('ababccdeee')
    False
    """

    def __init__(self, regexp):
        self.regexp = regexp
        self.ast    = Parser().parse(regexp)
        self.nfa    = NFATranslator().translate(self.ast)
        self.dfa    = DFATranslator().translate(self.nfa)
        self.nfacg  = CallGraph(self.nfa)
        self.dfacg  = CallGraph(self.dfa)
        an = Analyzer()
        self.max_len, self.min_len, self.must_words =\
                      an.analyze(self.ast)
        an = CharCollector()
        self.chars = an.analyze(self.ast)

    @classmethod
    def get_chars(cls, regexp):
        an = CharCollector()
        return an.analyze(cls.parse(regexp))

    @classmethod
    def get_analyze(cls, regexp):
        an = Analyzer()
        return an.analyze(cls.parse(regexp))

    @classmethod
    def parse(cls, regexp):
        psr = Parser()
        return psr.parse(regexp)

    def matches(self, string):
        runtime = self.dfa.get_runtime()
        return runtime.accept(string)


def test():
    import doctest
    doctest.testmod()


if __name__ == "__main__": test()