changeset 98:4d498b002de5

refactor for data-structure (dict -> Transition).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sun, 12 Dec 2010 23:04:31 +0900
parents 5db856953793
children e327e93aeb3a
files pyrect/regexp/__init__.py pyrect/regexp/dfa.py pyrect/regexp/dfa_translator.py pyrect/regexp/fa.py pyrect/regexp/nfa.py pyrect/regexp/nfa_translator.py
diffstat 6 files changed, 87 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/pyrect/regexp/__init__.py	Sun Dec 12 19:02:37 2010 +0900
+++ b/pyrect/regexp/__init__.py	Sun Dec 12 23:04:31 2010 +0900
@@ -5,7 +5,6 @@
 from pyrect.regexp.nfa import NFA
 from pyrect.regexp.nfa_translator import NFATranslator
 from pyrect.regexp.dfa_translator import DFATranslator, SuffixDFATranslator, SuffixTrieTranslator
-from pyrect.regexp.callgraph import CallGraph
 from pyrect.regexp.analyzer import Analyzer
 from pyrect.regexp.char_collector import CharCollector
 
@@ -13,8 +12,8 @@
     """Regexp is basic class in Pyrect.
     this contains regexp, dfa, nfa,, actually it's include all.
     >>> regexp = Regexp('(A|B)*C')
-    >>> regexp.dfacg.map
-    {'1': {}, '0': {(Character:'A'): '0', (Character:'B'): '0', (Character:'C'): '1'}}
+    >>> print(regexp.dfa.transition)
+    Transition: 0 x 'A' -> 0, 0 x 'B' -> 0, 0 x 'C' -> 1,
     >>> regexp.matches('ABC')
     True
     >>> regexp = Regexp('(a|b)*cd*e')
@@ -29,8 +28,6 @@
         self.ast    = Parser().parse(regexp)
         self.nfa    = NFATranslator().translate(self.ast)
         self.dfa    = DFATranslator().translate(self.nfa)
-        self.nfacg  = CallGraph(self.nfa)
-        self.dfacg  = CallGraph(self.dfa)
         an = Analyzer()
         self.max_len, self.min_len, self.must_words =\
                       an.analyze(self.ast)
--- a/pyrect/regexp/dfa.py	Sun Dec 12 19:02:37 2010 +0900
+++ b/pyrect/regexp/dfa.py	Sun Dec 12 23:04:31 2010 +0900
@@ -5,15 +5,13 @@
 from fa import FA
 
 class DFA(FA):
-    def __init__(self, start, accepts, map_, states, assoc):
-        def transition(state, input):
-            return map_[state, input]
-        FA.__init__(self, transition, start, accepts, map_, states)
+    def __init__(self, transition, start, accepts, states, assoc):
+        FA.__init__(self, transition, start, accepts, states)
         self.assoc = assoc
 
     def copy_from(self, dfa):
         self.start = dfa.start
-        self.map = copy.deepcopy(dfa.map)
+        self.transition = copy.deepcopy(dfa.transition)
         self.accepts = copy.deepcopy(dfa.accepts)
         self.states = copy.deepcopy(dfa.states)
         self.assoc = copy.deepcopy(dfa.assoc)
@@ -41,11 +39,11 @@
         self.acceptable = collections.defaultdict(self.setdict)
 
         def search_acceptable(pcur, scur, searched, history, pstart):
-            for (pcur_, i), pnext in self.pdfa.map.iteritems():
-                if pcur_ != pcur or (pnext, i) in searched: continue
+            for i, pnext in self.pdfa.transition[pcur].iteritems():
+                if (pnext, i) in searched: continue
                 searched_ = searched + [(pnext, i)]
-                snext = self.map[(scur, i)]
-                if scur  not in history:
+                snext = self.transition[(scur, i)]
+                if scur not in history:
                     history_ = history + (scur,)
                 else:
                     history_ = history
@@ -62,8 +60,7 @@
         self.curState = self.DFA.start
 
     def do_transition(self, char):
-        self.curState = self.DFA.transition(
-            self.curState, char)
+        self.curState = self.DFA.transition[self.curState, char]
 
     def is_accept_state(self):
         return self.curState in self.DFA.accepts
--- a/pyrect/regexp/dfa_translator.py	Sun Dec 12 19:02:37 2010 +0900
+++ b/pyrect/regexp/dfa_translator.py	Sun Dec 12 23:04:31 2010 +0900
@@ -3,6 +3,7 @@
 
 from pyrect.regexp.parser import Parser
 from pyrect.regexp.ast import ASTWalker, AnyChar
+from pyrect.regexp.fa import Transition
 from pyrect.regexp.nfa_translator import NFATranslator
 from pyrect.regexp.nfa import NFA, SuffixNFA, SuffixTrieNFA
 from pyrect.regexp.dfa import DFA, SuffixDFA
@@ -12,12 +13,12 @@
     actually DFATranslator use NFATranslator and it convert NFA into DFA.
     >>> prs  = Parser()
     >>> dfat = DFATranslator()
-    >>> dfat.translate(prs.parse('(A|B)*C')).map
-    {(0, (Character:'C')): 1, (0, (Character:'A')): 0, (0, (Character:'B')): 0}
+    >>> print(dfat.translate(prs.parse('(A|B)*C')).transition)
+    Transition: 0 x 'A' -> 0, 0 x 'B' -> 0, 0 x 'C' -> 1,
     >>> dfat.translate(prs.parse('(A|B)*C')).accepts
     [1]
-    >>> dfat.translate(prs.parse('ほ?げ+')).map
-    {(2, (MBCharacter:'げ')): 2, (0, (MBCharacter:'げ')): 2, (0, (MBCharacter:'ほ')): 1, (1, (MBCharacter:'げ')): 2}
+    >>> print(dfat.translate(prs.parse('ほ?げ+')).transition)
+    Transition: 0 x \\x81 -> 1, 1 x \\x92 -> 5, 2 x \\x92 -> 5, 2 x \\xbb -> 3, 3 x \\xe3 -> 0, 4 x \\xe3 -> 6, 5 x \\xe3 -> 0, 6 x \\x81 -> 2,
     """
 
     def __init__(self):
@@ -34,61 +35,70 @@
         return self.dfa
 
     def convert(self, nfa):
-        map_ = dict()
-        que =  set(frozenset([nfa.epsilon_expand(set([nfa.start]))]))
+        transition = Transition()
+        que =  set()
         done = set()
+        que.add(nfa.epsilon_expand(nfa.start))
 
         while que:
             states = que.pop()
             anys = set()
             for state in states:
-                for k, v in nfa.map.iteritems():
-                    if state == k[0] and k[1] == AnyChar():
-                        anys.update(nfa.epsilon_expand(v))
+                trans = nfa.transition[state]
+                if trans == None: continue
+                for i, n in trans.iteritems():
+                    if i == AnyChar():
+                        anys.update(nfa.epsilon_expand(n))
 
             for state in states:
-                for k, v in nfa.map.iteritems():
-                    if state == k[0] and k[1] != '':
+                trans = nfa.transition[state]
+                if trans == None: continue
+                for i, n in trans.iteritems():
+                    if i != '':
                         default = set(anys)
-                        slot = map_.setdefault((states, k[1]), default)
-                        slot.update(nfa.epsilon_expand(v))
-
+                        if transition[states, i] == None:
+                            slot = default
+                            transition[states, i] = slot
+                        else:
+                            slot = transition[states, i]
+                        slot.update(nfa.epsilon_expand(n))
 
             done.add(states)
 
-            for v in map_.itervalues():
-                if not v in done:
-                    que.add(frozenset(v))
+            for (_, _), n in transition.itertrans():
+                if not n in done:
+                    que.add(frozenset(n))
 
         assoc = dict()
         states = set()
+        states.add(nfa.epsilon_expand(nfa.start))
 
-        states.add(nfa.epsilon_expand(frozenset([nfa.start])))
-        for state in map_.itervalues():
+        for _, state in transition.itertrans():
             states.add(frozenset(state))
         for index, state in enumerate(states):
-            assoc[frozenset(state)] = index
+            assoc[state] = index
         states = list()
         for state in assoc.itervalues():
             states.append(state)
 
-        start = assoc[nfa.epsilon_expand(frozenset([nfa.start]))]
+        start = assoc[nfa.epsilon_expand(nfa.start)]
 
         accepts = list()
         for state in assoc.iterkeys():
             if state & set(nfa.accepts):
                 accepts.append(assoc[state])
 
-        map__ = dict()
-        for (s, c), v in map_.iteritems():
-            map__[(assoc[frozenset(s)], c)] = assoc[frozenset(v)]
+        transition_ = Transition()
+
+        for (s, i), n in transition.itertrans():
+            transition_[(assoc[s], i)] = assoc[frozenset(n)]
         for k, v in assoc.items():
             assoc[v] = k
 
         return DFA(
+            transition_,
             start,
             accepts,
-            map__,
             states,
             assoc
             )
--- a/pyrect/regexp/fa.py	Sun Dec 12 19:02:37 2010 +0900
+++ b/pyrect/regexp/fa.py	Sun Dec 12 23:04:31 2010 +0900
@@ -1,11 +1,10 @@
 #!/usr/bin/env python
 
 class FA(object):
-    def __init__(self, transition, start, accepts, map_, states=None):
+    def __init__(self, transition, start, accepts, states=None):
         self.transition = transition
         self.start = start
         self.accepts = accepts
-        self.map = map_
         self.states = states
 
     def accept(self, laungage):
@@ -39,12 +38,12 @@
 
     def __str__(self):
         string = "Transition:"
-        for (s, i), ns in self.iterallitems():
+        for (s, i), ns in self.itertrans():
             string += " %s x %s -> %s," % (s, i, ns)
         return string
 
     def iterstates(self):
-        for s, t self.iteritems():
+        for s, t in self.iteritems():
             yield s, t
 
     def itertrans(self):
--- a/pyrect/regexp/nfa.py	Sun Dec 12 19:02:37 2010 +0900
+++ b/pyrect/regexp/nfa.py	Sun Dec 12 23:04:31 2010 +0900
@@ -3,16 +3,20 @@
 import curses.ascii as ascii
 import copy
 import collections
-from pyrect.regexp.fa import FA
+from pyrect.regexp.fa import FA, Transition
 
 class NFAFragment(object):
     def __init__(self):
         self.start   = None    # integer
         self.accepts = set()   # frozenset
-        self.map     = dict()  # transition, char or special -> frozenset([states])
+        self.transition     = Transition()  # transition, char or special -> frozenset([states])
 
     def connect(self, from_, char, to):
-        slot = self.map.setdefault((from_, char), set())
+        if self.transition[from_, char]:
+            slot = self.transition[from_, char]
+        else:
+            slot = set()
+            self.transition[from_, char] = slot
         slot.add(to)
 
     def accept_to(self, from_, char):
@@ -26,46 +30,44 @@
     def new_skelton(self):
         # copy fragment (self), and it return
         newFrag = NFAFragment();
-        newFrag.map = copy.deepcopy(self.map)
+        newFrag.transition = copy.deepcopy(self.transition)
         return newFrag
 
     def __or__(self, frag):
         newFrag = self.new_skelton()
-        for k, v in frag.map.iteritems():
-            newFrag.map[k] = v.copy()
+        for k, v in frag.transition.iterstates():
+            newFrag.transition[k] = copy.deepcopy(v)
         return newFrag
 
     def __str__(self):
         return ("NFA: delta -> %s, start -> %s, accepts -> %s" %
-                (self.map, self.start, self.accepts))
+                (self.transition, self.start, self.accepts))
 
     def build(self):
-        map_ = self.map
-        def transition(state, char):
-            return frozenset(map_.get((state, char), []))
-
         states = set()
-        for (s, _) in self.map.iterkeys():
+        for s in self.transition.iterkeys():
             states.add(s)
 
         return NFA(
-            transition,
+            self.transition,
             self.start,
             self.accepts,
-            self.map,
             sorted(states)
             )
 
 class NFA(FA):
-    def epsilon_expand(self, set_):
-        que = set(set_)
+    def epsilon_expand(self, states):
+        if not isinstance(states, set) and \
+        not isinstance(states, list):
+            states = set([states])
+        states = copy.deepcopy(states)
         done = set()
-        while que:
-            stat = que.pop()
-            nexts = self.transition(stat, "")
+        while states:
+            stat = states.pop()
             done.add(stat)
-            for nextStat in nexts:
-                if not nextStat in done: que.add(nextStat)
+            if self.transition[stat, ""] != None:
+                for n in self.transition[stat, ""]:
+                    if not n in done: states.add(n)
         return frozenset(done)
 
 class SuffixNFA(NFA):
@@ -73,40 +75,36 @@
         self.start = nfa.start
         self.accepts = copy.deepcopy(nfa.accepts)
         self.states = copy.deepcopy(nfa.states)
-        self.map = self.nfa_map(nfa)
+        self.transition = self.nfa_trans(nfa)
         start = set(self.states)-set([self.start])
         if self.map.has_key((self.start, '')):
             self.map[(self.start, '')].add(start)
         else:
             self.map[(self.start, '')] = start
 
-    def nfa_map(self, fa):
+    def nfa_trans(self, fa):
         if type(fa) == NFA:
-            return copy.deepcopy(fa.map)
-        nmap = dict()
-        for k, v in fa.map.items():
-            nmap[k] = set([v])
-        return nmap
-
-    def transition(self, state, char):
-        return frozenset(self.map.get((state, char), []))
+            return copy.deepcopy(fa.transition)
+        ntrans = dict()
+        for k, v in fa.map.itertrans():
+            ntrans[k] = set([v])
+        return ntrans
 
 class SuffixTrieNFA(NFA):
     def __init__(self, dfa):
         self.start = dfa.start
         self.pdfa = dfa
-        self.map = dict()
         self.states = copy.deepcopy(dfa.states)
         self.acceptable = collections.defaultdict(set)
         self.accepts = copy.deepcopy(set(dfa.accepts))
         states = [self.start] + list(set(self.states)-set((self.start,)))
-        self.make_trie_map(states)
+        self.make_trie_trans(states)
 
-    def make_trie_map(self, states):
+    def make_trie_trans(self, states):
         add_states = set()
         offset = [x * len(states) for x in range(len(states))]
         for s, ind in zip(states, offset):
-            for (s_, i), n in self.pdfa.map.iteritems():
+            for (s_, i), n in self.pdfa.trans.itertrans():
                 sind = s_ + ind
                 nind = n + ind
                 add_states.add((sind, nind))
@@ -120,7 +118,4 @@
             starts.add(self.start + i + offset[i])
 
         self.states = list(add_states)
-        self.map[(self.start, '')] = starts
-
-    def transition(self, state, char):
-        return frozenset(self.map.get((state, char), []))
+        self.trans[(self.start, '')] = starts
--- a/pyrect/regexp/nfa_translator.py	Sun Dec 12 19:02:37 2010 +0900
+++ b/pyrect/regexp/nfa_translator.py	Sun Dec 12 23:04:31 2010 +0900
@@ -11,10 +11,11 @@
     >>> prs  = Parser()
     >>> nfat = NFATranslator()
     >>> nfa = nfat.translate(prs.parse('(A|B)*C'))
-    >>> nfa.map
-    {(3, ''): set([2, 4]), (2, ''): set([0, 1]), (1, (Character:'B')): set([3]), (4, (Character:'C')): set([5]), (0, (Character:'A')): set([3])}
-    >>> nfa = nfat.translate(prs.parse('A{2}B{3,}C{4,5}'))
-    >>> nfa.map
+    >>> print nfa.transition
+    Transition: 0 x 'A' -> set([3]), 1 x 'B' -> set([3]), 2 x  -> set([0, 1]), 3 x  -> set([2, 4]), 4 x 'C' -> set([5]),
+    >>> nfa = nfat.translate(prs.parse('[A-C]{2}D{3,}E{4,5}'))
+    >>> print nfa.transition
+    Transition: 0 x 'A' -> set([5]), 1 x 'B' -> set([5]), 2 x  -> set([0, 1]), 3 x 'C' -> set([5]), 4 x  -> set([2, 3]), 5 x REPEAT -> set([4]), 5 x ['A'-'C']{2} -> set([7]), 7 x 'D' -> set([8]), 8 x REPEAT -> set([7]), 8 x 'D'{3,} -> set([10]), 10 x 'E' -> set([11]), 11 x REPEAT -> set([10]), 11 x 'E'{4, 5} -> set([13]),
     """
 
     def __init__(self):