changeset 2:bbb6e28dc488 0.13

Add documentation to `MercurialNode` `find_dirctx`. As this is the heart of the optimizations for browsing speed, better make it easier to grasp (did I really write that code?). Also make the method private, as it's really an implementation detail and shouldn't be part of the public API.
author Christian Boos <christian.boos@free.fr>
date Sun, 10 Jun 2012 00:36:16 +0200
parents b30690538379
children 8af21bda2b3e
files tracext/hg/backend.py
diffstat 1 files changed, 33 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/tracext/hg/backend.py	Sat Jun 09 19:34:03 2012 +0200
+++ b/tracext/hg/backend.py	Sun Jun 10 00:36:16 2012 +0200
@@ -884,8 +884,8 @@
             if not dirctx:
                 # we need to find the most recent change for a file below dir
                 str_dir = str_path + '/'
-                dirctxs = self.find_dirctx(changectx.rev(), [str_dir,],
-                                           {str_dir: str_entries})
+                dirctxs = self._find_dirctx(changectx.rev(), [str_dir,],
+                                            {str_dir: str_entries})
                 dirctx = dirctxs.values()[0]
 
         if not kind:
@@ -906,13 +906,13 @@
         self.created_rev = created_rev
         self.data = None
 
-    def find_dirctx(self, max_rev, str_dirnames, str_entries):
+    def _find_dirctx(self, max_rev, str_dirnames, str_entries):
         """Find most recent modification for each given directory path.
         
         :param max_rev: find no revision more recent than this one
         :param str_dirnames: directory paths to consider 
-                             (as `str` ending with '/')
-        :param str_entries: optionally maps directories to their file content
+                             (list of `str` ending with '/')
+        :param str_entries: maps each directory to the files it contains
 
         :return: a `dict` with `str_dirnames` as keys, `changectx` as values
 
@@ -929,13 +929,14 @@
            each directory; this is much faster but can still be slow
            if some folders are only modified in the distant past
            
-        It is possible to combine both approach, and this can yield
-        excellent results in some cases (e.g. browsing the Linux repos
-        @ 118733 takes several minutes with the first approach, 11s
-        with the second, but only 1.2s with the hybrid approach)
+        It is possible to combine both approaches, and this can
+        produce excellent results in some cases, for example browsing
+        the root of the Hg mirror of the Linux repository (at revision
+        118733) takes several minutes with the first approach, 11s
+        with the second, but only 1.2s with the hybrid approach.
 
         Note that the specialized scan of the changelog we do below is
-        more efficient than the general cmdutil.walkchangerevs here.
+        more efficient than the general cmdutil.walkchangerevs.
         """
         str_dirctxs = {}
         repo = self.repos.repo
@@ -946,16 +947,27 @@
             for str_file in ctx.files():
                 for str_dir in str_dirnames[:]:
                     if str_file.startswith(str_dir):
+                        # rev for str_dir was found using first strategy
                         str_dirctxs[str_dir] = ctx
                         str_dirnames.remove(str_dir)
-                        if not str_dirnames: # if nothing left to find
+                        if not str_dirnames: # nothing left to find
                             return str_dirctxs
-            # in parallel, try the filelog strategy (the 463, 2, 40
+
+            # In parallel, try the filelog strategy (the 463, 2, 40
             # values below look a bit like magic numbers; actually
             # they were selected by testing the plugin on the Linux
             # and NetBeans repositories)
-            if r % 463 == 0:
-                k = max(2, 40 / len(str_dirnames))
+
+            # only use the filelog strategy every `n` revs
+            n = 463
+
+            # k, the number of files to examine per directory,
+            # will be comprised between `min_files` and `max_files`
+            min_files = 2
+            max_files = 40 # (will be the max if there's only one dir left)
+
+            if r % n == 0:
+                k = max(min_files, max_files / len(str_dirnames))
                 for str_dir in str_dirnames[:]:
                     str_files = str_entries[str_dir]
                     dr = str_dirctxs.get(str_dir, 0)
@@ -966,9 +978,13 @@
                             pass # that file was not on this revision `r`
                     str_files = str_files[k:]
                     if str_files:
+                        # not all files for str_dir seen yet,
+                        # store max rev found so far
                         str_entries[str_dir] = str_files
                         str_dirctxs[str_dir] = dr
                     else:
+                        # all files for str_dir were examined,
+                        # rev found using filelog strategy
                         str_dirctxs[str_dir] = repo[dr]
                         str_dirnames.remove(str_dir)
                         if not str_dirnames:
@@ -1032,14 +1048,13 @@
 
         # pre-computing the changectx for the last change in each sub-directory
         if str_dirnames:
-            dirctxs = self.find_dirctx(self.created_rev, str_dirnames, 
-                                       str_entries)
+            dirctxs = self._find_dirctx(self.created_rev, str_dirnames, 
+                                        str_entries)
         else:
             dirctxs = {}
 
         for str_entry in str_entries:
-            yield self.subnode(str_entry.rstrip('/'),
-                               dirctxs.get(str_entry, None))
+            yield self.subnode(str_entry.rstrip('/'), dirctxs.get(str_entry))
 
     def get_history(self, limit=None):
         repo = self.repos.repo