[Python-checkins] r56869 - in doctools/trunk/sphinx: __init__.py builder.py search.py style/searchtools.js templates/search.html util/json.py

georg.brandl python-checkins at python.org
Thu Aug 9 21:22:21 CEST 2007


Author: georg.brandl
Date: Thu Aug  9 21:22:20 2007
New Revision: 56869

Modified:
   doctools/trunk/sphinx/__init__.py
   doctools/trunk/sphinx/builder.py
   doctools/trunk/sphinx/search.py
   doctools/trunk/sphinx/style/searchtools.js
   doctools/trunk/sphinx/templates/search.html
   doctools/trunk/sphinx/util/json.py
Log:
Fix searching and search index creation for incremental builds.


Modified: doctools/trunk/sphinx/__init__.py
==============================================================================
--- doctools/trunk/sphinx/__init__.py	(original)
+++ doctools/trunk/sphinx/__init__.py	Thu Aug  9 21:22:20 2007
@@ -26,11 +26,13 @@
     print >>sys.stderr, """\
 usage: %s [options] sourcedir outdir [filenames...]"
 options: -b <builder> -- builder to use (one of %s)
-         -a -- write all files; default is to only write new and changed files
-         -d <path> -- path for the cached doctree files (default outdir/.doctrees)
+         -a        -- write all files; default is to only write new and changed files
+         -E        -- don't use a saved environment, always read all files
+         -d <path> -- path for the cached environment and doctree files
+                      (default outdir/.doctrees)
          -O <option[=value]> -- give option to to the builder (-O help for list)
          -D <setting=value> -- override a setting in sourcedir/conf.py
-         -N -- do not do colored output
+         -N        -- do not do colored output
 modi:
 * without -a and without filenames, write new and changed files.
 * with -a, write all files.
@@ -39,7 +41,7 @@
 
 def main(argv):
     try:
-        opts, args = getopt.getopt(argv[1:], 'ab:d:O:D:N')
+        opts, args = getopt.getopt(argv[1:], 'ab:d:O:D:NE')
         srcdirname = path.abspath(args[0])
         if not path.isdir(srcdirname):
             print >>sys.stderr, 'Error: Cannot find source directory.'
@@ -65,7 +67,7 @@
         return 1
 
     builder = all_files = None
-    opt_help = False
+    opt_help = freshenv = False
     options = {}
     confoverrides = {}
     doctreedir = path.join(outdirname, '.doctrees')
@@ -102,6 +104,8 @@
             confoverrides[key] = val
         elif opt == '-N':
             nocolor()
+        elif opt == '-E':
+            freshenv = True
 
     if not sys.stdout.isatty() or sys.platform == 'win32':
         # Windows' cmd box doesn't understand ANSI sequences
@@ -122,7 +126,8 @@
     builderobj = builderobj(srcdirname, outdirname, doctreedir, options,
                             status_stream=sys.stdout,
                             warning_stream=sys.stderr,
-                            confoverrides=confoverrides)
+                            confoverrides=confoverrides,
+                            freshenv=freshenv)
     if all_files:
         builderobj.build_all()
     elif filenames:

Modified: doctools/trunk/sphinx/builder.py
==============================================================================
--- doctools/trunk/sphinx/builder.py	(original)
+++ doctools/trunk/sphinx/builder.py	Thu Aug  9 21:22:20 2007
@@ -72,18 +72,18 @@
     Builds target formats from the reST sources.
     """
 
-    option_spec = {
-        'freshenv': 'Don\'t use a pickled environment',
-    }
+    option_spec = {}
 
     def __init__(self, srcdirname, outdirname, doctreedirname,
                  options, confoverrides=None, env=None,
-                 status_stream=None, warning_stream=None):
+                 status_stream=None, warning_stream=None,
+                 freshenv=False):
         self.srcdir = srcdirname
         self.outdir = outdirname
         self.doctreedir = doctreedirname
         if not path.isdir(doctreedirname):
             os.mkdir(doctreedirname)
+        self.freshenv = freshenv
 
         self.options = attrdict(options)
         self.validate_options()
@@ -161,7 +161,7 @@
            successfully loaded, False if a new environment had to be created."""
         if self.env:
             return
-        if not self.options.freshenv:
+        if not self.freshenv:
             try:
                 self.msg('trying to load pickled env...', nonl=True)
                 self.env = BuildEnvironment.frompickle(
@@ -223,8 +223,6 @@
         self.msg('creating index...')
         self.env.create_index(self)
 
-        self.prepare_writing()
-
         if filenames:
             # add all TOC files that may have changed
             filenames_set = set(filenames)
@@ -236,6 +234,8 @@
             # build all
             filenames_set = set(self.env.all_files)
 
+        self.prepare_writing(filenames)
+
         # write target files
         with collect_env_warnings(self):
             self.msg('writing output...')
@@ -249,7 +249,7 @@
         self.finish()
         self.msg('done!')
 
-    def prepare_writing(self):
+    def prepare_writing(self, filenames):
         raise NotImplementedError
 
     def write_file(self, filename, doctree):
@@ -265,12 +265,6 @@
     """
     name = 'html'
 
-    option_spec = Builder.option_spec
-    option_spec.update({
-        'nostyle': 'Don\'t copy style and script files',
-        'nosearchindex': 'Don\'t create a JSON search index for offline search',
-    })
-
     copysource = True
 
     def init(self):
@@ -301,12 +295,10 @@
             settings_overrides={'output_encoding': 'unicode'}
         )
 
-    def prepare_writing(self):
-        if not self.options.nosearchindex:
-            from .search import IndexBuilder
-            self.indexer = IndexBuilder()
-        else:
-            self.indexer = None
+    def prepare_writing(self, filenames):
+        from .search import IndexBuilder
+        self.indexer = IndexBuilder()
+        self.load_indexer(filenames)
         self.docwriter = HTMLWriter(self.config)
         self.docsettings = OptionParser(
             defaults=self.env.settings,
@@ -463,20 +455,19 @@
         )
         self.handle_file('search.rst', searchcontext, 'search')
 
-        if not self.options.nostyle:
-            self.msg('copying style files...')
-            # copy style files
-            styledirname = path.join(path.dirname(__file__), 'style')
-            ensuredir(path.join(self.outdir, 'style'))
-            for filename in os.listdir(styledirname):
-                if not filename.startswith('.'):
-                    shutil.copyfile(path.join(styledirname, filename),
-                                    path.join(self.outdir, 'style', filename))
-            # add pygments style file
-            f = open(path.join(self.outdir, 'style', 'pygments.css'), 'w')
-            if pygments:
-                f.write(get_stylesheet())
-            f.close()
+        # copy style files
+        self.msg('copying style files...')
+        styledirname = path.join(path.dirname(__file__), 'style')
+        ensuredir(path.join(self.outdir, 'style'))
+        for filename in os.listdir(styledirname):
+            if not filename.startswith('.'):
+                shutil.copyfile(path.join(styledirname, filename),
+                                path.join(self.outdir, 'style', filename))
+        # add pygments style file
+        f = open(path.join(self.outdir, 'style', 'pygments.css'), 'w')
+        if pygments:
+            f.write(get_stylesheet())
+        f.close()
 
         # dump the search index
         self.handle_finish()
@@ -497,6 +488,16 @@
             if path.getmtime(path.join(self.srcdir, filename)) > targetmtime:
                 yield filename
 
+
+    def load_indexer(self, filenames):
+        try:
+            with open(path.join(self.outdir, 'searchindex.json'), 'r') as f:
+                self.indexer.load(f, 'json')
+        except (IOError, OSError):
+            pass
+        # delete all entries for files that will be rebuilt
+        self.indexer.prune(set(self.env.all_files) - set(filenames))
+
     def index_file(self, filename, doctree, title):
         # only index pages with title
         if self.indexer is not None and title:
@@ -522,11 +523,10 @@
                             path.join(self.outdir, context['sourcename']))
 
     def handle_finish(self):
-        if self.indexer is not None:
-            self.msg('dumping search index...')
-            f = open(path.join(self.outdir, 'searchindex.json'), 'w')
+        self.msg('dumping search index...')
+        self.indexer.prune([self.get_target_uri(fn)[:-5] for fn in self.env.all_files])
+        with open(path.join(self.outdir, 'searchindex.json'), 'w') as f:
             self.indexer.dump(f, 'json')
-            f.close()
 
 
 class WebHTMLBuilder(StandaloneHTMLBuilder):
@@ -535,13 +535,6 @@
     """
     name = 'web'
 
-    # doesn't use the standalone specific options
-    option_spec = Builder.option_spec.copy()
-    option_spec.update({
-        'nostyle': 'Don\'t copy style and script files',
-        'nosearchindex': 'Don\'t create a search index for the online search',
-    })
-
     def init(self):
         # Nothing to do here.
         pass
@@ -564,6 +557,15 @@
             return source_filename[:-9] # up to /
         return source_filename[:-4] + '/'
 
+    def load_indexer(self, filenames):
+        try:
+            with open(path.join(self.outdir, 'searchindex.pickle'), 'r') as f:
+                self.indexer.load(f, 'pickle')
+        except (IOError, OSError):
+            pass
+        # delete all entries for files that will be rebuilt
+        self.indexer.prune(set(self.env.all_files) - set(filenames))
+
     def index_file(self, filename, doctree, title):
         # only index pages with title and category
         if self.indexer is not None and title:
@@ -590,11 +592,11 @@
         with file(outfilename, 'wb') as fp:
             pickle.dump(self.globalcontext, fp, 2)
 
-        if self.indexer is not None:
-            self.msg('dumping search index...')
-            f = open(path.join(self.outdir, 'searchindex.pickle'), 'w')
+        self.msg('dumping search index...')
+        self.indexer.prune(self.env.all_files)
+        with open(path.join(self.outdir, 'searchindex.pickle'), 'wb') as f:
             self.indexer.dump(f, 'pickle')
-            f.close()
+
         # touch 'last build' file, used by the web application to determine
         # when to reload its environment and clear the cache
         open(path.join(self.outdir, LAST_BUILD_FILENAME), 'w').close()
@@ -611,10 +613,9 @@
     """
     name = 'htmlhelp'
 
-    option_spec = Builder.option_spec.copy()
-    option_spec.update({
+    option_spec = {
         'outname': 'Output file base name (default "pydoc")'
-    })
+    }
 
     # don't copy the reST source
     copysource = False

Modified: doctools/trunk/sphinx/search.py
==============================================================================
--- doctools/trunk/sphinx/search.py	(original)
+++ doctools/trunk/sphinx/search.py	Thu Aug  9 21:22:20 2007
@@ -14,7 +14,7 @@
 from collections import defaultdict
 from docutils.nodes import Text, NodeVisitor
 from .util.stemmer import PorterStemmer
-from .util.json import dump_json
+from .util.json import dump_json, load_json
 
 
 word_re = re.compile(r'\w+(?u)')
@@ -50,47 +50,71 @@
     passed to the `feed` method.
     """
     formats = {
-        'json':     dump_json,
-        'pickle':   pickle.dumps
+        'json':     (dump_json, load_json),
+        'pickle':   (pickle.dumps, pickle.loads),
     }
 
     def __init__(self):
-        self._filenames = {}
-        self._mapping = {}
+        self._stemmer = Stemmer()
+        # filename -> title
         self._titles = {}
+        # stemmed word -> set(filenames)
+        self._mapping = {}
+        # category -> set(filenames)
         self._categories = {}
-        self._stemmer = Stemmer()
+
+    def load(self, stream, format):
+        """Reconstruct from frozen data."""
+        frozen = self.formats[format][1](stream.read())
+        index2fn = frozen[0]
+        self._titles = dict(zip(frozen[0], frozen[2]))
+        self._categories = dict((k, set(index2fn[i] for i in v))
+                                for (k, v) in frozen[1].iteritems())
+        self._mapping = dict((k, set(index2fn[i] for i in v))
+                             for (k, v) in frozen[3].iteritems())
 
     def dump(self, stream, format):
-        """Dump the freezed index to a stream."""
-        stream.write(self.formats[format](self.freeze()))
+        """Dump the frozen index to a stream."""
+        stream.write(self.formats[format][0](self.freeze()))
 
     def freeze(self):
         """
         Create a useable data structure. You can pass this output
         to the `SearchFrontend` to search the index.
         """
+        fns, titles = self._titles.keys(), self._titles.values()
+        fn2index = dict((f, i) for (i, f) in enumerate(fns))
         return [
-            [k for k, v in sorted(self._filenames.items(),
-                                  key=lambda x: x[1])],
-            dict(item for item in sorted(self._categories.items(),
-                                         key=lambda x: x[0])),
-            [v for k, v in sorted(self._titles.items(),
-                                  key=lambda x: x[0])],
-            dict(item for item in sorted(self._mapping.items(),
-                                         key=lambda x: x[0])),
+            fns,
+            dict((k, [fn2index[fn] for fn in v])
+                 for (k, v) in self._categories.iteritems()),
+            titles,
+            dict((k, [fn2index[fn] for fn in v])
+                 for (k, v) in self._mapping.iteritems()),
         ]
 
+    def prune(self, filenames):
+        """Remove data for all filenames not in the list."""
+        new_titles = {}
+        for filename in filenames:
+            if filename in self._titles:
+                new_titles[filename] = self._titles[filename]
+        self._titles = new_titles
+        for wordnames in self._mapping.itervalues():
+            wordnames.intersection_update(filenames)
+        for catnames in self._categories.itervalues():
+            catnames.intersection_update(filenames)
+
     def feed(self, filename, category, title, doctree):
         """Feed a doctree to the index."""
-        file_id = self._filenames.setdefault(filename, len(self._filenames))
-        self._titles[file_id] = title
+        self._titles[filename] = title
+        self._categories.setdefault(category, set()).add(filename)
+
         visitor = WordCollector(doctree)
         doctree.walk(visitor)
-        self._categories.setdefault(category, set()).add(file_id)
         for word in word_re.findall(title) + visitor.found_words:
             self._mapping.setdefault(self._stemmer.stem(word.lower()),
-                                     set()).add(file_id)
+                                     set()).add(filename)
 
 
 class SearchFrontend(object):

Modified: doctools/trunk/sphinx/style/searchtools.js
==============================================================================
--- doctools/trunk/sphinx/style/searchtools.js	(original)
+++ doctools/trunk/sphinx/style/searchtools.js	Thu Aug  9 21:22:20 2007
@@ -424,5 +424,5 @@
 }
 
 $(document).ready(function() {
-        Documentation.Search.init();
+        Search.init();
     });

Modified: doctools/trunk/sphinx/templates/search.html
==============================================================================
--- doctools/trunk/sphinx/templates/search.html	(original)
+++ doctools/trunk/sphinx/templates/search.html	Thu Aug  9 21:22:20 2007
@@ -1,6 +1,6 @@
 {% extends "layout.html" %}
 {% set title = 'Search Documentation' %}
-{% block header %}
+{% block head %}
     <script type="text/javascript" src="{{ pathto('style/searchtools.js', 1) }}"></script>
 {% endblock %}
 {% block body %}
@@ -26,13 +26,13 @@
       ('tutorial', 'Python Tutorial', true),
       ('library', 'Library Reference', true),
       ('maclib', 'Macintosh Library Modules', false),
+      ('reference', 'Language Reference', false),
       ('extending', 'Extending and Embedding', false),
       ('c-api', 'Python/C API', false),
       ('install', 'Installing Python Modules', true),
       ('distutils', 'Distributing Python Modules', true),
       ('documenting', 'Documenting Python', false),
       ('whatsnew', 'What\'s new in Python?', false),
-      ('reference', 'Language Reference', false)
     ] -%}
       <li><input type="checkbox" name="area" id="area-{{ id }}" value="{{ id
           }}"{% if checked %} checked{% endif %}>

Modified: doctools/trunk/sphinx/util/json.py
==============================================================================
--- doctools/trunk/sphinx/util/json.py	(original)
+++ doctools/trunk/sphinx/util/json.py	Thu Aug  9 21:22:20 2007
@@ -16,7 +16,7 @@
 
 import re
 
-ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
+# escape \, ", control characters and everything outside ASCII
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 ESCAPE_DICT = {
     '\\': '\\\\',
@@ -27,8 +27,6 @@
     '\r': '\\r',
     '\t': '\\t',
 }
-for i in range(0x20):
-    ESCAPE_DICT.setdefault(chr(i), '\\u%04x' % (i,))
 
 
 def encode_basestring_ascii(s):
@@ -70,3 +68,11 @@
     elif isinstance(obj, basestring):
         return encode_basestring_ascii(obj)
     raise TypeError(type(obj))
+
+
+STRING = re.compile(r'("(\\\\|\\"|[^"])*")')
+
+def load_json(s):
+    d = {'null': None, 'true': True, 'false': False}
+    s = STRING.sub(r'u\1', s)
+    return eval(s, d)


More information about the Python-checkins mailing list