[Python-checkins] r66582 - in doctools/trunk/sphinx: builder.py util/_json.py util/json.py

georg.brandl python-checkins at python.org
Wed Sep 24 13:30:23 CEST 2008


Author: georg.brandl
Date: Wed Sep 24 13:30:22 2008
New Revision: 66582

Log:
* Always use our own JS dumper instead of simplejson.
* Compress JS further by omitting quotes for dict keys where possible.


Removed:
   doctools/trunk/sphinx/util/_json.py
Modified:
   doctools/trunk/sphinx/builder.py
   doctools/trunk/sphinx/util/json.py

Modified: doctools/trunk/sphinx/builder.py
==============================================================================
--- doctools/trunk/sphinx/builder.py	(original)
+++ doctools/trunk/sphinx/builder.py	Wed Sep 24 13:30:22 2008
@@ -698,18 +698,19 @@
                 pass
 
     def load_indexer(self, docnames):
+        keep = set(self.env.all_docs) - set(docnames)
         try:
             f = open(path.join(self.outdir, self.searchindex_filename), 'rb')
             try:
                 self.indexer.load(f, self.indexer_format)
             finally:
                 f.close()
-        except (IOError, OSError, NotImplementedError, ValueError):
-            # we catch NotImplementedError here because if no simplejson
-            # is installed the searchindex can't be loaded
-            pass
+        except (IOError, OSError, ValueError):
+            if keep:
+                self.warn("search index couldn't be loaded, but not all documents "
+                          "will be built: the index will be incomplete.")
         # delete all entries for files that will be rebuilt
-        self.indexer.prune(set(self.env.all_docs) - set(docnames))
+        self.indexer.prune(keep)
 
     def index_page(self, pagename, doctree, title):
         # only index pages with title

Deleted: doctools/trunk/sphinx/util/_json.py
==============================================================================
--- doctools/trunk/sphinx/util/_json.py	Wed Sep 24 13:30:22 2008
+++ (empty file)
@@ -1,75 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-    sphinx.util._json
-    ~~~~~~~~~~~~~~~~~
-
-    This module implements a simple JSON serializer if simplejson is
-    unavailable.
-
-    This is not fully JSON compliant but enough for the searchindex.
-    And the generated files are smaller than the simplejson ones.
-
-    Uses the basestring encode function from simplejson.
-
-    :copyright: 2008 by Armin Ronacher, Bob Ippolito.
-    :license: BSD.
-"""
-import re
-
-
-# escape \, ", control characters and everything outside ASCII
-ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
-ESCAPE_DICT = {
-    '\\': '\\\\',
-    '"': '\\"',
-    '\b': '\\b',
-    '\f': '\\f',
-    '\n': '\\n',
-    '\r': '\\r',
-    '\t': '\\t',
-}
-
-
-def encode_basestring_ascii(s):
-    def replace(match):
-        s = match.group(0)
-        try:
-            return ESCAPE_DICT[s]
-        except KeyError:
-            n = ord(s)
-            if n < 0x10000:
-                return '\\u%04x' % (n,)
-            else:
-                # surrogate pair
-                n -= 0x10000
-                s1 = 0xd800 | ((n >> 10) & 0x3ff)
-                s2 = 0xdc00 | (n & 0x3ff)
-                return '\\u%04x\\u%04x' % (s1, s2)
-    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
-
-
-def dumps(obj, key=False):
-    if key:
-        if not isinstance(obj, basestring):
-            obj = str(obj)
-        return encode_basestring_ascii(obj)
-    if obj is None:
-        return 'null'
-    elif obj is True or obj is False:
-        return obj and 'true' or 'false'
-    elif isinstance(obj, (int, long, float)):
-        return str(obj)
-    elif isinstance(obj, dict):
-        return '{%s}' % ','.join('%s:%s' % (
-            dumps(key, True),
-            dumps(value)
-        ) for key, value in obj.iteritems())
-    elif isinstance(obj, (tuple, list, set)):
-        return '[%s]' % ','.join(dumps(x) for x in obj)
-    elif isinstance(obj, basestring):
-        return encode_basestring_ascii(obj)
-    raise TypeError(type(obj))
-
-
-def dump(obj, f):
-    f.write(dumps(obj))

Modified: doctools/trunk/sphinx/util/json.py
==============================================================================
--- doctools/trunk/sphinx/util/json.py	(original)
+++ doctools/trunk/sphinx/util/json.py	Wed Sep 24 13:30:22 2008
@@ -3,32 +3,192 @@
     sphinx.util.json
     ~~~~~~~~~~~~~~~~
 
-    This module imports JSON functions from various locations.
+    This module implements a simple JSON serializer if simplejson is
+    unavailable.
 
-    :copyright: 2008 by Armin Ronacher.
+    This is not fully JSON compliant but enough for the searchindex.
+    And the generated files are smaller than the simplejson ones.
+
+    Uses the basestring encode function from simplejson.
+
+    :copyright: 2008 by Armin Ronacher, Bob Ippolito, Georg Brandl.
     :license: BSD.
 """
 
-# if no simplejson is available this module can not load json files.
-can_load = True
+import re
 
-# unset __name__ for a moment so that the import goes straight into
-# the stdlib for python 2.4.
-_old_name = __name__
-del __name__
-
-try:
-    from simplejson import dumps, dump, loads, load
-except ImportError:
-    try:
-        from json import dumps, dump, loads, load
-    except ImportError:
-        from sphinx.util._json import dumps, dump
-        def _dummy(x):
-            raise NotImplementedError('simplejson unavailable, can\'t load')
-        load = loads = _dummy
-        can_load = False
-        del _dummy
+_str_re  = re.compile(r'"(\\\\|\\"|[^"])*"')
+_int_re  = re.compile(r'\d+')
+_name_re = re.compile(r'[a-zA-Z]\w*')
+
+# escape \, ", control characters and everything outside ASCII
+ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
+ESCAPE_DICT = {
+    '\\': '\\\\',
+    '"': '\\"',
+    '\b': '\\b',
+    '\f': '\\f',
+    '\n': '\\n',
+    '\r': '\\r',
+    '\t': '\\t',
+}
+
+ESCAPED = re.compile(r'\\u.{4}|\\.')
+
+
+def encode_string(s):
+    def replace(match):
+        s = match.group(0)
+        try:
+            return ESCAPE_DICT[s]
+        except KeyError:
+            n = ord(s)
+            if n < 0x10000:
+                return '\\u%04x' % (n,)
+            else:
+                # surrogate pair
+                n -= 0x10000
+                s1 = 0xd800 | ((n >> 10) & 0x3ff)
+                s2 = 0xdc00 | (n & 0x3ff)
+                return '\\u%04x\\u%04x' % (s1, s2)
+    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+
+def decode_string(s):
+    return ESCAPED.sub(lambda m: eval('u"'+m.group()+'"'), s)
+
+
+reswords = set("""\
+abstract   else   instanceof   switch
+boolean   enum   int   synchronized
+break   export   interface   this
+byte   extends   long   throw
+case   false   native   throws
+catch   final   new   transient
+char   finally   null   true
+class   float   package   try
+const   for   private   typeof
+continue   function   protected   var
+debugger   goto   public   void
+default   if   return   volatile
+delete   implements   short   while
+do   import   static   with
+double   in   super""".split())
+
+def dumps(obj, key=False):
+    if key:
+        if not isinstance(obj, basestring):
+            obj = str(obj)
+        if _name_re.match(obj) and obj not in reswords:
+            return obj  # return it as a bare word
+        else:
+            return encode_string(obj)
+    if obj is None:
+        return 'null'
+    elif obj is True or obj is False:
+        return obj and 'true' or 'false'
+    elif isinstance(obj, (int, long, float)):
+        return str(obj)
+    elif isinstance(obj, dict):
+        return '{%s}' % ','.join('%s:%s' % (
+            dumps(key, True),
+            dumps(value)
+        ) for key, value in obj.iteritems())
+    elif isinstance(obj, (tuple, list, set)):
+        return '[%s]' % ','.join(dumps(x) for x in obj)
+    elif isinstance(obj, basestring):
+        return encode_string(obj)
+    raise TypeError(type(obj))
+
+def dump(obj, f):
+    f.write(dumps(obj))
+
+
+def loads(x):
+    """Loader that can read the JS subset the indexer produces."""
+    nothing = object()
+    i = 0
+    n = len(x)
+    stack = []
+    obj = nothing
+    key = False
+    keys = []
+    while i < n:
+        c = x[i]
+        if c == '{':
+            obj = {}
+            stack.append(obj)
+            key = True
+            keys.append(nothing)
+            i += 1
+        elif c == '[':
+            obj = []
+            stack.append(obj)
+            key = False
+            keys.append(nothing)
+            i += 1
+        elif c in '}]':
+            if key:
+                raise ValueError("unfinished dict")
+            oldobj = stack.pop()
+            keys.pop()
+            if stack:
+                obj = stack[-1]
+                if isinstance(obj, dict):
+                    if keys[-1] is nothing:
+                        raise ValueError("invalid key object", oldobj)
+                    obj[keys[-1]] = oldobj
+                else:
+                    obj.append(oldobj)
+            else:
+                break
+            i += 1
+        elif c == ',':
+            if key:
+                raise ValueError("multiple keys")
+            if isinstance(obj, dict):
+                key = True
+            i += 1
+        elif c == ':':
+            if not isinstance(obj, dict):
+                raise ValueError("colon in list")
+            i += 1
+            if not key:
+                raise ValueError("multiple values")
+            key = False
+        else:
+            m = _str_re.match(x, i)
+            if m:
+                y = decode_string(m.group()[1:-1])
+            else:
+                m = _int_re.match(x, i)
+                if m:
+                    y = int(m.group())
+                else:
+                    m = _name_re.match(x, i)
+                    if m:
+                        y = m.group()
+                        if y == 'true':
+                            y = True
+                        elif y == 'false':
+                            y = False
+                        elif y == 'null':
+                            y = None
+                        elif not key:
+                            raise ValueError("bareword as value")
+                    else:
+                        raise ValueError("read error at pos %d" % i)
+            i = m.end()
+            if isinstance(obj, dict):
+                if key:
+                    keys[-1] = y
+                else:
+                    obj[keys[-1]] = y
+                    key = False
+            else:
+                obj.append(y)
+    if obj is nothing:
+        raise ValueError("nothing loaded from string")
+    return obj
 
-__name__ = _old_name
-del _old_name
+def load(f):
+    return loads(f.read())


More information about the Python-checkins mailing list