[Python-checkins] r70471 - in python/trunk: Doc/library/json.rst Lib/json/__init__.py Lib/json/decoder.py Lib/json/tests/test_decode.py Lib/json/tests/test_unicode.py Misc/NEWS Modules/_json.c

raymond.hettinger python-checkins at python.org
Thu Mar 19 20:19:04 CET 2009


Author: raymond.hettinger
Date: Thu Mar 19 20:19:03 2009
New Revision: 70471

Log:
Issue 5381:  Add object_pairs_hook to the json module.



Modified:
   python/trunk/Doc/library/json.rst
   python/trunk/Lib/json/__init__.py
   python/trunk/Lib/json/decoder.py
   python/trunk/Lib/json/tests/test_decode.py
   python/trunk/Lib/json/tests/test_unicode.py
   python/trunk/Misc/NEWS
   python/trunk/Modules/_json.c

Modified: python/trunk/Doc/library/json.rst
==============================================================================
--- python/trunk/Doc/library/json.rst	(original)
+++ python/trunk/Doc/library/json.rst	Thu Mar 19 20:19:03 2009
@@ -166,7 +166,7 @@
    :func:`dump`.
 
 
-.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, **kw]]]]]]])
+.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]])
 
    Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON
    document) to a Python object.
@@ -182,6 +182,17 @@
    *object_hook* will be used instead of the :class:`dict`.  This feature can be used
    to implement custom decoders (e.g. JSON-RPC class hinting).
 
+   *object_pairs_hook* is an optional function that will be called with the
+   result of any object literal decode with an ordered list of pairs.  The
+   return value of *object_pairs_hook* will be used instead of the
+   :class:`dict`.  This feature can be used to implement custom decoders that
+   rely on the order that the key and value pairs are decoded (for example,
+   :func:`collections.OrderedDict` will remember the order of insertion). If
+   *object_hook* is also defined, the *object_pairs_hook* takes priority.
+
+   .. versionchanged:: 2.7
+      Added support for *object_pairs_hook*.
+
    *parse_float*, if specified, will be called with the string of every JSON
    float to be decoded.  By default, this is equivalent to ``float(num_str)``.
    This can be used to use another datatype or parser for JSON floats
@@ -202,7 +213,7 @@
    class.
 
 
-.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, **kw]]]]]]])
+.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]])
 
    Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON
    document) to a Python object.
@@ -218,7 +229,7 @@
 Encoders and decoders
 ---------------------
 
-.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, strict]]]]]])
+.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, strict[, object_pairs_hook]]]]]]])
 
    Simple JSON decoder.
 
@@ -259,6 +270,17 @@
    :class:`dict`.  This can be used to provide custom deserializations (e.g. to
    support JSON-RPC class hinting).
 
+   *object_pairs_hook*, if specified will be called with the result of every
+   JSON object decoded with an ordered list of pairs.  The return value of
+   *object_pairs_hook* will be used instead of the :class:`dict`.  This
+   feature can be used to implement custom decoders that rely on the order
+   that the key and value pairs are decoded (for example,
+   :func:`collections.OrderedDict` will remember the order of insertion). If
+   *object_hook* is also defined, the *object_pairs_hook* takes priority.
+
+   .. versionchanged:: 2.7
+      Added support for *object_pairs_hook*.
+
    *parse_float*, if specified, will be called with the string of every JSON
    float to be decoded.  By default, this is equivalent to ``float(num_str)``.
    This can be used to use another datatype or parser for JSON floats

Modified: python/trunk/Lib/json/__init__.py
==============================================================================
--- python/trunk/Lib/json/__init__.py	(original)
+++ python/trunk/Lib/json/__init__.py	Thu Mar 19 20:19:03 2009
@@ -238,11 +238,12 @@
         **kw).encode(obj)
 
 
-_default_decoder = JSONDecoder(encoding=None, object_hook=None)
+_default_decoder = JSONDecoder(encoding=None, object_hook=None,
+                               object_pairs_hook=None)
 
 
 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
-        parse_int=None, parse_constant=None, **kw):
+        parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
     a JSON document) to a Python object.
 
@@ -265,11 +266,11 @@
     return loads(fp.read(),
         encoding=encoding, cls=cls, object_hook=object_hook,
         parse_float=parse_float, parse_int=parse_int,
-        parse_constant=parse_constant, **kw)
+        parse_constant=parse_constant, object_pairs_hook=None, **kw)
 
 
 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
-        parse_int=None, parse_constant=None, **kw):
+        parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
     document) to a Python object.
 
@@ -304,12 +305,14 @@
     """
     if (cls is None and encoding is None and object_hook is None and
             parse_int is None and parse_float is None and
-            parse_constant is None and not kw):
+            parse_constant is None and object_pairs_hook is None and not kw):
         return _default_decoder.decode(s)
     if cls is None:
         cls = JSONDecoder
     if object_hook is not None:
         kw['object_hook'] = object_hook
+    if object_pairs_hook is not None:
+        kw['object_pairs_hook'] = object_pairs_hook
     if parse_float is not None:
         kw['parse_float'] = parse_float
     if parse_int is not None:

Modified: python/trunk/Lib/json/decoder.py
==============================================================================
--- python/trunk/Lib/json/decoder.py	(original)
+++ python/trunk/Lib/json/decoder.py	Thu Mar 19 20:19:03 2009
@@ -147,8 +147,9 @@
 WHITESPACE_STR = ' \t\n\r'
 
 def JSONObject((s, end), encoding, strict, scan_once, object_hook,
-        _w=WHITESPACE.match, _ws=WHITESPACE_STR):
-    pairs = {}
+               object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    pairs = []
+    pairs_append = pairs.append
     # Use a slice to prevent IndexError from being raised, the following
     # check will raise a more specific ValueError if the string is empty
     nextchar = s[end:end + 1]
@@ -187,7 +188,7 @@
             value, end = scan_once(s, end)
         except StopIteration:
             raise ValueError(errmsg("Expecting object", s, end))
-        pairs[key] = value
+        pairs_append((key, value))
 
         try:
             nextchar = s[end]
@@ -218,6 +219,10 @@
         if nextchar != '"':
             raise ValueError(errmsg("Expecting property name", s, end - 1))
 
+    if object_pairs_hook is not None:
+        result = object_pairs_hook(pairs)
+        return result, end
+    pairs = dict(pairs)
     if object_hook is not None:
         pairs = object_hook(pairs)
     return pairs, end
@@ -289,7 +294,8 @@
     """
 
     def __init__(self, encoding=None, object_hook=None, parse_float=None,
-            parse_int=None, parse_constant=None, strict=True):
+            parse_int=None, parse_constant=None, strict=True,
+            object_pairs_hook=None):
         """``encoding`` determines the encoding used to interpret any ``str``
         objects decoded by this instance (utf-8 by default).  It has no
         effect when decoding ``unicode`` objects.
@@ -320,6 +326,7 @@
         """
         self.encoding = encoding
         self.object_hook = object_hook
+        self.object_pairs_hook = object_pairs_hook
         self.parse_float = parse_float or float
         self.parse_int = parse_int or int
         self.parse_constant = parse_constant or _CONSTANTS.__getitem__

Modified: python/trunk/Lib/json/tests/test_decode.py
==============================================================================
--- python/trunk/Lib/json/tests/test_decode.py	(original)
+++ python/trunk/Lib/json/tests/test_decode.py	Thu Mar 19 20:19:03 2009
@@ -2,6 +2,7 @@
 from unittest import TestCase
 
 import json
+from collections import OrderedDict
 
 class TestDecode(TestCase):
     def test_decimal(self):
@@ -20,3 +21,18 @@
         # exercise the uncommon cases. The array cases are already covered.
         rval = json.loads('{   "key"    :    "value"    ,  "k":"v"    }')
         self.assertEquals(rval, {"key":"value", "k":"v"})
+
+    def test_object_pairs_hook(self):
+        s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
+        p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
+             ("qrt", 5), ("pad", 6), ("hoy", 7)]
+        self.assertEqual(json.loads(s), eval(s))
+        self.assertEqual(json.loads(s, object_pairs_hook = lambda x: x), p)
+        od = json.loads(s, object_pairs_hook = OrderedDict)
+        self.assertEqual(od, OrderedDict(p))
+        self.assertEqual(type(od), OrderedDict)
+        # the object_pairs_hook takes priority over the object_hook
+        self.assertEqual(json.loads(s,
+                                    object_pairs_hook = OrderedDict,
+                                    object_hook = lambda x: None),
+                         OrderedDict(p))

Modified: python/trunk/Lib/json/tests/test_unicode.py
==============================================================================
--- python/trunk/Lib/json/tests/test_unicode.py	(original)
+++ python/trunk/Lib/json/tests/test_unicode.py	Thu Mar 19 20:19:03 2009
@@ -1,6 +1,7 @@
 from unittest import TestCase
 
 import json
+from collections import OrderedDict
 
 class TestUnicode(TestCase):
     def test_encoding1(self):
@@ -54,6 +55,21 @@
             s = '"\\u{0:04x}"'.format(i)
             self.assertEquals(json.loads(s), u)
 
+    def test_object_pairs_hook_with_unicode(self):
+        s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
+        p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4),
+             (u"qrt", 5), (u"pad", 6), (u"hoy", 7)]
+        self.assertEqual(json.loads(s), eval(s))
+        self.assertEqual(json.loads(s, object_pairs_hook = lambda x: x), p)
+        od = json.loads(s, object_pairs_hook = OrderedDict)
+        self.assertEqual(od, OrderedDict(p))
+        self.assertEqual(type(od), OrderedDict)
+        # the object_pairs_hook takes priority over the object_hook
+        self.assertEqual(json.loads(s,
+                                    object_pairs_hook = OrderedDict,
+                                    object_hook = lambda x: None),
+                         OrderedDict(p))
+
     def test_default_encoding(self):
         self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
             {'a': u'\xe9'})

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Mar 19 20:19:03 2009
@@ -181,6 +181,9 @@
 Library
 -------
 
+- Issue #5381: Added object_pairs_hook to the json module.  This allows
+  OrderedDicts to be built by the decoder.
+
 - Issue #2110: Add support for thousands separator and 'n' type
   specifier to Decimal.__format__
 

Modified: python/trunk/Modules/_json.c
==============================================================================
--- python/trunk/Modules/_json.c	(original)
+++ python/trunk/Modules/_json.c	Thu Mar 19 20:19:03 2009
@@ -35,6 +35,7 @@
     PyObject *encoding;
     PyObject *strict;
     PyObject *object_hook;
+    PyObject *pairs_hook;
     PyObject *parse_float;
     PyObject *parse_int;
     PyObject *parse_constant;
@@ -44,6 +45,7 @@
     {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
     {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
+    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
@@ -891,6 +893,7 @@
     Py_VISIT(s->encoding);
     Py_VISIT(s->strict);
     Py_VISIT(s->object_hook);
+    Py_VISIT(s->pairs_hook);
     Py_VISIT(s->parse_float);
     Py_VISIT(s->parse_int);
     Py_VISIT(s->parse_constant);
@@ -906,6 +909,7 @@
     Py_CLEAR(s->encoding);
     Py_CLEAR(s->strict);
     Py_CLEAR(s->object_hook);
+    Py_CLEAR(s->pairs_hook);
     Py_CLEAR(s->parse_float);
     Py_CLEAR(s->parse_int);
     Py_CLEAR(s->parse_constant);
@@ -923,13 +927,17 @@
     */
     char *str = PyString_AS_STRING(pystr);
     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
-    PyObject *rval = PyDict_New();
+    PyObject *rval;
+    PyObject *pairs;
+    PyObject *item;
     PyObject *key = NULL;
     PyObject *val = NULL;
     char *encoding = PyString_AS_STRING(s->encoding);
     int strict = PyObject_IsTrue(s->strict);
     Py_ssize_t next_idx;
-    if (rval == NULL)
+
+    pairs = PyList_New(0);
+    if (pairs == NULL)
         return NULL;
 
     /* skip whitespace after { */
@@ -962,11 +970,16 @@
             if (val == NULL)
                 goto bail;
 
-            if (PyDict_SetItem(rval, key, val) == -1)
+            item = PyTuple_Pack(2, key, val);
+            if (item == NULL)
                 goto bail;
-
             Py_CLEAR(key);
             Py_CLEAR(val);
+            if (PyList_Append(pairs, item) == -1) {
+                Py_DECREF(item);
+                goto bail;
+            }
+            Py_DECREF(item);
             idx = next_idx;
 
             /* skip whitespace before } or , */
@@ -992,6 +1005,23 @@
         raise_errmsg("Expecting object", pystr, end_idx);
         goto bail;
     }
+
+    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
+    if (s->pairs_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(pairs);
+        *next_idx_ptr = idx + 1;
+        return val;
+    }
+
+    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), 
+                                         pairs, NULL);
+    if (rval == NULL)
+        goto bail;
+    Py_CLEAR(pairs);
+
     /* if object_hook is not None: rval = object_hook(rval) */
     if (s->object_hook != Py_None) {
         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
@@ -1006,7 +1036,7 @@
 bail:
     Py_XDECREF(key);
     Py_XDECREF(val);
-    Py_DECREF(rval);
+    Py_XDECREF(pairs);
     return NULL;
 }
 
@@ -1021,12 +1051,16 @@
     */
     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
-    PyObject *val = NULL;
-    PyObject *rval = PyDict_New();
+    PyObject *rval;
+    PyObject *pairs;
+    PyObject *item;
     PyObject *key = NULL;
+    PyObject *val = NULL;
     int strict = PyObject_IsTrue(s->strict);
     Py_ssize_t next_idx;
-    if (rval == NULL)
+
+    pairs = PyList_New(0);
+    if (pairs == NULL)
         return NULL;
 
     /* skip whitespace after { */
@@ -1059,11 +1093,16 @@
             if (val == NULL)
                 goto bail;
 
-            if (PyDict_SetItem(rval, key, val) == -1)
+            item = PyTuple_Pack(2, key, val);
+            if (item == NULL)
                 goto bail;
-
             Py_CLEAR(key);
             Py_CLEAR(val);
+            if (PyList_Append(pairs, item) == -1) {
+                Py_DECREF(item);
+                goto bail;
+            }
+            Py_DECREF(item);
             idx = next_idx;
 
             /* skip whitespace before } or , */
@@ -1091,6 +1130,22 @@
         goto bail;
     }
 
+    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
+    if (s->pairs_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(pairs);
+        *next_idx_ptr = idx + 1;
+        return val;
+    }
+
+    rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), 
+                                         pairs, NULL);
+    if (rval == NULL)
+        goto bail;
+    Py_CLEAR(pairs);
+
     /* if object_hook is not None: rval = object_hook(rval) */
     if (s->object_hook != Py_None) {
         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
@@ -1105,7 +1160,7 @@
 bail:
     Py_XDECREF(key);
     Py_XDECREF(val);
-    Py_DECREF(rval);
+    Py_XDECREF(pairs);
     return NULL;
 }
 
@@ -1648,6 +1703,7 @@
         s->encoding = NULL;
         s->strict = NULL;
         s->object_hook = NULL;
+        s->pairs_hook = NULL;
         s->parse_float = NULL;
         s->parse_int = NULL;
         s->parse_constant = NULL;
@@ -1690,6 +1746,9 @@
     s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
     if (s->object_hook == NULL)
         goto bail;
+    s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
+    if (s->object_hook == NULL)
+        goto bail;
     s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
     if (s->parse_float == NULL)
         goto bail;
@@ -1706,6 +1765,7 @@
     Py_CLEAR(s->encoding);
     Py_CLEAR(s->strict);
     Py_CLEAR(s->object_hook);
+    Py_CLEAR(s->pairs_hook);
     Py_CLEAR(s->parse_float);
     Py_CLEAR(s->parse_int);
     Py_CLEAR(s->parse_constant);


More information about the Python-checkins mailing list