[Python-checkins] gh-95385 Fastpath for encoding dict to JSON (gh-95374)

corona10 webhook-mailer at python.org
Sat Aug 6 02:55:42 EDT 2022


https://github.com/python/cpython/commit/15f4a35487d5ed9eb1edfdb9169eae6f5ad0874a
commit: 15f4a35487d5ed9eb1edfdb9169eae6f5ad0874a
branch: main
author: Aivars Kalvāns <aivars.kalvans at gmail.com>
committer: corona10 <donghee.na92 at gmail.com>
date: 2022-08-06T15:55:24+09:00
summary:

gh-95385 Fastpath for encoding dict to JSON (gh-95374)

files:
A Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst
M Modules/_json.c

diff --git a/Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst b/Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst
new file mode 100644
index 00000000000..89fa9c2b276
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-07-28-17-14-38.gh-issue-95385.6YlsDI.rst
@@ -0,0 +1 @@
+Faster ``json.dumps()`` when sorting of keys is not requested (default).
diff --git a/Modules/_json.c b/Modules/_json.c
index 7ea84efdb91..1c39b46937d 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -12,6 +12,7 @@
 #include "Python.h"
 #include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
 #include "structmember.h"         // PyMemberDef
+#include <stdbool.h>              // bool
 
 
 typedef struct _PyScannerObject {
@@ -1491,17 +1492,79 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
     }
 }
 
+static int
+encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
+                         PyObject *key, PyObject *value, Py_ssize_t indent_level)
+{
+    PyObject *keystr = NULL;
+    PyObject *encoded;
+
+    if (PyUnicode_Check(key)) {
+        Py_INCREF(key);
+        keystr = key;
+    }
+    else if (PyFloat_Check(key)) {
+        keystr = encoder_encode_float(s, key);
+    }
+    else if (key == Py_True || key == Py_False || key == Py_None) {
+                    /* This must come before the PyLong_Check because
+                       True and False are also 1 and 0.*/
+        keystr = _encoded_const(key);
+    }
+    else if (PyLong_Check(key)) {
+        keystr = PyLong_Type.tp_repr(key);
+    }
+    else if (s->skipkeys) {
+        return 0;
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "keys must be str, int, float, bool or None, "
+                     "not %.100s", Py_TYPE(key)->tp_name);
+        return -1;
+    }
+
+    if (keystr == NULL) {
+        return -1;
+    }
+
+    if (*first) {
+        *first = false;
+    } 
+    else {
+        if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
+            Py_DECREF(keystr);
+            return -1;
+        }
+    }
+
+    encoded = encoder_encode_string(s, keystr);
+    Py_DECREF(keystr);
+    if (encoded == NULL) {
+        return -1;
+    }
+
+    if (_steal_accumulate(writer, encoded) < 0) {
+        return -1;
+    }
+    if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
+        return -1;
+    }
+    if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
 static int
 encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
                         PyObject *dct, Py_ssize_t indent_level)
 {
     /* Encode Python dict dct a JSON term */
-    PyObject *kstr = NULL;
     PyObject *ident = NULL;
-    PyObject *it = NULL;
-    PyObject *items;
-    PyObject *item = NULL;
-    Py_ssize_t idx;
+    PyObject *items = NULL;
+    PyObject *key, *value;
+    bool first = true;
 
     if (PyDict_GET_SIZE(dct) == 0)  /* Fast path */
         return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@@ -1535,84 +1598,34 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
         */
     }
 
-    items = PyMapping_Items(dct);
-    if (items == NULL)
-        goto bail;
-    if (s->sort_keys && PyList_Sort(items) < 0) {
-        Py_DECREF(items);
-        goto bail;
-    }
-    it = PyObject_GetIter(items);
-    Py_DECREF(items);
-    if (it == NULL)
-        goto bail;
-    idx = 0;
-    while ((item = PyIter_Next(it)) != NULL) {
-        PyObject *encoded, *key, *value;
-        if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
-            PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+    if (s->sort_keys) {
+
+        items = PyDict_Items(dct);
+        if (items == NULL || PyList_Sort(items) < 0)
             goto bail;
-        }
-        key = PyTuple_GET_ITEM(item, 0);
-        if (PyUnicode_Check(key)) {
-            Py_INCREF(key);
-            kstr = key;
-        }
-        else if (PyFloat_Check(key)) {
-            kstr = encoder_encode_float(s, key);
-            if (kstr == NULL)
-                goto bail;
-        }
-        else if (key == Py_True || key == Py_False || key == Py_None) {
-                        /* This must come before the PyLong_Check because
-                           True and False are also 1 and 0.*/
-            kstr = _encoded_const(key);
-            if (kstr == NULL)
-                goto bail;
-        }
-        else if (PyLong_Check(key)) {
-            kstr = PyLong_Type.tp_repr(key);
-            if (kstr == NULL) {
+
+        for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
+            PyObject *item = PyList_GET_ITEM(items, i);
+
+            if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
+                PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
                 goto bail;
             }
-        }
-        else if (s->skipkeys) {
-            Py_DECREF(item);
-            continue;
-        }
-        else {
-            PyErr_Format(PyExc_TypeError,
-                         "keys must be str, int, float, bool or None, "
-                         "not %.100s", Py_TYPE(key)->tp_name);
-            goto bail;
-        }
 
-        if (idx) {
-            if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
+            key = PyTuple_GET_ITEM(item, 0);
+            value = PyTuple_GET_ITEM(item, 1);
+            if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
                 goto bail;
         }
+        Py_CLEAR(items);
 
-        encoded = encoder_encode_string(s, kstr);
-        Py_CLEAR(kstr);
-        if (encoded == NULL)
-            goto bail;
-        if (_PyUnicodeWriter_WriteStr(writer, encoded)) {
-            Py_DECREF(encoded);
-            goto bail;
+    } else {
+        Py_ssize_t pos = 0;
+        while (PyDict_Next(dct, &pos, &key, &value)) {
+            if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
+                goto bail;
         }
-        Py_DECREF(encoded);
-        if (_PyUnicodeWriter_WriteStr(writer, s->key_separator))
-            goto bail;
-
-        value = PyTuple_GET_ITEM(item, 1);
-        if (encoder_listencode_obj(s, writer, value, indent_level))
-            goto bail;
-        idx += 1;
-        Py_DECREF(item);
     }
-    if (PyErr_Occurred())
-        goto bail;
-    Py_CLEAR(it);
 
     if (ident != NULL) {
         if (PyDict_DelItem(s->markers, ident))
@@ -1630,14 +1643,11 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
     return 0;
 
 bail:
-    Py_XDECREF(it);
-    Py_XDECREF(item);
-    Py_XDECREF(kstr);
+    Py_XDECREF(items);
     Py_XDECREF(ident);
     return -1;
 }
 
-
 static int
 encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
                         PyObject *seq, Py_ssize_t indent_level)



More information about the Python-checkins mailing list