[Python-checkins] bpo-37587: optimize json.loads (GH-15134)

Inada Naoki webhook-mailer at python.org
Thu Aug 8 04:57:14 EDT 2019


https://github.com/python/cpython/commit/2a570af12ac5e4ac5575a68f8739b31c24d01367
commit: 2a570af12ac5e4ac5575a68f8739b31c24d01367
branch: master
author: Inada Naoki <songofacandy at gmail.com>
committer: GitHub <noreply at github.com>
date: 2019-08-08T17:57:10+09:00
summary:

bpo-37587: optimize json.loads (GH-15134)

Use a tighter scope temporary variable to help register allocation.
1% speedup for large string.

Use PyDict_SetItemDefault() for memoizing keys.
At most 4% speedup when the cache hit ratio is low.

files:
M Modules/_json.c

diff --git a/Modules/_json.c b/Modules/_json.c
index 76da1d345e9d..112903ea577a 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -433,16 +433,21 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
     }
     while (1) {
         /* Find the end of the string or the next escape */
-        Py_UCS4 c = 0;
-        for (next = end; next < len; next++) {
-            c = PyUnicode_READ(kind, buf, next);
-            if (c == '"' || c == '\\') {
-                break;
-            }
-            else if (c <= 0x1f && strict) {
-                raise_errmsg("Invalid control character at", pystr, next);
-                goto bail;
+        Py_UCS4 c;
+        {
+            // Use tight scope variable to help register allocation.
+            Py_UCS4 d = 0;
+            for (next = end; next < len; next++) {
+                d = PyUnicode_READ(kind, buf, next);
+                if (d == '"' || d == '\\') {
+                    break;
+                }
+                if (d <= 0x1f && strict) {
+                    raise_errmsg("Invalid control character at", pystr, next);
+                    goto bail;
+                }
             }
+            c = d;
         }
         if (!(c == '"' || c == '\\')) {
             raise_errmsg("Unterminated string starting at", pystr, begin);
@@ -749,19 +754,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
             key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
             if (key == NULL)
                 goto bail;
-            memokey = PyDict_GetItemWithError(s->memo, key);
-            if (memokey != NULL) {
-                Py_INCREF(memokey);
-                Py_DECREF(key);
-                key = memokey;
-            }
-            else if (PyErr_Occurred()) {
+            memokey = PyDict_SetDefault(s->memo, key, key);
+            if (memokey == NULL) {
                 goto bail;
             }
-            else {
-                if (PyDict_SetItem(s->memo, key, key) < 0)
-                    goto bail;
-            }
+            Py_INCREF(memokey);
+            Py_DECREF(key);
+            key = memokey;
             idx = next_idx;
 
             /* skip whitespace between key and : delimiter, read :, skip whitespace */



More information about the Python-checkins mailing list