[Python-checkins] bpo-38876: Raise pickle.UnpicklingError when loading an item from memo for invalid input (GH-17335)

Miss Islington (bot) webhook-mailer at python.org
Sun Nov 24 14:15:12 EST 2019


https://github.com/python/cpython/commit/6f03b236c17c96bc9f8a004ffa7e7ae0542e9cac
commit: 6f03b236c17c96bc9f8a004ffa7e7ae0542e9cac
branch: master
author: Claudiu Popa <pcmanticore at gmail.com>
committer: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
date: 2019-11-24T11:15:08-08:00
summary:

bpo-38876: Raise pickle.UnpicklingError when loading an item from memo for invalid input (GH-17335)



The previous code was raising a `KeyError` for both the Python and C implementation.
This was caused by the specified index of an invalid input which did not exist
in the memo structure, where the pickle stores what objects it has seen.
The malformed input would have caused either a `BINGET` or `LONG_BINGET` load
from the memo, leading to a `KeyError` as the determined index was bogus.

https://bugs.python.org/issue38876



https://bugs.python.org/issue38876

files:
A Misc/NEWS.d/next/Library/2019-11-22-10-58-58.bpo-38876.qqy1Vp.rst
M Lib/pickle.py
M Lib/test/pickletester.py
M Modules/_pickle.c

diff --git a/Lib/pickle.py b/Lib/pickle.py
index 71aa57d500ecc..01d41422aa4a6 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1604,17 +1604,29 @@ def load_dup(self):
 
     def load_get(self):
         i = int(self.readline()[:-1])
-        self.append(self.memo[i])
+        try:
+            self.append(self.memo[i])
+        except KeyError:
+            msg = f'Memo value not found at index {i}'
+            raise UnpicklingError(msg) from None
     dispatch[GET[0]] = load_get
 
     def load_binget(self):
         i = self.read(1)[0]
-        self.append(self.memo[i])
+        try:
+            self.append(self.memo[i])
+        except KeyError as exc:
+            msg = f'Memo value not found at index {i}'
+            raise UnpicklingError(msg) from None
     dispatch[BINGET[0]] = load_binget
 
     def load_long_binget(self):
         i, = unpack('<I', self.read(4))
-        self.append(self.memo[i])
+        try:
+            self.append(self.memo[i])
+        except KeyError as exc:
+            msg = f'Memo value not found at index {i}'
+            raise UnpicklingError(msg) from None
     dispatch[LONG_BINGET[0]] = load_long_binget
 
     def load_put(self):
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index c9f374678ae35..953fd5c5a278b 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1019,7 +1019,9 @@ def test_short_binunicode(self):
         self.assertEqual(self.loads(dumped), '\u20ac\x00')
 
     def test_misc_get(self):
-        self.check_unpickling_error(KeyError, b'g0\np0')
+        self.check_unpickling_error(pickle.UnpicklingError, b'g0\np0')
+        self.check_unpickling_error(pickle.UnpicklingError, b'jens:')
+        self.check_unpickling_error(pickle.UnpicklingError, b'hens:')
         self.assert_is_copy([(100,), (100,)],
                             self.loads(b'((Kdtp0\nh\x00l.))'))
 
diff --git a/Misc/NEWS.d/next/Library/2019-11-22-10-58-58.bpo-38876.qqy1Vp.rst b/Misc/NEWS.d/next/Library/2019-11-22-10-58-58.bpo-38876.qqy1Vp.rst
new file mode 100644
index 0000000000000..43b25acc39d2e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-11-22-10-58-58.bpo-38876.qqy1Vp.rst
@@ -0,0 +1,9 @@
+Raise pickle.UnpicklingError when loading an item from memo for invalid
+input
+
+The previous code was raising a `KeyError` for both the Python and C
+implementation. This was caused by the specified index of an invalid input
+which did not exist in the memo structure, where the pickle stores what
+objects it has seen. The malformed input would have caused either a `BINGET`
+or `LONG_BINGET` load from the memo, leading to a `KeyError` as the
+determined index was bogus. Patch by Claudiu Popa
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 7370be5938831..baa0a27419693 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -6174,8 +6174,10 @@ load_get(UnpicklerObject *self)
 
     value = _Unpickler_MemoGet(self, idx);
     if (value == NULL) {
-        if (!PyErr_Occurred())
-            PyErr_SetObject(PyExc_KeyError, key);
+        if (!PyErr_Occurred()) {
+           PickleState *st = _Pickle_GetGlobalState();
+           PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
+        }
         Py_DECREF(key);
         return -1;
     }
@@ -6201,7 +6203,8 @@ load_binget(UnpicklerObject *self)
     if (value == NULL) {
         PyObject *key = PyLong_FromSsize_t(idx);
         if (key != NULL) {
-            PyErr_SetObject(PyExc_KeyError, key);
+            PickleState *st = _Pickle_GetGlobalState();
+            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
             Py_DECREF(key);
         }
         return -1;
@@ -6227,7 +6230,8 @@ load_long_binget(UnpicklerObject *self)
     if (value == NULL) {
         PyObject *key = PyLong_FromSsize_t(idx);
         if (key != NULL) {
-            PyErr_SetObject(PyExc_KeyError, key);
+            PickleState *st = _Pickle_GetGlobalState();
+            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
             Py_DECREF(key);
         }
         return -1;



More information about the Python-checkins mailing list