[issue25262] Issues with BINUNICODE8 and BINBYTES8 opcodes in pickle

Serhiy Storchaka report at bugs.python.org
Tue Sep 29 15:14:31 CEST 2015


Serhiy Storchaka added the comment:

> To access an array item, the type of the index variable must be size_t (or a
> type with the same size), otherwise the compiler produce less efficient
> machine code: http://www.viva64.com/en/a/0050/
> 
> => please keep Py_ssize_t type for i

i is small integer between 0 and 8. I don't think that this article is 
related. Py_ssize_t is signed, and i is compared with int in a loop. And isn't 
Viva64 work only with Visual C++?  At least I found statements that can be not 
correct on platforms not supported by Microsoft.

> Please add a comment here to explain that the first loop check for integer
> overflow, it's not obvious at the first read.

Done.

> Does the Python implementation of pickle produce BINBYTES8? If not: why not?

Yes, it produces BINBYTES8 on 64-bit platform.

> Note: the patch is probably based on a private Mercurial revision, so it
> didn't get the [Review] button.

Yes, I had made some refactoring for unpickling tests and forgot to push they. 
Here is rebased patch.

----------
Added file: http://bugs.python.org/file40621/pickle_binbytes8_2.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue25262>
_______________________________________
-------------- next part --------------
diff -r 377f200ad521 Lib/pickle.py
--- a/Lib/pickle.py	Tue Sep 29 15:51:02 2015 +0300
+++ b/Lib/pickle.py	Tue Sep 29 16:03:38 2015 +0300
@@ -1205,6 +1205,14 @@ class _Unpickler:
         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
     dispatch[BINUNICODE8[0]] = load_binunicode8
 
+    def load_binbytes8(self):
+        len, = unpack('<Q', self.read(8))
+        if len > maxsize:
+            raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
+                                  "of %d bytes" % maxsize)
+        self.append(self.read(len))
+    dispatch[BINBYTES8[0]] = load_binbytes8
+
     def load_short_binstring(self):
         len = self.read(1)[0]
         data = self.read(len)
diff -r 377f200ad521 Lib/test/pickletester.py
--- a/Lib/test/pickletester.py	Tue Sep 29 15:51:02 2015 +0300
+++ b/Lib/test/pickletester.py	Tue Sep 29 16:03:38 2015 +0300
@@ -857,6 +857,26 @@ class AbstractUnpickleTests(unittest.Tes
         self.assert_is_copy([(100,), (100,)],
                             self.loads(b'((Kdtp0\nh\x00l.))'))
 
+    def test_binbytes8(self):
+        dumped = b'\x80\x04\x8e\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
+        self.assertEqual(self.loads(dumped), b'\xe2\x82\xac\x00')
+
+    def test_binunicode8(self):
+        dumped = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'
+        self.assertEqual(self.loads(dumped), '\u20ac\x00')
+
+    @requires_32b
+    def test_large_32b_binbytes8(self):
+        dumped = b'\x80\x04\x8e\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
+        with self.assertRaises((pickle.UnpicklingError, OverflowError)):
+            self.loads(dumped)
+
+    @requires_32b
+    def test_large_32b_binunicode8(self):
+        dumped = b'\x80\x04\x8d\4\0\0\0\1\0\0\0\xe2\x82\xac\x00.'
+        with self.assertRaises((pickle.UnpicklingError, OverflowError)):
+            self.loads(dumped)
+
     def test_get(self):
         pickled = b'((lp100000\ng100000\nt.'
         unpickled = self.loads(pickled)
diff -r 377f200ad521 Lib/test/test_pickle.py
--- a/Lib/test/test_pickle.py	Tue Sep 29 15:51:02 2015 +0300
+++ b/Lib/test/test_pickle.py	Tue Sep 29 16:03:38 2015 +0300
@@ -39,6 +39,16 @@ class PyUnpicklerTests(AbstractUnpickleT
         return u.load()
 
 
+class PyUnpicklerTests(AbstractUnpickleTests):
+
+    unpickler = pickle._Unpickler
+
+    def loads(self, buf, **kwds):
+        f = io.BytesIO(buf)
+        u = self.unpickler(f, **kwds)
+        return u.load()
+
+
 class PyPicklerTests(AbstractPickleTests):
 
     pickler = pickle._Pickler
diff -r 377f200ad521 Modules/_pickle.c
--- a/Modules/_pickle.c	Tue Sep 29 15:51:02 2015 +0300
+++ b/Modules/_pickle.c	Tue Sep 29 16:03:38 2015 +0300
@@ -4606,10 +4606,20 @@ static Py_ssize_t
 calc_binsize(char *bytes, int nbytes)
 {
     unsigned char *s = (unsigned char *)bytes;
-    Py_ssize_t i;
+    int i;
     size_t x = 0;
 
-    for (i = 0; i < nbytes && (size_t)i < sizeof(size_t); i++) {
+    if (nbytes > (int)sizeof(size_t)) {
+        /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
+         * have 64-bit size that can't be represented on 32-bit platform.
+         */
+        for (i = (int)sizeof(size_t); i < nbytes; i++) {
+            if (s[i])
+                return -1;
+        }
+        nbytes = (int)sizeof(size_t);
+    }
+    for (i = 0; i < nbytes; i++) {
         x |= (size_t) s[i] << (8 * i);
     }
 


More information about the Python-bugs-list mailing list