[Python-checkins] cpython: Issue #19881: Fix bad pickling of large bytes in cpickle.

alexandre.vassalotti python-checkins at python.org
Fri Dec 6 04:29:44 CET 2013


http://hg.python.org/cpython/rev/2612ea573ff7
changeset:   87788:2612ea573ff7
user:        Alexandre Vassalotti <alexandre at peadrop.com>
date:        Thu Dec 05 19:29:32 2013 -0800
summary:
  Issue #19881: Fix bad pickling of large bytes in cpickle.

files:
  Lib/test/pickletester.py |  75 ++++++++++++++++++++-------
  Misc/NEWS                |   4 +
  Modules/_pickle.c        |   2 +-
  3 files changed, 59 insertions(+), 22 deletions(-)


diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -3,6 +3,7 @@
 import pickle
 import pickletools
 import random
+import struct
 import sys
 import unittest
 import weakref
@@ -1611,9 +1612,9 @@
         data = 1 << (8 * size)
         try:
             for proto in protocols:
+                if proto < 2:
+                    continue
                 with self.subTest(proto=proto):
-                    if proto < 2:
-                        continue
                     with self.assertRaises((ValueError, OverflowError)):
                         self.dumps(data, protocol=proto)
         finally:
@@ -1628,13 +1629,17 @@
         data = b"abcd" * (size // 4)
         try:
             for proto in protocols:
+                if proto < 3:
+                    continue
                 with self.subTest(proto=proto):
-                    if proto < 3:
-                        continue
                     try:
                         pickled = self.dumps(data, protocol=proto)
-                        self.assertTrue(b"abcd" in pickled[:19])
-                        self.assertTrue(b"abcd" in pickled[-18:])
+                        header = (pickle.BINBYTES +
+                                  struct.pack("<I", len(data)))
+                        data_start = pickled.index(data)
+                        self.assertEqual(
+                            header,
+                            pickled[data_start-len(header):data_start])
                     finally:
                         pickled = None
         finally:
@@ -1642,14 +1647,28 @@
 
     @bigmemtest(size=_4G, memuse=1 + 1, dry_run=False)
     def test_huge_bytes_64b(self, size):
-        data = b"a" * size
+        data = b"acbd" * (size // 4)
         try:
             for proto in protocols:
+                if proto < 3:
+                    continue
                 with self.subTest(proto=proto):
-                    if proto < 3:
+                    if proto == 3:
+                        # Protocol 3 does not support large bytes objects.
+                        # Verify that we do not crash when processing one.
+                        with self.assertRaises((ValueError, OverflowError)):
+                            self.dumps(data, protocol=proto)
                         continue
-                    with self.assertRaises((ValueError, OverflowError)):
-                        self.dumps(data, protocol=proto)
+                    try:
+                        pickled = self.dumps(data, protocol=proto)
+                        header = (pickle.BINBYTES8 +
+                                  struct.pack("<Q", len(data)))
+                        data_start = pickled.index(data)
+                        self.assertEqual(
+                            header,
+                            pickled[data_start-len(header):data_start])
+                    finally:
+                        pickled = None
         finally:
             data = None
 
@@ -1661,11 +1680,19 @@
         data = "abcd" * (size // 4)
         try:
             for proto in protocols:
+                if proto == 0:
+                    continue
                 with self.subTest(proto=proto):
                     try:
                         pickled = self.dumps(data, protocol=proto)
-                        self.assertTrue(b"abcd" in pickled[:19])
-                        self.assertTrue(b"abcd" in pickled[-18:])
+                        header = (pickle.BINUNICODE +
+                                  struct.pack("<I", len(data)))
+                        data_start = pickled.index(b'abcd')
+                        self.assertEqual(
+                            header,
+                            pickled[data_start-len(header):data_start])
+                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
+                                          pickled.index(b"abcd")), len(data))
                     finally:
                         pickled = None
         finally:
@@ -1680,19 +1707,25 @@
         data = "abcd" * (size // 4)
         try:
             for proto in protocols:
+                if proto == 0:
+                    continue
                 with self.subTest(proto=proto):
-                    if proto == 0:
-                        continue
                     if proto < 4:
                         with self.assertRaises((ValueError, OverflowError)):
                             self.dumps(data, protocol=proto)
-                    else:
-                        try:
-                            pickled = self.dumps(data, protocol=proto)
-                            self.assertTrue(b"abcd" in pickled[:19])
-                            self.assertTrue(b"abcd" in pickled[-18:])
-                        finally:
-                            pickled = None
+                        continue
+                    try:
+                        pickled = self.dumps(data, protocol=proto)
+                        header = (pickle.BINUNICODE8 +
+                                  struct.pack("<Q", len(data)))
+                        data_start = pickled.index(b'abcd')
+                        self.assertEqual(
+                            header,
+                            pickled[data_start-len(header):data_start])
+                        self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
+                                          pickled.index(b"abcd")), len(data))
+                    finally:
+                        pickled = None
         finally:
             data = None
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -18,11 +18,15 @@
 Library
 -------
 
+
 - Issue #19296: Silence compiler warning in dbm_open
 
 - Issue #19839: Fix regression in bz2 module's handling of non-bzip2 data at
   EOF, and analogous bug in lzma module.
 
+- Issue #19881: Fix pickling bug where cpickle would emit bad pickle data for
+  large bytes string (i.e., with size greater than 2**32-1).
+
 - Issue #19138: doctest's IGNORE_EXCEPTION_DETAIL now allows a match when
   no exception detail exists (no colon following the exception's name, or
   a colon does follow but no text follows the colon).
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2027,7 +2027,7 @@
         else if (self->proto >= 4) {
             header[0] = BINBYTES8;
             _write_size64(header + 1, size);
-            len = 8;
+            len = 9;
         }
         else {
             PyErr_SetString(PyExc_OverflowError,

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list