[Python-checkins] cpython: Issue #19881: Fix bad pickling of large bytes in cpickle.
alexandre.vassalotti
python-checkins at python.org
Fri Dec 6 04:29:44 CET 2013
http://hg.python.org/cpython/rev/2612ea573ff7
changeset: 87788:2612ea573ff7
user: Alexandre Vassalotti <alexandre at peadrop.com>
date: Thu Dec 05 19:29:32 2013 -0800
summary:
Issue #19881: Fix bad pickling of large bytes in cpickle.
files:
Lib/test/pickletester.py | 75 ++++++++++++++++++++-------
Misc/NEWS | 4 +
Modules/_pickle.c | 2 +-
3 files changed, 59 insertions(+), 22 deletions(-)
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -3,6 +3,7 @@
import pickle
import pickletools
import random
+import struct
import sys
import unittest
import weakref
@@ -1611,9 +1612,9 @@
data = 1 << (8 * size)
try:
for proto in protocols:
+ if proto < 2:
+ continue
with self.subTest(proto=proto):
- if proto < 2:
- continue
with self.assertRaises((ValueError, OverflowError)):
self.dumps(data, protocol=proto)
finally:
@@ -1628,13 +1629,17 @@
data = b"abcd" * (size // 4)
try:
for proto in protocols:
+ if proto < 3:
+ continue
with self.subTest(proto=proto):
- if proto < 3:
- continue
try:
pickled = self.dumps(data, protocol=proto)
- self.assertTrue(b"abcd" in pickled[:19])
- self.assertTrue(b"abcd" in pickled[-18:])
+ header = (pickle.BINBYTES +
+ struct.pack("<I", len(data)))
+ data_start = pickled.index(data)
+ self.assertEqual(
+ header,
+ pickled[data_start-len(header):data_start])
finally:
pickled = None
finally:
@@ -1642,14 +1647,28 @@
@bigmemtest(size=_4G, memuse=1 + 1, dry_run=False)
def test_huge_bytes_64b(self, size):
- data = b"a" * size
+ data = b"acbd" * (size // 4)
try:
for proto in protocols:
+ if proto < 3:
+ continue
with self.subTest(proto=proto):
- if proto < 3:
+ if proto == 3:
+ # Protocol 3 does not support large bytes objects.
+ # Verify that we do not crash when processing one.
+ with self.assertRaises((ValueError, OverflowError)):
+ self.dumps(data, protocol=proto)
continue
- with self.assertRaises((ValueError, OverflowError)):
- self.dumps(data, protocol=proto)
+ try:
+ pickled = self.dumps(data, protocol=proto)
+ header = (pickle.BINBYTES8 +
+ struct.pack("<Q", len(data)))
+ data_start = pickled.index(data)
+ self.assertEqual(
+ header,
+ pickled[data_start-len(header):data_start])
+ finally:
+ pickled = None
finally:
data = None
@@ -1661,11 +1680,19 @@
data = "abcd" * (size // 4)
try:
for proto in protocols:
+ if proto == 0:
+ continue
with self.subTest(proto=proto):
try:
pickled = self.dumps(data, protocol=proto)
- self.assertTrue(b"abcd" in pickled[:19])
- self.assertTrue(b"abcd" in pickled[-18:])
+ header = (pickle.BINUNICODE +
+ struct.pack("<I", len(data)))
+ data_start = pickled.index(b'abcd')
+ self.assertEqual(
+ header,
+ pickled[data_start-len(header):data_start])
+ self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
+ pickled.index(b"abcd")), len(data))
finally:
pickled = None
finally:
@@ -1680,19 +1707,25 @@
data = "abcd" * (size // 4)
try:
for proto in protocols:
+ if proto == 0:
+ continue
with self.subTest(proto=proto):
- if proto == 0:
- continue
if proto < 4:
with self.assertRaises((ValueError, OverflowError)):
self.dumps(data, protocol=proto)
- else:
- try:
- pickled = self.dumps(data, protocol=proto)
- self.assertTrue(b"abcd" in pickled[:19])
- self.assertTrue(b"abcd" in pickled[-18:])
- finally:
- pickled = None
+ continue
+ try:
+ pickled = self.dumps(data, protocol=proto)
+ header = (pickle.BINUNICODE8 +
+ struct.pack("<Q", len(data)))
+ data_start = pickled.index(b'abcd')
+ self.assertEqual(
+ header,
+ pickled[data_start-len(header):data_start])
+ self.assertEqual((pickled.rindex(b"abcd") + len(b"abcd") -
+ pickled.index(b"abcd")), len(data))
+ finally:
+ pickled = None
finally:
data = None
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -18,11 +18,15 @@
Library
-------
+
- Issue #19296: Silence compiler warning in dbm_open
- Issue #19839: Fix regression in bz2 module's handling of non-bzip2 data at
EOF, and analogous bug in lzma module.
+- Issue #19881: Fix pickling bug where cpickle would emit bad pickle data for
+ large bytes string (i.e., with size greater than 2**32-1).
+
- Issue #19138: doctest's IGNORE_EXCEPTION_DETAIL now allows a match when
no exception detail exists (no colon following the exception's name, or
a colon does follow but no text follows the colon).
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2027,7 +2027,7 @@
else if (self->proto >= 4) {
header[0] = BINBYTES8;
_write_size64(header + 1, size);
- len = 8;
+ len = 9;
}
else {
PyErr_SetString(PyExc_OverflowError,
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list