[Python-checkins] bpo-31829: Make protocol 0 pickles be loadable in text mode in Python 2. (GH-11859)

Miss Islington (bot) webhook-mailer at python.org
Wed Jun 12 07:50:17 EDT 2019


https://github.com/python/cpython/commit/d561f848b235f2011a43b705d112055b92fa2366
commit: d561f848b235f2011a43b705d112055b92fa2366
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-06-12T04:50:11-07:00
summary:

bpo-31829: Make protocol 0 pickles be loadable in text mode in Python 2. (GH-11859)


Escape ``\r``, ``\0`` and ``\x1a`` (end-of-file on Windows) in Unicode strings.
(cherry picked from commit 38ab7d4721b422547f7b46b9d68968863fa70573)

Co-authored-by: Serhiy Storchaka <storchaka at gmail.com>

files:
A Misc/NEWS.d/next/Library/2017-10-21-12-07-56.bpo-31829.6IhP-O.rst
M Lib/pickle.py
M Lib/test/pickletester.py
M Modules/_pickle.c

diff --git a/Lib/pickle.py b/Lib/pickle.py
index e6d003787bad..bfa3c0361b73 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -749,7 +749,10 @@ def save_str(self, obj):
                 self.write(BINUNICODE + pack("<I", n) + encoded)
         else:
             obj = obj.replace("\\", "\\u005c")
+            obj = obj.replace("\0", "\\u0000")
             obj = obj.replace("\n", "\\u000a")
+            obj = obj.replace("\r", "\\u000d")
+            obj = obj.replace("\x1a", "\\u001a")  # EOF on DOS
             self.write(UNICODE + obj.encode('raw-unicode-escape') +
                        b'\n')
         self.memoize(obj)
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 5540d0015ed9..1d88fcb859af 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2709,22 +2709,20 @@ def __getattr__(self, key):
 class AbstractPickleModuleTests(unittest.TestCase):
 
     def test_dump_closed_file(self):
-        import os
         f = open(TESTFN, "wb")
         try:
             f.close()
             self.assertRaises(ValueError, self.dump, 123, f)
         finally:
-            os.remove(TESTFN)
+            support.unlink(TESTFN)
 
     def test_load_closed_file(self):
-        import os
         f = open(TESTFN, "wb")
         try:
             f.close()
             self.assertRaises(ValueError, self.dump, 123, f)
         finally:
-            os.remove(TESTFN)
+            support.unlink(TESTFN)
 
     def test_load_from_and_dump_to_file(self):
         stream = io.BytesIO()
@@ -2748,6 +2746,19 @@ def test_callapi(self):
         self.Pickler(f, -1)
         self.Pickler(f, protocol=-1)
 
+    def test_dump_text_file(self):
+        f = open(TESTFN, "w")
+        try:
+            for proto in protocols:
+                self.assertRaises(TypeError, self.dump, 123, f, proto)
+        finally:
+            f.close()
+            support.unlink(TESTFN)
+
+    def test_incomplete_input(self):
+        s = io.BytesIO(b"X''.")
+        self.assertRaises((EOFError, struct.error, pickle.UnpicklingError), self.load, s)
+
     def test_bad_init(self):
         # Test issue3664 (pickle can segfault from a badly initialized Pickler).
         # Override initialization without calling __init__() of the superclass.
diff --git a/Misc/NEWS.d/next/Library/2017-10-21-12-07-56.bpo-31829.6IhP-O.rst b/Misc/NEWS.d/next/Library/2017-10-21-12-07-56.bpo-31829.6IhP-O.rst
new file mode 100644
index 000000000000..aefb8aec16fd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-10-21-12-07-56.bpo-31829.6IhP-O.rst
@@ -0,0 +1,3 @@
+``\r``, ``\0`` and ``\x1a`` (end-of-file on Windows) are now escaped in
+protocol 0 pickles of Unicode strings. This allows to load them without loss
+from files open in text mode in Python 2.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index c8b3ef70f521..9a6207b519fa 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2337,7 +2337,10 @@ raw_unicode_escape(PyObject *obj)
             *p++ = Py_hexdigits[ch & 15];
         }
         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
-        else if (ch >= 256 || ch == '\\' || ch == '\n') {
+        else if (ch >= 256 ||
+                 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
+                 ch == 0x1a)
+        {
             /* -1: subtract 1 preallocated byte */
             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
             if (p == NULL)



More information about the Python-checkins mailing list