[Python-checkins] [3.11] gh-101144: Allow zipfile.Path .open & .read_text encoding to be positional (#101179)
gpshead
webhook-mailer at python.org
Fri Jan 20 03:06:25 EST 2023
https://github.com/python/cpython/commit/efe3a389cabd7295e6e0938767cdc4055c871e3c
commit: efe3a389cabd7295e6e0938767cdc4055c871e3c
branch: 3.11
author: Gregory P. Smith <greg at krypto.org>
committer: gpshead <greg at krypto.org>
date: 2023-01-20T00:06:18-08:00
summary:
[3.11] gh-101144: Allow zipfile.Path .open & .read_text encoding to be positional (#101179)
The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only.
Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time..
(cherry picked from commit 5927013e47a8c63b70e104152351f3447baa819c)
Co-authored-by: Gregory P. Smith <greg at krypto.org> [Google]
files:
A Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst
M Doc/library/zipfile.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index d02e32d320bc..af1f26385553 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -551,6 +551,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
Added support for text and binary modes for open. Default
mode is now text.
+ .. versionchanged:: 3.11.2
+ The ``encoding`` parameter can be supplied as a positional argument
+ without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
+ be compatible with unpatched 3.10 and 3.11 versions must pass all
+ :class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
+
.. method:: Path.iterdir()
Enumerate the children of the current directory.
@@ -596,6 +602,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
:class:`io.TextIOWrapper` (except ``buffer``, which is
implied by the context).
+ .. versionchanged:: 3.11.2
+ The ``encoding`` parameter can be supplied as a positional argument
+ without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
+ be compatible with unpatched 3.10 and 3.11 versions must pass all
+ :class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
+
.. method:: Path.read_bytes()
Read the current file as bytes.
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index fd25e5a800d7..35d78d96a68e 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -10,6 +10,7 @@
import struct
import subprocess
import sys
+from test.support.script_helper import assert_python_ok
import time
import unittest
import unittest.mock as mock
@@ -3005,7 +3006,69 @@ def test_open(self, alpharep):
a, b, g = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
- assert data == "content of a"
+ self.assertEqual(data, "content of a")
+ with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
+ data = strm.read()
+ self.assertEqual(data, "content of a")
+
+ def test_open_encoding_utf16(self):
+ in_memory_file = io.BytesIO()
+ zf = zipfile.ZipFile(in_memory_file, "w")
+ zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
+ zf.filename = "test_open_utf16.zip"
+ root = zipfile.Path(zf)
+ (path,) = root.iterdir()
+ u16 = path.joinpath("16.txt")
+ with u16.open('r', "utf-16") as strm:
+ data = strm.read()
+ self.assertEqual(data, "This was utf-16")
+ with u16.open(encoding="utf-16") as strm:
+ data = strm.read()
+ self.assertEqual(data, "This was utf-16")
+
+ def test_open_encoding_errors(self):
+ in_memory_file = io.BytesIO()
+ zf = zipfile.ZipFile(in_memory_file, "w")
+ zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
+ zf.filename = "test_read_text_encoding_errors.zip"
+ root = zipfile.Path(zf)
+ (path,) = root.iterdir()
+ u16 = path.joinpath("bad-utf8.bin")
+
+ # encoding= as a positional argument for gh-101144.
+ data = u16.read_text("utf-8", errors="ignore")
+ self.assertEqual(data, "invalid utf-8: .")
+ with u16.open("r", "utf-8", errors="surrogateescape") as f:
+ self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")
+
+ # encoding= both positional and keyword is an error; gh-101144.
+ with self.assertRaisesRegex(TypeError, "encoding"):
+ data = u16.read_text("utf-8", encoding="utf-8")
+
+ # both keyword arguments work.
+ with u16.open("r", encoding="utf-8", errors="strict") as f:
+ # error during decoding with wrong codec.
+ with self.assertRaises(UnicodeDecodeError):
+ f.read()
+
+ def test_encoding_warnings(self):
+ """EncodingWarning must blame the read_text and open calls."""
+ code = '''\
+import io, zipfile
+with zipfile.ZipFile(io.BytesIO(), "w") as zf:
+ zf.filename = '<test_encoding_warnings in memory zip file>'
+ zf.writestr("path/file.txt", b"Spanish Inquisition")
+ root = zipfile.Path(zf)
+ (path,) = root.iterdir()
+ file_path = path.joinpath("file.txt")
+ unused = file_path.read_text() # should warn
+ file_path.open("r").close() # should warn
+'''
+ proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
+ warnings = proc.err.splitlines()
+ self.assertEqual(len(warnings), 2, proc.err)
+ self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
+ self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")
def test_open_write(self):
"""
@@ -3047,6 +3110,7 @@ def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
+ a.read_text("utf-8") # No positional arg TypeError per gh-101144.
assert a.read_bytes() == b"content of a"
@pass_alpharep
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index de667a7efeeb..f69a43b764ef 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -2287,6 +2287,11 @@ def _name_set(self):
return self.__lookup
+def _extract_text_encoding(encoding=None, *args, **kwargs):
+ # stacklevel=3 so that the caller of the caller see any warning.
+ return io.text_encoding(encoding, 3), args, kwargs
+
+
class Path:
"""
A pathlib-compatible interface for zip files.
@@ -2396,9 +2401,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
- else:
- kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
- return io.TextIOWrapper(stream, *args, **kwargs)
+ # Text mode:
+ encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
+ return io.TextIOWrapper(stream, encoding, *args, **kwargs)
@property
def name(self):
@@ -2421,8 +2426,8 @@ def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)
def read_text(self, *args, **kwargs):
- kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
- with self.open('r', *args, **kwargs) as strm:
+ encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
+ with self.open('r', encoding, *args, **kwargs) as strm:
return strm.read()
def read_bytes(self):
diff --git a/Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst b/Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst
new file mode 100644
index 000000000000..297652259949
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst
@@ -0,0 +1,4 @@
+Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
+``encoding`` as a positional argument. This was the behavior in Python 3.9 and
+earlier. 3.10 introduced a regression where supplying it as a positional
+argument would lead to a :exc:`TypeError`.
More information about the Python-checkins
mailing list