[Python-checkins] [3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701)
Miss Islington (bot)
webhook-mailer at python.org
Wed Apr 15 14:45:32 EDT 2020
https://github.com/python/cpython/commit/3e72de9e08b03a15875f5b226c5f096e567dab42
commit: 3e72de9e08b03a15875f5b226c5f096e567dab42
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2020-04-15T14:45:25-04:00
summary:
[3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701)
* bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0
* 📜🤖 Added by blurb_it.
* Update docs for new zipfile.Path.open
* Rely on dict, faster than OrderedDict.
* Syntax edits on docs
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
(cherry picked from commit 0aeab5c4381f0cc11479362af2533b3a391312ac)
Co-authored-by: Jason R. Coombs <jaraco at jaraco.com>
* Clarify the change in behavior with a couple of workaround options.
* Restore API compatibility while backporting performance improvements.
Co-authored-by: Jason R. Coombs <jaraco at jaraco.com>
files:
A Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst
M Doc/library/zipfile.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index e8a2530fb8c17..97da6cab806e3 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -494,6 +494,12 @@ Path objects are traversable using the ``/`` operator.
Invoke :meth:`ZipFile.open` on the current path. Accepts
the same arguments as :meth:`ZipFile.open`.
+ .. caution::
+
+ The signature on this function changes in an incompatible way
+ in Python 3.9. For a future-compatible version, consider using
+ the third-party zipp.Path package (3.0 or later).
+
.. method:: Path.iterdir()
Enumerate the children of the current directory.
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 61bca8651c02a..28e62dc5c61c5 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -5,6 +5,7 @@
import os
import pathlib
import posixpath
+import string
import struct
import subprocess
import sys
@@ -2933,6 +2934,11 @@ def test_joinpath_constant_time(self):
# Check the file iterated all items
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
+ # @func_timeout.func_set_timeout(3)
+ def test_implied_dirs_performance(self):
+ data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
+ zipfile.CompleteDirs._implied_dirs(data)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 5dc6516cc47b7..07faaccac9226 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -17,7 +17,6 @@
import threading
import time
import contextlib
-from collections import OrderedDict
try:
import zlib # We may need its compression method
@@ -2125,24 +2124,6 @@ def _compile(file, optimize=-1):
return (fname, archivename)
-def _unique_everseen(iterable, key=None):
- "List unique elements, preserving order. Remember all elements ever seen."
- # unique_everseen('AAAABBBCCDAABBB') --> A B C D
- # unique_everseen('ABBCcAD', str.lower) --> A B C D
- seen = set()
- seen_add = seen.add
- if key is None:
- for element in itertools.filterfalse(seen.__contains__, iterable):
- seen_add(element)
- yield element
- else:
- for element in iterable:
- k = key(element)
- if k not in seen:
- seen_add(k)
- yield element
-
-
def _parents(path):
"""
Given a path with elements separated by
@@ -2184,6 +2165,18 @@ def _ancestry(path):
path, tail = posixpath.split(path)
+_dedupe = dict.fromkeys
+"""Deduplicate an iterable in original order"""
+
+
+def _difference(minuend, subtrahend):
+ """
+ Return items in minuend not in subtrahend, retaining order
+ with O(1) lookup.
+ """
+ return itertools.filterfalse(set(subtrahend).__contains__, minuend)
+
+
class CompleteDirs(ZipFile):
"""
A ZipFile subclass that ensures that implied directories
@@ -2193,13 +2186,8 @@ class CompleteDirs(ZipFile):
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
- # Deduplicate entries in original order
- implied_dirs = OrderedDict.fromkeys(
- p + posixpath.sep for p in parents
- # Cast names to a set for O(1) lookups
- if p + posixpath.sep not in set(names)
- )
- return implied_dirs
+ as_dirs = (p + posixpath.sep for p in parents)
+ return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
diff --git a/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst b/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst
new file mode 100644
index 0000000000000..ccc33e289846a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst
@@ -0,0 +1 @@
+Correct performance degradation in ``zipfile.Path`` as found in zipp 3.0. While retaining compatibility, this change discourages the use of ``zipfile.Path.open`` due to the signature change in Python 3.9. For compatibility across Python 3.8 and later versions, consider using ``zipp.Path`` on Python 3.8.x and earlier.
More information about the Python-checkins
mailing list