[Python-checkins] bpo-37772: fix zipfile.Path.iterdir() outputs (GH-15170)
Jason R. Coombs
webhook-mailer at python.org
Sat Aug 24 11:26:45 EDT 2019
https://github.com/python/cpython/commit/a4e2991bdc993b60b6457c8a38d6e4a1fc845781
commit: a4e2991bdc993b60b6457c8a38d6e4a1fc845781
branch: master
author: shireenrao <shireenrao at gmail.com>
committer: Jason R. Coombs <jaraco at jaraco.com>
date: 2019-08-24T11:26:41-04:00
summary:
bpo-37772: fix zipfile.Path.iterdir() outputs (GH-15170)
* fix Path._add_implied_dirs to include all implied directories
* fix Path._add_implied_dirs to include all implied directories
* Optimize code by using sets instead of lists
* 📜🤖 Added by blurb_it.
* fix Path._add_implied_dirs to include all implied directories
* Optimize code by using sets instead of lists
* 📜🤖 Added by blurb_it.
* Add tests to zipfile.Path.iterdir() fix
* Update test for zipfile.Path.iterdir()
* remove whitespace from test file
* Rewrite NEWS blurb to describe the user-facing impact and avoid implementation details.
* remove redundant [] within set comprehension
* Update to use unique_everseen to maintain order and other suggestions in review
* remove whitespace and add back add_dirs in tests
* Add new standalone function parents using posixpath to get parents of a directory
* removing whitespace (sorry)
* Remove import pathlib from zipfile.py
* Rewrite _parents as a slice on a generator of the ancestry of a path.
* Remove check for '.' and '/', now that parents no longer returns those.
* Separate calculation of implied dirs from adding those
* Re-use _implied_dirs in tests for generating zipfile with dir entries.
* Replace three fixtures (abcde, abcdef, abde) with one representative example alpharep.
* Simplify implementation of _implied_dirs by collapsing the generation of parent directories for each name.
files:
A Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 0c8ffcdbf14a..8e437e5cb2b9 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -2397,37 +2397,49 @@ def test_extract_command(self):
consume = tuple
-def add_dirs(zipfile):
+def add_dirs(zf):
"""
- Given a writable zipfile, inject directory entries for
+ Given a writable zip file zf, inject directory entries for
any directories implied by the presence of children.
"""
- names = zipfile.namelist()
- consume(
- zipfile.writestr(name + "/", b"")
- for name in map(posixpath.dirname, names)
- if name and name + "/" not in names
- )
- return zipfile
+ for name in zipfile.Path._implied_dirs(zf.namelist()):
+ zf.writestr(name, b"")
+ return zf
-def build_abcde_files():
+def build_alpharep_fixture():
"""
Create a zip file with this structure:
.
├── a.txt
- └── b
- ├── c.txt
- └── d
- └── e.txt
+ ├── b
+ │ ├── c.txt
+ │ ├── d
+ │ │ └── e.txt
+ │ └── f.txt
+ └── g
+ └── h
+ └── i.txt
+
+ This fixture has the following key characteristics:
+
+ - a file at the root (a)
+ - a file two levels deep (b/d/e)
+ - multiple files in a directory (b/c, b/f)
+ - a directory containing only a directory (g/h)
+
+ "alpha" because it uses alphabet
+ "rep" because it's a representative example
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("a.txt", b"content of a")
zf.writestr("b/c.txt", b"content of c")
zf.writestr("b/d/e.txt", b"content of e")
- zf.filename = "abcde.zip"
+ zf.writestr("b/f.txt", b"content of f")
+ zf.writestr("g/h/i.txt", b"content of i")
+ zf.filename = "alpharep.zip"
return zf
@@ -2436,60 +2448,64 @@ def setUp(self):
self.fixtures = contextlib.ExitStack()
self.addCleanup(self.fixtures.close)
- def zipfile_abcde(self):
+ def zipfile_alpharep(self):
with self.subTest():
- yield build_abcde_files()
+ yield build_alpharep_fixture()
with self.subTest():
- yield add_dirs(build_abcde_files())
+ yield add_dirs(build_alpharep_fixture())
def zipfile_ondisk(self):
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
- for zipfile_abcde in self.zipfile_abcde():
- buffer = zipfile_abcde.fp
- zipfile_abcde.close()
- path = tmpdir / zipfile_abcde.filename
+ for alpharep in self.zipfile_alpharep():
+ buffer = alpharep.fp
+ alpharep.close()
+ path = tmpdir / alpharep.filename
with path.open("wb") as strm:
strm.write(buffer.getvalue())
yield path
- def test_iterdir_istype(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ def test_iterdir_and_types(self):
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert root.is_dir()
- a, b = root.iterdir()
+ a, b, g = root.iterdir()
assert a.is_file()
assert b.is_dir()
- c, d = b.iterdir()
- assert c.is_file()
+ assert g.is_dir()
+ c, f, d = b.iterdir()
+ assert c.is_file() and f.is_file()
e, = d.iterdir()
assert e.is_file()
+ h, = g.iterdir()
+ i, = h.iterdir()
+ assert i.is_file()
def test_open(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
- a, b = root.iterdir()
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
+ a, b, g = root.iterdir()
with a.open() as strm:
data = strm.read()
assert data == b"content of a"
def test_read(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
- a, b = root.iterdir()
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
+ a, b, g = root.iterdir()
assert a.read_text() == "content of a"
assert a.read_bytes() == b"content of a"
def test_joinpath(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
a = root.joinpath("a")
assert a.is_file()
e = root.joinpath("b").joinpath("d").joinpath("e.txt")
assert e.read_text() == "content of e"
def test_traverse_truediv(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
a = root / "a"
assert a.is_file()
e = root / "b" / "d" / "e.txt"
@@ -2504,26 +2520,27 @@ def test_pathlike_construction(self):
zipfile.Path(pathlike)
def test_traverse_pathlike(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
root / pathlib.Path("a")
def test_parent(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert (root / 'a').parent.at == ''
assert (root / 'a' / 'b').parent.at == 'a/'
def test_dir_parent(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert (root / 'b').parent.at == ''
assert (root / 'b/').parent.at == ''
def test_missing_dir_parent(self):
- for zipfile_abcde in self.zipfile_abcde():
- root = zipfile.Path(zipfile_abcde)
+ for alpharep in self.zipfile_alpharep():
+ root = zipfile.Path(alpharep)
assert (root / 'missing dir/').parent.at == ''
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 3c1f1235034a..dfd090795019 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -7,6 +7,7 @@
import functools
import importlib.util
import io
+import itertools
import os
import posixpath
import shutil
@@ -2104,6 +2105,65 @@ def _compile(file, optimize=-1):
return (fname, archivename)
+def _unique_everseen(iterable, key=None):
+ "List unique elements, preserving order. Remember all elements ever seen."
+ # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+ # unique_everseen('ABBCcAD', str.lower) --> A B C D
+ seen = set()
+ seen_add = seen.add
+ if key is None:
+ for element in itertools.filterfalse(seen.__contains__, iterable):
+ seen_add(element)
+ yield element
+ else:
+ for element in iterable:
+ k = key(element)
+ if k not in seen:
+ seen_add(k)
+ yield element
+
+
+def _parents(path):
+ """
+ Given a path with elements separated by
+ posixpath.sep, generate all parents of that path.
+
+ >>> list(_parents('b/d'))
+ ['b']
+ >>> list(_parents('/b/d/'))
+ ['/b']
+ >>> list(_parents('b/d/f/'))
+ ['b/d', 'b']
+ >>> list(_parents('b'))
+ []
+ >>> list(_parents(''))
+ []
+ """
+ return itertools.islice(_ancestry(path), 1, None)
+
+
+def _ancestry(path):
+ """
+ Given a path with elements separated by
+ posixpath.sep, generate all elements of that path
+
+ >>> list(_ancestry('b/d'))
+ ['b/d', 'b']
+ >>> list(_ancestry('/b/d/'))
+ ['/b/d', '/b']
+ >>> list(_ancestry('b/d/f/'))
+ ['b/d/f', 'b/d', 'b']
+ >>> list(_ancestry('b'))
+ ['b']
+ >>> list(_ancestry(''))
+ []
+ """
+ path = path.rstrip(posixpath.sep)
+ while path and path != posixpath.sep:
+ yield path
+ path, tail = posixpath.split(path)
+
+
class Path:
"""
A pathlib-compatible interface for zip files.
@@ -2227,12 +2287,17 @@ def joinpath(self, add):
__truediv__ = joinpath
@staticmethod
- def _add_implied_dirs(names):
- return names + [
- name + "/"
- for name in map(posixpath.dirname, names)
- if name and name + "/" not in names
- ]
+ def _implied_dirs(names):
+ return _unique_everseen(
+ parent + "/"
+ for name in names
+ for parent in _parents(name)
+ if parent + "/" not in names
+ )
+
+ @classmethod
+ def _add_implied_dirs(cls, names):
+ return names + list(cls._implied_dirs(names))
@property
def parent(self):
diff --git a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst
new file mode 100644
index 000000000000..f9ec6a33b07b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst
@@ -0,0 +1 @@
+In ``zipfile.Path``, when adding implicit dirs, ensure that ancestral directories are added and that duplicates are excluded.
More information about the Python-checkins
mailing list