[Python-checkins] gh-106752: Sync with zipp 3.16.2 (#106757)

jaraco webhook-mailer at python.org
Sat Jul 15 09:21:20 EDT 2023


https://github.com/python/cpython/commit/22980dc7c9dcec4b74fea815542601ef582c230e
commit: 22980dc7c9dcec4b74fea815542601ef582c230e
branch: main
author: Jason R. Coombs <jaraco at jaraco.com>
committer: jaraco <jaraco at jaraco.com>
date: 2023-07-15T09:21:17-04:00
summary:

gh-106752: Sync with zipp 3.16.2 (#106757)

* gh-106752: Sync with zipp 3.16.2

* Add blurb

files:
A Lib/test/test_zipfile/_path/write-alpharep.py
A Lib/zipfile/_path/glob.py
A Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst
M Lib/test/test_zipfile/_path/test_complexity.py
M Lib/test/test_zipfile/_path/test_path.py
M Lib/zipfile/_path/__init__.py

diff --git a/Lib/test/test_zipfile/_path/test_complexity.py b/Lib/test/test_zipfile/_path/test_complexity.py
index 3432dc39e56c4..7050937738af1 100644
--- a/Lib/test/test_zipfile/_path/test_complexity.py
+++ b/Lib/test/test_zipfile/_path/test_complexity.py
@@ -1,5 +1,9 @@
-import unittest
+import io
+import itertools
+import math
+import re
 import string
+import unittest
 import zipfile
 
 from ._functools import compose
@@ -9,9 +13,11 @@
 
 
 big_o = import_or_skip('big_o')
+pytest = import_or_skip('pytest')
 
 
 class TestComplexity(unittest.TestCase):
+    @pytest.mark.flaky
     def test_implied_dirs_performance(self):
         best, others = big_o.big_o(
             compose(consume, zipfile.CompleteDirs._implied_dirs),
@@ -22,3 +28,76 @@ def test_implied_dirs_performance(self):
             min_n=1,
         )
         assert best <= big_o.complexities.Linear
+
+    def make_zip_path(self, depth=1, width=1) -> zipfile.Path:
+        """
+        Construct a Path with width files at every level of depth.
+        """
+        zf = zipfile.ZipFile(io.BytesIO(), mode='w')
+        pairs = itertools.product(self.make_deep_paths(depth), self.make_names(width))
+        for path, name in pairs:
+            zf.writestr(f"{path}{name}.txt", b'')
+        zf.filename = "big un.zip"
+        return zipfile.Path(zf)
+
+    @classmethod
+    def make_names(cls, width, letters=string.ascii_lowercase):
+        """
+        >>> list(TestComplexity.make_names(2))
+        ['a', 'b']
+        >>> list(TestComplexity.make_names(30))
+        ['aa', 'ab', ..., 'bd']
+        """
+        # determine how many products are needed to produce width
+        n_products = math.ceil(math.log(width, len(letters)))
+        inputs = (letters,) * n_products
+        combinations = itertools.product(*inputs)
+        names = map(''.join, combinations)
+        return itertools.islice(names, width)
+
+    @classmethod
+    def make_deep_paths(cls, depth):
+        return map(cls.make_deep_path, range(depth))
+
+    @classmethod
+    def make_deep_path(cls, depth):
+        return ''.join(('d/',) * depth)
+
+    def test_baseline_regex_complexity(self):
+        best, others = big_o.big_o(
+            lambda path: re.fullmatch(r'[^/]*\\.txt', path),
+            self.make_deep_path,
+            max_n=100,
+            min_n=1,
+        )
+        assert best <= big_o.complexities.Constant
+
+    @pytest.mark.flaky
+    def test_glob_depth(self):
+        best, others = big_o.big_o(
+            lambda path: consume(path.glob('*.txt')),
+            self.make_zip_path,
+            max_n=100,
+            min_n=1,
+        )
+        assert best <= big_o.complexities.Quadratic
+
+    @pytest.mark.flaky
+    def test_glob_width(self):
+        best, others = big_o.big_o(
+            lambda path: consume(path.glob('*.txt')),
+            lambda size: self.make_zip_path(width=size),
+            max_n=100,
+            min_n=1,
+        )
+        assert best <= big_o.complexities.Linear
+
+    @pytest.mark.flaky
+    def test_glob_width_and_depth(self):
+        best, others = big_o.big_o(
+            lambda path: consume(path.glob('*.txt')),
+            lambda size: self.make_zip_path(depth=size, width=size),
+            max_n=10,
+            min_n=1,
+        )
+        assert best <= big_o.complexities.Linear
diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py
index aff91e5399587..c66cb3cba69eb 100644
--- a/Lib/test/test_zipfile/_path/test_path.py
+++ b/Lib/test/test_zipfile/_path/test_path.py
@@ -41,9 +41,13 @@ def build_alpharep_fixture():
     │   ├── d
     │   │   └── e.txt
     │   └── f.txt
-    └── g
-        └── h
-            └── i.txt
+    ├── g
+    │   └── h
+    │       └── i.txt
+    └── j
+        ├── k.bin
+        ├── l.baz
+        └── m.bar
 
     This fixture has the following key characteristics:
 
@@ -51,6 +55,7 @@ def build_alpharep_fixture():
     - a file two levels deep (b/d/e)
     - multiple files in a directory (b/c, b/f)
     - a directory containing only a directory (g/h)
+    - a directory with files of different extensions (j/klm)
 
     "alpha" because it uses alphabet
     "rep" because it's a representative example
@@ -62,6 +67,9 @@ def build_alpharep_fixture():
     zf.writestr("b/d/e.txt", b"content of e")
     zf.writestr("b/f.txt", b"content of f")
     zf.writestr("g/h/i.txt", b"content of i")
+    zf.writestr("j/k.bin", b"content of k")
+    zf.writestr("j/l.baz", b"content of l")
+    zf.writestr("j/m.bar", b"content of m")
     zf.filename = "alpharep.zip"
     return zf
 
@@ -92,7 +100,7 @@ def zipfile_ondisk(self, alpharep):
     def test_iterdir_and_types(self, alpharep):
         root = zipfile.Path(alpharep)
         assert root.is_dir()
-        a, b, g = root.iterdir()
+        a, b, g, j = root.iterdir()
         assert a.is_file()
         assert b.is_dir()
         assert g.is_dir()
@@ -112,7 +120,7 @@ def test_is_file_missing(self, alpharep):
     @pass_alpharep
     def test_iterdir_on_file(self, alpharep):
         root = zipfile.Path(alpharep)
-        a, b, g = root.iterdir()
+        a, b, g, j = root.iterdir()
         with self.assertRaises(ValueError):
             a.iterdir()
 
@@ -127,7 +135,7 @@ def test_subdir_is_dir(self, alpharep):
     @pass_alpharep
     def test_open(self, alpharep):
         root = zipfile.Path(alpharep)
-        a, b, g = root.iterdir()
+        a, b, g, j = root.iterdir()
         with a.open(encoding="utf-8") as strm:
             data = strm.read()
         self.assertEqual(data, "content of a")
@@ -229,7 +237,7 @@ def test_open_missing_directory(self):
     @pass_alpharep
     def test_read(self, alpharep):
         root = zipfile.Path(alpharep)
-        a, b, g = root.iterdir()
+        a, b, g, j = root.iterdir()
         assert a.read_text(encoding="utf-8") == "content of a"
         # Also check positional encoding arg (gh-101144).
         assert a.read_text("utf-8") == "content of a"
@@ -295,7 +303,7 @@ def test_mutability(self, alpharep):
         reflect that change.
         """
         root = zipfile.Path(alpharep)
-        a, b, g = root.iterdir()
+        a, b, g, j = root.iterdir()
         alpharep.writestr('foo.txt', 'foo')
         alpharep.writestr('bar/baz.txt', 'baz')
         assert any(child.name == 'foo.txt' for child in root.iterdir())
@@ -394,6 +402,13 @@ def test_suffixes(self, alpharep):
         e = root / '.hgrc'
         assert e.suffixes == []
 
+    @pass_alpharep
+    def test_suffix_no_filename(self, alpharep):
+        alpharep.filename = None
+        root = zipfile.Path(alpharep)
+        assert root.joinpath('example').suffix == ""
+        assert root.joinpath('example').suffixes == []
+
     @pass_alpharep
     def test_stem(self, alpharep):
         """
@@ -411,6 +426,8 @@ def test_stem(self, alpharep):
         d = root / "d"
         assert d.stem == "d"
 
+        assert (root / ".gitignore").stem == ".gitignore"
+
     @pass_alpharep
     def test_root_parent(self, alpharep):
         root = zipfile.Path(alpharep)
@@ -442,12 +459,49 @@ def test_match_and_glob(self, alpharep):
         assert not root.match("*.txt")
 
         assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")]
+        assert list(root.glob("b/*.txt")) == [
+            zipfile.Path(alpharep, "b/c.txt"),
+            zipfile.Path(alpharep, "b/f.txt"),
+        ]
 
+    @pass_alpharep
+    def test_glob_recursive(self, alpharep):
+        root = zipfile.Path(alpharep)
         files = root.glob("**/*.txt")
         assert all(each.match("*.txt") for each in files)
 
         assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt"))
 
+    @pass_alpharep
+    def test_glob_subdirs(self, alpharep):
+        root = zipfile.Path(alpharep)
+
+        assert list(root.glob("*/i.txt")) == []
+        assert list(root.rglob("*/i.txt")) == [zipfile.Path(alpharep, "g/h/i.txt")]
+
+    @pass_alpharep
+    def test_glob_does_not_overmatch_dot(self, alpharep):
+        root = zipfile.Path(alpharep)
+
+        assert list(root.glob("*.xt")) == []
+
+    @pass_alpharep
+    def test_glob_single_char(self, alpharep):
+        root = zipfile.Path(alpharep)
+
+        assert list(root.glob("a?txt")) == [zipfile.Path(alpharep, "a.txt")]
+        assert list(root.glob("a[.]txt")) == [zipfile.Path(alpharep, "a.txt")]
+        assert list(root.glob("a[?]txt")) == []
+
+    @pass_alpharep
+    def test_glob_chars(self, alpharep):
+        root = zipfile.Path(alpharep)
+
+        assert list(root.glob("j/?.b[ai][nz]")) == [
+            zipfile.Path(alpharep, "j/k.bin"),
+            zipfile.Path(alpharep, "j/l.baz"),
+        ]
+
     def test_glob_empty(self):
         root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w'))
         with self.assertRaises(ValueError):
diff --git a/Lib/test/test_zipfile/_path/write-alpharep.py b/Lib/test/test_zipfile/_path/write-alpharep.py
new file mode 100644
index 0000000000000..48c09b537179f
--- /dev/null
+++ b/Lib/test/test_zipfile/_path/write-alpharep.py
@@ -0,0 +1,4 @@
+from . import test_path
+
+
+__name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep')
diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py
index fd49a3ea91db5..78c413563bb2b 100644
--- a/Lib/zipfile/_path/__init__.py
+++ b/Lib/zipfile/_path/__init__.py
@@ -5,7 +5,8 @@
 import contextlib
 import pathlib
 import re
-import fnmatch
+
+from .glob import translate
 
 
 __all__ = ['Path']
@@ -296,21 +297,24 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
         encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
         return io.TextIOWrapper(stream, encoding, *args, **kwargs)
 
+    def _base(self):
+        return pathlib.PurePosixPath(self.at or self.root.filename)
+
     @property
     def name(self):
-        return pathlib.Path(self.at).name or self.filename.name
+        return self._base().name
 
     @property
     def suffix(self):
-        return pathlib.Path(self.at).suffix or self.filename.suffix
+        return self._base().suffix
 
     @property
     def suffixes(self):
-        return pathlib.Path(self.at).suffixes or self.filename.suffixes
+        return self._base().suffixes
 
     @property
     def stem(self):
-        return pathlib.Path(self.at).stem or self.filename.stem
+        return self._base().stem
 
     @property
     def filename(self):
@@ -347,7 +351,7 @@ def iterdir(self):
         return filter(self._is_child, subs)
 
     def match(self, path_pattern):
-        return pathlib.Path(self.at).match(path_pattern)
+        return pathlib.PurePosixPath(self.at).match(path_pattern)
 
     def is_symlink(self):
         """
@@ -355,22 +359,13 @@ def is_symlink(self):
         """
         return False
 
-    def _descendants(self):
-        for child in self.iterdir():
-            yield child
-            if child.is_dir():
-                yield from child._descendants()
-
     def glob(self, pattern):
         if not pattern:
             raise ValueError(f"Unacceptable pattern: {pattern!r}")
 
-        matches = re.compile(fnmatch.translate(pattern)).fullmatch
-        return (
-            child
-            for child in self._descendants()
-            if matches(str(child.relative_to(self)))
-        )
+        prefix = re.escape(self.at)
+        matches = re.compile(prefix + translate(pattern)).fullmatch
+        return map(self._next, filter(matches, self.root.namelist()))
 
     def rglob(self, pattern):
         return self.glob(f'**/{pattern}')
diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py
new file mode 100644
index 0000000000000..4a2e665e27078
--- /dev/null
+++ b/Lib/zipfile/_path/glob.py
@@ -0,0 +1,40 @@
+import re
+
+
+def translate(pattern):
+    r"""
+    Given a glob pattern, produce a regex that matches it.
+
+    >>> translate('*.txt')
+    '[^/]*\\.txt'
+    >>> translate('a?txt')
+    'a.txt'
+    >>> translate('**/*')
+    '.*/[^/]*'
+    """
+    return ''.join(map(replace, separate(pattern)))
+
+
+def separate(pattern):
+    """
+    Separate out character sets to avoid translating their contents.
+
+    >>> [m.group(0) for m in separate('*.txt')]
+    ['*.txt']
+    >>> [m.group(0) for m in separate('a[?]txt')]
+    ['a', '[?]', 'txt']
+    """
+    return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)
+
+
+def replace(match):
+    """
+    Perform the replacements for a match from :func:`separate`.
+    """
+
+    return match.group('set') or (
+        re.escape(match.group(0))
+        .replace('\\*\\*', r'.*')
+        .replace('\\*', r'[^/]*')
+        .replace('\\?', r'.')
+    )
diff --git a/Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst b/Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst
new file mode 100644
index 0000000000000..bbc53d76decbc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst
@@ -0,0 +1,5 @@
+Fixed several bugs in zipfile.Path, including: in ``Path.match`, Windows
+separators are no longer honored (and never were meant to be); Fixed
+``name``/``suffix``/``suffixes``/``stem`` operations when no filename is
+present and the Path is not at the root of the zipfile; Reworked glob for
+performance and more correct matching behavior.



More information about the Python-checkins mailing list