[Python-checkins] bpo-44246: Entry points performance improvements. (GH-26467)
jaraco
webhook-mailer at python.org
Mon May 31 11:52:38 EDT 2021
https://github.com/python/cpython/commit/410b70d39d9d77384f8b8597560f6731530149ca
commit: 410b70d39d9d77384f8b8597560f6731530149ca
branch: main
author: Jason R. Coombs <jaraco at jaraco.com>
committer: jaraco <jaraco at jaraco.com>
date: 2021-05-31T11:52:29-04:00
summary:
bpo-44246: Entry points performance improvements. (GH-26467)
>From importlib_metadata 4.3.1.
files:
A Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst
M Lib/importlib/metadata/__init__.py
M Lib/test/test_importlib/test_zip.py
diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py
index 94b83869a68550..2e3403e5a565cb 100644
--- a/Lib/importlib/metadata/__init__.py
+++ b/Lib/importlib/metadata/__init__.py
@@ -493,6 +493,11 @@ def name(self):
"""Return the 'Name' metadata for the distribution package."""
return self.metadata['Name']
+ @property
+ def _normalized_name(self):
+ """Return a normalized version of the name."""
+ return Prepared.normalize(self.name)
+
@property
def version(self):
"""Return the 'Version' metadata for the distribution package."""
@@ -795,6 +800,22 @@ def read_text(self, filename):
def locate_file(self, path):
return self._path.parent / path
+ @property
+ def _normalized_name(self):
+ """
+ Performance optimization: where possible, resolve the
+ normalized name from the file system path.
+ """
+ stem = os.path.basename(str(self._path))
+ return self._name_from_stem(stem) or super()._normalized_name
+
+ def _name_from_stem(self, stem):
+ name, ext = os.path.splitext(stem)
+ if ext not in ('.dist-info', '.egg-info'):
+ return
+ name, sep, rest = stem.partition('-')
+ return name
+
def distribution(distribution_name):
"""Get the ``Distribution`` instance for the named package.
@@ -849,7 +870,8 @@ def entry_points(**params) -> Union[EntryPoints, SelectableGroups]:
:return: EntryPoints or SelectableGroups for all installed packages.
"""
- unique = functools.partial(unique_everseen, key=operator.attrgetter('name'))
+ norm_name = operator.attrgetter('_normalized_name')
+ unique = functools.partial(unique_everseen, key=norm_name)
eps = itertools.chain.from_iterable(
dist.entry_points for dist in unique(distributions())
)
diff --git a/Lib/test/test_importlib/test_zip.py b/Lib/test/test_importlib/test_zip.py
index 83e041385e0e8c..bf16a3b95e18c5 100644
--- a/Lib/test/test_importlib/test_zip.py
+++ b/Lib/test/test_importlib/test_zip.py
@@ -76,3 +76,7 @@ def test_files(self):
for file in files('example'):
path = str(file.dist.locate_file(file))
assert '.egg/' in path, path
+
+ def test_normalized_name(self):
+ dist = distribution('example')
+ assert dist._normalized_name == 'example'
diff --git a/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst b/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst
new file mode 100644
index 00000000000000..727d9fd0a19d8a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst
@@ -0,0 +1,3 @@
+In importlib.metadata.entry_points, de-duplication of distributions no
+longer requires loading the full metadata for PathDistribution objects,
+improving entry point loading performance by ~10x.
More information about the Python-checkins
mailing list