[Python-checkins] gh-103977: compile re expressions in platform.py only if required (#103981)

JelleZijlstra webhook-mailer at python.org
Sun Apr 30 11:36:26 EDT 2023


https://github.com/python/cpython/commit/f186557dc3e77495ba65b9b7e492ab6ddb7cecc3
commit: f186557dc3e77495ba65b9b7e492ab6ddb7cecc3
branch: main
author: Pieter Eendebak <pieter.eendebak at gmail.com>
committer: JelleZijlstra <jelle.zijlstra at gmail.com>
date: 2023-04-30T08:36:19-07:00
summary:

gh-103977: compile re expressions in platform.py only if required (#103981)

Co-authored-by: Jelle Zijlstra <jelle.zijlstra at gmail.com>

files:
A Misc/NEWS.d/next/Library/2023-04-28-19-08-50.gh-issue-103977.msF70A.rst
M Lib/platform.py

diff --git a/Lib/platform.py b/Lib/platform.py
index 790ef860bf10..7bb222088d50 100755
--- a/Lib/platform.py
+++ b/Lib/platform.py
@@ -136,11 +136,11 @@
     'pl': 200, 'p': 200,
 }
 
-_component_re = re.compile(r'([0-9]+|[._+-])')
 
 def _comparable_version(version):
+    component_re = re.compile(r'([0-9]+|[._+-])')
     result = []
-    for v in _component_re.split(version):
+    for v in component_re.split(version):
         if v not in '._+-':
             try:
                 v = int(v, 10)
@@ -152,11 +152,6 @@ def _comparable_version(version):
 
 ### Platform specific APIs
 
-_libc_search = re.compile(b'(__libc_init)'
-                          b'|'
-                          b'(GLIBC_([0-9.]+))'
-                          b'|'
-                          br'(libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)', re.ASCII)
 
 def libc_ver(executable=None, lib='', version='', chunksize=16384):
 
@@ -190,6 +185,12 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384):
             # sys.executable is not set.
             return lib, version
 
+    libc_search = re.compile(b'(__libc_init)'
+                          b'|'
+                          b'(GLIBC_([0-9.]+))'
+                          b'|'
+                          br'(libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)', re.ASCII)
+
     V = _comparable_version
     # We use os.path.realpath()
     # here to work around problems with Cygwin not being
@@ -200,7 +201,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384):
         pos = 0
         while pos < len(binary):
             if b'libc' in binary or b'GLIBC' in binary:
-                m = _libc_search.search(binary, pos)
+                m = libc_search.search(binary, pos)
             else:
                 m = None
             if not m or m.end() == len(binary):
@@ -247,9 +248,6 @@ def _norm_version(version, build=''):
     version = '.'.join(strings[:3])
     return version
 
-_ver_output = re.compile(r'(?:([\w ]+) ([\w.]+) '
-                         r'.*'
-                         r'\[.* ([\d.]+)\])')
 
 # Examples of VER command output:
 #
@@ -295,9 +293,13 @@ def _syscmd_ver(system='', release='', version='',
     else:
         return system, release, version
 
+    ver_output = re.compile(r'(?:([\w ]+) ([\w.]+) '
+                         r'.*'
+                         r'\[.* ([\d.]+)\])')
+
     # Parse the output
     info = info.strip()
-    m = _ver_output.match(info)
+    m = ver_output.match(info)
     if m is not None:
         system, release, version = m.groups()
         # Strip trailing dots from version and release
@@ -1033,18 +1035,6 @@ def processor():
 
 ### Various APIs for extracting information from sys.version
 
-_sys_version_parser = re.compile(
-    r'([\w.+]+)\s*'  # "version<space>"
-    r'\(#?([^,]+)'  # "(#buildno"
-    r'(?:,\s*([\w ]*)'  # ", builddate"
-    r'(?:,\s*([\w :]*))?)?\)\s*'  # ", buildtime)<space>"
-    r'\[([^\]]+)\]?', re.ASCII)  # "[compiler]"
-
-_pypy_sys_version_parser = re.compile(
-    r'([\w.+]+)\s*'
-    r'\(#?([^,]+),\s*([\w ]+),\s*([\w :]+)\)\s*'
-    r'\[PyPy [^\]]+\]?')
-
 _sys_version_cache = {}
 
 def _sys_version(sys_version=None):
@@ -1076,10 +1066,17 @@ def _sys_version(sys_version=None):
     if result is not None:
         return result
 
+    sys_version_parser = re.compile(
+        r'([\w.+]+)\s*'  # "version<space>"
+        r'\(#?([^,]+)'  # "(#buildno"
+        r'(?:,\s*([\w ]*)'  # ", builddate"
+        r'(?:,\s*([\w :]*))?)?\)\s*'  # ", buildtime)<space>"
+        r'\[([^\]]+)\]?', re.ASCII)  # "[compiler]"
+
     if sys.platform.startswith('java'):
         # Jython
         name = 'Jython'
-        match = _sys_version_parser.match(sys_version)
+        match = sys_version_parser.match(sys_version)
         if match is None:
             raise ValueError(
                 'failed to parse Jython sys.version: %s' %
@@ -1091,8 +1088,13 @@ def _sys_version(sys_version=None):
 
     elif "PyPy" in sys_version:
         # PyPy
+        pypy_sys_version_parser = re.compile(
+            r'([\w.+]+)\s*'
+            r'\(#?([^,]+),\s*([\w ]+),\s*([\w :]+)\)\s*'
+            r'\[PyPy [^\]]+\]?')
+
         name = "PyPy"
-        match = _pypy_sys_version_parser.match(sys_version)
+        match = pypy_sys_version_parser.match(sys_version)
         if match is None:
             raise ValueError("failed to parse PyPy sys.version: %s" %
                              repr(sys_version))
@@ -1101,7 +1103,7 @@ def _sys_version(sys_version=None):
 
     else:
         # CPython
-        match = _sys_version_parser.match(sys_version)
+        match = sys_version_parser.match(sys_version)
         if match is None:
             raise ValueError(
                 'failed to parse CPython sys.version: %s' %
@@ -1290,13 +1292,6 @@ def platform(aliased=False, terse=False):
 ### freedesktop.org os-release standard
 # https://www.freedesktop.org/software/systemd/man/os-release.html
 
-# NAME=value with optional quotes (' or "). The regular expression is less
-# strict than shell lexer, but that's ok.
-_os_release_line = re.compile(
-    "^(?P<name>[a-zA-Z0-9_]+)=(?P<quote>[\"\']?)(?P<value>.*)(?P=quote)$"
-)
-# unescape five special characters mentioned in the standard
-_os_release_unescape = re.compile(r"\\([\\\$\"\'`])")
 # /etc takes precedence over /usr/lib
 _os_release_candidates = ("/etc/os-release", "/usr/lib/os-release")
 _os_release_cache = None
@@ -1311,10 +1306,18 @@ def _parse_os_release(lines):
         "PRETTY_NAME": "Linux",
     }
 
+    # NAME=value with optional quotes (' or "). The regular expression is less
+    # strict than shell lexer, but that's ok.
+    os_release_line = re.compile(
+        "^(?P<name>[a-zA-Z0-9_]+)=(?P<quote>[\"\']?)(?P<value>.*)(?P=quote)$"
+    )
+    # unescape five special characters mentioned in the standard
+    os_release_unescape = re.compile(r"\\([\\\$\"\'`])")
+
     for line in lines:
-        mo = _os_release_line.match(line)
+        mo = os_release_line.match(line)
         if mo is not None:
-            info[mo.group('name')] = _os_release_unescape.sub(
+            info[mo.group('name')] = os_release_unescape.sub(
                 r"\1", mo.group('value')
             )
 
diff --git a/Misc/NEWS.d/next/Library/2023-04-28-19-08-50.gh-issue-103977.msF70A.rst b/Misc/NEWS.d/next/Library/2023-04-28-19-08-50.gh-issue-103977.msF70A.rst
new file mode 100644
index 000000000000..ff4005774a95
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-04-28-19-08-50.gh-issue-103977.msF70A.rst
@@ -0,0 +1 @@
+Improve import time of :mod:`platform` module.



More information about the Python-checkins mailing list