[Python-checkins] bpo-45211: Move helpers from getpath.c to internal API. (gh-28550)

ericsnowcurrently webhook-mailer at python.org
Mon Sep 27 12:00:42 EDT 2021


https://github.com/python/cpython/commit/ae7839bbe817329dd015f9195da308a0f3fbd3e2
commit: ae7839bbe817329dd015f9195da308a0f3fbd3e2
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2021-09-27T10:00:32-06:00
summary:

bpo-45211: Move helpers from getpath.c to internal API. (gh-28550)

This accomplishes 2 things:

* consolidates some common code between getpath.c and getpathp.c
* makes the helpers available to code in other files

FWIW, the signature of the join_relfile() function (in fileutils.c) intentionally mirrors that of Windows' PathCchCombineEx().

Note that this change is mostly moving code around. No behavior is meant to change.

https://bugs.python.org/issue45211

files:
M Include/internal/pycore_fileutils.h
M Include/internal/pycore_pystate.h
M Lib/test/test_embed.py
M Modules/getpath.c
M PC/getpathp.c
M Python/fileutils.c
M Python/initconfig.c
M Python/preconfig.c

diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h
index c1c9244a1bc7c..8491ed9b5ffe2 100644
--- a/Include/internal/pycore_fileutils.h
+++ b/Include/internal/pycore_fileutils.h
@@ -10,6 +10,12 @@ extern "C" {
 
 #include <locale.h>   /* struct lconv */
 
+// This is used after getting NULL back from Py_DecodeLocale().
+#define DECODE_LOCALE_ERR(NAME, LEN) \
+    ((LEN) == (size_t)-2) \
+     ? _PyStatus_ERR("cannot decode " NAME) \
+     : _PyStatus_NO_MEMORY()
+
 PyAPI_DATA(int) _Py_HasFileSystemDefaultEncodeErrors;
 
 PyAPI_FUNC(int) _Py_DecodeUTF8Ex(
@@ -33,6 +39,9 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
     Py_ssize_t arglen,
     size_t *wlen);
 
+extern int
+_Py_wstat(const wchar_t *, struct stat *);
+
 PyAPI_FUNC(int) _Py_GetForceASCII(void);
 
 /* Reset "force ASCII" mode (if it was initialized).
@@ -65,6 +74,12 @@ extern int _Py_EncodeNonUnicodeWchar_InPlace(
     Py_ssize_t size);
 #endif
 
+extern wchar_t * _Py_join_relfile(const wchar_t *dirname,
+                                  const wchar_t *relfile);
+extern int _Py_add_relfile(wchar_t *dirname,
+                           const wchar_t *relfile,
+                           size_t bufsize);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index 4b894f3eff496..aef318989aa6e 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -30,6 +30,17 @@ _Py_IsMainInterpreter(PyInterpreterState *interp)
 }
 
 
+static inline const PyConfig *
+_Py_GetMainConfig(void)
+{
+    PyInterpreterState *interp = _PyRuntime.interpreters.main;
+    if (interp == NULL) {
+        return NULL;
+    }
+    return _PyInterpreterState_GetConfig(interp);
+}
+
+
 /* Only handle signals on the main thread of the main interpreter. */
 static inline int
 _Py_ThreadCanHandleSignals(PyInterpreterState *interp)
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index e1b466a7b56b1..cda814c3ed34e 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -434,7 +434,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
         'pathconfig_warnings': 1,
         '_init_main': 1,
         '_isolated_interpreter': 0,
-        'use_frozen_modules': False,
+        'use_frozen_modules': 0,
     }
     if MS_WINDOWS:
         CONFIG_COMPAT.update({
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 363d62a0657eb..de1c6e3fbb657 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -115,11 +115,6 @@ extern "C" {
 
 #define BUILD_LANDMARK L"Modules/Setup.local"
 
-#define DECODE_LOCALE_ERR(NAME, LEN) \
-    ((LEN) == (size_t)-2) \
-     ? _PyStatus_ERR("cannot decode " NAME) \
-     : _PyStatus_NO_MEMORY()
-
 #define PATHLEN_ERR() _PyStatus_ERR("path configuration: path too long")
 
 typedef struct {
@@ -149,23 +144,6 @@ static const wchar_t delimiter[2] = {DELIM, '\0'};
 static const wchar_t separator[2] = {SEP, '\0'};
 
 
-/* Get file status. Encode the path to the locale encoding. */
-static int
-_Py_wstat(const wchar_t* path, struct stat *buf)
-{
-    int err;
-    char *fname;
-    fname = _Py_EncodeLocaleRaw(path, NULL);
-    if (fname == NULL) {
-        errno = EINVAL;
-        return -1;
-    }
-    err = stat(fname, buf);
-    PyMem_RawFree(fname);
-    return err;
-}
-
-
 static void
 reduce(wchar_t *dir)
 {
@@ -235,28 +213,18 @@ isdir(const wchar_t *filename)
 static PyStatus
 joinpath(wchar_t *path, const wchar_t *path2, size_t path_len)
 {
-    size_t n;
-    if (!_Py_isabs(path2)) {
-        n = wcslen(path);
-        if (n >= path_len) {
+    if (_Py_isabs(path2)) {
+        if (wcslen(path2) >= path_len) {
             return PATHLEN_ERR();
         }
-
-        if (n > 0 && path[n-1] != SEP) {
-            path[n++] = SEP;
-        }
+        wcscpy(path, path2);
     }
     else {
-        n = 0;
-    }
-
-    size_t k = wcslen(path2);
-    if (n + k >= path_len) {
-        return PATHLEN_ERR();
+        if (_Py_add_relfile(path, path2, path_len) < 0) {
+            return PATHLEN_ERR();
+        }
+        return _PyStatus_OK();
     }
-    wcsncpy(path + n, path2, k);
-    path[n + k] = '\0';
-
     return _PyStatus_OK();
 }
 
@@ -283,23 +251,7 @@ joinpath2(const wchar_t *path, const wchar_t *path2)
     if (_Py_isabs(path2)) {
         return _PyMem_RawWcsdup(path2);
     }
-
-    size_t len = wcslen(path);
-    int add_sep = (len > 0 && path[len - 1] != SEP);
-    len += add_sep;
-    len += wcslen(path2);
-
-    wchar_t *new_path = PyMem_RawMalloc((len + 1) * sizeof(wchar_t));
-    if (new_path == NULL) {
-        return NULL;
-    }
-
-    wcscpy(new_path, path);
-    if (add_sep) {
-        wcscat(new_path, separator);
-    }
-    wcscat(new_path, path2);
-    return new_path;
+    return _Py_join_relfile(path, path2);
 }
 
 
diff --git a/PC/getpathp.c b/PC/getpathp.c
index 603a1eb13c4ff..38009465ae649 100644
--- a/PC/getpathp.c
+++ b/PC/getpathp.c
@@ -82,6 +82,7 @@
 #include "Python.h"
 #include "pycore_initconfig.h"    // PyStatus
 #include "pycore_pathconfig.h"    // _PyPathConfig
+#include "pycore_fileutils.h"     // _Py_add_relfile()
 #include "osdefs.h"               // SEP, ALTSEP
 #include <wchar.h>
 
@@ -115,10 +116,6 @@
  * with a semicolon separated path prior to calling Py_Initialize.
  */
 
-#ifndef LANDMARK
-#  define LANDMARK L"lib\\os.py"
-#endif
-
 #define INIT_ERR_BUFFER_OVERFLOW() _PyStatus_ERR("buffer overflow")
 
 
@@ -216,7 +213,7 @@ exists(const wchar_t *filename)
    Assumes 'filename' MAXPATHLEN+1 bytes long -
    may extend 'filename' by one character. */
 static int
-ismodule(wchar_t *filename, int update_filename)
+ismodule(wchar_t *filename)
 {
     size_t n;
 
@@ -231,9 +228,8 @@ ismodule(wchar_t *filename, int update_filename)
         filename[n] = L'c';
         filename[n + 1] = L'\0';
         exist = exists(filename);
-        if (!update_filename) {
-            filename[n] = L'\0';
-        }
+        // Drop the 'c' we just added.
+        filename[n] = L'\0';
         return exist;
     }
     return 0;
@@ -253,7 +249,7 @@ ismodule(wchar_t *filename, int update_filename)
 static void
 join(wchar_t *buffer, const wchar_t *stuff)
 {
-    if (FAILED(PathCchCombineEx(buffer, MAXPATHLEN+1, buffer, stuff, 0))) {
+    if (_Py_add_relfile(buffer, stuff, MAXPATHLEN+1) < 0) {
         Py_FatalError("buffer overflow in getpathp.c's join()");
     }
 }
@@ -273,30 +269,37 @@ canonicalize(wchar_t *buffer, const wchar_t *path)
     return _PyStatus_OK();
 }
 
-
-/* gotlandmark only called by search_for_prefix, which ensures
-   'prefix' is null terminated in bounds.  join() ensures
-   'landmark' can not overflow prefix if too long. */
 static int
-gotlandmark(const wchar_t *prefix, const wchar_t *landmark)
+is_stdlibdir(wchar_t *stdlibdir)
 {
-    wchar_t filename[MAXPATHLEN+1];
-    memset(filename, 0, sizeof(filename));
-    wcscpy_s(filename, Py_ARRAY_LENGTH(filename), prefix);
-    join(filename, landmark);
-    return ismodule(filename, FALSE);
+    wchar_t *filename = stdlibdir;
+#ifndef LANDMARK
+#  define LANDMARK L"os.py"
+#endif
+    /* join() ensures 'landmark' can not overflow prefix if too long. */
+    join(filename, LANDMARK);
+    return ismodule(filename);
 }
 
-
 /* assumes argv0_path is MAXPATHLEN+1 bytes long, already \0 term'd.
    assumption provided by only caller, calculate_path() */
 static int
-search_for_prefix(wchar_t *prefix, const wchar_t *argv0_path, const wchar_t *landmark)
+search_for_prefix(wchar_t *prefix, const wchar_t *argv0_path)
 {
-    /* Search from argv0_path, until landmark is found */
-    wcscpy_s(prefix, MAXPATHLEN + 1, argv0_path);
+    /* Search from argv0_path, until LANDMARK is found.
+       We guarantee 'prefix' is null terminated in bounds. */
+    wcscpy_s(prefix, MAXPATHLEN+1, argv0_path);
+    wchar_t stdlibdir[MAXPATHLEN+1];
+    wcscpy_s(stdlibdir, Py_ARRAY_LENGTH(stdlibdir), prefix);
+    /* We initialize with the longest possible path, in case it doesn't fit.
+       This also gives us an initial SEP at stdlibdir[wcslen(prefix)]. */
+    join(stdlibdir, L"lib");
     do {
-        if (gotlandmark(prefix, landmark)) {
+        assert(stdlibdir[wcslen(prefix)] == SEP);
+        /* Due to reduce() and our initial value, this result
+           is guaranteed to fit. */
+        wcscpy(&stdlibdir[wcslen(prefix) + 1], L"lib");
+        if (is_stdlibdir(stdlibdir)) {
             return 1;
         }
         reduce(prefix);
@@ -758,7 +761,7 @@ calculate_home_prefix(PyCalculatePath *calculate,
             reduce(prefix);
             calculate->home = prefix;
         }
-        else if (search_for_prefix(prefix, argv0_path, LANDMARK)) {
+        else if (search_for_prefix(prefix, argv0_path)) {
             calculate->home = prefix;
         }
         else {
@@ -936,7 +939,7 @@ calculate_module_search_path(PyCalculatePath *calculate,
             lookBuf[nchars] = L'\0';
             /* Up one level to the parent */
             reduce(lookBuf);
-            if (search_for_prefix(prefix, lookBuf, LANDMARK)) {
+            if (search_for_prefix(prefix, lookBuf)) {
                 break;
             }
             /* If we are out of paths to search - give up */
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 9e732ddca55ce..2492d0567d84a 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -7,6 +7,7 @@
 #ifdef MS_WINDOWS
 #  include <malloc.h>
 #  include <windows.h>
+#  include <pathcch.h>            // PathCchCombineEx
 extern int winerror_to_errno(int);
 #endif
 
@@ -1205,6 +1206,31 @@ _Py_fstat(int fd, struct _Py_stat_struct *status)
     return 0;
 }
 
+/* Like _Py_stat() but with a raw filename. */
+int
+_Py_wstat(const wchar_t* path, struct stat *buf)
+{
+    int err;
+#ifdef MS_WINDOWS
+    struct _stat wstatbuf;
+    err = _wstat(path, &wstatbuf);
+    if (!err) {
+        buf->st_mode = wstatbuf.st_mode;
+    }
+#else
+    char *fname;
+    fname = _Py_EncodeLocaleRaw(path, NULL);
+    if (fname == NULL) {
+        errno = EINVAL;
+        return -1;
+    }
+    err = stat(fname, buf);
+    PyMem_RawFree(fname);
+#endif
+    return err;
+}
+
+
 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
    call stat() otherwise. Only fill st_mode attribute on Windows.
 
@@ -1216,7 +1242,6 @@ _Py_stat(PyObject *path, struct stat *statbuf)
 {
 #ifdef MS_WINDOWS
     int err;
-    struct _stat wstatbuf;
 
 #if USE_UNICODE_WCHAR_CACHE
     const wchar_t *wpath = _PyUnicode_AsUnicode(path);
@@ -1226,9 +1251,7 @@ _Py_stat(PyObject *path, struct stat *statbuf)
     if (wpath == NULL)
         return -2;
 
-    err = _wstat(wpath, &wstatbuf);
-    if (!err)
-        statbuf->st_mode = wstatbuf.st_mode;
+    err = _Py_wstat(wpath, statbuf);
 #if !USE_UNICODE_WCHAR_CACHE
     PyMem_Free(wpath);
 #endif /* USE_UNICODE_WCHAR_CACHE */
@@ -2072,6 +2095,77 @@ _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
 }
 
 
+// The caller must ensure "buffer" is big enough.
+static int
+join_relfile(wchar_t *buffer, size_t bufsize,
+             const wchar_t *dirname, const wchar_t *relfile)
+{
+#ifdef MS_WINDOWS
+    if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile, 0))) {
+        return -1;
+    }
+#else
+    assert(!_Py_isabs(relfile));
+    size_t dirlen = wcslen(dirname);
+    size_t rellen = wcslen(relfile);
+    size_t maxlen = bufsize - 1;
+    if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
+        return -1;
+    }
+    if (dirlen == 0) {
+        // We do not add a leading separator.
+        wcscpy(buffer, relfile);
+    }
+    else {
+        if (dirname != buffer) {
+            wcscpy(buffer, dirname);
+        }
+        size_t relstart = dirlen;
+        if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
+            buffer[dirlen] = SEP;
+            relstart += 1;
+        }
+        wcscpy(&buffer[relstart], relfile);
+    }
+#endif
+    return 0;
+}
+
+/* Join the two paths together, like os.path.join().  Return NULL
+   if memory could not be allocated.  The caller is responsible
+   for calling PyMem_RawFree() on the result. */
+wchar_t *
+_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
+{
+    assert(dirname != NULL && relfile != NULL);
+    assert(!_Py_isabs(relfile));
+    size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
+    size_t bufsize = maxlen + 1;
+    wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
+    if (filename == NULL) {
+        return NULL;
+    }
+    assert(wcslen(dirname) < MAXPATHLEN);
+    assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
+    join_relfile(filename, bufsize, dirname, relfile);
+    return filename;
+}
+
+/* Join the two paths together, like os.path.join().
+     dirname: the target buffer with the dirname already in place,
+              including trailing NUL
+     relfile: this must be a relative path
+     bufsize: total allocated size of the buffer
+   Return -1 if anything is wrong with the path lengths. */
+int
+_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
+{
+    assert(dirname != NULL && relfile != NULL);
+    assert(bufsize > 0);
+    return join_relfile(dirname, bufsize, dirname, relfile);
+}
+
+
 /* Get the current directory. buflen is the buffer size in wide characters
    including the null character. Decode the path from the locale encoding.
 
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 8740cc1cf7a2b..40a5846f43b73 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -587,11 +587,6 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
 
 /* --- PyConfig ---------------------------------------------- */
 
-#define DECODE_LOCALE_ERR(NAME, LEN) \
-    (((LEN) == -2) \
-     ? _PyStatus_ERR("cannot decode " NAME) \
-     : _PyStatus_NO_MEMORY())
-
 #define MAX_HASH_SEED 4294967295UL
 
 
diff --git a/Python/preconfig.c b/Python/preconfig.c
index ae1cc3f90fca7..d59273159a671 100644
--- a/Python/preconfig.c
+++ b/Python/preconfig.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "pycore_fileutils.h"     // DECODE_LOCALE_ERR
 #include "pycore_getopt.h"        // _PyOS_GetOpt()
 #include "pycore_initconfig.h"    // _PyArgv
 #include "pycore_pymem.h"         // _PyMem_GetAllocatorName()
@@ -6,12 +7,6 @@
 #include <locale.h>               // setlocale()
 
 
-#define DECODE_LOCALE_ERR(NAME, LEN) \
-    (((LEN) == -2) \
-     ? _PyStatus_ERR("cannot decode " NAME) \
-     : _PyStatus_NO_MEMORY())
-
-
 /* Forward declarations */
 static void
 preconfig_copy(PyPreConfig *config, const PyPreConfig *config2);
@@ -87,8 +82,7 @@ _PyArgv_AsWstrList(const _PyArgv *args, PyWideStringList *list)
             wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len);
             if (arg == NULL) {
                 _PyWideStringList_Clear(&wargv);
-                return DECODE_LOCALE_ERR("command line arguments",
-                                         (Py_ssize_t)len);
+                return DECODE_LOCALE_ERR("command line arguments", len);
             }
             wargv.items[i] = arg;
             wargv.length++;



More information about the Python-checkins mailing list