[Python-checkins] gh-88745: Add _winapi.CopyFile2 and update shutil.copy2 to use it (GH-105055)

zooba webhook-mailer at python.org
Tue May 30 06:00:37 EDT 2023


https://github.com/python/cpython/commit/cda1bd3c9d3b2cecdeeba0c498cd2df83fbdb535
commit: cda1bd3c9d3b2cecdeeba0c498cd2df83fbdb535
branch: main
author: Steve Dower <steve.dower at python.org>
committer: zooba <steve.dower at microsoft.com>
date: 2023-05-30T11:00:29+01:00
summary:

gh-88745: Add _winapi.CopyFile2 and update shutil.copy2 to use it (GH-105055)

files:
A Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst
M Include/internal/pycore_global_objects_fini_generated.h
M Include/internal/pycore_global_strings.h
M Include/internal/pycore_runtime_init_generated.h
M Include/internal/pycore_unicodeobject_generated.h
M Lib/shutil.py
M Modules/_winapi.c
M Modules/clinic/_winapi.c.h

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index a83f8fc49fc5..546ba6d4c552 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -919,6 +919,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_value));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(excepthook));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exception));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(existing_file_name));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exp));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extend));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extra_tokens));
@@ -1071,6 +1072,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_limit));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(newline));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(newlines));
@@ -1125,6 +1127,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(priority));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_handler));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_routine));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(proto));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(protocol));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ps1));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index dd6a62f53a99..088bd96c756b 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -407,6 +407,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(exc_value)
         STRUCT_FOR_ID(excepthook)
         STRUCT_FOR_ID(exception)
+        STRUCT_FOR_ID(existing_file_name)
         STRUCT_FOR_ID(exp)
         STRUCT_FOR_ID(extend)
         STRUCT_FOR_ID(extra_tokens)
@@ -559,6 +560,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(namespaces)
         STRUCT_FOR_ID(narg)
         STRUCT_FOR_ID(ndigits)
+        STRUCT_FOR_ID(new_file_name)
         STRUCT_FOR_ID(new_limit)
         STRUCT_FOR_ID(newline)
         STRUCT_FOR_ID(newlines)
@@ -613,6 +615,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(priority)
         STRUCT_FOR_ID(progress)
         STRUCT_FOR_ID(progress_handler)
+        STRUCT_FOR_ID(progress_routine)
         STRUCT_FOR_ID(proto)
         STRUCT_FOR_ID(protocol)
         STRUCT_FOR_ID(ps1)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index d689f717eaf9..2963423f6071 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -913,6 +913,7 @@ extern "C" {
     INIT_ID(exc_value), \
     INIT_ID(excepthook), \
     INIT_ID(exception), \
+    INIT_ID(existing_file_name), \
     INIT_ID(exp), \
     INIT_ID(extend), \
     INIT_ID(extra_tokens), \
@@ -1065,6 +1066,7 @@ extern "C" {
     INIT_ID(namespaces), \
     INIT_ID(narg), \
     INIT_ID(ndigits), \
+    INIT_ID(new_file_name), \
     INIT_ID(new_limit), \
     INIT_ID(newline), \
     INIT_ID(newlines), \
@@ -1119,6 +1121,7 @@ extern "C" {
     INIT_ID(priority), \
     INIT_ID(progress), \
     INIT_ID(progress_handler), \
+    INIT_ID(progress_routine), \
     INIT_ID(proto), \
     INIT_ID(protocol), \
     INIT_ID(ps1), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index db6a157ee7af..9e13a9491b7d 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1062,6 +1062,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     string = &_Py_ID(exception);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
+    string = &_Py_ID(existing_file_name);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    _PyUnicode_InternInPlace(interp, &string);
     string = &_Py_ID(exp);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
@@ -1518,6 +1521,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     string = &_Py_ID(ndigits);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
+    string = &_Py_ID(new_file_name);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    _PyUnicode_InternInPlace(interp, &string);
     string = &_Py_ID(new_limit);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
@@ -1680,6 +1686,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     string = &_Py_ID(progress_handler);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
+    string = &_Py_ID(progress_routine);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    _PyUnicode_InternInPlace(interp, &string);
     string = &_Py_ID(proto);
     assert(_PyUnicode_CheckConsistency(string, 1));
     _PyUnicode_InternInPlace(interp, &string);
diff --git a/Lib/shutil.py b/Lib/shutil.py
index 7d1a3d00011f..3f2864af517e 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -42,6 +42,8 @@
 
 if sys.platform == 'win32':
     import _winapi
+else:
+    _winapi = None
 
 COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024
 # This should never be removed, see rationale in:
@@ -435,6 +437,29 @@ def copy2(src, dst, *, follow_symlinks=True):
     """
     if os.path.isdir(dst):
         dst = os.path.join(dst, os.path.basename(src))
+
+    if hasattr(_winapi, "CopyFile2"):
+        src_ = os.fsdecode(src)
+        dst_ = os.fsdecode(dst)
+        flags = _winapi.COPY_FILE_ALLOW_DECRYPTED_DESTINATION # for compat
+        if not follow_symlinks:
+            flags |= _winapi.COPY_FILE_COPY_SYMLINK
+        try:
+            _winapi.CopyFile2(src_, dst_, flags)
+            return dst
+        except OSError as exc:
+            if (exc.winerror == _winapi.ERROR_PRIVILEGE_NOT_HELD
+                and not follow_symlinks):
+                # Likely encountered a symlink we aren't allowed to create.
+                # Fall back on the old code
+                pass
+            elif exc.winerror == _winapi.ERROR_ACCESS_DENIED:
+                # Possibly encountered a hidden or readonly file we can't
+                # overwrite. Fall back on old code
+                pass
+            else:
+                raise
+
     copyfile(src, dst, follow_symlinks=follow_symlinks)
     copystat(src, dst, follow_symlinks=follow_symlinks)
     return dst
diff --git a/Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst b/Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst
new file mode 100644
index 000000000000..258eb89d50d9
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2023-05-29-11-38-53.gh-issue-88745.cldf9G.rst
@@ -0,0 +1,3 @@
+Improve performance of :func:`shutil.copy2` by using the operating system's
+``CopyFile2`` function. This may result in subtle changes to metadata copied
+along with some files, bringing them in line with normal OS behavior.
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 1e02dbc1a4bf..bbc9facd227c 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1947,6 +1947,7 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle)
     return result;
 }
 
+
 /*[clinic input]
 _winapi._mimetypes_read_windows_registry
 
@@ -2075,6 +2076,67 @@ _winapi_NeedCurrentDirectoryForExePath_impl(PyObject *module,
     return result;
 }
 
+
+/*[clinic input]
+_winapi.CopyFile2
+
+    existing_file_name: LPCWSTR
+    new_file_name: LPCWSTR
+    flags: DWORD
+    progress_routine: object = None
+
+Copies a file from one name to a new name.
+
+This is implemented using the CopyFile2 API, which preserves all stat
+and metadata information apart from security attributes.
+
+progress_routine is reserved for future use, but is currently not
+implemented. Its value is ignored.
+[clinic start generated code]*/
+
+static PyObject *
+_winapi_CopyFile2_impl(PyObject *module, LPCWSTR existing_file_name,
+                       LPCWSTR new_file_name, DWORD flags,
+                       PyObject *progress_routine)
+/*[clinic end generated code: output=43d960d9df73d984 input=fb976b8d1492d130]*/
+{
+    HRESULT hr;
+    COPYFILE2_EXTENDED_PARAMETERS params = { sizeof(COPYFILE2_EXTENDED_PARAMETERS) };
+
+    if (PySys_Audit("_winapi.CopyFile2", "uuI",
+                    existing_file_name, new_file_name, flags) < 0) {
+        return NULL;
+    }
+
+    params.dwCopyFlags = flags;
+    /* For future implementation. We ignore the value for now so that
+       users only have to test for 'CopyFile2' existing and not whether
+       the additional parameter exists.
+    if (progress_routine != Py_None) {
+        params.pProgressRoutine = _winapi_CopyFile2ProgressRoutine;
+        params.pvCallbackContext = Py_NewRef(progress_routine);
+    }
+    */
+    Py_BEGIN_ALLOW_THREADS;
+    hr = CopyFile2(existing_file_name, new_file_name, &params);
+    Py_END_ALLOW_THREADS;
+    /* For future implementation.
+    if (progress_routine != Py_None) {
+        Py_DECREF(progress_routine);
+    }
+    */
+    if (FAILED(hr)) {
+        if ((hr & 0xFFFF0000) == 0x80070000) {
+            PyErr_SetFromWindowsErr(hr & 0xFFFF);
+        } else {
+            PyErr_SetFromWindowsErr(hr);
+        }
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef winapi_functions[] = {
     _WINAPI_CLOSEHANDLE_METHODDEF
     _WINAPI_CONNECTNAMEDPIPE_METHODDEF
@@ -2110,6 +2172,7 @@ static PyMethodDef winapi_functions[] = {
     _WINAPI_GETFILETYPE_METHODDEF
     _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF
     _WINAPI_NEEDCURRENTDIRECTORYFOREXEPATH_METHODDEF
+    _WINAPI_COPYFILE2_METHODDEF
     {NULL, NULL}
 };
 
@@ -2146,6 +2209,7 @@ static int winapi_exec(PyObject *m)
     WINAPI_CONSTANT(F_DWORD, CREATE_NEW_PROCESS_GROUP);
     WINAPI_CONSTANT(F_DWORD, DUPLICATE_SAME_ACCESS);
     WINAPI_CONSTANT(F_DWORD, DUPLICATE_CLOSE_SOURCE);
+    WINAPI_CONSTANT(F_DWORD, ERROR_ACCESS_DENIED);
     WINAPI_CONSTANT(F_DWORD, ERROR_ALREADY_EXISTS);
     WINAPI_CONSTANT(F_DWORD, ERROR_BROKEN_PIPE);
     WINAPI_CONSTANT(F_DWORD, ERROR_IO_PENDING);
@@ -2159,6 +2223,7 @@ static int winapi_exec(PyObject *m)
     WINAPI_CONSTANT(F_DWORD, ERROR_OPERATION_ABORTED);
     WINAPI_CONSTANT(F_DWORD, ERROR_PIPE_BUSY);
     WINAPI_CONSTANT(F_DWORD, ERROR_PIPE_CONNECTED);
+    WINAPI_CONSTANT(F_DWORD, ERROR_PRIVILEGE_NOT_HELD);
     WINAPI_CONSTANT(F_DWORD, ERROR_SEM_TIMEOUT);
     WINAPI_CONSTANT(F_DWORD, FILE_FLAG_FIRST_PIPE_INSTANCE);
     WINAPI_CONSTANT(F_DWORD, FILE_FLAG_OVERLAPPED);
@@ -2252,6 +2317,34 @@ static int winapi_exec(PyObject *m)
     WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE);
     WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE);
 
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_ALLOW_DECRYPTED_DESTINATION);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_COPY_SYMLINK);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_FAIL_IF_EXISTS);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_NO_BUFFERING);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_NO_OFFLOAD);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_OPEN_SOURCE_FOR_WRITE);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_RESTARTABLE);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_REQUEST_SECURITY_PRIVILEGES);
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_RESUME_FROM_PAUSE);
+#ifndef COPY_FILE_REQUEST_COMPRESSED_TRAFFIC
+    // Only defined in newer WinSDKs
+    #define COPY_FILE_REQUEST_COMPRESSED_TRAFFIC 0x10000000
+#endif
+    WINAPI_CONSTANT(F_DWORD, COPY_FILE_REQUEST_COMPRESSED_TRAFFIC);
+
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_STARTED);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_FINISHED);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_STREAM_STARTED);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_STREAM_FINISHED);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_POLL_CONTINUE);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_ERROR);
+
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_CONTINUE);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_CANCEL);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_STOP);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_QUIET);
+    WINAPI_CONSTANT(F_DWORD, COPYFILE2_PROGRESS_PAUSE);
+
     WINAPI_CONSTANT("i", NULL);
 
     return 0;
diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h
index 7bc63e612be3..3767b19d76db 100644
--- a/Modules/clinic/_winapi.c.h
+++ b/Modules/clinic/_winapi.c.h
@@ -1411,4 +1411,74 @@ _winapi_NeedCurrentDirectoryForExePath(PyObject *module, PyObject *arg)
 
     return return_value;
 }
-/*[clinic end generated code: output=96ea65ece7912d0a input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(_winapi_CopyFile2__doc__,
+"CopyFile2($module, /, existing_file_name, new_file_name, flags,\n"
+"          progress_routine=None)\n"
+"--\n"
+"\n"
+"Copies a file from one name to a new name.\n"
+"\n"
+"This is implemented using the CopyFile2 API, which preserves all stat\n"
+"and metadata information apart from security attributes.\n"
+"\n"
+"progress_routine is reserved for future use, but is currently not\n"
+"implemented. Its value is ignored.");
+
+#define _WINAPI_COPYFILE2_METHODDEF    \
+    {"CopyFile2", _PyCFunction_CAST(_winapi_CopyFile2), METH_FASTCALL|METH_KEYWORDS, _winapi_CopyFile2__doc__},
+
+static PyObject *
+_winapi_CopyFile2_impl(PyObject *module, LPCWSTR existing_file_name,
+                       LPCWSTR new_file_name, DWORD flags,
+                       PyObject *progress_routine);
+
+static PyObject *
+_winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 4
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(existing_file_name), &_Py_ID(new_file_name), &_Py_ID(flags), &_Py_ID(progress_routine), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"existing_file_name", "new_file_name", "flags", "progress_routine", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .format = "O&O&k|O:CopyFile2",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    LPCWSTR existing_file_name = NULL;
+    LPCWSTR new_file_name = NULL;
+    DWORD flags;
+    PyObject *progress_routine = Py_None;
+
+    if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
+        _PyUnicode_WideCharString_Converter, &existing_file_name, _PyUnicode_WideCharString_Converter, &new_file_name, &flags, &progress_routine)) {
+        goto exit;
+    }
+    return_value = _winapi_CopyFile2_impl(module, existing_file_name, new_file_name, flags, progress_routine);
+
+exit:
+    /* Cleanup for existing_file_name */
+    PyMem_Free((void *)existing_file_name);
+    /* Cleanup for new_file_name */
+    PyMem_Free((void *)new_file_name);
+
+    return return_value;
+}
+/*[clinic end generated code: output=be1343b3759e0c96 input=a9049054013a1b77]*/



More information about the Python-checkins mailing list