[Python-checkins] bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (GH-27059)

miss-islington webhook-mailer at python.org
Thu Jul 8 12:13:15 EDT 2021


https://github.com/python/cpython/commit/08697ac5d1543fca3629c719ab43e50d73021631
commit: 08697ac5d1543fca3629c719ab43e50d73021631
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2021-07-08T09:13:06-07:00
summary:

bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (GH-27059)

(cherry picked from commit bbf2fb6c7ae78f40483606f467739a58cd747270)

Co-authored-by: Steve Dower <steve.dower at python.org>

files:
A Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst
M Lib/mimetypes.py
M Lib/test/test_mimetypes.py
M Modules/_winapi.c
M Modules/clinic/_winapi.c.h

diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 018793c4f01e46..1e83131d05b1c7 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -27,6 +27,12 @@
 import sys
 import posixpath
 import urllib.parse
+
+try:
+    from _winapi import _mimetypes_read_windows_registry
+except ImportError:
+    _mimetypes_read_windows_registry = None
+
 try:
     import winreg as _winreg
 except ImportError:
@@ -237,10 +243,21 @@ def read_windows_registry(self, strict=True):
         types.
         """
 
-        # Windows only
-        if not _winreg:
+        if not _mimetypes_read_windows_registry and not _winreg:
             return
 
+        add_type = self.add_type
+        if strict:
+            add_type = lambda type, ext: self.add_type(type, ext, True)
+
+        # Accelerated function if it is available
+        if _mimetypes_read_windows_registry:
+            _mimetypes_read_windows_registry(add_type)
+        elif _winreg:
+            self._read_windows_registry(add_type)
+
+    @classmethod
+    def _read_windows_registry(cls, add_type):
         def enum_types(mimedb):
             i = 0
             while True:
@@ -265,7 +282,7 @@ def enum_types(mimedb):
                             subkey, 'Content Type')
                         if datatype != _winreg.REG_SZ:
                             continue
-                        self.add_type(mimetype, subkeyname, strict)
+                        add_type(mimetype, subkeyname)
                 except OSError:
                     continue
 
@@ -349,8 +366,8 @@ def init(files=None):
 
     if files is None or _db is None:
         db = MimeTypes()
-        if _winreg:
-            db.read_windows_registry()
+        # Quick return if not supported
+        db.read_windows_registry()
 
         if files is None:
             files = knownfiles
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index f5c040a97addf2..fb9cb04452c300 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -9,6 +9,11 @@
 from test.support import os_helper
 from platform import win32_edition
 
+try:
+    import _winapi
+except ImportError:
+    _winapi = None
+
 
 def setUpModule():
     global knownfiles
@@ -235,6 +240,21 @@ def test_registry_parsing(self):
         eq(self.db.guess_type("image.jpg"), ("image/jpeg", None))
         eq(self.db.guess_type("image.png"), ("image/png", None))
 
+    @unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"),
+                     "read_windows_registry accelerator unavailable")
+    def test_registry_accelerator(self):
+        from_accel = {}
+        from_reg = {}
+        _winapi._mimetypes_read_windows_registry(
+            lambda v, k: from_accel.setdefault(k, set()).add(v)
+        )
+        mimetypes.MimeTypes._read_windows_registry(
+            lambda v, k: from_reg.setdefault(k, set()).add(v)
+        )
+        self.assertEqual(list(from_reg), list(from_accel))
+        for k in from_reg:
+            self.assertEqual(from_reg[k], from_accel[k])
+
 
 class MiscTestCase(unittest.TestCase):
     def test__all__(self):
@@ -288,6 +308,5 @@ def test_guess_type(self):
         type_info = self.mimetypes_cmd("foo.pic")
         eq(type_info, "I don't know anything about type foo.pic")
 
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst b/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst
new file mode 100644
index 00000000000000..f79c88931c5310
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst
@@ -0,0 +1,2 @@
+Accelerate speed of :mod:`mimetypes` initialization using a native
+implementation of the registry scan.
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 1b85d7dd7ee97f..f341493503c925 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1894,6 +1894,113 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle)
     return result;
 }
 
+/*[clinic input]
+_winapi._mimetypes_read_windows_registry
+
+    on_type_read: object
+
+Optimized function for reading all known MIME types from the registry.
+
+*on_type_read* is a callable taking *type* and *ext* arguments, as for
+MimeTypes.add_type.
+[clinic start generated code]*/
+
+static PyObject *
+_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
+                                              PyObject *on_type_read)
+/*[clinic end generated code: output=20829f00bebce55b input=cd357896d6501f68]*/
+{
+#define CCH_EXT 128
+#define CB_TYPE 510
+    struct {
+        wchar_t ext[CCH_EXT];
+        wchar_t type[CB_TYPE / sizeof(wchar_t) + 1];
+    } entries[64];
+    int entry = 0;
+    HKEY hkcr = NULL;
+    LRESULT err;
+
+    Py_BEGIN_ALLOW_THREADS
+    err = RegOpenKeyExW(HKEY_CLASSES_ROOT, NULL, 0, KEY_READ, &hkcr);
+    for (DWORD i = 0; err == ERROR_SUCCESS || err == ERROR_MORE_DATA; ++i) {
+        LPWSTR ext = entries[entry].ext;
+        LPWSTR type = entries[entry].type;
+        DWORD cchExt = CCH_EXT;
+        DWORD cbType = CB_TYPE;
+        HKEY subkey;
+        DWORD regType;
+
+        err = RegEnumKeyExW(hkcr, i, ext, &cchExt, NULL, NULL, NULL, NULL);
+        if (err != ERROR_SUCCESS || (cchExt && ext[0] != L'.')) {
+            continue;
+        }
+
+        err = RegOpenKeyExW(hkcr, ext, 0, KEY_READ, &subkey);
+        if (err == ERROR_FILE_NOT_FOUND) {
+            err = ERROR_SUCCESS;
+            continue;
+        } else if (err != ERROR_SUCCESS) {
+            continue;
+        }
+
+        err = RegQueryValueExW(subkey, L"Content Type", NULL,
+                              &regType, (LPBYTE)type, &cbType);
+        RegCloseKey(subkey);
+        if (err == ERROR_FILE_NOT_FOUND) {
+            err = ERROR_SUCCESS;
+            continue;
+        } else if (err != ERROR_SUCCESS) {
+            continue;
+        } else if (regType != REG_SZ || !cbType) {
+            continue;
+        }
+        type[cbType / sizeof(wchar_t)] = L'\0';
+
+        entry += 1;
+
+        /* Flush our cached entries if we are full */
+        if (entry == sizeof(entries) / sizeof(entries[0])) {
+            Py_BLOCK_THREADS
+            for (int j = 0; j < entry; ++j) {
+                PyObject *r = PyObject_CallFunction(
+                    on_type_read, "uu", entries[j].type, entries[j].ext
+                );
+                if (!r) {
+                    /* We blocked threads, so safe to return from here */
+                    RegCloseKey(hkcr);
+                    return NULL;
+                }
+                Py_DECREF(r);
+            }
+            Py_UNBLOCK_THREADS
+            entry = 0;
+        }
+    }
+    if (hkcr) {
+        RegCloseKey(hkcr);
+    }
+    Py_END_ALLOW_THREADS
+
+    if (err != ERROR_SUCCESS && err != ERROR_NO_MORE_ITEMS) {
+        PyErr_SetFromWindowsErr((int)err);
+        return NULL;
+    }
+
+    for (int j = 0; j < entry; ++j) {
+        PyObject *r = PyObject_CallFunction(
+            on_type_read, "uu", entries[j].type, entries[j].ext
+        );
+        if (!r) {
+            return NULL;
+        }
+        Py_DECREF(r);
+    }
+
+    Py_RETURN_NONE;
+#undef CCH_EXT
+#undef CB_TYPE
+}
+
 
 static PyMethodDef winapi_functions[] = {
     _WINAPI_CLOSEHANDLE_METHODDEF
@@ -1926,6 +2033,7 @@ static PyMethodDef winapi_functions[] = {
     _WINAPI_WRITEFILE_METHODDEF
     _WINAPI_GETACP_METHODDEF
     _WINAPI_GETFILETYPE_METHODDEF
+    _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF
     {NULL, NULL}
 };
 
diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h
index a9630d55998d3d..5bda156d7aa80c 100644
--- a/Modules/clinic/_winapi.c.h
+++ b/Modules/clinic/_winapi.c.h
@@ -1148,4 +1148,40 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=1f10e03f64ff9777 input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(_winapi__mimetypes_read_windows_registry__doc__,
+"_mimetypes_read_windows_registry($module, /, on_type_read)\n"
+"--\n"
+"\n"
+"Optimized function for reading all known MIME types from the registry.\n"
+"\n"
+"*on_type_read* is a callable taking *type* and *ext* arguments, as for\n"
+"MimeTypes.add_type.");
+
+#define _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF    \
+    {"_mimetypes_read_windows_registry", (PyCFunction)(void(*)(void))_winapi__mimetypes_read_windows_registry, METH_FASTCALL|METH_KEYWORDS, _winapi__mimetypes_read_windows_registry__doc__},
+
+static PyObject *
+_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
+                                              PyObject *on_type_read);
+
+static PyObject *
+_winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"on_type_read", NULL};
+    static _PyArg_Parser _parser = {NULL, _keywords, "_mimetypes_read_windows_registry", 0};
+    PyObject *argsbuf[1];
+    PyObject *on_type_read;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    on_type_read = args[0];
+    return_value = _winapi__mimetypes_read_windows_registry_impl(module, on_type_read);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=ac3623be6e42017c input=a9049054013a1b77]*/



More information about the Python-checkins mailing list