[Python-checkins] bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (GH-27059)
miss-islington
webhook-mailer at python.org
Thu Jul 8 12:13:15 EDT 2021
https://github.com/python/cpython/commit/08697ac5d1543fca3629c719ab43e50d73021631
commit: 08697ac5d1543fca3629c719ab43e50d73021631
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2021-07-08T09:13:06-07:00
summary:
bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (GH-27059)
(cherry picked from commit bbf2fb6c7ae78f40483606f467739a58cd747270)
Co-authored-by: Steve Dower <steve.dower at python.org>
files:
A Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst
M Lib/mimetypes.py
M Lib/test/test_mimetypes.py
M Modules/_winapi.c
M Modules/clinic/_winapi.c.h
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 018793c4f01e46..1e83131d05b1c7 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -27,6 +27,12 @@
import sys
import posixpath
import urllib.parse
+
+try:
+ from _winapi import _mimetypes_read_windows_registry
+except ImportError:
+ _mimetypes_read_windows_registry = None
+
try:
import winreg as _winreg
except ImportError:
@@ -237,10 +243,21 @@ def read_windows_registry(self, strict=True):
types.
"""
- # Windows only
- if not _winreg:
+ if not _mimetypes_read_windows_registry and not _winreg:
return
+ add_type = self.add_type
+ if strict:
+ add_type = lambda type, ext: self.add_type(type, ext, True)
+
+ # Accelerated function if it is available
+ if _mimetypes_read_windows_registry:
+ _mimetypes_read_windows_registry(add_type)
+ elif _winreg:
+ self._read_windows_registry(add_type)
+
+ @classmethod
+ def _read_windows_registry(cls, add_type):
def enum_types(mimedb):
i = 0
while True:
@@ -265,7 +282,7 @@ def enum_types(mimedb):
subkey, 'Content Type')
if datatype != _winreg.REG_SZ:
continue
- self.add_type(mimetype, subkeyname, strict)
+ add_type(mimetype, subkeyname)
except OSError:
continue
@@ -349,8 +366,8 @@ def init(files=None):
if files is None or _db is None:
db = MimeTypes()
- if _winreg:
- db.read_windows_registry()
+ # Quick return if not supported
+ db.read_windows_registry()
if files is None:
files = knownfiles
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index f5c040a97addf2..fb9cb04452c300 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -9,6 +9,11 @@
from test.support import os_helper
from platform import win32_edition
+try:
+ import _winapi
+except ImportError:
+ _winapi = None
+
def setUpModule():
global knownfiles
@@ -235,6 +240,21 @@ def test_registry_parsing(self):
eq(self.db.guess_type("image.jpg"), ("image/jpeg", None))
eq(self.db.guess_type("image.png"), ("image/png", None))
+ @unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"),
+ "read_windows_registry accelerator unavailable")
+ def test_registry_accelerator(self):
+ from_accel = {}
+ from_reg = {}
+ _winapi._mimetypes_read_windows_registry(
+ lambda v, k: from_accel.setdefault(k, set()).add(v)
+ )
+ mimetypes.MimeTypes._read_windows_registry(
+ lambda v, k: from_reg.setdefault(k, set()).add(v)
+ )
+ self.assertEqual(list(from_reg), list(from_accel))
+ for k in from_reg:
+ self.assertEqual(from_reg[k], from_accel[k])
+
class MiscTestCase(unittest.TestCase):
def test__all__(self):
@@ -288,6 +308,5 @@ def test_guess_type(self):
type_info = self.mimetypes_cmd("foo.pic")
eq(type_info, "I don't know anything about type foo.pic")
-
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst b/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst
new file mode 100644
index 00000000000000..f79c88931c5310
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst
@@ -0,0 +1,2 @@
+Accelerate speed of :mod:`mimetypes` initialization using a native
+implementation of the registry scan.
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 1b85d7dd7ee97f..f341493503c925 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1894,6 +1894,113 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle)
return result;
}
+/*[clinic input]
+_winapi._mimetypes_read_windows_registry
+
+ on_type_read: object
+
+Optimized function for reading all known MIME types from the registry.
+
+*on_type_read* is a callable taking *type* and *ext* arguments, as for
+MimeTypes.add_type.
+[clinic start generated code]*/
+
+static PyObject *
+_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
+ PyObject *on_type_read)
+/*[clinic end generated code: output=20829f00bebce55b input=cd357896d6501f68]*/
+{
+#define CCH_EXT 128
+#define CB_TYPE 510
+ struct {
+ wchar_t ext[CCH_EXT];
+ wchar_t type[CB_TYPE / sizeof(wchar_t) + 1];
+ } entries[64];
+ int entry = 0;
+ HKEY hkcr = NULL;
+ LRESULT err;
+
+ Py_BEGIN_ALLOW_THREADS
+ err = RegOpenKeyExW(HKEY_CLASSES_ROOT, NULL, 0, KEY_READ, &hkcr);
+ for (DWORD i = 0; err == ERROR_SUCCESS || err == ERROR_MORE_DATA; ++i) {
+ LPWSTR ext = entries[entry].ext;
+ LPWSTR type = entries[entry].type;
+ DWORD cchExt = CCH_EXT;
+ DWORD cbType = CB_TYPE;
+ HKEY subkey;
+ DWORD regType;
+
+ err = RegEnumKeyExW(hkcr, i, ext, &cchExt, NULL, NULL, NULL, NULL);
+ if (err != ERROR_SUCCESS || (cchExt && ext[0] != L'.')) {
+ continue;
+ }
+
+ err = RegOpenKeyExW(hkcr, ext, 0, KEY_READ, &subkey);
+ if (err == ERROR_FILE_NOT_FOUND) {
+ err = ERROR_SUCCESS;
+ continue;
+ } else if (err != ERROR_SUCCESS) {
+ continue;
+ }
+
+ err = RegQueryValueExW(subkey, L"Content Type", NULL,
+ ®Type, (LPBYTE)type, &cbType);
+ RegCloseKey(subkey);
+ if (err == ERROR_FILE_NOT_FOUND) {
+ err = ERROR_SUCCESS;
+ continue;
+ } else if (err != ERROR_SUCCESS) {
+ continue;
+ } else if (regType != REG_SZ || !cbType) {
+ continue;
+ }
+ type[cbType / sizeof(wchar_t)] = L'\0';
+
+ entry += 1;
+
+ /* Flush our cached entries if we are full */
+ if (entry == sizeof(entries) / sizeof(entries[0])) {
+ Py_BLOCK_THREADS
+ for (int j = 0; j < entry; ++j) {
+ PyObject *r = PyObject_CallFunction(
+ on_type_read, "uu", entries[j].type, entries[j].ext
+ );
+ if (!r) {
+ /* We blocked threads, so safe to return from here */
+ RegCloseKey(hkcr);
+ return NULL;
+ }
+ Py_DECREF(r);
+ }
+ Py_UNBLOCK_THREADS
+ entry = 0;
+ }
+ }
+ if (hkcr) {
+ RegCloseKey(hkcr);
+ }
+ Py_END_ALLOW_THREADS
+
+ if (err != ERROR_SUCCESS && err != ERROR_NO_MORE_ITEMS) {
+ PyErr_SetFromWindowsErr((int)err);
+ return NULL;
+ }
+
+ for (int j = 0; j < entry; ++j) {
+ PyObject *r = PyObject_CallFunction(
+ on_type_read, "uu", entries[j].type, entries[j].ext
+ );
+ if (!r) {
+ return NULL;
+ }
+ Py_DECREF(r);
+ }
+
+ Py_RETURN_NONE;
+#undef CCH_EXT
+#undef CB_TYPE
+}
+
static PyMethodDef winapi_functions[] = {
_WINAPI_CLOSEHANDLE_METHODDEF
@@ -1926,6 +2033,7 @@ static PyMethodDef winapi_functions[] = {
_WINAPI_WRITEFILE_METHODDEF
_WINAPI_GETACP_METHODDEF
_WINAPI_GETFILETYPE_METHODDEF
+ _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF
{NULL, NULL}
};
diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h
index a9630d55998d3d..5bda156d7aa80c 100644
--- a/Modules/clinic/_winapi.c.h
+++ b/Modules/clinic/_winapi.c.h
@@ -1148,4 +1148,40 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
exit:
return return_value;
}
-/*[clinic end generated code: output=1f10e03f64ff9777 input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(_winapi__mimetypes_read_windows_registry__doc__,
+"_mimetypes_read_windows_registry($module, /, on_type_read)\n"
+"--\n"
+"\n"
+"Optimized function for reading all known MIME types from the registry.\n"
+"\n"
+"*on_type_read* is a callable taking *type* and *ext* arguments, as for\n"
+"MimeTypes.add_type.");
+
+#define _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF \
+ {"_mimetypes_read_windows_registry", (PyCFunction)(void(*)(void))_winapi__mimetypes_read_windows_registry, METH_FASTCALL|METH_KEYWORDS, _winapi__mimetypes_read_windows_registry__doc__},
+
+static PyObject *
+_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
+ PyObject *on_type_read);
+
+static PyObject *
+_winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ static const char * const _keywords[] = {"on_type_read", NULL};
+ static _PyArg_Parser _parser = {NULL, _keywords, "_mimetypes_read_windows_registry", 0};
+ PyObject *argsbuf[1];
+ PyObject *on_type_read;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ on_type_read = args[0];
+ return_value = _winapi__mimetypes_read_windows_registry_impl(module, on_type_read);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=ac3623be6e42017c input=a9049054013a1b77]*/
More information about the Python-checkins
mailing list