[Python-checkins] [3.7] bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770) (GH-12788)

Victor Stinner webhook-mailer at python.org
Thu Apr 11 16:30:36 EDT 2019


https://github.com/python/cpython/commit/9e23f0a27cb8bf6e4ea1d2aef36a91502282bbc9
commit: 9e23f0a27cb8bf6e4ea1d2aef36a91502282bbc9
branch: 3.7
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2019-04-11T22:30:31+02:00
summary:

[3.7] bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770) (GH-12788)

* bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770)

Replace _PyMem_IsFreed() function with _PyMem_IsPtrFreed() inline
function. The function is now way more efficient, it became a simple
comparison on integers, rather than a short loop. It detects also
uninitialized bytes and "forbidden bytes" filled by debug hooks
on memory allocators.

Add unit tests on _PyObject_IsFreed().

(cherry picked from commit 2b00db68554422ec37faba2a80179a0172df6349)

* bpo-36389: Change PyMem_SetupDebugHooks() constants (GH-12782)

Modify CLEANBYTE, DEADDYTE and FORBIDDENBYTE constants: use 0xCD,
0xDD and 0xFD, rather than 0xCB, 0xBB and 0xFB, to use the same byte
patterns than Windows CRT debug malloc() and free().

(cherry picked from commit 4c409beb4c360a73d054f37807d3daad58d1b567)

files:
A Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst
M Doc/c-api/memory.rst
M Include/internal/mem.h
M Include/pymem.h
M Lib/test/test_capi.py
M Modules/_testcapimodule.c
M Objects/object.c
M Objects/obmalloc.c

diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst
index b79b7e49b67e..9b42900b4350 100644
--- a/Doc/c-api/memory.rst
+++ b/Doc/c-api/memory.rst
@@ -440,8 +440,9 @@ Customize Memory Allocators
 
    Setup hooks to detect bugs in the Python memory allocator functions.
 
-   Newly allocated memory is filled with the byte ``0xCB``, freed memory is
-   filled with the byte ``0xDB``.
+   Newly allocated memory is filled with the byte ``0xCD`` (``CLEANBYTE``),
+   freed memory is filled with the byte ``0xDD`` (``DEADBYTE``). Memory blocks
+   are surrounded by "forbidden bytes" (``FORBIDDENBYTE``: byte ``0xFD``).
 
    Runtime checks:
 
@@ -471,6 +472,12 @@ Customize Memory Allocators
       if the GIL is held when functions of :c:data:`PYMEM_DOMAIN_OBJ` and
       :c:data:`PYMEM_DOMAIN_MEM` domains are called.
 
+   .. versionchanged:: 3.7.3
+      Byte patterns ``0xCB`` (``CLEANBYTE``), ``0xDB`` (``DEADBYTE``) and
+      ``0xFB`` (``FORBIDDENBYTE``) have been replaced with ``0xCD``, ``0xDD``
+      and ``0xFD`` to use the same values than Windows CRT debug ``malloc()``
+      and ``free()``.
+
 
 .. _pymalloc:
 
diff --git a/Include/internal/mem.h b/Include/internal/mem.h
index a731e30e6af7..5896e4a05055 100644
--- a/Include/internal/mem.h
+++ b/Include/internal/mem.h
@@ -145,6 +145,30 @@ PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *);
 
 #define _PyGC_generation0 _PyRuntime.gc.generation0
 
+/* Heuristic checking if a pointer value is newly allocated
+   (uninitialized) or newly freed. The pointer is not dereferenced, only the
+   pointer value is checked.
+
+   The heuristic relies on the debug hooks on Python memory allocators which
+   fills newly allocated memory with CLEANBYTE (0xCD) and newly freed memory
+   with DEADBYTE (0xDD). Detect also "untouchable bytes" marked
+   with FORBIDDENBYTE (0xFD). */
+static inline int _PyMem_IsPtrFreed(void *ptr)
+{
+    uintptr_t value = (uintptr_t)ptr;
+#if SIZEOF_VOID_P == 8
+    return (value == (uintptr_t)0xCDCDCDCDCDCDCDCD
+            || value == (uintptr_t)0xDDDDDDDDDDDDDDDD
+            || value == (uintptr_t)0xFDFDFDFDFDFDFDFD);
+#elif SIZEOF_VOID_P == 4
+    return (value == (uintptr_t)0xCDCDCDCD
+            || value == (uintptr_t)0xDDDDDDDD
+            || value == (uintptr_t)0xFDFDFDFD);
+#else
+#  error "unknown pointer size"
+#endif
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/pymem.h b/Include/pymem.h
index ef6e0bb5e25f..458a6489c75d 100644
--- a/Include/pymem.h
+++ b/Include/pymem.h
@@ -55,8 +55,6 @@ PyAPI_FUNC(int) PyTraceMalloc_Untrack(
 PyAPI_FUNC(PyObject*) _PyTraceMalloc_GetTraceback(
     unsigned int domain,
     uintptr_t ptr);
-
-PyAPI_FUNC(int) _PyMem_IsFreed(void *ptr, size_t size);
 #endif   /* !defined(Py_LIMITED_API) */
 
 
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
index 65e0795aba84..d94ee0227c87 100644
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -462,11 +462,11 @@ def test_buffer_overflow(self):
                  r"    The [0-9] pad bytes at p-[0-9] are FORBIDDENBYTE, as expected.\n"
                  r"    The [0-9] pad bytes at tail={ptr} are not all FORBIDDENBYTE \(0x[0-9a-f]{{2}}\):\n"
                  r"        at tail\+0: 0x78 \*\*\* OUCH\n"
-                 r"        at tail\+1: 0xfb\n"
-                 r"        at tail\+2: 0xfb\n"
+                 r"        at tail\+1: 0xfd\n"
+                 r"        at tail\+2: 0xfd\n"
                  r"        .*\n"
                  r"    The block was made by call #[0-9]+ to debug malloc/realloc.\n"
-                 r"    Data at p: cb cb cb .*\n"
+                 r"    Data at p: cd cd cd .*\n"
                  r"\n"
                  r"Enable tracemalloc to get the memory block allocation traceback\n"
                  r"\n"
@@ -482,7 +482,7 @@ def test_api_misuse(self):
                  r"    The [0-9] pad bytes at p-[0-9] are FORBIDDENBYTE, as expected.\n"
                  r"    The [0-9] pad bytes at tail={ptr} are FORBIDDENBYTE, as expected.\n"
                  r"    The block was made by call #[0-9]+ to debug malloc/realloc.\n"
-                 r"    Data at p: cb cb cb .*\n"
+                 r"    Data at p: cd cd cd .*\n"
                  r"\n"
                  r"Enable tracemalloc to get the memory block allocation traceback\n"
                  r"\n"
@@ -508,6 +508,29 @@ def test_pyobject_malloc_without_gil(self):
         code = 'import _testcapi; _testcapi.pyobject_malloc_without_gil()'
         self.check_malloc_without_gil(code)
 
+    def check_pyobject_is_freed(self, func):
+        code = textwrap.dedent('''
+            import gc, os, sys, _testcapi
+            # Disable the GC to avoid crash on GC collection
+            gc.disable()
+            obj = _testcapi.{func}()
+            error = (_testcapi.pyobject_is_freed(obj) == False)
+            # Exit immediately to avoid a crash while deallocating
+            # the invalid object
+            os._exit(int(error))
+        ''')
+        code = code.format(func=func)
+        assert_python_ok('-c', code, PYTHONMALLOC=self.PYTHONMALLOC)
+
+    def test_pyobject_is_freed_uninitialized(self):
+        self.check_pyobject_is_freed('pyobject_uninitialized')
+
+    def test_pyobject_is_freed_forbidden_bytes(self):
+        self.check_pyobject_is_freed('pyobject_forbidden_bytes')
+
+    def test_pyobject_is_freed_free(self):
+        self.check_pyobject_is_freed('pyobject_freed')
+
 
 class PyMemMallocDebugTests(PyMemDebugTests):
     PYTHONMALLOC = 'malloc_debug'
diff --git a/Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst b/Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst
new file mode 100644
index 000000000000..f2b507a9c230
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst	
@@ -0,0 +1,5 @@
+Change the value of ``CLEANBYTE``, ``DEADDYTE`` and ``FORBIDDENBYTE`` internal
+constants used by debug hooks on Python memory allocators
+(:c:func:`PyMem_SetupDebugHooks` function). Byte patterns ``0xCB``, ``0xDB``
+and ``0xFB`` have been replaced with ``0xCD``, ``0xDD`` and ``0xFD`` to use the
+same values than Windows CRT debug ``malloc()`` and ``free()``.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 1e33ca872d45..b864f9270e9d 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -4227,6 +4227,59 @@ test_pymem_getallocatorsname(PyObject *self, PyObject *args)
 }
 
 
+static PyObject*
+pyobject_is_freed(PyObject *self, PyObject *op)
+{
+    int res = _PyObject_IsFreed(op);
+    return PyBool_FromLong(res);
+}
+
+
+static PyObject*
+pyobject_uninitialized(PyObject *self, PyObject *args)
+{
+    PyObject *op = (PyObject *)PyObject_Malloc(sizeof(PyObject));
+    if (op == NULL) {
+        return NULL;
+    }
+    /* Initialize reference count to avoid early crash in ceval or GC */
+    Py_REFCNT(op) = 1;
+    /* object fields like ob_type are uninitialized! */
+    return op;
+}
+
+
+static PyObject*
+pyobject_forbidden_bytes(PyObject *self, PyObject *args)
+{
+    /* Allocate an incomplete PyObject structure: truncate 'ob_type' field */
+    PyObject *op = (PyObject *)PyObject_Malloc(offsetof(PyObject, ob_type));
+    if (op == NULL) {
+        return NULL;
+    }
+    /* Initialize reference count to avoid early crash in ceval or GC */
+    Py_REFCNT(op) = 1;
+    /* ob_type field is after the memory block: part of "forbidden bytes"
+       when using debug hooks on memory allocatrs! */
+    return op;
+}
+
+
+static PyObject*
+pyobject_freed(PyObject *self, PyObject *args)
+{
+    PyObject *op = _PyObject_CallNoArg((PyObject *)&PyBaseObject_Type);
+    if (op == NULL) {
+        return NULL;
+    }
+    Py_TYPE(op)->tp_dealloc(op);
+    /* Reset reference count to avoid early crash in ceval or GC */
+    Py_REFCNT(op) = 1;
+    /* object memory is freed! */
+    return op;
+}
+
+
 static PyObject*
 pyobject_malloc_without_gil(PyObject *self, PyObject *args)
 {
@@ -4788,6 +4841,10 @@ static PyMethodDef TestMethods[] = {
     {"pymem_api_misuse", pymem_api_misuse, METH_NOARGS},
     {"pymem_malloc_without_gil", pymem_malloc_without_gil, METH_NOARGS},
     {"pymem_getallocatorsname", test_pymem_getallocatorsname, METH_NOARGS},
+    {"pyobject_is_freed", (PyCFunction)(void(*)(void))pyobject_is_freed, METH_O},
+    {"pyobject_uninitialized", pyobject_uninitialized, METH_NOARGS},
+    {"pyobject_forbidden_bytes", pyobject_forbidden_bytes, METH_NOARGS},
+    {"pyobject_freed", pyobject_freed, METH_NOARGS},
     {"pyobject_malloc_without_gil", pyobject_malloc_without_gil, METH_NOARGS},
     {"tracemalloc_track", tracemalloc_track, METH_VARARGS},
     {"tracemalloc_untrack", tracemalloc_untrack, METH_VARARGS},
diff --git a/Objects/object.c b/Objects/object.c
index 138df4488027..420af9465b5c 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -411,28 +411,26 @@ _Py_BreakPoint(void)
 }
 
 
-/* Heuristic checking if the object memory has been deallocated.
-   Rely on the debug hooks on Python memory allocators which fills the memory
-   with DEADBYTE (0xDB) when memory is deallocated.
+/* Heuristic checking if the object memory is uninitialized or deallocated.
+   Rely on the debug hooks on Python memory allocators:
+   see _PyMem_IsPtrFreed().
 
    The function can be used to prevent segmentation fault on dereferencing
-   pointers like 0xdbdbdbdbdbdbdbdb. Such pointer is very unlikely to be mapped
-   in memory. */
+   pointers like 0xDDDDDDDDDDDDDDDD. */
 int
 _PyObject_IsFreed(PyObject *op)
 {
-    uintptr_t ptr = (uintptr_t)op;
-    if (_PyMem_IsFreed(&ptr, sizeof(ptr))) {
+    if (_PyMem_IsPtrFreed(op) || _PyMem_IsPtrFreed(op->ob_type)) {
         return 1;
     }
-    int freed = _PyMem_IsFreed(&op->ob_type, sizeof(op->ob_type));
-    /* ignore op->ob_ref: the value can have be modified
+    /* ignore op->ob_ref: its value can have be modified
        by Py_INCREF() and Py_DECREF(). */
 #ifdef Py_TRACE_REFS
-    freed &= _PyMem_IsFreed(&op->_ob_next, sizeof(op->_ob_next));
-    freed &= _PyMem_IsFreed(&op->_ob_prev, sizeof(op->_ob_prev));
+    if (_PyMem_IsPtrFreed(op->_ob_next) || _PyMem_IsPtrFreed(op->_ob_prev)) {
+        return 1;
+    }
 #endif
-    return freed;
+    return 0;
 }
 
 
@@ -449,7 +447,7 @@ _PyObject_Dump(PyObject* op)
     if (_PyObject_IsFreed(op)) {
         /* It seems like the object memory has been freed:
            don't access it to prevent a segmentation fault. */
-        fprintf(stderr, "<freed object>\n");
+        fprintf(stderr, "<Freed object>\n");
         return;
     }
 
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 3b0c35bcc941..46e84270b26e 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1946,14 +1946,17 @@ _Py_GetAllocatedBlocks(void)
 
 /* Special bytes broadcast into debug memory blocks at appropriate times.
  * Strings of these are unlikely to be valid addresses, floats, ints or
- * 7-bit ASCII.
+ * 7-bit ASCII. If modified, _PyMem_IsPtrFreed() should be updated as well.
+ *
+ * Byte patterns 0xCB, 0xBB and 0xFB have been replaced with 0xCD, 0xDD and
+ * 0xFD to use the same values than Windows CRT debug malloc() and free().
  */
 #undef CLEANBYTE
 #undef DEADBYTE
 #undef FORBIDDENBYTE
-#define CLEANBYTE      0xCB    /* clean (newly allocated) memory */
-#define DEADBYTE       0xDB    /* dead (newly freed) memory */
-#define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
+#define CLEANBYTE      0xCD    /* clean (newly allocated) memory */
+#define DEADBYTE       0xDD    /* dead (newly freed) memory */
+#define FORBIDDENBYTE  0xFD    /* untouchable bytes at each end of a block */
 
 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
 
@@ -2091,22 +2094,6 @@ _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
 }
 
 
-/* Heuristic checking if the memory has been freed. Rely on the debug hooks on
-   Python memory allocators which fills the memory with DEADBYTE (0xDB) when
-   memory is deallocated. */
-int
-_PyMem_IsFreed(void *ptr, size_t size)
-{
-    unsigned char *bytes = ptr;
-    for (size_t i=0; i < size; i++) {
-        if (bytes[i] != DEADBYTE) {
-            return 0;
-        }
-    }
-    return 1;
-}
-
-
 /* The debug free first checks the 2*SST bytes on each end for sanity (in
    particular, that the FORBIDDENBYTEs with the api ID are still intact).
    Then fills the original bytes with DEADBYTE.



More information about the Python-checkins mailing list