[Python-checkins] bpo-36389: Add _PyObject_CheckConsistency() function (GH-12803)

Victor Stinner webhook-mailer at python.org
Fri Apr 12 15:51:39 EDT 2019


https://github.com/python/cpython/commit/0fc91eef34a1d9194904fa093c9fbd711af0f26c
commit: 0fc91eef34a1d9194904fa093c9fbd711af0f26c
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2019-04-12T21:51:34+02:00
summary:

bpo-36389: Add _PyObject_CheckConsistency() function (GH-12803)

Add a new _PyObject_CheckConsistency() function which can be used to
help debugging. The function is available in release mode.

Add a 'check_content' parameter to _PyDict_CheckConsistency().

files:
M Include/cpython/object.h
M Include/internal/pycore_object.h
M Objects/dictobject.c
M Objects/object.c
M Objects/typeobject.c
M Objects/unicodeobject.c

diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index 64d196a722ee..ba52a4835823 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -446,6 +446,21 @@ PyAPI_FUNC(void) _PyObject_AssertFailed(
     int line,
     const char *function);
 
+/* Check if an object is consistent. For example, ensure that the reference
+   counter is greater than or equal to 1, and ensure that ob_type is not NULL.
+
+   Call _PyObject_AssertFailed() if the object is inconsistent.
+
+   If check_content is zero, only check header fields: reduce the overhead.
+
+   The function always return 1. The return value is just here to be able to
+   write:
+
+   assert(_PyObject_CheckConsistency(obj, 1)); */
+PyAPI_FUNC(int) _PyObject_CheckConsistency(
+    PyObject *op,
+    int check_content);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index a88b626332f1..c95595358a9e 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -10,6 +10,10 @@ extern "C" {
 
 #include "pycore_pystate.h"   /* _PyRuntime */
 
+PyAPI_FUNC(int) _PyType_CheckConsistency(PyTypeObject *type);
+PyAPI_FUNC(int) _PyUnicode_CheckConsistency(PyObject *op, int check_content);
+PyAPI_FUNC(int) _PyDict_CheckConsistency(PyObject *mp, int check_content);
+
 /* Tell the GC to track this object.
  *
  * NB: While the object is tracked by the collector, it must be safe to call the
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 9ff009f6aa4e..9b5c0a3be9ab 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -449,77 +449,77 @@ static PyObject *empty_values[1] = { NULL };
 /* Uncomment to check the dict content in _PyDict_CheckConsistency() */
 /* #define DEBUG_PYDICT */
 
+#ifdef DEBUG_PYDICT
+#  define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 1))
+#else
+#  define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 0))
+#endif
 
-#ifndef NDEBUG
-static int
-_PyDict_CheckConsistency(PyDictObject *mp)
+
+int
+_PyDict_CheckConsistency(PyObject *op, int check_content)
 {
-#define ASSERT(expr) _PyObject_ASSERT((PyObject *)mp, (expr))
+    _PyObject_ASSERT(op, PyDict_Check(op));
+    PyDictObject *mp = (PyDictObject *)op;
 
     PyDictKeysObject *keys = mp->ma_keys;
     int splitted = _PyDict_HasSplitTable(mp);
     Py_ssize_t usable = USABLE_FRACTION(keys->dk_size);
-#ifdef DEBUG_PYDICT
-    PyDictKeyEntry *entries = DK_ENTRIES(keys);
-    Py_ssize_t i;
-#endif
 
-    ASSERT(0 <= mp->ma_used && mp->ma_used <= usable);
-    ASSERT(IS_POWER_OF_2(keys->dk_size));
-    ASSERT(0 <= keys->dk_usable
-           && keys->dk_usable <= usable);
-    ASSERT(0 <= keys->dk_nentries
-           && keys->dk_nentries <= usable);
-    ASSERT(keys->dk_usable + keys->dk_nentries <= usable);
+    _PyObject_ASSERT(op, 0 <= mp->ma_used && mp->ma_used <= usable);
+    _PyObject_ASSERT(op, IS_POWER_OF_2(keys->dk_size));
+    _PyObject_ASSERT(op, 0 <= keys->dk_usable && keys->dk_usable <= usable);
+    _PyObject_ASSERT(op, 0 <= keys->dk_nentries && keys->dk_nentries <= usable);
+    _PyObject_ASSERT(op, keys->dk_usable + keys->dk_nentries <= usable);
 
     if (!splitted) {
         /* combined table */
-        ASSERT(keys->dk_refcnt == 1);
+        _PyObject_ASSERT(op, keys->dk_refcnt == 1);
     }
 
-#ifdef DEBUG_PYDICT
-    for (i=0; i < keys->dk_size; i++) {
-        Py_ssize_t ix = dictkeys_get_index(keys, i);
-        ASSERT(DKIX_DUMMY <= ix && ix <= usable);
-    }
+    if (check_content) {
+        PyDictKeyEntry *entries = DK_ENTRIES(keys);
+        Py_ssize_t i;
+
+        for (i=0; i < keys->dk_size; i++) {
+            Py_ssize_t ix = dictkeys_get_index(keys, i);
+            _PyObject_ASSERT(op, DKIX_DUMMY <= ix && ix <= usable);
+        }
 
-    for (i=0; i < usable; i++) {
-        PyDictKeyEntry *entry = &entries[i];
-        PyObject *key = entry->me_key;
+        for (i=0; i < usable; i++) {
+            PyDictKeyEntry *entry = &entries[i];
+            PyObject *key = entry->me_key;
 
-        if (key != NULL) {
-            if (PyUnicode_CheckExact(key)) {
-                Py_hash_t hash = ((PyASCIIObject *)key)->hash;
-                ASSERT(hash != -1);
-                ASSERT(entry->me_hash == hash);
-            }
-            else {
-                /* test_dict fails if PyObject_Hash() is called again */
-                ASSERT(entry->me_hash != -1);
+            if (key != NULL) {
+                if (PyUnicode_CheckExact(key)) {
+                    Py_hash_t hash = ((PyASCIIObject *)key)->hash;
+                    _PyObject_ASSERT(op, hash != -1);
+                    _PyObject_ASSERT(op, entry->me_hash == hash);
+                }
+                else {
+                    /* test_dict fails if PyObject_Hash() is called again */
+                    _PyObject_ASSERT(op, entry->me_hash != -1);
+                }
+                if (!splitted) {
+                    _PyObject_ASSERT(op, entry->me_value != NULL);
+                }
             }
-            if (!splitted) {
-                ASSERT(entry->me_value != NULL);
+
+            if (splitted) {
+                _PyObject_ASSERT(op, entry->me_value == NULL);
             }
         }
 
         if (splitted) {
-            ASSERT(entry->me_value == NULL);
+            /* splitted table */
+            for (i=0; i < mp->ma_used; i++) {
+                _PyObject_ASSERT(op, mp->ma_values[i] != NULL);
+            }
         }
     }
 
-    if (splitted) {
-        /* splitted table */
-        for (i=0; i < mp->ma_used; i++) {
-            ASSERT(mp->ma_values[i] != NULL);
-        }
-    }
-#endif
-
     return 1;
-
-#undef ASSERT
 }
-#endif
 
 
 static PyDictKeysObject *new_keys_object(Py_ssize_t size)
@@ -614,7 +614,7 @@ new_dict(PyDictKeysObject *keys, PyObject **values)
     mp->ma_values = values;
     mp->ma_used = 0;
     mp->ma_version_tag = DICT_NEXT_VERSION();
-    assert(_PyDict_CheckConsistency(mp));
+    ASSERT_CONSISTENT(mp);
     return (PyObject *)mp;
 }
 
@@ -675,7 +675,7 @@ clone_combined_dict(PyDictObject *orig)
         return NULL;
     }
     new->ma_used = orig->ma_used;
-    assert(_PyDict_CheckConsistency(new));
+    ASSERT_CONSISTENT(new);
     if (_PyObject_GC_IS_TRACKED(orig)) {
         /* Maintain tracking. */
         _PyObject_GC_TRACK(new);
@@ -1075,7 +1075,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
         mp->ma_keys->dk_usable--;
         mp->ma_keys->dk_nentries++;
         assert(mp->ma_keys->dk_usable >= 0);
-        assert(_PyDict_CheckConsistency(mp));
+        ASSERT_CONSISTENT(mp);
         return 0;
     }
 
@@ -1094,7 +1094,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 
     mp->ma_version_tag = DICT_NEXT_VERSION();
     Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
-    assert(_PyDict_CheckConsistency(mp));
+    ASSERT_CONSISTENT(mp);
     Py_DECREF(key);
     return 0;
 
@@ -1582,7 +1582,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
     Py_DECREF(old_key);
     Py_DECREF(old_value);
 
-    assert(_PyDict_CheckConsistency(mp));
+    ASSERT_CONSISTENT(mp);
     return 0;
 }
 
@@ -1722,7 +1722,7 @@ PyDict_Clear(PyObject *op)
        assert(oldkeys->dk_refcnt == 1);
        dictkeys_decref(oldkeys);
     }
-    assert(_PyDict_CheckConsistency(mp));
+    ASSERT_CONSISTENT(mp);
 }
 
 /* Internal version of PyDict_Next that returns a hash value in addition
@@ -1852,7 +1852,7 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d
     ep->me_value = NULL;
     Py_DECREF(old_key);
 
-    assert(_PyDict_CheckConsistency(mp));
+    ASSERT_CONSISTENT(mp);
     return old_value;
 }
 
@@ -2434,7 +2434,7 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
     }
 
     i = 0;
-    assert(_PyDict_CheckConsistency((PyDictObject *)d));
+    ASSERT_CONSISTENT(d);
     goto Return;
 Fail:
     Py_XDECREF(item);
@@ -2586,7 +2586,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
             /* Iterator completed, via error */
             return -1;
     }
-    assert(_PyDict_CheckConsistency((PyDictObject *)a));
+    ASSERT_CONSISTENT(a);
     return 0;
 }
 
@@ -2950,7 +2950,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
         mp->ma_version_tag = DICT_NEXT_VERSION();
     }
 
-    assert(_PyDict_CheckConsistency(mp));
+    ASSERT_CONSISTENT(mp);
     return value;
 }
 
@@ -3069,7 +3069,7 @@ dict_popitem_impl(PyDictObject *self)
     self->ma_keys->dk_nentries = i;
     self->ma_used--;
     self->ma_version_tag = DICT_NEXT_VERSION();
-    assert(_PyDict_CheckConsistency(self));
+    ASSERT_CONSISTENT(self);
     return res;
 }
 
@@ -3275,7 +3275,7 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         Py_DECREF(self);
         return NULL;
     }
-    assert(_PyDict_CheckConsistency(d));
+    ASSERT_CONSISTENT(d);
     return self;
 }
 
diff --git a/Objects/object.c b/Objects/object.c
index 3fad73c493db..e7ec7aec490f 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2,6 +2,7 @@
 /* Generic object operations; and implementation of None */
 
 #include "Python.h"
+#include "pycore_object.h"
 #include "pycore_pystate.h"
 #include "pycore_context.h"
 #include "frameobject.h"
@@ -19,6 +20,28 @@ _Py_IDENTIFIER(__bytes__);
 _Py_IDENTIFIER(__dir__);
 _Py_IDENTIFIER(__isabstractmethod__);
 
+
+int
+_PyObject_CheckConsistency(PyObject *op, int check_content)
+{
+    _PyObject_ASSERT(op, op != NULL);
+    _PyObject_ASSERT(op, !_PyObject_IsFreed(op));
+    _PyObject_ASSERT(op, Py_REFCNT(op) >= 1);
+
+    PyTypeObject *type = op->ob_type;
+    _PyObject_ASSERT(op, type != NULL);
+    _PyType_CheckConsistency(type);
+
+    if (PyUnicode_Check(op)) {
+        _PyUnicode_CheckConsistency(op, check_content);
+    }
+    else if (PyDict_Check(op)) {
+        _PyDict_CheckConsistency(op, check_content);
+    }
+    return 1;
+}
+
+
 #ifdef Py_REF_DEBUG
 Py_ssize_t _Py_RefTotal;
 
@@ -2136,7 +2159,13 @@ _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg,
     else if (_PyObject_IsFreed(obj)) {
         /* It seems like the object memory has been freed:
            don't access it to prevent a segmentation fault. */
-        fprintf(stderr, "<Freed object>\n");
+        fprintf(stderr, "<object: freed>\n");
+    }
+    else if (Py_TYPE(obj) == NULL) {
+        fprintf(stderr, "<object: ob_type=NULL>\n");
+    }
+    else if (_PyObject_IsFreed((PyObject *)Py_TYPE(obj))) {
+        fprintf(stderr, "<object: freed type %p>\n", Py_TYPE(obj));
     }
     else {
         /* Diplay the traceback where the object has been allocated.
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 4c3909c098c4..37df4d23e4c1 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -131,8 +131,7 @@ skip_signature(const char *doc)
     return NULL;
 }
 
-#ifndef NDEBUG
-static int
+int
 _PyType_CheckConsistency(PyTypeObject *type)
 {
 #define ASSERT(expr) _PyObject_ASSERT((PyObject *)type, (expr))
@@ -142,14 +141,16 @@ _PyType_CheckConsistency(PyTypeObject *type)
         return 1;
     }
 
+    ASSERT(!_PyObject_IsFreed((PyObject *)type));
+    ASSERT(Py_REFCNT(type) >= 1);
+    ASSERT(PyType_Check(type));
+
     ASSERT(!(type->tp_flags & Py_TPFLAGS_READYING));
-    ASSERT(type->tp_mro != NULL && PyTuple_Check(type->tp_mro));
     ASSERT(type->tp_dict != NULL);
-    return 1;
 
+    return 1;
 #undef ASSERT
 }
-#endif
 
 static const char *
 _PyType_DocWithoutSignature(const char *name, const char *internal_doc)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e00dc37974f8..f6e68c94df55 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -401,23 +401,20 @@ PyUnicode_GetMax(void)
 #endif
 }
 
-#ifdef Py_DEBUG
 int
 _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 {
-#define ASSERT(expr) _PyObject_ASSERT(op, (expr))
-
     PyASCIIObject *ascii;
     unsigned int kind;
 
-    ASSERT(PyUnicode_Check(op));
+    _PyObject_ASSERT(op, PyUnicode_Check(op));
 
     ascii = (PyASCIIObject *)op;
     kind = ascii->state.kind;
 
     if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
-        ASSERT(kind == PyUnicode_1BYTE_KIND);
-        ASSERT(ascii->state.ready == 1);
+        _PyObject_ASSERT(op, kind == PyUnicode_1BYTE_KIND);
+        _PyObject_ASSERT(op, ascii->state.ready == 1);
     }
     else {
         PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
@@ -425,41 +422,41 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 
         if (ascii->state.compact == 1) {
             data = compact + 1;
-            ASSERT(kind == PyUnicode_1BYTE_KIND
-                   || kind == PyUnicode_2BYTE_KIND
-                   || kind == PyUnicode_4BYTE_KIND);
-            ASSERT(ascii->state.ascii == 0);
-            ASSERT(ascii->state.ready == 1);
-            ASSERT (compact->utf8 != data);
+            _PyObject_ASSERT(op, kind == PyUnicode_1BYTE_KIND
+                                 || kind == PyUnicode_2BYTE_KIND
+                                 || kind == PyUnicode_4BYTE_KIND);
+            _PyObject_ASSERT(op, ascii->state.ascii == 0);
+            _PyObject_ASSERT(op, ascii->state.ready == 1);
+            _PyObject_ASSERT(op, compact->utf8 != data);
         }
         else {
             PyUnicodeObject *unicode = (PyUnicodeObject *)op;
 
             data = unicode->data.any;
             if (kind == PyUnicode_WCHAR_KIND) {
-                ASSERT(ascii->length == 0);
-                ASSERT(ascii->hash == -1);
-                ASSERT(ascii->state.compact == 0);
-                ASSERT(ascii->state.ascii == 0);
-                ASSERT(ascii->state.ready == 0);
-                ASSERT(ascii->state.interned == SSTATE_NOT_INTERNED);
-                ASSERT(ascii->wstr != NULL);
-                ASSERT(data == NULL);
-                ASSERT(compact->utf8 == NULL);
+                _PyObject_ASSERT(op, ascii->length == 0);
+                _PyObject_ASSERT(op, ascii->hash == -1);
+                _PyObject_ASSERT(op, ascii->state.compact == 0);
+                _PyObject_ASSERT(op, ascii->state.ascii == 0);
+                _PyObject_ASSERT(op, ascii->state.ready == 0);
+                _PyObject_ASSERT(op, ascii->state.interned == SSTATE_NOT_INTERNED);
+                _PyObject_ASSERT(op, ascii->wstr != NULL);
+                _PyObject_ASSERT(op, data == NULL);
+                _PyObject_ASSERT(op, compact->utf8 == NULL);
             }
             else {
-                ASSERT(kind == PyUnicode_1BYTE_KIND
-                       || kind == PyUnicode_2BYTE_KIND
-                       || kind == PyUnicode_4BYTE_KIND);
-                ASSERT(ascii->state.compact == 0);
-                ASSERT(ascii->state.ready == 1);
-                ASSERT(data != NULL);
+                _PyObject_ASSERT(op, kind == PyUnicode_1BYTE_KIND
+                                     || kind == PyUnicode_2BYTE_KIND
+                                     || kind == PyUnicode_4BYTE_KIND);
+                _PyObject_ASSERT(op, ascii->state.compact == 0);
+                _PyObject_ASSERT(op, ascii->state.ready == 1);
+                _PyObject_ASSERT(op, data != NULL);
                 if (ascii->state.ascii) {
-                    ASSERT (compact->utf8 == data);
-                    ASSERT (compact->utf8_length == ascii->length);
+                    _PyObject_ASSERT(op, compact->utf8 == data);
+                    _PyObject_ASSERT(op, compact->utf8_length == ascii->length);
                 }
                 else
-                    ASSERT (compact->utf8 != data);
+                    _PyObject_ASSERT(op, compact->utf8 != data);
             }
         }
         if (kind != PyUnicode_WCHAR_KIND) {
@@ -471,20 +468,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 #endif
                )
             {
-                ASSERT(ascii->wstr == data);
-                ASSERT(compact->wstr_length == ascii->length);
+                _PyObject_ASSERT(op, ascii->wstr == data);
+                _PyObject_ASSERT(op, compact->wstr_length == ascii->length);
             } else
-                ASSERT(ascii->wstr != data);
+                _PyObject_ASSERT(op, ascii->wstr != data);
         }
 
         if (compact->utf8 == NULL)
-            ASSERT(compact->utf8_length == 0);
+            _PyObject_ASSERT(op, compact->utf8_length == 0);
         if (ascii->wstr == NULL)
-            ASSERT(compact->wstr_length == 0);
+            _PyObject_ASSERT(op, compact->wstr_length == 0);
     }
-    /* check that the best kind is used */
-    if (check_content && kind != PyUnicode_WCHAR_KIND)
-    {
+
+    /* check that the best kind is used: O(n) operation */
+    if (check_content && kind != PyUnicode_WCHAR_KIND) {
         Py_ssize_t i;
         Py_UCS4 maxchar = 0;
         void *data;
@@ -499,27 +496,25 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
         }
         if (kind == PyUnicode_1BYTE_KIND) {
             if (ascii->state.ascii == 0) {
-                ASSERT(maxchar >= 128);
-                ASSERT(maxchar <= 255);
+                _PyObject_ASSERT(op, maxchar >= 128);
+                _PyObject_ASSERT(op, maxchar <= 255);
             }
             else
-                ASSERT(maxchar < 128);
+                _PyObject_ASSERT(op, maxchar < 128);
         }
         else if (kind == PyUnicode_2BYTE_KIND) {
-            ASSERT(maxchar >= 0x100);
-            ASSERT(maxchar <= 0xFFFF);
+            _PyObject_ASSERT(op, maxchar >= 0x100);
+            _PyObject_ASSERT(op, maxchar <= 0xFFFF);
         }
         else {
-            ASSERT(maxchar >= 0x10000);
-            ASSERT(maxchar <= MAX_UNICODE);
+            _PyObject_ASSERT(op, maxchar >= 0x10000);
+            _PyObject_ASSERT(op, maxchar <= MAX_UNICODE);
         }
-        ASSERT(PyUnicode_READ(kind, data, ascii->length) == 0);
+        _PyObject_ASSERT(op, PyUnicode_READ(kind, data, ascii->length) == 0);
     }
     return 1;
-
-#undef ASSERT
 }
-#endif
+
 
 static PyObject*
 unicode_result_wchar(PyObject *unicode)



More information about the Python-checkins mailing list