[Python-checkins] bpo-40602: Write unit tests for _Py_hashtable_t (GH-20091)

Victor Stinner webhook-mailer at python.org
Thu May 14 15:55:55 EDT 2020


https://github.com/python/cpython/commit/a482dc500b6ec4889f6a126ba08cbad6c11e37bc
commit: a482dc500b6ec4889f6a126ba08cbad6c11e37bc
branch: master
author: Victor Stinner <vstinner at python.org>
committer: GitHub <noreply at github.com>
date: 2020-05-14T21:55:47+02:00
summary:

bpo-40602: Write unit tests for _Py_hashtable_t (GH-20091)

Cleanup also hashtable.c.
Rename _Py_hashtable_t members:

* Rename entries to nentries
* Rename num_buckets to nbuckets

files:
M Include/internal/pycore_hashtable.h
M Modules/_testinternalcapi.c
M Python/hashtable.c
M Python/marshal.c

diff --git a/Include/internal/pycore_hashtable.h b/Include/internal/pycore_hashtable.h
index 2990f9e0c1cc6..18757abc28c19 100644
--- a/Include/internal/pycore_hashtable.h
+++ b/Include/internal/pycore_hashtable.h
@@ -48,18 +48,18 @@ typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *
                                                                const void *key);
 
 typedef struct {
-    /* allocate a memory block */
+    // Allocate a memory block
     void* (*malloc) (size_t size);
 
-    /* release a memory block */
+    // Release a memory block
     void (*free) (void *ptr);
 } _Py_hashtable_allocator_t;
 
 
 /* _Py_hashtable: table */
 struct _Py_hashtable_t {
-    size_t num_buckets;
-    size_t entries; /* Total number of entries in the table. */
+    size_t nentries; // Total number of entries in the table
+    size_t nbuckets;
     _Py_slist_t *buckets;
 
     _Py_hashtable_get_entry_func get_entry_func;
@@ -70,10 +70,10 @@ struct _Py_hashtable_t {
     _Py_hashtable_allocator_t alloc;
 };
 
-/* hash a pointer (void*) */
+/* Hash a pointer (void*) */
 PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
 
-/* comparison using memcmp() */
+/* Comparison using memcmp() */
 PyAPI_FUNC(int) _Py_hashtable_compare_direct(
     const void *key1,
     const void *key2);
@@ -129,13 +129,14 @@ _Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
 
    Use _Py_hashtable_get_entry() to distinguish entry value equal to NULL
    and entry not found. */
-extern void *_Py_hashtable_get(_Py_hashtable_t *ht, const void *key);
+PyAPI_FUNC(void*) _Py_hashtable_get(_Py_hashtable_t *ht, const void *key);
 
 
-// Remove a key and its associated value without calling key and value destroy
-// functions.
-// Return the removed value if the key was found.
-// Return NULL if the key was not found.
+/* Remove a key and its associated value without calling key and value destroy
+   functions.
+
+   Return the removed value if the key was found.
+   Return NULL if the key was not found. */
 PyAPI_FUNC(void*) _Py_hashtable_steal(
     _Py_hashtable_t *ht,
     const void *key);
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 1b7563cb20fc5..3ae387d945d76 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -14,6 +14,7 @@
 #include "Python.h"
 #include "pycore_byteswap.h"     // _Py_bswap32()
 #include "pycore_initconfig.h"   // _Py_GetConfigsAsDict()
+#include "pycore_hashtable.h"    // _Py_hashtable_new()
 #include "pycore_gc.h"           // PyGC_Head
 
 
@@ -62,10 +63,97 @@ test_bswap(PyObject *self, PyObject *Py_UNUSED(args))
 }
 
 
+#define TO_PTR(ch) ((void*)(uintptr_t)ch)
+#define FROM_PTR(ptr) ((uintptr_t)ptr)
+#define VALUE(key) (1 + ((int)(key) - 'a'))
+
+static Py_uhash_t
+hash_char(const void *key)
+{
+    char ch = (char)FROM_PTR(key);
+    return ch;
+}
+
+
+static int
+hashtable_cb(_Py_hashtable_t *table,
+             const void *key_ptr, const void *value_ptr,
+             void *user_data)
+{
+    int *count = (int *)user_data;
+    char key = (char)FROM_PTR(key_ptr);
+    int value = (int)FROM_PTR(value_ptr);
+    assert(value == VALUE(key));
+    *count += 1;
+    return 0;
+}
+
+
+static PyObject*
+test_hashtable(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    _Py_hashtable_t *table = _Py_hashtable_new(hash_char,
+                                               _Py_hashtable_compare_direct);
+    if (table == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    // Test _Py_hashtable_set()
+    char key;
+    for (key='a'; key <= 'z'; key++) {
+        int value = VALUE(key);
+        if (_Py_hashtable_set(table, TO_PTR(key), TO_PTR(value)) < 0) {
+            _Py_hashtable_destroy(table);
+            return PyErr_NoMemory();
+        }
+    }
+    assert(table->nentries == 26);
+    assert(table->nbuckets > table->nentries);
+
+    // Test _Py_hashtable_get_entry()
+    for (key='a'; key <= 'z'; key++) {
+        _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(table, TO_PTR(key));
+        assert(entry != NULL);
+        assert(entry->key = TO_PTR(key));
+        assert(entry->value = TO_PTR(VALUE(key)));
+    }
+
+    // Test _Py_hashtable_get()
+    for (key='a'; key <= 'z'; key++) {
+        void *value_ptr = _Py_hashtable_get(table, TO_PTR(key));
+        int value = (int)FROM_PTR(value_ptr);
+        assert(value == VALUE(key));
+    }
+
+    // Test _Py_hashtable_steal()
+    key = 'p';
+    void *value_ptr = _Py_hashtable_steal(table, TO_PTR(key));
+    int value = (int)FROM_PTR(value_ptr);
+    assert(value == VALUE(key));
+
+    assert(table->nentries == 25);
+
+    // Test _Py_hashtable_foreach()
+    int count = 0;
+    int res = _Py_hashtable_foreach(table, hashtable_cb, &count);
+    assert(res == 0);
+    assert(count == 25);
+
+    // Test _Py_hashtable_clear()
+    _Py_hashtable_clear(table);
+    assert(table->nentries == 0);
+    assert(_Py_hashtable_get(table, TO_PTR('x')) == NULL);
+
+    _Py_hashtable_destroy(table);
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
     {"test_bswap", test_bswap, METH_NOARGS},
+    {"test_hashtable", test_hashtable, METH_NOARGS},
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Python/hashtable.c b/Python/hashtable.c
index d1467ad94ed55..45c52859ac2d6 100644
--- a/Python/hashtable.c
+++ b/Python/hashtable.c
@@ -119,66 +119,20 @@ round_size(size_t s)
 size_t
 _Py_hashtable_size(const _Py_hashtable_t *ht)
 {
-    size_t size;
-
-    size = sizeof(_Py_hashtable_t);
-
+    size_t size = sizeof(_Py_hashtable_t);
     /* buckets */
-    size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
-
+    size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
     /* entries */
-    size += ht->entries * sizeof(_Py_hashtable_entry_t);
-
+    size += ht->nentries * sizeof(_Py_hashtable_entry_t);
     return size;
 }
 
 
-#ifdef Py_DEBUG
-void
-_Py_hashtable_print_stats(_Py_hashtable_t *ht)
-{
-    size_t size;
-    size_t chain_len, max_chain_len, total_chain_len, nchains;
-    _Py_hashtable_entry_t *entry;
-    size_t hv;
-    double load;
-
-    size = _Py_hashtable_size(ht);
-
-    load = (double)ht->entries / ht->num_buckets;
-
-    max_chain_len = 0;
-    total_chain_len = 0;
-    nchains = 0;
-    for (hv = 0; hv < ht->num_buckets; hv++) {
-        entry = TABLE_HEAD(ht, hv);
-        if (entry != NULL) {
-            chain_len = 0;
-            for (; entry; entry = ENTRY_NEXT(entry)) {
-                chain_len++;
-            }
-            if (chain_len > max_chain_len)
-                max_chain_len = chain_len;
-            total_chain_len += chain_len;
-            nchains++;
-        }
-    }
-    printf("hash table %p: entries=%"
-           PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
-           (void *)ht, ht->entries, ht->num_buckets, load * 100.0);
-    if (nchains)
-        printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
-    printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n",
-           max_chain_len, size / 1024);
-}
-#endif
-
-
 _Py_hashtable_entry_t *
 _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
 {
     Py_uhash_t key_hash = ht->hash_func(key);
-    size_t index = key_hash & (ht->num_buckets - 1);
+    size_t index = key_hash & (ht->nbuckets - 1);
     _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
     while (1) {
         if (entry == NULL) {
@@ -200,7 +154,7 @@ static _Py_hashtable_entry_t *
 _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
 {
     Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
-    size_t index = key_hash & (ht->num_buckets - 1);
+    size_t index = key_hash & (ht->nbuckets - 1);
     _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
     while (1) {
         if (entry == NULL) {
@@ -220,7 +174,7 @@ void*
 _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
 {
     Py_uhash_t key_hash = ht->hash_func(key);
-    size_t index = key_hash & (ht->num_buckets - 1);
+    size_t index = key_hash & (ht->nbuckets - 1);
 
     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
     _Py_hashtable_entry_t *previous = NULL;
@@ -238,12 +192,12 @@ _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
 
     _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
                      (_Py_slist_item_t *)entry);
-    ht->entries--;
+    ht->nentries--;
 
     void *value = entry->value;
     ht->alloc.free(entry);
 
-    if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) {
+    if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
         hashtable_rehash(ht);
     }
     return value;
@@ -263,8 +217,6 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
     assert(entry == NULL);
 #endif
 
-    Py_uhash_t key_hash = ht->hash_func(key);
-    size_t index = key_hash & (ht->num_buckets - 1);
 
     entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
     if (entry == NULL) {
@@ -272,15 +224,17 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
         return -1;
     }
 
-    entry->key_hash = key_hash;
+    entry->key_hash = ht->hash_func(key);
     entry->key = (void *)key;
     entry->value = value;
 
+    size_t index = entry->key_hash & (ht->nbuckets - 1);
     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
-    ht->entries++;
+    ht->nentries++;
 
-    if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
+    if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
         hashtable_rehash(ht);
+    }
     return 0;
 }
 
@@ -303,14 +257,14 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
                       _Py_hashtable_foreach_func func,
                       void *user_data)
 {
-    _Py_hashtable_entry_t *entry;
-    size_t hv;
-
-    for (hv = 0; hv < ht->num_buckets; hv++) {
-        for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+    for (size_t hv = 0; hv < ht->nbuckets; hv++) {
+        _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
+        while (entry != NULL) {
             int res = func(ht, entry->key, entry->value, user_data);
-            if (res)
+            if (res) {
                 return res;
+            }
+            entry = ENTRY_NEXT(entry);
         }
     }
     return 0;
@@ -320,44 +274,35 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
 static void
 hashtable_rehash(_Py_hashtable_t *ht)
 {
-    size_t buckets_size, new_size, bucket;
-    _Py_slist_t *old_buckets = NULL;
-    size_t old_num_buckets;
-
-    new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
-    if (new_size == ht->num_buckets)
+    size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
+    if (new_size == ht->nbuckets) {
         return;
+    }
 
-    old_num_buckets = ht->num_buckets;
-
-    buckets_size = new_size * sizeof(ht->buckets[0]);
-    old_buckets = ht->buckets;
-    ht->buckets = ht->alloc.malloc(buckets_size);
-    if (ht->buckets == NULL) {
-        /* cancel rehash on memory allocation failure */
-        ht->buckets = old_buckets ;
+    size_t buckets_size = new_size * sizeof(ht->buckets[0]);
+    _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
+    if (new_buckets == NULL) {
         /* memory allocation failed */
         return;
     }
-    memset(ht->buckets, 0, buckets_size);
-
-    ht->num_buckets = new_size;
-
-    for (bucket = 0; bucket < old_num_buckets; bucket++) {
-        _Py_hashtable_entry_t *entry, *next;
-        for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
-            size_t entry_index;
-
+    memset(new_buckets, 0, buckets_size);
 
+    for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
+        _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
+        while (entry != NULL) {
             assert(ht->hash_func(entry->key) == entry->key_hash);
-            next = ENTRY_NEXT(entry);
-            entry_index = entry->key_hash & (new_size - 1);
+            _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
+            size_t entry_index = entry->key_hash & (new_size - 1);
+
+            _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
 
-            _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
+            entry = next;
         }
     }
 
-    ht->alloc.free(old_buckets);
+    ht->alloc.free(ht->buckets);
+    ht->nbuckets = new_size;
+    ht->buckets = new_buckets;
 }
 
 
@@ -368,10 +313,7 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
                        _Py_hashtable_destroy_func value_destroy_func,
                        _Py_hashtable_allocator_t *allocator)
 {
-    _Py_hashtable_t *ht;
-    size_t buckets_size;
     _Py_hashtable_allocator_t alloc;
-
     if (allocator == NULL) {
         alloc.malloc = PyMem_Malloc;
         alloc.free = PyMem_Free;
@@ -380,14 +322,15 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
         alloc = *allocator;
     }
 
-    ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
-    if (ht == NULL)
+    _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
+    if (ht == NULL) {
         return ht;
+    }
 
-    ht->num_buckets = HASHTABLE_MIN_SIZE;
-    ht->entries = 0;
+    ht->nbuckets = HASHTABLE_MIN_SIZE;
+    ht->nentries = 0;
 
-    buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
+    size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
     ht->buckets = alloc.malloc(buckets_size);
     if (ht->buckets == NULL) {
         alloc.free(ht);
@@ -435,17 +378,16 @@ _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
 void
 _Py_hashtable_clear(_Py_hashtable_t *ht)
 {
-    _Py_hashtable_entry_t *entry, *next;
-    size_t i;
-
-    for (i=0; i < ht->num_buckets; i++) {
-        for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
-            next = ENTRY_NEXT(entry);
+    for (size_t i=0; i < ht->nbuckets; i++) {
+        _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
+        while (entry != NULL) {
+            _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
             _Py_hashtable_destroy_entry(ht, entry);
+            entry = next;
         }
         _Py_slist_init(&ht->buckets[i]);
     }
-    ht->entries = 0;
+    ht->nentries = 0;
     hashtable_rehash(ht);
 }
 
@@ -453,7 +395,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
 void
 _Py_hashtable_destroy(_Py_hashtable_t *ht)
 {
-    for (size_t i = 0; i < ht->num_buckets; i++) {
+    for (size_t i = 0; i < ht->nbuckets; i++) {
         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
         while (entry) {
             _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
diff --git a/Python/marshal.c b/Python/marshal.c
index b096ff8932220..a0f6b9812601b 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -312,7 +312,7 @@ w_ref(PyObject *v, char *flag, WFILE *p)
         w_long(w, p);
         return 1;
     } else {
-        size_t s = p->hashtable->entries;
+        size_t s = p->hashtable->nentries;
         /* we don't support long indices */
         if (s >= 0x7fffffff) {
             PyErr_SetString(PyExc_ValueError, "too many objects");



More information about the Python-checkins mailing list