[Python-checkins] cpython: Issue #23359: Specialize set_lookkey intoa lookup function and an insert

raymond.hettinger python-checkins at python.org
Wed May 27 19:37:29 CEST 2015


https://hg.python.org/cpython/rev/cd1e1715becd
changeset:   96312:cd1e1715becd
user:        Raymond Hettinger <python at rcn.com>
date:        Wed May 27 10:37:20 2015 -0700
summary:
  Issue #23359: Specialize set_lookkey intoa lookup function and an insert function.

files:
  Misc/NEWS           |    3 +
  Objects/setobject.c |  145 ++++++++++++++++++++++---------
  2 files changed, 107 insertions(+), 41 deletions(-)


diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,9 @@
 - Issue #24268: PEP 489: Multi-phase extension module initialization.
   Patch by Petr Viktorin.
 
+- Issue #23359: Optimize set object internals by specializing the
+  hash table search into a lookup function and an insert function.
+
 - Issue #23955: Add pyvenv.cfg option to suppress registry/environment
   lookup for generating sys.path on Windows.
 
diff --git a/Objects/setobject.c b/Objects/setobject.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -52,7 +52,6 @@
 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
     setentry *table = so->table;
-    setentry *freeslot = NULL;
     setentry *entry;
     size_t perturb = hash;
     size_t mask = so->mask;
@@ -86,14 +85,12 @@
                 return entry;
             mask = so->mask;                 /* help avoid a register spill */
         }
-        if (entry->hash == -1 && freeslot == NULL)
-            freeslot = entry;
 
         if (i + LINEAR_PROBES <= mask) {
             for (j = 0 ; j < LINEAR_PROBES ; j++) {
                 entry++;
-                if (entry->key == NULL)
-                    goto found_null;
+                if (entry->hash == 0 && entry->key == NULL)
+                    return entry;
                 if (entry->hash == hash) {
                     PyObject *startkey = entry->key;
                     assert(startkey != dummy);
@@ -114,6 +111,89 @@
                         return entry;
                     mask = so->mask;
                 }
+            }
+        }
+
+        perturb >>= PERTURB_SHIFT;
+        i = (i * 5 + 1 + perturb) & mask;
+
+        entry = &table[i];
+        if (entry->hash == 0 && entry->key == NULL)
+            return entry;
+    }
+}
+
+/*
+Internal routine to insert a new key into the table.
+Used by the public insert routine.
+Eats a reference to key.
+*/
+static int
+set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+    setentry *table = so->table;
+    setentry *freeslot = NULL;
+    setentry *entry;
+    size_t perturb = hash;
+    size_t mask = so->mask;
+    size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
+    size_t j;
+    int cmp;
+
+    entry = &table[i];
+    if (entry->key == NULL)
+        goto found_null;
+
+    while (1) {
+        if (entry->hash == hash) {
+            PyObject *startkey = entry->key;
+            /* startkey cannot be a dummy because the dummy hash field is -1 */
+            assert(startkey != dummy);
+            if (startkey == key)
+                goto found_active;
+            if (PyUnicode_CheckExact(startkey)
+                && PyUnicode_CheckExact(key)
+                && unicode_eq(startkey, key))
+                goto found_active;
+            Py_INCREF(startkey);
+            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+            Py_DECREF(startkey);
+            if (cmp < 0)                                          /* unlikely */
+                return -1;
+            if (table != so->table || entry->key != startkey)     /* unlikely */
+                return set_insert_key(so, key, hash);
+            if (cmp > 0)                                          /* likely */
+                goto found_active;
+            mask = so->mask;                 /* help avoid a register spill */
+        }
+        if (entry->hash == -1 && freeslot == NULL)
+            freeslot = entry;
+
+        if (i + LINEAR_PROBES <= mask) {
+            for (j = 0 ; j < LINEAR_PROBES ; j++) {
+                entry++;
+                if (entry->hash == 0 && entry->key == NULL)
+                    goto found_null;
+                if (entry->hash == hash) {
+                    PyObject *startkey = entry->key;
+                    assert(startkey != dummy);
+                    if (startkey == key)
+                        goto found_active;
+                    if (PyUnicode_CheckExact(startkey)
+                        && PyUnicode_CheckExact(key)
+                        && unicode_eq(startkey, key))
+                        goto found_active;
+                    Py_INCREF(startkey);
+                    cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+                    Py_DECREF(startkey);
+                    if (cmp < 0)
+                        return -1;
+                    if (table != so->table || entry->key != startkey)
+                        return set_insert_key(so, key, hash);
+                    if (cmp > 0)
+                        goto found_active;
+                    mask = so->mask;
+                }
                 if (entry->hash == -1 && freeslot == NULL)
                     freeslot = entry;
             }
@@ -123,11 +203,26 @@
         i = (i * 5 + 1 + perturb) & mask;
 
         entry = &table[i];
-        if (entry->key == NULL)
+        if (entry->hash == 0 && entry->key == NULL)
             goto found_null;
     }
+
   found_null:
-    return freeslot == NULL ? entry : freeslot;
+    if (freeslot == NULL) {
+        /* UNUSED */
+        so->fill++;
+    } else {
+        /* DUMMY */
+        entry = freeslot;
+    }
+    so->used++;
+    entry->key = key;
+    entry->hash = hash;
+    return 0;
+
+  found_active:
+    Py_DECREF(key);
+    return 0;
 }
 
 /*
@@ -172,38 +267,6 @@
 /* ======== End logic for probing the hash table ========================== */
 /* ======================================================================== */
 
-
-/*
-Internal routine to insert a new key into the table.
-Used by the public insert routine.
-Eats a reference to key.
-*/
-static int
-set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
-{
-    setentry *entry;
-
-    entry = set_lookkey(so, key, hash);
-    if (entry == NULL)
-        return -1;
-    if (entry->key == NULL) {
-        /* UNUSED */
-        entry->key = key;
-        entry->hash = hash;
-        so->fill++;
-        so->used++;
-    } else if (entry->key == dummy) {
-        /* DUMMY */
-        entry->key = key;
-        entry->hash = hash;
-        so->used++;
-    } else {
-        /* ACTIVE */
-        Py_DECREF(key);
-    }
-    return 0;
-}
-
 /*
 Restructure the table by allocating a new table and reinserting all
 keys again.  When entries have been deleted, the new table may
@@ -345,7 +408,7 @@
     entry = set_lookkey(so, oldentry->key, oldentry->hash);
     if (entry == NULL)
         return -1;
-    if (entry->key == NULL  ||  entry->key == dummy)
+    if (entry->key == NULL)
         return DISCARD_NOTFOUND;
     old_key = entry->key;
     entry->key = dummy;
@@ -623,7 +686,7 @@
     if (lu_entry == NULL)
         return -1;
     key = lu_entry->key;
-    return key != NULL && key != dummy;
+    return key != NULL;
 }
 
 static int

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list