[Python-checkins] cpython: Issue #1621: Fix undefined behaviour in bytes.__hash__, str.__hash__,

mark.dickinson python-checkins at python.org
Sat Sep 24 19:19:42 CEST 2011


http://hg.python.org/cpython/rev/698fa089ce70
changeset:   72464:698fa089ce70
user:        Mark Dickinson <mdickinson at enthought.com>
date:        Sat Sep 24 18:18:40 2011 +0100
summary:
  Issue #1621: Fix undefined behaviour in bytes.__hash__, str.__hash__, tuple.__hash__, frozenset.__hash__ and set indexing operations.

files:
  Objects/bytesobject.c   |   8 ++++----
  Objects/dictobject.c    |   4 ++--
  Objects/setobject.c     |  20 ++++++++++----------
  Objects/tupleobject.c   |   9 +++++----
  Objects/unicodeobject.c |  10 +++++-----
  5 files changed, 26 insertions(+), 25 deletions(-)


diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -869,16 +869,16 @@
 {
     register Py_ssize_t len;
     register unsigned char *p;
-    register Py_hash_t x;
+    register Py_uhash_t x;
 
     if (a->ob_shash != -1)
         return a->ob_shash;
     len = Py_SIZE(a);
     p = (unsigned char *) a->ob_sval;
-    x = *p << 7;
+    x = (Py_uhash_t)*p << 7;
     while (--len >= 0)
-        x = (1000003*x) ^ *p++;
-    x ^= Py_SIZE(a);
+        x = (1000003U*x) ^ (Py_uhash_t)*p++;
+    x ^= (Py_uhash_t)Py_SIZE(a);
     if (x == -1)
         x = -2;
     a->ob_shash = x;
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -418,7 +418,7 @@
         mp->ma_lookup = lookdict;
         return lookdict(mp, key, hash);
     }
-    i = hash & mask;
+    i = (size_t)hash & mask;
     ep = &ep0[i];
     if (ep->me_key == NULL || ep->me_key == key)
         return ep;
@@ -572,7 +572,7 @@
     register PyDictEntry *ep;
 
     MAINTAIN_TRACKING(mp, key, value);
-    i = hash & mask;
+    i = (size_t)hash & mask;
     ep = &ep0[i];
     for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
diff --git a/Objects/setobject.c b/Objects/setobject.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -77,7 +77,7 @@
 static setentry *
 set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
 {
-    register Py_ssize_t i;
+    register size_t i;
     register size_t perturb;
     register setentry *freeslot;
     register size_t mask = so->mask;
@@ -86,7 +86,7 @@
     register int cmp;
     PyObject *startkey;
 
-    i = hash & mask;
+    i = (size_t)hash & mask;
     entry = &table[i];
     if (entry->key == NULL || entry->key == key)
         return entry;
@@ -159,7 +159,7 @@
 static setentry *
 set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
 {
-    register Py_ssize_t i;
+    register size_t i;
     register size_t perturb;
     register setentry *freeslot;
     register size_t mask = so->mask;
@@ -174,7 +174,7 @@
         so->lookup = set_lookkey;
         return set_lookkey(so, key, hash);
     }
-    i = hash & mask;
+    i = (size_t)hash & mask;
     entry = &table[i];
     if (entry->key == NULL || entry->key == key)
         return entry;
@@ -256,7 +256,7 @@
     setentry *table = so->table;
     register setentry *entry;
 
-    i = hash & mask;
+    i = (size_t)hash & mask;
     entry = &table[i];
     for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
@@ -770,14 +770,14 @@
 frozenset_hash(PyObject *self)
 {
     PySetObject *so = (PySetObject *)self;
-    Py_hash_t h, hash = 1927868237L;
+    Py_uhash_t h, hash = 1927868237U;
     setentry *entry;
     Py_ssize_t pos = 0;
 
     if (so->hash != -1)
         return so->hash;
 
-    hash *= PySet_GET_SIZE(self) + 1;
+    hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1;
     while (set_next(so, &pos, &entry)) {
         /* Work to increase the bit dispersion for closely spaced hash
            values.  The is important because some use cases have many
@@ -785,11 +785,11 @@
            hashes so that many distinct combinations collapse to only
            a handful of distinct hash values. */
         h = entry->hash;
-        hash ^= (h ^ (h << 16) ^ 89869747L)  * 3644798167u;
+        hash ^= (h ^ (h << 16) ^ 89869747U)  * 3644798167U;
     }
-    hash = hash * 69069L + 907133923L;
+    hash = hash * 69069U + 907133923U;
     if (hash == -1)
-        hash = 590923713L;
+        hash = 590923713U;
     so->hash = hash;
     return hash;
 }
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -315,11 +315,12 @@
 static Py_hash_t
 tuplehash(PyTupleObject *v)
 {
-    register Py_hash_t x, y;
+    register Py_uhash_t x;
+    register Py_hash_t y;
     register Py_ssize_t len = Py_SIZE(v);
     register PyObject **p;
-    Py_hash_t mult = 1000003L;
-    x = 0x345678L;
+    Py_uhash_t mult = 1000003;
+    x = 0x345678;
     p = v->ob_item;
     while (--len >= 0) {
         y = PyObject_Hash(*p++);
@@ -330,7 +331,7 @@
         mult += (Py_hash_t)(82520L + len + len);
     }
     x += 97531L;
-    if (x == -1)
+    if (x == (Py_uhash_t)-1)
         x = -2;
     return x;
 }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7721,22 +7721,22 @@
 }
 
 /* Believe it or not, this produces the same value for ASCII strings
-   as string_hash(). */
+   as bytes_hash(). */
 static Py_hash_t
 unicode_hash(PyUnicodeObject *self)
 {
     Py_ssize_t len;
     Py_UNICODE *p;
-    Py_hash_t x;
+    Py_uhash_t x;
 
     if (self->hash != -1)
         return self->hash;
     len = Py_SIZE(self);
     p = self->str;
-    x = *p << 7;
+    x = (Py_uhash_t)*p << 7;
     while (--len >= 0)
-        x = (1000003*x) ^ *p++;
-    x ^= Py_SIZE(self);
+        x = (1000003U*x) ^ (Py_uhash_t)*p++;
+    x ^= (Py_uhash_t)Py_SIZE(self);
     if (x == -1)
         x = -2;
     self->hash = x;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list