[pypy-commit] pypy py3k-newhash: modernize float's hash

Fri Feb 22 01:59:07 CET 2013

Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k-newhash
Changeset: r61581:32410af09b82
Date: 2013-02-21 16:42 -0800
http://bitbucket.org/pypy/pypy/changeset/32410af09b82/

Log:	modernize float's hash

diff --git a/pypy/module/sys/system.py b/pypy/module/sys/system.py
--- a/pypy/module/sys/system.py
+++ b/pypy/module/sys/system.py
@@ -2,7 +2,7 @@
 from pypy.interpreter import gateway
 from rpython.rlib import rfloat, rbigint
 from rpython.rtyper.lltypesystem import rffi, lltype
-from pypy.objspace.std.floatobject import HASH_INF, HASH_NAN
+from pypy.objspace.std.floatobject import HASH_INF, HASH_MODULUS, HASH_NAN
 from pypy.objspace.std.complexobject import HASH_IMAG
 
 
@@ -65,11 +65,9 @@
     return space.call_function(w_int_info, space.newtuple(info_w))
 
 def get_hash_info(space):
-    # XXX our _hash_float() always give values that fit in 32bit
-    modulus = (1 << 31) - 1  # Must be a prime number
     info_w = [
         space.wrap(8 * rffi.sizeof(lltype.Signed)),
-        space.wrap(modulus),
+        space.wrap(HASH_MODULUS),
         space.wrap(HASH_INF),
         space.wrap(HASH_NAN),
         space.wrap(HASH_IMAG),
diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py
--- a/pypy/objspace/std/complexobject.py
+++ b/pypy/objspace/std/complexobject.py
@@ -129,7 +129,7 @@
     hashreal = _hash_float(space, w_value.realval)
     hashimg = _hash_float(space, w_value.imagval)
     combined = intmask(hashreal + HASH_IMAG * hashimg)
-    return space.newint(combined)
+    return space.newint(-2 if combined == -1 else combined)
 
 def add__Complex_Complex(space, w_complex1, w_complex2):
     return W_ComplexObject(w_complex1.realval + w_complex2.realval,
diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py
--- a/pypy/objspace/std/floatobject.py
+++ b/pypy/objspace/std/floatobject.py
@@ -1,4 +1,5 @@
 import operator
+import sys
 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError
 from pypy.objspace.std import model, newformat
@@ -7,10 +8,11 @@
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.objspace.std.longobject import W_LongObject
-from rpython.rlib.rarithmetic import ovfcheck_float_to_int, intmask, LONG_BIT
+from rpython.rlib.rarithmetic import (
+    LONG_BIT, intmask, ovfcheck_float_to_int, r_uint)
 from rpython.rlib.rfloat import (
     isinf, isnan, isfinite, INFINITY, NAN, copysign, formatd,
-    DTSF_ADD_DOT_0, DTSF_STR_PRECISION, float_as_rbigint_ratio)
+    DTSF_ADD_DOT_0, float_as_rbigint_ratio)
 from rpython.rlib.rbigint import rbigint
 from rpython.rlib import rfloat
 from rpython.tool.sourcetools import func_with_new_name
@@ -21,6 +23,8 @@
 
 HASH_INF  = 314159
 HASH_NAN  = 0
+HASH_BITS = 61 if sys.maxsize > 2 ** 31 - 1 else 31
+HASH_MODULUS = (1 << HASH_BITS) - 1
 
 class W_AbstractFloatObject(W_Object):
     __slots__ = ()
@@ -284,51 +288,37 @@
     return space.wrap(_hash_float(space, w_value.floatval))
 
 def _hash_float(space, v):
-    if isnan(v):
+    if not isfinite(v):
+        if isinf(v):
+            return HASH_INF if v > 0 else -HASH_INF
         return HASH_NAN
 
-    # This is designed so that Python numbers of different types
-    # that compare equal hash to the same value; otherwise comparisons
-    # of mapping keys will turn out weird.
-    fractpart, intpart = math.modf(v)
+    m, e = math.frexp(v)
 
-    if fractpart == 0.0:
-        # This must return the same hash as an equal int or long.
-        try:
-            x = ovfcheck_float_to_int(intpart)
-            # Fits in a C long == a Python int, so is its own hash.
-            return x
-        except OverflowError:
-            # Convert to long and use its hash.
-            try:
-                w_lval = W_LongObject.fromfloat(space, v)
-            except (OverflowError, ValueError):
-                # can't convert to long int -- arbitrary
-                if v < 0:
-                    return -HASH_INF
-                else:
-                    return HASH_INF
-            return space.int_w(space.hash(w_lval))
+    sign = 1
+    if m < 0:
+        sign = -1
+        m = -m
 
-    # The fractional part is non-zero, so we don't have to worry about
-    # making this match the hash of some other type.
-    # Use frexp to get at the bits in the double.
-    # Since the VAX D double format has 56 mantissa bits, which is the
-    # most of any double format in use, each of these parts may have as
-    # many as (but no more than) 56 significant bits.
-    # So, assuming sizeof(long) >= 4, each part can be broken into two
-    # longs; frexp and multiplication are used to do that.
-    # Also, since the Cray double format has 15 exponent bits, which is
-    # the most of any double format in use, shifting the exponent field
-    # left by 15 won't overflow a long (again assuming sizeof(long) >= 4).
+    # process 28 bits at a time;  this should work well both for binary
+    # and hexadecimal floating point.
+    x = r_uint(0)
+    while m:
+        x = ((x << 28) & HASH_MODULUS) | x >> (HASH_BITS - 28)
+        m *= 268435456.0  # 2**28
+        e -= 28
+        y = int(m)  # pull out integer part
+        m -= y
+        x += y
+        if x >= HASH_MODULUS:
+            x -= HASH_MODULUS
 
-    v, expo = math.frexp(v)
-    v *= 2147483648.0  # 2**31
-    hipart = int(v)    # take the top 32 bits
-    v = (v - hipart) * 2147483648.0 # get the next 32 bits
-    x = intmask(hipart + int(v) + (expo << 15))
-    return x
+    # adjust for the exponent;  first reduce it modulo HASH_BITS
+    e = e % HASH_BITS if e >= 0 else HASH_BITS - 1 - ((-1 - e) % HASH_BITS)
+    x = ((x << e) & HASH_MODULUS) | x >> (HASH_BITS - e)
 
+    x = intmask(intmask(x) * sign)
+    return -2 if x == -1 else x
 
 
 def add__Float_Float(space, w_float1, w_float2):
diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py
--- a/pypy/objspace/std/test/test_floatobject.py
+++ b/pypy/objspace/std/test/test_floatobject.py
@@ -100,16 +100,27 @@
         # these are taken from standard Python, which produces
         # the same but for -1.
         import math
+        import sys
+
+        assert hash(-1.0) == -2
+        assert hash(-2.0) == -2
+        assert hash(-3.0) == -3
         assert hash(42.0) == 42
-        assert hash(42.125) == 1413677056
-        assert hash(math.ldexp(0.125, 1000)) in (
-            32,              # answer on 32-bit machines
-            137438953472)    # answer on 64-bit machines
-        # testing special overflow values
-        inf = 1e200 * 1e200
+        if sys.maxsize > 2 ** 31 - 1:
+            assert hash(42.125) == 288230376151711786
+            assert hash(math.ldexp(0.125, 1000)) == 2097152
+            assert hash(3.141593) == 326491229203594243
+            assert hash(2.5) == 1152921504606846978
+        else:
+            assert hash(42.125) == 268435498
+            assert hash(math.ldexp(0.125, 1000)) == 32
+            assert hash(3.141593) == 671854639
+            assert hash(2.5) == 1073741826
+        inf = float('inf')
+        nan = float('nan')
         assert hash(inf) == 314159
         assert hash(-inf) == -314159
-        assert hash(inf/inf) == 0
+        assert hash(nan) == 0
 
     def test_int_float(self):
         assert int(42.1234) == 42