[pypy-commit] pypy py3k-newhash: modernize float's hash
pjenvey
noreply at buildbot.pypy.org
Fri Feb 22 01:59:07 CET 2013
Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k-newhash
Changeset: r61581:32410af09b82
Date: 2013-02-21 16:42 -0800
http://bitbucket.org/pypy/pypy/changeset/32410af09b82/
Log: modernize float's hash
diff --git a/pypy/module/sys/system.py b/pypy/module/sys/system.py
--- a/pypy/module/sys/system.py
+++ b/pypy/module/sys/system.py
@@ -2,7 +2,7 @@
from pypy.interpreter import gateway
from rpython.rlib import rfloat, rbigint
from rpython.rtyper.lltypesystem import rffi, lltype
-from pypy.objspace.std.floatobject import HASH_INF, HASH_NAN
+from pypy.objspace.std.floatobject import HASH_INF, HASH_MODULUS, HASH_NAN
from pypy.objspace.std.complexobject import HASH_IMAG
@@ -65,11 +65,9 @@
return space.call_function(w_int_info, space.newtuple(info_w))
def get_hash_info(space):
- # XXX our _hash_float() always give values that fit in 32bit
- modulus = (1 << 31) - 1 # Must be a prime number
info_w = [
space.wrap(8 * rffi.sizeof(lltype.Signed)),
- space.wrap(modulus),
+ space.wrap(HASH_MODULUS),
space.wrap(HASH_INF),
space.wrap(HASH_NAN),
space.wrap(HASH_IMAG),
diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py
--- a/pypy/objspace/std/complexobject.py
+++ b/pypy/objspace/std/complexobject.py
@@ -129,7 +129,7 @@
hashreal = _hash_float(space, w_value.realval)
hashimg = _hash_float(space, w_value.imagval)
combined = intmask(hashreal + HASH_IMAG * hashimg)
- return space.newint(combined)
+ return space.newint(-2 if combined == -1 else combined)
def add__Complex_Complex(space, w_complex1, w_complex2):
return W_ComplexObject(w_complex1.realval + w_complex2.realval,
diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py
--- a/pypy/objspace/std/floatobject.py
+++ b/pypy/objspace/std/floatobject.py
@@ -1,4 +1,5 @@
import operator
+import sys
from pypy.interpreter import gateway
from pypy.interpreter.error import OperationError
from pypy.objspace.std import model, newformat
@@ -7,10 +8,11 @@
from pypy.objspace.std.register_all import register_all
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.objspace.std.longobject import W_LongObject
-from rpython.rlib.rarithmetic import ovfcheck_float_to_int, intmask, LONG_BIT
+from rpython.rlib.rarithmetic import (
+ LONG_BIT, intmask, ovfcheck_float_to_int, r_uint)
from rpython.rlib.rfloat import (
isinf, isnan, isfinite, INFINITY, NAN, copysign, formatd,
- DTSF_ADD_DOT_0, DTSF_STR_PRECISION, float_as_rbigint_ratio)
+ DTSF_ADD_DOT_0, float_as_rbigint_ratio)
from rpython.rlib.rbigint import rbigint
from rpython.rlib import rfloat
from rpython.tool.sourcetools import func_with_new_name
@@ -21,6 +23,8 @@
HASH_INF = 314159
HASH_NAN = 0
+HASH_BITS = 61 if sys.maxsize > 2 ** 31 - 1 else 31
+HASH_MODULUS = (1 << HASH_BITS) - 1
class W_AbstractFloatObject(W_Object):
__slots__ = ()
@@ -284,51 +288,37 @@
return space.wrap(_hash_float(space, w_value.floatval))
def _hash_float(space, v):
- if isnan(v):
+ if not isfinite(v):
+ if isinf(v):
+ return HASH_INF if v > 0 else -HASH_INF
return HASH_NAN
- # This is designed so that Python numbers of different types
- # that compare equal hash to the same value; otherwise comparisons
- # of mapping keys will turn out weird.
- fractpart, intpart = math.modf(v)
+ m, e = math.frexp(v)
- if fractpart == 0.0:
- # This must return the same hash as an equal int or long.
- try:
- x = ovfcheck_float_to_int(intpart)
- # Fits in a C long == a Python int, so is its own hash.
- return x
- except OverflowError:
- # Convert to long and use its hash.
- try:
- w_lval = W_LongObject.fromfloat(space, v)
- except (OverflowError, ValueError):
- # can't convert to long int -- arbitrary
- if v < 0:
- return -HASH_INF
- else:
- return HASH_INF
- return space.int_w(space.hash(w_lval))
+ sign = 1
+ if m < 0:
+ sign = -1
+ m = -m
- # The fractional part is non-zero, so we don't have to worry about
- # making this match the hash of some other type.
- # Use frexp to get at the bits in the double.
- # Since the VAX D double format has 56 mantissa bits, which is the
- # most of any double format in use, each of these parts may have as
- # many as (but no more than) 56 significant bits.
- # So, assuming sizeof(long) >= 4, each part can be broken into two
- # longs; frexp and multiplication are used to do that.
- # Also, since the Cray double format has 15 exponent bits, which is
- # the most of any double format in use, shifting the exponent field
- # left by 15 won't overflow a long (again assuming sizeof(long) >= 4).
+ # process 28 bits at a time; this should work well both for binary
+ # and hexadecimal floating point.
+ x = r_uint(0)
+ while m:
+ x = ((x << 28) & HASH_MODULUS) | x >> (HASH_BITS - 28)
+ m *= 268435456.0 # 2**28
+ e -= 28
+ y = int(m) # pull out integer part
+ m -= y
+ x += y
+ if x >= HASH_MODULUS:
+ x -= HASH_MODULUS
- v, expo = math.frexp(v)
- v *= 2147483648.0 # 2**31
- hipart = int(v) # take the top 32 bits
- v = (v - hipart) * 2147483648.0 # get the next 32 bits
- x = intmask(hipart + int(v) + (expo << 15))
- return x
+ # adjust for the exponent; first reduce it modulo HASH_BITS
+ e = e % HASH_BITS if e >= 0 else HASH_BITS - 1 - ((-1 - e) % HASH_BITS)
+ x = ((x << e) & HASH_MODULUS) | x >> (HASH_BITS - e)
+ x = intmask(intmask(x) * sign)
+ return -2 if x == -1 else x
def add__Float_Float(space, w_float1, w_float2):
diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py
--- a/pypy/objspace/std/test/test_floatobject.py
+++ b/pypy/objspace/std/test/test_floatobject.py
@@ -100,16 +100,27 @@
# these are taken from standard Python, which produces
# the same but for -1.
import math
+ import sys
+
+ assert hash(-1.0) == -2
+ assert hash(-2.0) == -2
+ assert hash(-3.0) == -3
assert hash(42.0) == 42
- assert hash(42.125) == 1413677056
- assert hash(math.ldexp(0.125, 1000)) in (
- 32, # answer on 32-bit machines
- 137438953472) # answer on 64-bit machines
- # testing special overflow values
- inf = 1e200 * 1e200
+ if sys.maxsize > 2 ** 31 - 1:
+ assert hash(42.125) == 288230376151711786
+ assert hash(math.ldexp(0.125, 1000)) == 2097152
+ assert hash(3.141593) == 326491229203594243
+ assert hash(2.5) == 1152921504606846978
+ else:
+ assert hash(42.125) == 268435498
+ assert hash(math.ldexp(0.125, 1000)) == 32
+ assert hash(3.141593) == 671854639
+ assert hash(2.5) == 1073741826
+ inf = float('inf')
+ nan = float('nan')
assert hash(inf) == 314159
assert hash(-inf) == -314159
- assert hash(inf/inf) == 0
+ assert hash(nan) == 0
def test_int_float(self):
assert int(42.1234) == 42
More information about the pypy-commit
mailing list