[Patches] fix hashing take#2 (was: fix float_hash and complex_hash for 64-bit *nix)

Trent Mick trentm@activestate.com
Thu, 11 May 2000 21:54:33 -0700


--huq684BweRXVnRxX
Content-Type: text/plain; charset=us-ascii


Sorry, I goofed in _Py_HashPointer in my first patch. Caused an infinite loop
on Win64. Here is the correct ed patch ....


Discussion of patch:

Correct (presumably) and cleanup certain hash functions (float, complex,
object, class, func, method, winreg). Two new hash helper function
(_Py_HashDouble() and _Py_HashPointer()) were added to group common hash
function code (this helps maintain the hash function invariant: (a==b) =>
(hash(a)==hash(b))).

Tim, a question: Before, the float_hash algorithm that we were discussing did
the frexp-and-32-bits-at-a-time reduction magic to the fractpart of the float
and then you identified that the same needed to be done to the intpart.
Instead of doing this twice, I just applied the reduction to whole double
(fractpart+intpart). I don't see any error in doing that. Do you? As well, is
there some hashing benefit that is lost by my doing the reduction only once?

Note to a reviewer: Please check my refcount usage in _Py_HashPointer. I am a
newbie there.

_Py_HashPointer just casts to a long (as before) if it fits and goes through
a long if it does not (i.e. on Win64 where sizeof(long) < sizeof(void*)). The
function is applied where previously a pointer was cast to a long.

test_hash.py is added (see attached) to test some cases for the mentioned
invariant.


Legal:

I confirm that, to the best of my knowledge and belief, this
contribution is free of any claims of third parties under
copyright, patent or other rights or interests ("claims").  To
the extent that I have any such claims, I hereby grant to CNRI a
nonexclusive, irrevocable, royalty-free, worldwide license to
reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part
of the Python software and its related documentation, or any
derivative versions thereof, at no cost to CNRI or its licensed
users, and to authorize others to do so.

I acknowledge that CNRI may, at its sole discretion, decide
whether or not to incorporate this contribution in the Python
software and its related documentation.  I further grant CNRI
permission to use my name and other identifying information
provided to CNRI by me for use in connection with the Python
software and its related documentation.


Patch:


diff -c3  /home/trentm/main/contrib/python/dist/src/Include/object.h ./Include/object.h
*** /home/trentm/main/contrib/python/dist/src/Include/object.h	Mon Apr 24 08:40:45 2000
--- ./Include/object.h	Thu May 11 20:19:53 2000
***************
*** 287,292 ****
--- 287,296 ----
  /* tstate dict key for PyObject_Compare helper */
  extern PyObject *_PyCompareState_Key;
  
+ /* Helpers for hash functions */
+ extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double));
+ extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*));
+ 
  /* Flag bits for printing: */
  #define Py_PRINT_RAW	1	/* No string quotes etc. */
  
diff -c3  /home/trentm/main/contrib/python/dist/src/Objects/object.c ./Objects/object.c
*** /home/trentm/main/contrib/python/dist/src/Objects/object.c	Wed May  3 16:44:35 2000
--- ./Objects/object.c	Thu May 11 20:30:57 2000
***************
*** 33,38 ****
--- 33,40 ----
  
  #include "Python.h"
  
+ #include "mymath.h"
+ 
  /* just for trashcan: */
  #include "compile.h"
  #include "frameobject.h"
***************
*** 506,511 ****
--- 508,569 ----
  	return result;
  }
  
+ 
+ /* Set of hash utility functions to help maintaining the invariant that
+ 	iff a==b then hash(a)==hash(b)
+ 
+    All the utility functions (_Py_Hash*()) return "-1" to signify an error.
+ */
+ 
+ long
+ _Py_HashDouble(v)
+     double v;
+ {
+ 	/* Use frexp to get at the bits in the double.
+ 	 * Since the VAX D double format has 56 mantissa bits, which is the
+ 	 * most of any double format in use, each of these parts may have as
+ 	 * many as (but no more than) 56 significant bits.
+ 	 * So, assuming sizeof(long) >= 4, each part can be broken into two longs;
+ 	 * frexp and multiplication are used to do that.
+ 	 * Also, since the Cray double format has 15 exponent bits, which is the
+ 	 * most of any double format in use, shifting the exponent field left by
+ 	 * 15 won't overflow a long (again assuming sizeof(long) >= 4).
+ 	 */
+     int expo;
+     long hipart;
+ 
+     v = frexp(v, &expo);
+     v = v * 2147483648.0; /* 2**31 */
+     hipart = (long)v; /* Take the top 32 bits */
+ 	v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
+ 
+     return hipart + (long)v + (expo << 15); /* Combine everything */
+ }
+ 
+ long
+ _Py_HashPointer(p)
+ 	void *p;
+ {
+ #if SIZEOF_LONG >= SIZEOF_VOID_P
+ 	return (long)p;
+ #else
+ 	/* convert to a Python long and hash that */
+ 	PyObject* longobj;
+ 	long x;
+ 	
+ 	if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
+ 		x = -1;
+ 		goto finally;
+ 	}
+ 	x = PyObject_Hash(longobj);
+ 	
+ finally:
+ 	Py_XDECREF(longobj);
+ 	return x;
+ #endif
+ }
+ 
+ 
  long
  PyObject_Hash(v)
  	PyObject *v;
***************
*** 513,520 ****
  	PyTypeObject *tp = v->ob_type;
  	if (tp->tp_hash != NULL)
  		return (*tp->tp_hash)(v);
! 	if (tp->tp_compare == NULL)
! 		return (long) v; /* Use address as hash value */
  	/* If there's a cmp but no hash defined, the object can't be hashed */
  	PyErr_SetString(PyExc_TypeError, "unhashable type");
  	return -1;
--- 571,579 ----
  	PyTypeObject *tp = v->ob_type;
  	if (tp->tp_hash != NULL)
  		return (*tp->tp_hash)(v);
! 	if (tp->tp_compare == NULL) {
! 		return _Py_HashPointer(v); /* Use address as hash value */
! 	}
  	/* If there's a cmp but no hash defined, the object can't be hashed */
  	PyErr_SetString(PyExc_TypeError, "unhashable type");
  	return -1;
diff -c3  /home/trentm/main/contrib/python/dist/src/Objects/floatobject.c ./Objects/floatobject.c
*** /home/trentm/main/contrib/python/dist/src/Objects/floatobject.c	Wed May  3 16:44:34 2000
--- ./Objects/floatobject.c	Thu May 11 20:27:05 2000
***************
*** 59,65 ****
--- 59,71 ----
  #endif
  
  #ifndef LONG_MAX
+ #if SIZEOF_LONG == 4
  #define LONG_MAX 0X7FFFFFFFL
+ #elif SIZEOF_LONG == 8
+ #define LONG_MAX 0X7FFFFFFFFFFFFFFFL
+ #else
+ #error "could not set LONG_MAX"
+ #endif
  #endif
  
  #ifndef LONG_MIN
***************
*** 357,368 ****
  	return (i < j) ? -1 : (i > j) ? 1 : 0;
  }
  
  static long
  float_hash(v)
  	PyFloatObject *v;
  {
  	double intpart, fractpart;
- 	int expo;
  	long x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
--- 363,374 ----
  	return (i < j) ? -1 : (i > j) ? 1 : 0;
  }
  
+ 
  static long
  float_hash(v)
  	PyFloatObject *v;
  {
  	double intpart, fractpart;
  	long x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
***************
*** 379,385 ****
  #endif
  
  	if (fractpart == 0.0) {
! 		if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->ob_fval);
  			if (w == NULL)
--- 385,391 ----
  #endif
  
  	if (fractpart == 0.0) {
! 		if (intpart > LONG_MAX || -intpart > LONG_MAX) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->ob_fval);
  			if (w == NULL)
***************
*** 393,406 ****
  	else {
  		/* Note -- if you change this code, also change the copy
  		   in complexobject.c */
! 		long hipart;
! 		fractpart = frexp(fractpart, &expo);
! 		fractpart = fractpart * 2147483648.0; /* 2**31 */
! 		hipart = (long)fractpart; /* Take the top 32 bits */
! 		fractpart = (fractpart - (double)hipart) * 2147483648.0;
! 						/* Get the next 32 bits */
! 		x = hipart + (long)fractpart + (long)intpart + (expo << 15);
! 						/* Combine everything */
  	}
  	if (x == -1)
  		x = -2;
--- 399,407 ----
  	else {
  		/* Note -- if you change this code, also change the copy
  		   in complexobject.c */
! 		x = _Py_HashDouble(v->ob_fval);
! 		if (x == -1)
! 			return -1;
  	}
  	if (x == -1)
  		x = -2;
diff -c3  /home/trentm/main/contrib/python/dist/src/Objects/complexobject.c ./Objects/complexobject.c
*** /home/trentm/main/contrib/python/dist/src/Objects/complexobject.c	Wed May  3 16:44:34 2000
--- ./Objects/complexobject.c	Thu May 11 20:48:26 2000
***************
*** 285,292 ****
  	PyComplexObject *v;
  {
  	double intpart, fractpart;
! 	int expo;
! 	long hipart, x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
  	   of mapping keys will turn out weird */
--- 285,291 ----
  	PyComplexObject *v;
  {
  	double intpart, fractpart;
! 	long x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
  	   of mapping keys will turn out weird */
***************
*** 302,308 ****
  #endif
  
  	if (fractpart == 0.0 && v->cval.imag == 0.0) {
! 		if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->cval.real);
  			if (w == NULL)
--- 301,307 ----
  #endif
  
  	if (fractpart == 0.0 && v->cval.imag == 0.0) {
! 		if (intpart > LONG_MAX || -intpart > LONG_MAX) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->cval.real);
  			if (w == NULL)
***************
*** 314,349 ****
  		x = (long)intpart;
  	}
  	else {
! 		fractpart = frexp(fractpart, &expo);
! 		fractpart = fractpart * 2147483648.0; /* 2**31 */
! 		hipart = (long)fractpart; /* Take the top 32 bits */
! 		fractpart = (fractpart - (double)hipart) * 2147483648.0;
! 						/* Get the next 32 bits */
! 		x = hipart + (long)fractpart + (long)intpart + (expo << 15);
! 						/* Combine everything */
  
  		if (v->cval.imag != 0.0) { /* Hash the imaginary part */
  			/* XXX Note that this hashes complex(x, y)
  			   to the same value as complex(y, x).
  			   Still better than it used to be :-) */
! #ifdef MPW
! 			{
! 				extended e;
! 				fractpart = modf(v->cval.imag, &e);
! 				intpart = e;
! 			}
! #else
! 			fractpart = modf(v->cval.imag, &intpart);
! #endif
! 			fractpart = frexp(fractpart, &expo);
! 			fractpart = fractpart * 2147483648.0; /* 2**31 */
! 			hipart = (long)fractpart; /* Take the top 32 bits */
! 			fractpart =
! 				(fractpart - (double)hipart) * 2147483648.0;
! 						/* Get the next 32 bits */
! 			x ^= hipart + (long)fractpart +
! 				(long)intpart + (expo << 15);
! 						/* Combine everything */
  		}
  	}
  	if (x == -1)
--- 313,330 ----
  		x = (long)intpart;
  	}
  	else {
! 		x = _Py_HashDouble(v->cval.real);
! 		if (x == -1)
! 			return -1;
  
  		if (v->cval.imag != 0.0) { /* Hash the imaginary part */
  			/* XXX Note that this hashes complex(x, y)
  			   to the same value as complex(y, x).
  			   Still better than it used to be :-) */
! 			long y = _Py_HashDouble(v->cval.imag);
! 			if (y == -1)
! 				return -1;
! 			x += y;
  		}
  	}
  	if (x == -1)
diff -c3  /home/trentm/main/contrib/python/dist/src/Objects/classobject.c ./Objects/classobject.c
*** /home/trentm/main/contrib/python/dist/src/Objects/classobject.c	Wed May  3 16:44:34 2000
--- ./Objects/classobject.c	Thu May 11 20:31:51 2000
***************
*** 823,832 ****
  		func = instance_getattr(inst, cmpstr);
  		if (func == NULL) {
  			PyErr_Clear();
! 			outcome = (long)inst;
! 			if (outcome == -1)
! 				outcome = -2;
! 			return outcome;
  		}
  		PyErr_SetString(PyExc_TypeError, "unhashable instance");
  		return -1;
--- 823,829 ----
  		func = instance_getattr(inst, cmpstr);
  		if (func == NULL) {
  			PyErr_Clear();
! 			return _Py_HashPointer(inst);
  		}
  		PyErr_SetString(PyExc_TypeError, "unhashable instance");
  		return -1;
***************
*** 172,178 ****
  meth_hash(a)
  	PyCFunctionObject *a;
  {
! 	long x;
  	if (a->m_self == NULL)
  		x = 0;
  	else {
--- 172,178 ----
  meth_hash(a)
  	PyCFunctionObject *a;
  {
! 	long x,y;
  	if (a->m_self == NULL)
  		x = 0;
  	else {
***************
*** 180,186 ****
  		if (x == -1)
  			return -1;
  	}
! 	return x ^ (long) a->m_ml->ml_meth;
  }
  
  PyTypeObject PyCFunction_Type = {
--- 180,192 ----
  		if (x == -1)
  			return -1;
  	}
! 	y = _Py_HashPointer(a->m_ml->ml_meth);
! 	if (y == -1)
! 		return -1;
! 	x ^= y;
! 	if (x == -1)
! 		x = -2;
! 	return x;
  }
  
  PyTypeObject PyCFunction_Type = {
diff -c3  /home/trentm/main/contrib/python/dist/src/Objects/funcobject.c ./Objects/funcobject.c
*** /home/trentm/main/contrib/python/dist/src/Objects/funcobject.c	Wed May  3 16:44:35 2000
--- ./Objects/funcobject.c	Thu May 11 20:28:45 2000
***************
*** 231,240 ****
  func_hash(f)
  	PyFunctionObject *f;
  {
! 	long h;
  	h = PyObject_Hash(f->func_code);
  	if (h == -1) return h;
! 	h = h ^ (long)f->func_globals;
  	if (h == -1) h = -2;
  	return h;
  }
--- 231,242 ----
  func_hash(f)
  	PyFunctionObject *f;
  {
! 	long h,x;
  	h = PyObject_Hash(f->func_code);
  	if (h == -1) return h;
! 	x = _Py_HashPointer(f->func_globals);
! 	if (x == -1) return x;
! 	h ^= x;
  	if (h == -1) h = -2;
  	return h;
  }
diff -c3  /home/trentm/main/contrib/python/dist/src/PC/winreg.c ./PC/winreg.c
*** /home/trentm/main/contrib/python/dist/src/PC/winreg.c	Wed May  3 16:44:37 2000
--- ./PC/winreg.c	Thu May 11 20:31:22 2000
***************
*** 421,427 ****
  	/* Just use the address.
  	   XXX - should we use the handle value?
  	*/
! 	return (long)ob;
  }
  
  
--- 421,427 ----
  	/* Just use the address.
  	   XXX - should we use the handle value?
  	*/
! 	return _Py_HashPointer(ob);
  }
  
  


And test_hash.py and test_hash are attached.

-- 
Trent Mick
trentm@activestate.com

--huq684BweRXVnRxX
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="test_hash.py"

# test the invariant that
#   iff a==b then hash(a)==hash(b)
#

import test_support


def same_hash(*objlist):
	# hash each object given an raise TestFailed if
	# the hash values are not all the same
	hashed = map(hash, objlist)
	for h in hashed[1:]:
		if h != hashed[0]:
			raise TestFailed, "hashed values differ: %s" % `objlist`



same_hash(1, 1L, 1.0, 1.0+0.0j)
same_hash(int(1), long(1), float(1), complex(1))

same_hash(long(1.23e300), float(1.23e300))

same_hash(float(0.5), complex(0.5, 0.0))




--huq684BweRXVnRxX
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=test_hash

test_hash

--huq684BweRXVnRxX--