[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.41,2.42

Fredrik Lundh python-dev@python.org
Mon, 10 Jul 2000 11:27:50 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv20359/objects

Modified Files:
	unicodeobject.c 
Log Message:


- changed hash calculation for unicode strings.  the new
  value is calculated from the character values, in a way
  that makes sure an 8-bit ASCII string and a unicode string
  with the same contents get the same hash value.

  (as a side effect, this also works for ISO Latin 1 strings).

  for more details, see the python-dev discussion.

Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.41
retrieving revision 2.42
diff -C2 -r2.41 -r2.42
*** unicodeobject.c	2000/07/07 17:51:08	2.41
--- unicodeobject.c	2000/07/10 18:27:47	2.42
***************
*** 3472,3495 ****
  unicode_hash(PyUnicodeObject *self)
  {
!     long hash;
!     PyObject *utf8;
  
!     /* Since Unicode objects compare equal to their UTF-8 string
!        counterparts, they should also use the UTF-8 strings as basis
!        for their hash value. This is needed to assure that strings and
!        Unicode objects behave in the same way as dictionary
!        keys. Unfortunately, this costs some performance and also some
!        memory if the cached UTF-8 representation is not used later
!        on. */
      if (self->hash != -1)
  	return self->hash;
!     utf8 = _PyUnicode_AsUTF8String((PyObject *)self, NULL);
!     if (utf8 == NULL)
! 	return -1;
!     hash = PyObject_Hash(utf8);
!     if (hash == -1)
! 	return -1;
!     self->hash = hash;
!     return hash;
  }
  
--- 3472,3497 ----
  unicode_hash(PyUnicodeObject *self)
  {
!     /* Since Unicode objects compare equal to their ASCII string
!        counterparts, they should use the individual character values
!        as basis for their hash value.  This is needed to assure that
!        strings and Unicode objects behave in the same way as
!        dictionary keys. */
  
!     register int len;
!     register Py_UNICODE *p;
!     register long x;
! 
      if (self->hash != -1)
  	return self->hash;
!     len = PyUnicode_GET_SIZE(self);
!     p = PyUnicode_AS_UNICODE(self);
!     x = *p << 7;
!     while (--len >= 0)
! 	x = (1000003*x) ^ *p++;
!     x ^= PyUnicode_GET_SIZE(self);
!     if (x == -1)
! 	x = -2;
!     self->hash = x;
!     return x;
  }