[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.41,2.42
Fredrik Lundh
python-dev@python.org
Mon, 10 Jul 2000 11:27:50 -0700
Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv20359/objects
Modified Files:
unicodeobject.c
Log Message:
- changed hash calculation for unicode strings. the new
value is calculated from the character values, in a way
that makes sure an 8-bit ASCII string and a unicode string
with the same contents get the same hash value.
(as a side effect, this also works for ISO Latin 1 strings).
for more details, see the python-dev discussion.
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.41
retrieving revision 2.42
diff -C2 -r2.41 -r2.42
*** unicodeobject.c 2000/07/07 17:51:08 2.41
--- unicodeobject.c 2000/07/10 18:27:47 2.42
***************
*** 3472,3495 ****
unicode_hash(PyUnicodeObject *self)
{
! long hash;
! PyObject *utf8;
! /* Since Unicode objects compare equal to their UTF-8 string
! counterparts, they should also use the UTF-8 strings as basis
! for their hash value. This is needed to assure that strings and
! Unicode objects behave in the same way as dictionary
! keys. Unfortunately, this costs some performance and also some
! memory if the cached UTF-8 representation is not used later
! on. */
if (self->hash != -1)
return self->hash;
! utf8 = _PyUnicode_AsUTF8String((PyObject *)self, NULL);
! if (utf8 == NULL)
! return -1;
! hash = PyObject_Hash(utf8);
! if (hash == -1)
! return -1;
! self->hash = hash;
! return hash;
}
--- 3472,3497 ----
unicode_hash(PyUnicodeObject *self)
{
! /* Since Unicode objects compare equal to their ASCII string
! counterparts, they should use the individual character values
! as basis for their hash value. This is needed to assure that
! strings and Unicode objects behave in the same way as
! dictionary keys. */
! register int len;
! register Py_UNICODE *p;
! register long x;
!
if (self->hash != -1)
return self->hash;
! len = PyUnicode_GET_SIZE(self);
! p = PyUnicode_AS_UNICODE(self);
! x = *p << 7;
! while (--len >= 0)
! x = (1000003*x) ^ *p++;
! x ^= PyUnicode_GET_SIZE(self);
! if (x == -1)
! x = -2;
! self->hash = x;
! return x;
}