[Python-Dev] new unicode hash calculation

Mon, 10 Jul 2000 13:51:05 -0500

> mal wrote:
> 
> > * change hash value calculation to work on the Py_UNICODE data
> >   instead of creating a default encoded cached object (what
> >   now is .utf8str)

[effbot]
> it this what you had in mind?
> 
> static long
> unicode_hash(PyUnicodeObject *self)
> {
>     register int len;
>     register Py_UNICODE *p;
>     register long x;
> 
>     if (self->hash != -1)
>         return self->hash;
>     len = PyUnicode_GET_SIZE(self);
>     p = PyUnicode_AS_UNICODE(self);
>     x = *p << 7;
>     while (--len >= 0)
>         x = (1000003*x) ^ *p++;
>     x ^= a->ob_size;
>     if (x == -1)
>         x = -2;
>     self->hash = x;
>     return x;
> }

You mean this (fixed a->ob_size, restored comment):

static long
unicode_hash(PyUnicodeObject *self)
{
    /* Since Unicode objects compare equal to their ASCII string
       counterparts, they should also use the ASCII strings as basis
       for their hash value. This is needed to assure that strings and
       Unicode objects behave in the same way as dictionary keys. */

    register int len;
    register Py_UNICODE *p;
    register long x;

    if (self->hash != -1)
	    return self->hash;
    len = PyUnicode_GET_SIZE(self);
    p = PyUnicode_AS_UNICODE(self);
    x = *p << 7;
    while (--len >= 0)
	    x = (1000003*x) ^ *p++;
    x ^= PyUnicode_GET_SIZE(self);
    if (x == -1)
	    x = -2;
    self->hash = x;
    return x;
}

--Guido van Rossum (home page: http://dinsdale.python.org/~guido/)