[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.33,2.34
M.-A. Lemburg
python-dev@python.org
Tue, 4 Jul 2000 02:51:10 -0700
Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv16660/Objects
Modified Files:
unicodeobject.c
Log Message:
Bill Tutt:
Make unicode_compare a true UTF-16 compare function (includes
support for surrogates).
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -r2.33 -r2.34
*** unicodeobject.c 2000/06/30 14:58:20 2.33
--- unicodeobject.c 2000/07/04 09:51:07 2.34
***************
*** 3046,3053 ****
--- 3046,3066 ----
}
+ /* speedy UTF-16 code point order comparison */
+ /* gleaned from: */
+ /* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
+
+ static unsigned short utf16Fixup[32] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800
+ };
+
static int
unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
{
int len1, len2;
+
Py_UNICODE *s1 = str1->str;
Py_UNICODE *s2 = str2->str;
***************
*** 3055,3065 ****
len1 = str1->length;
len2 = str2->length;
!
while (len1 > 0 && len2 > 0) {
! int cmp = (*s1++) - (*s2++);
! if (cmp)
! /* This should make Christian happy! */
! return (cmp < 0) ? -1 : (cmp != 0);
! len1--, len2--;
}
--- 3068,3088 ----
len1 = str1->length;
len2 = str2->length;
!
while (len1 > 0 && len2 > 0) {
! unsigned short c1, c2; /* 16 bits */
! int diff; /* 32 bits */
!
! c1 = *s1++;
! c2 = *s2++;
! if (c1 > (1<<11) * 26)
! c1 += utf16Fixup[c1>>11];
! if (c2 > (1<<11) * 26)
! c2 += utf16Fixup[c2>>11];
!
! /* now c1 and c2 are in UTF-32-compatible order */
! diff = (int)c1 - (int)c2;
! if (diff)
! return (diff < 0) ? -1 : (diff != 0);
! len1--; len2--;
}