[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.33,2.34

M.-A. Lemburg python-dev@python.org
Tue, 4 Jul 2000 02:51:10 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv16660/Objects

Modified Files:
	unicodeobject.c 
Log Message:
Bill Tutt:
Make unicode_compare a true UTF-16 compare function (includes
support for surrogates).

Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -r2.33 -r2.34
*** unicodeobject.c	2000/06/30 14:58:20	2.33
--- unicodeobject.c	2000/07/04 09:51:07	2.34
***************
*** 3046,3053 ****
--- 3046,3066 ----
  }
  
+ /* speedy UTF-16 code point order comparison */
+ /* gleaned from: */
+ /* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
+ 
+ static unsigned short utf16Fixup[32] =
+ {
+     0, 0, 0, 0, 0, 0, 0, 0, 
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0, 
+     0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800
+ };
+ 
  static int
  unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
  {
      int len1, len2;
+ 
      Py_UNICODE *s1 = str1->str;
      Py_UNICODE *s2 = str2->str;
***************
*** 3055,3065 ****
      len1 = str1->length;
      len2 = str2->length;
! 
      while (len1 > 0 && len2 > 0) {
!         int cmp = (*s1++) - (*s2++);
!         if (cmp)
!             /* This should make Christian happy! */
!             return (cmp < 0) ? -1 : (cmp != 0);
!         len1--, len2--;
      }
  
--- 3068,3088 ----
      len1 = str1->length;
      len2 = str2->length;
!     
      while (len1 > 0 && len2 > 0) {
! 	unsigned short c1, c2; /* 16 bits */
! 	int diff; /* 32 bits */
! 
!         c1 = *s1++;
!         c2 = *s2++;
! 	if (c1 > (1<<11) * 26)
! 	    c1 += utf16Fixup[c1>>11];
! 	if (c2 > (1<<11) * 26)
!             c2 += utf16Fixup[c2>>11];
!         
!         /* now c1 and c2 are in UTF-32-compatible order */
!         diff = (int)c1 - (int)c2;
!         if (diff)
!             return (diff < 0) ? -1 : (diff != 0);
!         len1--; len2--;
      }