[Python-checkins] r84177 - in python/branches/py3k: Include/unicodeobject.h Lib/test/test_unicode.py Lib/test/test_unicodedata.py Misc/NEWS Objects/unicodectype.c Objects/unicodetype_db.h Tools/unicode/makeunicodedata.py

amaury.forgeotdarc python-checkins at python.org
Wed Aug 18 22:44:58 CEST 2010


Author: amaury.forgeotdarc
Date: Wed Aug 18 22:44:58 2010
New Revision: 84177

Log:
#5127: Even on narrow unicode builds, the C functions that access the Unicode
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept
and return characters from the full Unicode range (Py_UCS4).

The differences from Python code are few:
- unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit()
  now return the correct value for large code points
- repr() may consider more characters as printable.


Modified:
   python/branches/py3k/Include/unicodeobject.h
   python/branches/py3k/Lib/test/test_unicode.py
   python/branches/py3k/Lib/test/test_unicodedata.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/unicodectype.c
   python/branches/py3k/Objects/unicodetype_db.h
   python/branches/py3k/Tools/unicode/makeunicodedata.py

Modified: python/branches/py3k/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k/Include/unicodeobject.h	(original)
+++ python/branches/py3k/Include/unicodeobject.h	Wed Aug 18 22:44:58 2010
@@ -221,24 +221,6 @@
 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
 # define _PyUnicode_Init _PyUnicodeUCS2_Init
-# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
-# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
-# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
-# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
-# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
-# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
-# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable
-# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
-# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
-# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
-# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
-# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
-# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
-# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
-# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
-# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
-# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
-# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
 
 #else
 
@@ -322,24 +304,6 @@
 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
 # define _PyUnicode_Init _PyUnicodeUCS4_Init
-# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
-# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
-# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
-# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
-# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
-# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
-# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable
-# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
-# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
-# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
-# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
-# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
-# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
-# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
-# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
-# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
-# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
-# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
 
 
 #endif
@@ -351,7 +315,7 @@
    configure Python using --with-wctype-functions.  This reduces the
    interpreter's code size. */
 
-#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
+#if defined(Py_UNICODE_WIDE) && defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
 
 #include <wctype.h>
 
@@ -1542,75 +1506,75 @@
 */
 
 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
-    const Py_UNICODE ch         /* Unicode character */
+    const Py_UCS4 ch         /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
-    const Py_UNICODE ch         /* Unicode character */
+    const Py_UCS4 ch         /* Unicode character */
     );
 
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
-    Py_UNICODE ch       /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
+    Py_UCS4 ch       /* Unicode character */
     );
 
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
-    Py_UNICODE ch       /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
+    Py_UCS4 ch       /* Unicode character */
     );
 
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
-    Py_UNICODE ch       /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_ToDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(size_t) Py_UNICODE_strlen(

Modified: python/branches/py3k/Lib/test/test_unicode.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicode.py	(original)
+++ python/branches/py3k/Lib/test/test_unicode.py	Wed Aug 18 22:44:58 2010
@@ -1353,6 +1353,10 @@
         self.assertEqual(repr(s1()), '\\n')
         self.assertEqual(repr(s2()), '\\n')
 
+    def test_printable_repr(self):
+        self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
+        self.assertEqual(repr('\U00011000'), "'\\U00011000'")     # nonprintable
+
     def test_expandtabs_overflows_gracefully(self):
         # This test only affects 32-bit platforms because expandtabs can only take
         # an int as the max value, not a 64-bit C long.  If expandtabs is changed

Modified: python/branches/py3k/Lib/test/test_unicodedata.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicodedata.py	(original)
+++ python/branches/py3k/Lib/test/test_unicodedata.py	Wed Aug 18 22:44:58 2010
@@ -294,6 +294,12 @@
                 self.assertEqual(len(lines), 1,
                                  r"\u%.4x should not be a linebreak" % i)
 
+    def test_UCS4(self):
+        # unicodedata should work with code points outside the BMP
+        # even on a narrow Unicode build
+        self.assertEqual(self.db.category(u"\U0001012A"), "No")
+        self.assertEqual(self.db.numeric(u"\U0001012A"), 9000)
+
 def test_main():
     test.support.run_unittest(
         UnicodeMiscTest,

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Aug 18 22:44:58 2010
@@ -12,6 +12,12 @@
 Core and Builtins
 -----------------
 
+- Issue #5127: The C functions that access the Unicode Database now accept and
+  return characters from the full Unicode range, even on narrow unicode builds
+  (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others).  A visible difference
+  in Python is that unicodedata.numeric() now returns the correct value for
+  large code points, and repr() may consider more characters as printable.
+
 - Issue #9425: Create PyModule_GetFilenameObject() function to get the filename
   as a unicode object, instead of a byte string. Function needed to support
   unencodable filenames. Deprecate PyModule_GetFilename() in favor on the new

Modified: python/branches/py3k/Objects/unicodectype.c
==============================================================================
--- python/branches/py3k/Objects/unicodectype.c	(original)
+++ python/branches/py3k/Objects/unicodectype.c	Wed Aug 18 22:44:58 2010
@@ -26,9 +26,9 @@
 #define NUMERIC_MASK 0x1000
 
 typedef struct {
-    const Py_UNICODE upper;
-    const Py_UNICODE lower;
-    const Py_UNICODE title;
+    const Py_UCS4 upper;
+    const Py_UCS4 lower;
+    const Py_UCS4 title;
     const unsigned char decimal;
     const unsigned char digit;
     const unsigned short flags;
@@ -37,15 +37,13 @@
 #include "unicodetype_db.h"
 
 static const _PyUnicode_TypeRecord *
-gettyperecord(Py_UNICODE code)
+gettyperecord(Py_UCS4 code)
 {
     int index;
 
-#ifdef Py_UNICODE_WIDE
     if (code >= 0x110000)
         index = 0;
     else
-#endif
     {
         index = index1[(code>>SHIFT)];
         index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
@@ -57,7 +55,7 @@
 /* Returns the titlecase Unicode characters corresponding to ch or just
    ch if no titlecase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     int delta = ctype->title;
@@ -74,7 +72,7 @@
 /* Returns 1 for Unicode characters having the category 'Lt', 0
    otherwise. */
 
-int _PyUnicode_IsTitlecase(Py_UNICODE ch)
+int _PyUnicode_IsTitlecase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -84,7 +82,7 @@
 /* Returns 1 for Unicode characters having the XID_Start property, 0
    otherwise. */
 
-int _PyUnicode_IsXidStart(Py_UNICODE ch)
+int _PyUnicode_IsXidStart(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -94,7 +92,7 @@
 /* Returns 1 for Unicode characters having the XID_Continue property,
    0 otherwise. */
 
-int _PyUnicode_IsXidContinue(Py_UNICODE ch)
+int _PyUnicode_IsXidContinue(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -104,14 +102,14 @@
 /* Returns the integer decimal (0-9) for Unicode characters having
    this property, -1 otherwise. */
 
-int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
+int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
 }
 
-int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
+int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)
 {
     if (_PyUnicode_ToDecimalDigit(ch) < 0)
         return 0;
@@ -121,14 +119,14 @@
 /* Returns the integer digit (0-9) for Unicode characters having
    this property, -1 otherwise. */
 
-int _PyUnicode_ToDigit(Py_UNICODE ch)
+int _PyUnicode_ToDigit(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
 }
 
-int _PyUnicode_IsDigit(Py_UNICODE ch)
+int _PyUnicode_IsDigit(Py_UCS4 ch)
 {
     if (_PyUnicode_ToDigit(ch) < 0)
         return 0;
@@ -138,7 +136,7 @@
 /* Returns the numeric value as double for Unicode characters having
    this property, -1.0 otherwise. */
 
-int _PyUnicode_IsNumeric(Py_UNICODE ch)
+int _PyUnicode_IsNumeric(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -158,7 +156,7 @@
       * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
       * Zs (Separator, Space) other than ASCII space('\x20').
 */
-int _PyUnicode_IsPrintable(Py_UNICODE ch)
+int _PyUnicode_IsPrintable(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -170,7 +168,7 @@
 /* Returns 1 for Unicode characters having the category 'Ll', 0
    otherwise. */
 
-int _PyUnicode_IsLowercase(Py_UNICODE ch)
+int _PyUnicode_IsLowercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -180,7 +178,7 @@
 /* Returns 1 for Unicode characters having the category 'Lu', 0
    otherwise. */
 
-int _PyUnicode_IsUppercase(Py_UNICODE ch)
+int _PyUnicode_IsUppercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -190,7 +188,7 @@
 /* Returns the uppercase Unicode characters corresponding to ch or just
    ch if no uppercase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     int delta = ctype->upper;
@@ -204,7 +202,7 @@
 /* Returns the lowercase Unicode characters corresponding to ch or just
    ch if no lowercase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     int delta = ctype->lower;
@@ -218,7 +216,7 @@
 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
    'Lo' or 'Lm',  0 otherwise. */
 
-int _PyUnicode_IsAlpha(Py_UNICODE ch)
+int _PyUnicode_IsAlpha(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -230,27 +228,27 @@
 /* Export the interfaces using the wchar_t type for portability
    reasons:  */
 
-int _PyUnicode_IsLowercase(Py_UNICODE ch)
+int _PyUnicode_IsLowercase(Py_UCS4 ch)
 {
     return iswlower(ch);
 }
 
-int _PyUnicode_IsUppercase(Py_UNICODE ch)
+int _PyUnicode_IsUppercase(Py_UCS4 ch)
 {
     return iswupper(ch);
 }
 
-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
 {
     return towlower(ch);
 }
 
-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
 {
     return towupper(ch);
 }
 
-int _PyUnicode_IsAlpha(Py_UNICODE ch)
+int _PyUnicode_IsAlpha(Py_UCS4 ch)
 {
     return iswalpha(ch);
 }

Modified: python/branches/py3k/Objects/unicodetype_db.h
==============================================================================
--- python/branches/py3k/Objects/unicodetype_db.h	(original)
+++ python/branches/py3k/Objects/unicodetype_db.h	Wed Aug 18 22:44:58 2010
@@ -1980,7 +1980,7 @@
 /* Returns the numeric value as double for Unicode characters
  * having this property, -1.0 otherwise.
  */
-double _PyUnicode_ToNumeric(Py_UNICODE ch)
+double _PyUnicode_ToNumeric(Py_UCS4 ch)
 {
     switch (ch) {
     case 0x0F33:
@@ -2031,7 +2031,6 @@
     case 0xABF0:
     case 0xF9B2:
     case 0xFF10:
-#ifdef Py_UNICODE_WIDE
     case 0x1018A:
     case 0x104A0:
     case 0x1D7CE:
@@ -2041,7 +2040,6 @@
     case 0x1D7F6:
     case 0x1F100:
     case 0x1F101:
-#endif
         return (double) 0.0;
     case 0x0031:
     case 0x00B9:
@@ -2105,7 +2103,6 @@
     case 0xAA51:
     case 0xABF1:
     case 0xFF11:
-#ifdef Py_UNICODE_WIDE
     case 0x10107:
     case 0x10142:
     case 0x10158:
@@ -2135,7 +2132,6 @@
     case 0x1D7F7:
     case 0x1F102:
     case 0x2092A:
-#endif
         return (double) 1.0;
     case 0x2152:
         return (double) 1.0/10.0;
@@ -2147,46 +2143,36 @@
     case 0x0F2A:
     case 0x2CFD:
     case 0xA831:
-#ifdef Py_UNICODE_WIDE
     case 0x10141:
     case 0x10175:
     case 0x10176:
     case 0x10E7B:
-#endif
         return (double) 1.0/2.0;
     case 0x2153:
-#ifdef Py_UNICODE_WIDE
     case 0x10E7D:
     case 0x1245A:
     case 0x1245D:
-#endif
         return (double) 1.0/3.0;
     case 0x00BC:
     case 0x09F7:
     case 0x0D73:
     case 0xA830:
-#ifdef Py_UNICODE_WIDE
     case 0x10140:
     case 0x10E7C:
     case 0x12460:
     case 0x12462:
-#endif
         return (double) 1.0/4.0;
     case 0x2155:
         return (double) 1.0/5.0;
     case 0x2159:
-#ifdef Py_UNICODE_WIDE
     case 0x12461:
-#endif
         return (double) 1.0/6.0;
     case 0x2150:
         return (double) 1.0/7.0;
     case 0x09F5:
     case 0x215B:
     case 0xA834:
-#ifdef Py_UNICODE_WIDE
     case 0x1245F:
-#endif
         return (double) 1.0/8.0;
     case 0x2151:
         return (double) 1.0/9.0;
@@ -2210,7 +2196,6 @@
     case 0x62FE:
     case 0xF973:
     case 0xF9FD:
-#ifdef Py_UNICODE_WIDE
     case 0x10110:
     case 0x10149:
     case 0x10150:
@@ -2229,7 +2214,6 @@
     case 0x10B7C:
     case 0x10E69:
     case 0x1D369:
-#endif
         return (double) 10.0;
     case 0x0BF1:
     case 0x0D71:
@@ -2239,7 +2223,6 @@
     case 0x4F70:
     case 0x767E:
     case 0x964C:
-#ifdef Py_UNICODE_WIDE
     case 0x10119:
     case 0x1014B:
     case 0x10152:
@@ -2251,7 +2234,6 @@
     case 0x10B5E:
     case 0x10B7E:
     case 0x10E72:
-#endif
         return (double) 100.0;
     case 0x0BF2:
     case 0x0D72:
@@ -2261,7 +2243,6 @@
     case 0x4EDF:
     case 0x5343:
     case 0x9621:
-#ifdef Py_UNICODE_WIDE
     case 0x10122:
     case 0x1014D:
     case 0x10154:
@@ -2270,17 +2251,14 @@
     case 0x10A47:
     case 0x10B5F:
     case 0x10B7F:
-#endif
         return (double) 1000.0;
     case 0x137C:
     case 0x2182:
     case 0x4E07:
     case 0x842C:
-#ifdef Py_UNICODE_WIDE
     case 0x1012B:
     case 0x10155:
     case 0x1085F:
-#endif
         return (double) 10000.0;
     case 0x2188:
         return (double) 100000.0;
@@ -2414,7 +2392,6 @@
     case 0xABF2:
     case 0xF978:
     case 0xFF12:
-#ifdef Py_UNICODE_WIDE
     case 0x10108:
     case 0x1015B:
     case 0x1015C:
@@ -2445,15 +2422,12 @@
     case 0x1D7F8:
     case 0x1F103:
     case 0x22390:
-#endif
         return (double) 2.0;
     case 0x2154:
-#ifdef Py_UNICODE_WIDE
     case 0x10177:
     case 0x10E7E:
     case 0x1245B:
     case 0x1245E:
-#endif
         return (double) 2.0/3.0;
     case 0x2156:
         return (double) 2.0/5.0;
@@ -2465,7 +2439,6 @@
     case 0x3039:
     case 0x5344:
     case 0x5EFF:
-#ifdef Py_UNICODE_WIDE
     case 0x10111:
     case 0x103D4:
     case 0x1085C:
@@ -2475,21 +2448,14 @@
     case 0x10B7D:
     case 0x10E6A:
     case 0x1D36A:
-#endif
         return (double) 20.0;
-#ifdef Py_UNICODE_WIDE
     case 0x1011A:
     case 0x10E73:
         return (double) 200.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10123:
         return (double) 2000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x1012C:
         return (double) 20000.0;
-#endif
     case 0x3251:
         return (double) 21.0;
     case 0x3252:
@@ -2571,7 +2537,6 @@
     case 0xABF3:
     case 0xF96B:
     case 0xFF13:
-#ifdef Py_UNICODE_WIDE
     case 0x10109:
     case 0x104A3:
     case 0x1085A:
@@ -2605,7 +2570,6 @@
     case 0x20B19:
     case 0x22998:
     case 0x23B1B:
-#endif
         return (double) 3.0;
     case 0x09F6:
     case 0xA835:
@@ -2616,9 +2580,7 @@
     case 0x09F8:
     case 0x0D75:
     case 0xA832:
-#ifdef Py_UNICODE_WIDE
     case 0x10178:
-#endif
         return (double) 3.0/4.0;
     case 0x2157:
         return (double) 3.0/5.0;
@@ -2628,28 +2590,20 @@
     case 0x303A:
     case 0x325A:
     case 0x5345:
-#ifdef Py_UNICODE_WIDE
     case 0x10112:
     case 0x10165:
     case 0x10E6B:
     case 0x1D36B:
     case 0x20983:
-#endif
         return (double) 30.0;
-#ifdef Py_UNICODE_WIDE
     case 0x1011B:
     case 0x1016B:
     case 0x10E74:
         return (double) 300.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10124:
         return (double) 3000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x1012D:
         return (double) 30000.0;
-#endif
     case 0x325B:
         return (double) 31.0;
     case 0x325C:
@@ -2724,7 +2678,6 @@
     case 0xAA54:
     case 0xABF4:
     case 0xFF14:
-#ifdef Py_UNICODE_WIDE
     case 0x1010A:
     case 0x104A4:
     case 0x10A43:
@@ -2756,34 +2709,25 @@
     case 0x20064:
     case 0x200E2:
     case 0x2626D:
-#endif
         return (double) 4.0;
     case 0x2158:
         return (double) 4.0/5.0;
     case 0x1375:
     case 0x32B5:
     case 0x534C:
-#ifdef Py_UNICODE_WIDE
     case 0x10113:
     case 0x10E6C:
     case 0x1D36C:
     case 0x2098C:
     case 0x2099C:
-#endif
         return (double) 40.0;
-#ifdef Py_UNICODE_WIDE
     case 0x1011C:
     case 0x10E75:
         return (double) 400.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10125:
         return (double) 4000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x1012E:
         return (double) 40000.0;
-#endif
     case 0x32B6:
         return (double) 41.0;
     case 0x32B7:
@@ -2858,7 +2802,6 @@
     case 0xAA55:
     case 0xABF5:
     case 0xFF15:
-#ifdef Py_UNICODE_WIDE
     case 0x1010B:
     case 0x10143:
     case 0x10148:
@@ -2887,14 +2830,11 @@
     case 0x1D7FB:
     case 0x1F106:
     case 0x20121:
-#endif
         return (double) 5.0;
     case 0x0F2C:
         return (double) 5.0/2.0;
     case 0x215A:
-#ifdef Py_UNICODE_WIDE
     case 0x1245C:
-#endif
         return (double) 5.0/6.0;
     case 0x215D:
         return (double) 5.0/8.0;
@@ -2903,7 +2843,6 @@
     case 0x217C:
     case 0x2186:
     case 0x32BF:
-#ifdef Py_UNICODE_WIDE
     case 0x10114:
     case 0x10144:
     case 0x1014A:
@@ -2917,11 +2856,9 @@
     case 0x10A7E:
     case 0x10E6D:
     case 0x1D36D:
-#endif
         return (double) 50.0;
     case 0x216E:
     case 0x217E:
-#ifdef Py_UNICODE_WIDE
     case 0x1011D:
     case 0x10145:
     case 0x1014C:
@@ -2932,22 +2869,17 @@
     case 0x1016F:
     case 0x10170:
     case 0x10E76:
-#endif
         return (double) 500.0;
     case 0x2181:
-#ifdef Py_UNICODE_WIDE
     case 0x10126:
     case 0x10146:
     case 0x1014E:
     case 0x10172:
-#endif
         return (double) 5000.0;
     case 0x2187:
-#ifdef Py_UNICODE_WIDE
     case 0x1012F:
     case 0x10147:
     case 0x10156:
-#endif
         return (double) 50000.0;
     case 0x0036:
     case 0x0666:
@@ -3007,7 +2939,6 @@
     case 0xF9D1:
     case 0xF9D3:
     case 0xFF16:
-#ifdef Py_UNICODE_WIDE
     case 0x1010C:
     case 0x104A6:
     case 0x10E65:
@@ -3026,28 +2957,19 @@
     case 0x1D7FC:
     case 0x1F107:
     case 0x20AEA:
-#endif
         return (double) 6.0;
     case 0x1377:
-#ifdef Py_UNICODE_WIDE
     case 0x10115:
     case 0x10E6E:
     case 0x1D36E:
-#endif
         return (double) 60.0;
-#ifdef Py_UNICODE_WIDE
     case 0x1011E:
     case 0x10E77:
         return (double) 600.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10127:
         return (double) 6000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10130:
         return (double) 60000.0;
-#endif
     case 0x0037:
     case 0x0667:
     case 0x06F7:
@@ -3104,7 +3026,6 @@
     case 0xAA57:
     case 0xABF7:
     case 0xFF17:
-#ifdef Py_UNICODE_WIDE
     case 0x1010D:
     case 0x104A7:
     case 0x10E66:
@@ -3124,32 +3045,23 @@
     case 0x1D7FD:
     case 0x1F108:
     case 0x20001:
-#endif
         return (double) 7.0;
     case 0x0F2D:
         return (double) 7.0/2.0;
     case 0x215E:
         return (double) 7.0/8.0;
     case 0x1378:
-#ifdef Py_UNICODE_WIDE
     case 0x10116:
     case 0x10E6F:
     case 0x1D36F:
-#endif
         return (double) 70.0;
-#ifdef Py_UNICODE_WIDE
     case 0x1011F:
     case 0x10E78:
         return (double) 700.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10128:
         return (double) 7000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10131:
         return (double) 70000.0;
-#endif
     case 0x0038:
     case 0x0668:
     case 0x06F8:
@@ -3204,7 +3116,6 @@
     case 0xAA58:
     case 0xABF8:
     case 0xFF18:
-#ifdef Py_UNICODE_WIDE
     case 0x1010E:
     case 0x104A8:
     case 0x10E67:
@@ -3222,28 +3133,19 @@
     case 0x1D7F4:
     case 0x1D7FE:
     case 0x1F109:
-#endif
         return (double) 8.0;
     case 0x1379:
-#ifdef Py_UNICODE_WIDE
     case 0x10117:
     case 0x10E70:
     case 0x1D370:
-#endif
         return (double) 80.0;
-#ifdef Py_UNICODE_WIDE
     case 0x10120:
     case 0x10E79:
         return (double) 800.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10129:
         return (double) 8000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10132:
         return (double) 80000.0;
-#endif
     case 0x0039:
     case 0x0669:
     case 0x06F9:
@@ -3299,7 +3201,6 @@
     case 0xAA59:
     case 0xABF9:
     case 0xFF19:
-#ifdef Py_UNICODE_WIDE
     case 0x1010F:
     case 0x104A9:
     case 0x10E68:
@@ -3320,32 +3221,23 @@
     case 0x1D7FF:
     case 0x1F10A:
     case 0x2F890:
-#endif
         return (double) 9.0;
     case 0x0F2E:
         return (double) 9.0/2.0;
     case 0x137A:
-#ifdef Py_UNICODE_WIDE
     case 0x10118:
     case 0x10341:
     case 0x10E71:
     case 0x1D371:
-#endif
         return (double) 90.0;
-#ifdef Py_UNICODE_WIDE
     case 0x10121:
     case 0x1034A:
     case 0x10E7A:
         return (double) 900.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x1012A:
         return (double) 9000.0;
-#endif
-#ifdef Py_UNICODE_WIDE
     case 0x10133:
         return (double) 90000.0;
-#endif
     }
     return -1.0;
 }
@@ -3353,7 +3245,7 @@
 /* Returns 1 for Unicode characters having the bidirectional
  * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.
  */
-int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
+int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)
 {
 #ifdef WANT_WCTYPE_FUNCTIONS
     return iswspace(ch);
@@ -3399,7 +3291,7 @@
  * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
  * type 'B', 0 otherwise.
  */
-int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
+int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)
 {
     switch (ch) {
     case 0x000A:

Modified: python/branches/py3k/Tools/unicode/makeunicodedata.py
==============================================================================
--- python/branches/py3k/Tools/unicode/makeunicodedata.py	(original)
+++ python/branches/py3k/Tools/unicode/makeunicodedata.py	Wed Aug 18 22:44:58 2010
@@ -28,7 +28,7 @@
 import sys
 
 SCRIPT = sys.argv[0]
-VERSION = "2.6"
+VERSION = "3.2"
 
 # The Unicode Database
 UNIDATA_VERSION = "5.2.0"
@@ -479,7 +479,7 @@
     print('/* Returns the numeric value as double for Unicode characters', file=fp)
     print(' * having this property, -1.0 otherwise.', file=fp)
     print(' */', file=fp)
-    print('double _PyUnicode_ToNumeric(Py_UNICODE ch)', file=fp)
+    print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
     print('{', file=fp)
     print('    switch (ch) {', file=fp)
     for value, codepoints in numeric_items:
@@ -488,21 +488,10 @@
         parts = [repr(float(part)) for part in parts]
         value = '/'.join(parts)
 
-        haswide = False
-        hasnonewide = False
         codepoints.sort()
         for codepoint in codepoints:
-            if codepoint < 0x10000:
-                hasnonewide = True
-            if codepoint >= 0x10000 and not haswide:
-                print('#ifdef Py_UNICODE_WIDE', file=fp)
-                haswide = True
             print('    case 0x%04X:' % (codepoint,), file=fp)
-        if haswide and hasnonewide:
-            print('#endif', file=fp)
         print('        return (double) %s;' % (value,), file=fp)
-        if haswide and not hasnonewide:
-            print('#endif', file=fp)
     print('    }', file=fp)
     print('    return -1.0;', file=fp)
     print('}', file=fp)
@@ -512,27 +501,16 @@
     print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
     print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
     print(" */", file=fp)
-    print('int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)', file=fp)
+    print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp)
     print('{', file=fp)
     print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp)
     print('    return iswspace(ch);', file=fp)
     print('#else', file=fp)
     print('    switch (ch) {', file=fp)
 
-    haswide = False
-    hasnonewide = False
     for codepoint in sorted(spaces):
-        if codepoint < 0x10000:
-            hasnonewide = True
-        if codepoint >= 0x10000 and not haswide:
-            print('#ifdef Py_UNICODE_WIDE', file=fp)
-            haswide = True
         print('    case 0x%04X:' % (codepoint,), file=fp)
-    if haswide and hasnonewide:
-        print('#endif', file=fp)
     print('        return 1;', file=fp)
-    if haswide and not hasnonewide:
-        print('#endif', file=fp)
 
     print('    }', file=fp)
     print('    return 0;', file=fp)
@@ -545,23 +523,12 @@
     print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
     print(" * type 'B', 0 otherwise.", file=fp)
     print(" */", file=fp)
-    print('int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)', file=fp)
+    print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp)
     print('{', file=fp)
     print('    switch (ch) {', file=fp)
-    haswide = False
-    hasnonewide = False
     for codepoint in sorted(linebreaks):
-        if codepoint < 0x10000:
-            hasnonewide = True
-        if codepoint >= 0x10000 and not haswide:
-            print('#ifdef Py_UNICODE_WIDE', file=fp)
-            haswide = True
         print('    case 0x%04X:' % (codepoint,), file=fp)
-    if haswide and hasnonewide:
-        print('#endif', file=fp)
     print('        return 1;', file=fp)
-    if haswide and not hasnonewide:
-        print('#endif', file=fp)
 
     print('    }', file=fp)
     print('    return 0;', file=fp)


More information about the Python-checkins mailing list