[Python-3000-checkins] r56395 - in python/branches/py3k-struni: Lib/test/test_builtin.py Objects/unicodeobject.c Python/bltinmodule.c

guido.van.rossum python-3000-checkins at python.org
Sun Jul 15 15:00:05 CEST 2007


Author: guido.van.rossum
Date: Sun Jul 15 15:00:05 2007
New Revision: 56395

Modified:
   python/branches/py3k-struni/Lib/test/test_builtin.py
   python/branches/py3k-struni/Objects/unicodeobject.c
   python/branches/py3k-struni/Python/bltinmodule.c
Log:
Make chr() and ord() return/accept surrogate pairs in narrow builds.
The domain of chr() and the range of ord() are now always [0 ... 0x10FFFF].


Modified: python/branches/py3k-struni/Lib/test/test_builtin.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_builtin.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_builtin.py	Sun Jul 15 15:00:05 2007
@@ -169,15 +169,23 @@
         self.assertEqual(chr(97), 'a')
         self.assertEqual(chr(0xff), '\xff')
         self.assertRaises(ValueError, chr, 1<<24)
-        self.assertEqual(
-            chr(sys.maxunicode),
-            str(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape')
-        )
-        self.assertRaises(ValueError, chr, sys.maxunicode+1)
+        self.assertEqual(chr(sys.maxunicode),
+                         str(('\\U%08x' % (sys.maxunicode)).encode("ascii"),
+                             'unicode-escape'))
         self.assertRaises(TypeError, chr)
+        self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
+        self.assertEqual(chr(0x00010000), "\U00010000")
+        self.assertEqual(chr(0x00010001), "\U00010001")
+        self.assertEqual(chr(0x000FFFFE), "\U000FFFFE")
+        self.assertEqual(chr(0x000FFFFF), "\U000FFFFF")
+        self.assertEqual(chr(0x00100000), "\U00100000")
+        self.assertEqual(chr(0x00100001), "\U00100001")
+        self.assertEqual(chr(0x0010FFFE), "\U0010FFFE")
+        self.assertEqual(chr(0x0010FFFF), "\U0010FFFF")
+        self.assertRaises(ValueError, chr, -1)
+        self.assertRaises(ValueError, chr, 0x00110000)
 
-    def XXX_test_cmp(self):
-        # cmp() is no longer supported
+    def test_cmp(self):
         self.assertEqual(cmp(-1, 1), -1)
         self.assertEqual(cmp(1, -1), 1)
         self.assertEqual(cmp(1, 1), 0)
@@ -1288,6 +1296,17 @@
         self.assertEqual(ord(chr(sys.maxunicode)), sys.maxunicode)
         self.assertRaises(TypeError, ord, 42)
 
+        self.assertEqual(ord(chr(0x10FFFF)), 0x10FFFF)
+        self.assertEqual(ord("\U0000FFFF"), 0x0000FFFF)
+        self.assertEqual(ord("\U00010000"), 0x00010000)
+        self.assertEqual(ord("\U00010001"), 0x00010001)
+        self.assertEqual(ord("\U000FFFFE"), 0x000FFFFE)
+        self.assertEqual(ord("\U000FFFFF"), 0x000FFFFF)
+        self.assertEqual(ord("\U00100000"), 0x00100000)
+        self.assertEqual(ord("\U00100001"), 0x00100001)
+        self.assertEqual(ord("\U0010FFFE"), 0x0010FFFE)
+        self.assertEqual(ord("\U0010FFFF"), 0x0010FFFF)
+
     def test_pow(self):
         self.assertEqual(pow(0,0), 1)
         self.assertEqual(pow(0,1), 0)

Modified: python/branches/py3k-struni/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k-struni/Objects/unicodeobject.c	(original)
+++ python/branches/py3k-struni/Objects/unicodeobject.c	Sun Jul 15 15:00:05 2007
@@ -915,21 +915,20 @@
 
 PyObject *PyUnicode_FromOrdinal(int ordinal)
 {
-    Py_UNICODE s[1];
+    Py_UNICODE s[2];
 
-#ifdef Py_UNICODE_WIDE
     if (ordinal < 0 || ordinal > 0x10ffff) {
 	PyErr_SetString(PyExc_ValueError,
-			"chr() arg not in range(0x110000) "
-			"(wide Python build)");
+			"chr() arg not in range(0x110000)");
 	return NULL;
     }
-#else
-    if (ordinal < 0 || ordinal > 0xffff) {
-	PyErr_SetString(PyExc_ValueError,
-			"chr() arg not in range(0x10000) "
-			"(narrow Python build)");
-	return NULL;
+
+#ifndef Py_UNICODE_WIDE
+    if (ordinal > 0xffff) {
+        ordinal -= 0x10000;
+        s[0] = 0xD800 | (ordinal >> 10);
+        s[1] = 0xDC00 | (ordinal & 0x3FF);
+        return PyUnicode_FromUnicode(s, 2);
     }
 #endif
 

Modified: python/branches/py3k-struni/Python/bltinmodule.c
==============================================================================
--- python/branches/py3k-struni/Python/bltinmodule.c	(original)
+++ python/branches/py3k-struni/Python/bltinmodule.c	Sun Jul 15 15:00:05 2007
@@ -317,7 +317,11 @@
 PyDoc_STRVAR(chr_doc,
 "chr(i) -> Unicode character\n\
 \n\
-Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.");
+Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."
+#ifndef Py_UNICODE_WIDE
+"\nIf 0x10000 <= i, a surrogate pair is returned."
+#endif
+);
 
 
 static PyObject *
@@ -1179,6 +1183,19 @@
 			ord = (long)*PyUnicode_AS_UNICODE(obj);
 			return PyInt_FromLong(ord);
 		}
+#ifndef Py_UNICODE_WIDE
+		if (size == 2) {
+			/* Decode a valid surrogate pair */
+			int c0 = PyUnicode_AS_UNICODE(obj)[0];
+			int c1 = PyUnicode_AS_UNICODE(obj)[1];
+			if (0xD800 <= c0 && c0 <= 0xDBFF &&
+			    0xDC00 <= c1 && c1 <= 0xDFFF) {
+				ord = ((((c0 & 0x03FF) << 10) | (c1 & 0x03FF)) +
+				       0x00010000);
+				return PyInt_FromLong(ord);
+			}
+		}
+#endif
 	}
 	else if (PyBytes_Check(obj)) {
 		/* XXX Hopefully this is temporary */
@@ -1205,7 +1222,11 @@
 PyDoc_STRVAR(ord_doc,
 "ord(c) -> integer\n\
 \n\
-Return the integer ordinal of a one-character string.");
+Return the integer ordinal of a one-character string."
+#ifndef Py_UNICODE_WIDE
+"\nA valid surrogate pair is also accepted."
+#endif
+);
 
 
 static PyObject *


More information about the Python-3000-checkins mailing list