[Python-3000-checkins] r64717 - in python/branches/py3k: Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c Python/modsupport.c
amaury.forgeotdarc
python-3000-checkins at python.org
Fri Jul 4 23:26:43 CEST 2008
Author: amaury.forgeotdarc
Date: Fri Jul 4 23:26:43 2008
New Revision: 64717
Log:
Issue #3280: like chr() already does, the "%c" format now accepts the full unicode range
even on "narrow Unicode" builds; the result is a pair of UTF-16 surrogates.
Modified:
python/branches/py3k/Lib/test/test_unicode.py
python/branches/py3k/Misc/NEWS
python/branches/py3k/Objects/unicodeobject.c
python/branches/py3k/Python/modsupport.c
Modified: python/branches/py3k/Lib/test/test_unicode.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicode.py (original)
+++ python/branches/py3k/Lib/test/test_unicode.py Fri Jul 4 23:26:43 2008
@@ -717,7 +717,10 @@
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
self.assertEqual('%c' % 0x1234, '\u1234')
- self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
+ self.assertEqual('%c' % 0x21483, '\U00021483')
+ self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
+ self.assertEqual('%c' % '\U00021483', '\U00021483')
+ self.assertRaises(TypeError, "%c".__mod__, "aa")
# formatting jobs delegated from the string implementation:
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Fri Jul 4 23:26:43 2008
@@ -12,6 +12,11 @@
Core and Builtins
-----------------
+- Issue #3280: like chr(), the "%c" format now accepts unicode code points
+ beyond the Basic Multilingual Plane (above 0xffff) on all configurations. On
+ "narrow Unicode" builds, the result is a string of 2 code units, forming a
+ UTF-16 surrogate pair.
+
- Issue #3282: str.isprintable() should return False for undefined
Unicode characters.
Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c (original)
+++ python/branches/py3k/Objects/unicodeobject.c Fri Jul 4 23:26:43 2008
@@ -8730,11 +8730,28 @@
size_t buflen,
PyObject *v)
{
- /* presume that the buffer is at least 2 characters long */
+ /* presume that the buffer is at least 3 characters long */
if (PyUnicode_Check(v)) {
- if (PyUnicode_GET_SIZE(v) != 1)
- goto onError;
- buf[0] = PyUnicode_AS_UNICODE(v)[0];
+ if (PyUnicode_GET_SIZE(v) == 1) {
+ buf[0] = PyUnicode_AS_UNICODE(v)[0];
+ buf[1] = '\0';
+ return 1;
+ }
+#ifndef Py_UNICODE_WIDE
+ if (PyUnicode_GET_SIZE(v) == 2) {
+ /* Decode a valid surrogate pair */
+ int c0 = PyUnicode_AS_UNICODE(v)[0];
+ int c1 = PyUnicode_AS_UNICODE(v)[1];
+ if (0xD800 <= c0 && c0 <= 0xDBFF &&
+ 0xDC00 <= c1 && c1 <= 0xDFFF) {
+ buf[0] = c0;
+ buf[1] = c1;
+ buf[2] = '\0';
+ return 2;
+ }
+ }
+#endif
+ goto onError;
}
else {
/* Integer input truncated to a character */
@@ -8742,25 +8759,25 @@
x = PyLong_AsLong(v);
if (x == -1 && PyErr_Occurred())
goto onError;
-#ifdef Py_UNICODE_WIDE
+
if (x < 0 || x > 0x10ffff) {
PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000) "
- "(wide Python build)");
+ "%c arg not in range(0x110000)");
return -1;
}
-#else
- if (x < 0 || x > 0xffff) {
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x10000) "
- "(narrow Python build)");
- return -1;
+
+#ifndef Py_UNICODE_WIDE
+ if (x > 0xffff) {
+ x -= 0x10000;
+ buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
+ buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
+ return 2;
}
#endif
buf[0] = (Py_UNICODE) x;
+ buf[1] = '\0';
+ return 1;
}
- buf[1] = '\0';
- return 1;
onError:
PyErr_SetString(PyExc_TypeError,
Modified: python/branches/py3k/Python/modsupport.c
==============================================================================
--- python/branches/py3k/Python/modsupport.c (original)
+++ python/branches/py3k/Python/modsupport.c Fri Jul 4 23:26:43 2008
@@ -294,21 +294,12 @@
case 'C':
{
int i = va_arg(*p_va, int);
- Py_UNICODE c;
if (i < 0 || i > PyUnicode_GetMax()) {
-#ifdef Py_UNICODE_WIDE
PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000) "
- "(wide Python build)");
-#else
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x10000) "
- "(narrow Python build)");
-#endif
+ "%c arg not in range(0x110000)";
return NULL;
}
- c = i;
- return PyUnicode_FromUnicode(&c, 1);
+ return PyUnicode_FromOrdinal(i);
}
case 's':
More information about the Python-3000-checkins
mailing list