[Python-checkins] r78394 - in python/branches/release26-maint: Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c

Wed Feb 24 00:20:14 CET 2010

Author: victor.stinner
Date: Wed Feb 24 00:20:14 2010
New Revision: 78394

Log:
Merged revisions 78392 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r78392 | victor.stinner | 2010-02-24 00:16:07 +0100 (mer., 24 févr. 2010) | 4 lines
  
  Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF
  
  => raise an UnicodeDecodeError. Patch written by Ezio Melotti.
........


Modified:
   python/branches/release26-maint/   (props changed)
   python/branches/release26-maint/Lib/test/test_unicode.py
   python/branches/release26-maint/Misc/NEWS
   python/branches/release26-maint/Objects/unicodeobject.c

Modified: python/branches/release26-maint/Lib/test/test_unicode.py
==============================================================================

--- python/branches/release26-maint/Lib/test/test_unicode.py	(original)
+++ python/branches/release26-maint/Lib/test/test_unicode.py	Wed Feb 24 00:20:14 2010
@@ -393,6 +393,19 @@
         self.assertEqual(u'%c' % 0x1234, u'\u1234')
         self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
 
+        for num in range(0x00,0x80):
+            char = chr(num)
+            self.assertEqual(u"%c" % char, char)
+            self.assertEqual(u"%c" % num, char)
+        # Issue 7649
+        for num in range(0x80,0x100):
+            uchar = unichr(num)
+            self.assertEqual(uchar, u"%c" % num)   # works only with ints
+            self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
+            # the implicit decoding should fail for non-ascii chars
+            self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
+            self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
+
         # formatting jobs delegated from the string implementation:
         self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
         self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')

Modified: python/branches/release26-maint/Misc/NEWS
==============================================================================
--- python/branches/release26-maint/Misc/NEWS	(original)
+++ python/branches/release26-maint/Misc/NEWS	Wed Feb 24 00:20:14 2010
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
+  UnicodeDecodeError
+
 - Issue #5677: Explicitly forbid write operations on read-only file objects,
   and read operations on write-only file objects.  On Windows, the system C
   library would return a bogus result; on Solaris, it was possible to crash

Modified: python/branches/release26-maint/Objects/unicodeobject.c
==============================================================================
--- python/branches/release26-maint/Objects/unicodeobject.c	(original)
+++ python/branches/release26-maint/Objects/unicodeobject.c	Wed Feb 24 00:20:14 2010
@@ -8357,6 +8357,7 @@
            size_t buflen,
            PyObject *v)
 {
+    PyObject *s;
     /* presume that the buffer is at least 2 characters long */
     if (PyUnicode_Check(v)) {
         if (PyUnicode_GET_SIZE(v) != 1)
@@ -8367,7 +8368,14 @@
     else if (PyString_Check(v)) {
         if (PyString_GET_SIZE(v) != 1)
             goto onError;
-        buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+        /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
+           string, "u'%c' % char" should fail with a UnicodeDecodeError */
+        s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
+        /* if the char is not decodable return -1 */
+        if (s == NULL)
+            return -1;
+        buf[0] = PyUnicode_AS_UNICODE(s)[0];
+        Py_DECREF(s);
     }
 
     else {