[pypy-commit] pypy unicode-utf8: test, fix formating '%c'

Tue Jan 1 13:29:08 EST 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8
Changeset: r95560:9a0c90346239
Date: 2019-01-01 19:19 +0200
http://bitbucket.org/pypy/pypy/changeset/9a0c90346239/

Log:	test, fix formating '%c'

diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -455,6 +455,8 @@
             self.prec = -1     # just because
             space = self.space
             if space.isinstance_w(w_value, space.w_bytes):
+                if do_unicode:
+                    w_value = w_value.descr_decode(space, space.newtext('ascii'))
                 s = space.bytes_w(w_value)
                 if len(s) != 1:
                     raise oefmt(space.w_TypeError, "%c requires int or char")
@@ -463,7 +465,7 @@
                 if not do_unicode:
                     raise NeedUnicodeFormattingError
                 ustr = space.utf8_w(w_value)
-                if len(ustr) != 1:
+                if space.len_w(w_value) != 1:
                     raise oefmt(space.w_TypeError, "%c requires int or unichar")
                 self.std_wp(ustr, False)
             else:
@@ -516,7 +518,7 @@
     formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict)
     result = formatter.format()
     # this can force strings, not sure if it's a problem or not
-    lgt = rutf8.check_utf8(result, True)
+    lgt = rutf8.codepoints_in_utf8(result)
     return space.newutf8(result, lgt)
 
 def mod_format(space, w_format, w_values, do_unicode=False):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1125,6 +1125,16 @@
                 return u'\u1234'
         '%s' % X()
 
+    def test_formatting_char(self):
+        for num in range(0x80,0x100):
+            uchar = unichr(num)
+            print num
+            assert uchar == u"%c" % num   # works only with ints
+            assert uchar == u"%c" % uchar # and unicode chars
+            # the implicit decoding should fail for non-ascii chars
+            raises(UnicodeDecodeError, u"%c".__mod__, chr(num))
+            raises(UnicodeDecodeError, u"%s".__mod__, chr(num))
+
     def test_str_subclass(self):
         class Foo9(str):
             def __unicode__(self):