[pypy-commit] pypy unicode-utf8-py3: calculate padding from unicode length, not utf8 length

Tue Jan 22 05:37:42 EST 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95690:0e658f7c342f
Date: 2019-01-22 08:05 +0200
http://bitbucket.org/pypy/pypy/changeset/0e658f7c342f/

Log:	calculate padding from unicode length, not utf8 length

diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -348,12 +348,12 @@
                 return
             if prec >= 0 and prec < length:
                 length = prec   # ignore the end of the string if too long
+            padding = self.width - length
             if do_unicode:
                 # XXX could use W_UnicodeObject.descr_getslice, but that would
                 # require a refactor to use the w_val, not r
                 length = rutf8._pos_at_index(r, length)
             result = self.result
-            padding = self.width - length
             if padding < 0:
                 padding = 0
             assert padding >= 0
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1129,6 +1129,14 @@
                 return u'\u1234'
         '%s' % X()
 
+    def test_formatting_unicode__str__4(self):
+        # from lib-python/3/test/test_tokenize
+        fmt = "%(token)-13.13r %(start)s"
+        vals = {"token" : u"Örter", "start": "(1, 0)"}
+        expected = u"'Örter'       (1, 0)"
+        s = fmt % vals
+        assert s == expected, "\ns       = '%s'\nexpected= '%s'" %(s, expected)
+
     def test_format_repeat(self):
         assert format(u"abc", u"z<5") == u"abczz"
         assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007"