[pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch

mattip pypy.commits at gmail.com
Tue Jan 15 18:20:12 EST 2019


Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95643:5dbc4374c8c0
Date: 2019-01-16 01:19 +0200
http://bitbucket.org/pypy/pypy/changeset/5dbc4374c8c0/

Log:	merge unicode-utf8 into branch

diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -463,11 +463,16 @@
             i = 0
             got_align = True
             got_fill_char = False
-            if length - i >= 2 and self._is_alignment(spec[i + 1]):
-                self._align = spec[i + 1]
-                self._fill_char = spec[i]
+            # The single character could be utf8-encoded unicode
+            if self.is_unicode:
+                after_i = rutf8.next_codepoint_pos(spec, i)
+            else:
+                after_i = i + 1
+            if length - i >= 2 and self._is_alignment(spec[after_i]):
+                self._align = spec[after_i]
+                self._fill_char = spec[i:after_i]
                 got_fill_char = True
-                i += 2
+                i = after_i + 1
             elif length - i >= 1 and self._is_alignment(spec[i]):
                 self._align = spec[i]
                 i += 1
@@ -560,7 +565,10 @@
             return builder.build()
 
         def _builder(self):
-            return rstring.StringBuilder()
+            if self.is_unicode:
+                return rutf8.Utf8StringBuilder()
+            else:
+                return rstring.StringBuilder()
 
         def _unknown_presentation(self, tp):
             raise oefmt(self.space.w_ValueError,
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1127,6 +1127,10 @@
                 return u'\u1234'
         '%s' % X()
 
+    def test_format_repeat(self):
+        assert format(u"abc", u"z<5") == u"abczz"
+        assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007"
+
     def test_formatting_unicode__repr__(self):
         # Printable character
         assert '%r' % chr(0xe9) == "'\xe9'"
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -706,7 +706,7 @@
         return s_None
 
     def method_append_multiple_char(self, s_char, s_times):
-        assert isinstance(s_char, SomeChar)
+        assert isinstance(s_char, (SomeString, SomeChar))
         assert isinstance(s_times, SomeInteger)
         return s_None
 
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -723,6 +723,11 @@
         self._lgt += length
 
     @always_inline
+    def append_multiple_char(self, utf8, times):
+        self._s.append(utf8 * times)
+        self._lgt += times
+
+    @always_inline
     def build(self):
         return self._s.build()
 


More information about the pypy-commit mailing list