[pypy-commit] pypy unicode-utf8: test, fix for format specification alignment repeats with a unicode codepoint
mattip
pypy.commits at gmail.com
Tue Jan 15 18:18:28 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8
Changeset: r95642:762bda764d4f
Date: 2019-01-16 01:17 +0200
http://bitbucket.org/pypy/pypy/changeset/762bda764d4f/
Log: test, fix for format specification alignment repeats with a unicode
codepoint
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -455,11 +455,16 @@
i = 0
got_align = True
got_fill_char = False
- if length - i >= 2 and self._is_alignment(spec[i + 1]):
- self._align = spec[i + 1]
- self._fill_char = spec[i]
+ # The single character could be utf8-encoded unicode
+ if self.is_unicode:
+ after_i = rutf8.next_codepoint_pos(spec, i)
+ else:
+ after_i = i + 1
+ if length - i >= 2 and self._is_alignment(spec[after_i]):
+ self._align = spec[after_i]
+ self._fill_char = spec[i:after_i]
got_fill_char = True
- i += 2
+ i = after_i + 1
elif length - i >= 1 and self._is_alignment(spec[i]):
self._align = spec[i]
i += 1
@@ -552,7 +557,10 @@
return builder.build()
def _builder(self):
- return rstring.StringBuilder()
+ if self.is_unicode:
+ return rutf8.Utf8StringBuilder()
+ else:
+ return rstring.StringBuilder()
def _unknown_presentation(self, tp):
raise oefmt(self.space.w_ValueError,
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1130,6 +1130,10 @@
return u'\u1234'
'%s' % X()
+ def test_format_repeat(self):
+ assert format(u"abc", u"z<5") == u"abczz"
+ assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007"
+
def test_formatting_char(self):
for num in range(0x80,0x100):
uchar = unichr(num)
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -706,7 +706,7 @@
return s_None
def method_append_multiple_char(self, s_char, s_times):
- assert isinstance(s_char, SomeChar)
+ assert isinstance(s_char, (SomeString, SomeChar))
assert isinstance(s_times, SomeInteger)
return s_None
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -721,6 +721,11 @@
self._lgt += length
@always_inline
+ def append_multiple_char(self, utf8, times):
+ self._s.append(utf8 * times)
+ self._lgt += times
+
+ @always_inline
def build(self):
return self._s.build()
More information about the pypy-commit
mailing list