[pypy-commit] pypy unicode-utf8: support for append_utf8
cfbolz
pypy.commits at gmail.com
Fri Nov 24 10:13:23 EST 2017
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93164:f5be33826726
Date: 2017-11-24 16:10 +0100
http://bitbucket.org/pypy/pypy/changeset/f5be33826726/
Log: support for append_utf8
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -687,6 +687,11 @@
self._lgt += 1
unichr_as_utf8_append(self._s, code, True)
+ def append_utf8(self, utf8, length, flag):
+ self._flag = combine_flags(self._flag, flag)
+ self._lgt += length
+ self._s.append(utf8)
+
def build(self):
return self._s.build()
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -175,6 +175,7 @@
assert s.get_flag() == rutf8.FLAG_REGULAR
assert s.get_length() == 9
assert s.build().decode("utf8") == u"foox\u1234foox"
+
s = rutf8.Utf8StringBuilder()
s.append_code(0x1234)
assert s.build().decode("utf8") == u"\u1234"
@@ -184,6 +185,21 @@
assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
assert s.get_length() == 2
+ s = rutf8.Utf8StringBuilder()
+ s.append_utf8("abc", 3, rutf8.FLAG_ASCII)
+ assert s.get_flag() == rutf8.FLAG_ASCII
+ assert s.get_length() == 1
+ assert s.build().decode("utf8") == u"abc"
+
+ s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR)
+ assert s.build().decode("utf8") == u"abc\u1234"
+ assert s.get_flag() == rutf8.FLAG_REGULAR
+ assert s.get_length() == 4
+
+ s.append_code(0xD800)
+ assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
+ assert s.get_length() == 5
+
@given(strategies.text())
def test_utf8_iterator(arg):
u = rutf8.Utf8StringIterator(arg.encode('utf8'))
More information about the pypy-commit
mailing list