[pypy-commit] pypy unicode-utf8: support for append_utf8

cfbolz pypy.commits at gmail.com
Fri Nov 24 10:13:23 EST 2017


Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93164:f5be33826726
Date: 2017-11-24 16:10 +0100
http://bitbucket.org/pypy/pypy/changeset/f5be33826726/

Log:	support for append_utf8

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -687,6 +687,11 @@
         self._lgt += 1
         unichr_as_utf8_append(self._s, code, True)
 
+    def append_utf8(self, utf8, length, flag):
+        self._flag = combine_flags(self._flag, flag)
+        self._lgt += length
+        self._s.append(utf8)
+
     def build(self):
         return self._s.build()
 
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -175,6 +175,7 @@
     assert s.get_flag() == rutf8.FLAG_REGULAR
     assert s.get_length() == 9
     assert s.build().decode("utf8") == u"foox\u1234foox"
+
     s = rutf8.Utf8StringBuilder()
     s.append_code(0x1234)
     assert s.build().decode("utf8") == u"\u1234"
@@ -184,6 +185,21 @@
     assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
     assert s.get_length() == 2
 
+    s = rutf8.Utf8StringBuilder()
+    s.append_utf8("abc", 3, rutf8.FLAG_ASCII)
+    assert s.get_flag() == rutf8.FLAG_ASCII
+    assert s.get_length() == 1
+    assert s.build().decode("utf8") == u"abc"
+
+    s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR)
+    assert s.build().decode("utf8") == u"abc\u1234"
+    assert s.get_flag() == rutf8.FLAG_REGULAR
+    assert s.get_length() == 4
+
+    s.append_code(0xD800)
+    assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
+    assert s.get_length() == 5
+
 @given(strategies.text())
 def test_utf8_iterator(arg):
     u = rutf8.Utf8StringIterator(arg.encode('utf8'))


More information about the pypy-commit mailing list