[pypy-commit] pypy py3k: escape surrogates in marshal

pjenvey noreply at buildbot.pypy.org
Mon Oct 29 20:34:58 CET 2012


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k
Changeset: r58599:6404ccd57b8c
Date: 2012-10-29 12:32 -0700
http://bitbucket.org/pypy/pypy/changeset/6404ccd57b8c/

Log:	escape surrogates in marshal

diff --git a/lib-python/3.2/test/test_marshal.py b/lib-python/3.2/test/test_marshal.py
--- a/lib-python/3.2/test/test_marshal.py
+++ b/lib-python/3.2/test/test_marshal.py
@@ -165,7 +165,7 @@
         s = b'c' + (b'X' * 4*4) + b'{' * 2**20
         self.assertRaises(ValueError, marshal.loads, s)
 
-    @test_support.impl_detail('specific recursion check')
+    @support.impl_detail('specific recursion check')
     def test_recursion_limit(self):
         # Create a deeply nested structure.
         head = last = []
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -48,13 +48,15 @@
         final=True, errorhandler=decode_error_handler(space))
     return result
 
-def PyUnicode_DecodeUTF8(space, string):
+def PyUnicode_DecodeUTF8(space, string, allow_surrogates=False):
     result, consumed = runicode.str_decode_utf_8(
         string, len(string), "strict",
-        final=True, errorhandler=decode_error_handler(space))
+        final=True, errorhandler=decode_error_handler(space),
+        allow_surrogates=allow_surrogates)
     return result
 
-def PyUnicode_EncodeUTF8(space, uni):
+def PyUnicode_EncodeUTF8(space, uni, allow_surrogates=False):
     return runicode.unicode_encode_utf_8(
         uni, len(uni), "strict",
-        errorhandler=encode_error_handler(space))
+        errorhandler=encode_error_handler(space),
+        allow_surrogates=allow_surrogates)
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -380,11 +380,14 @@
 register(TYPE_CODE, unmarshal_pycode)
 
 def marshal_w__Unicode(space, w_unicode, m):
-    s = unicodehelper.PyUnicode_EncodeUTF8(space, space.unicode_w(w_unicode))
+    s = unicodehelper.PyUnicode_EncodeUTF8(space, space.unicode_w(w_unicode),
+                                           allow_surrogates=True)
     m.atom_str(TYPE_UNICODE, s)
 
 def unmarshal_Unicode(space, u, tc):
-    return space.wrap(unicodehelper.PyUnicode_DecodeUTF8(space, u.get_str()))
+    return space.wrap(
+        unicodehelper.PyUnicode_DecodeUTF8(space, u.get_str(),
+                                           allow_surrogates=True))
 register(TYPE_UNICODE, unmarshal_Unicode)
 
 app = gateway.applevel(r'''


More information about the pypy-commit mailing list