[pypy-commit] pypy default: Fix tests around utf8 encoding

amauryfa noreply at buildbot.pypy.org
Wed Sep 26 00:00:04 CEST 2012


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: 
Changeset: r57589:10e2fbb7bcec
Date: 2012-09-25 23:51 +0200
http://bitbucket.org/pypy/pypy/changeset/10e2fbb7bcec/

Log:	Fix tests around utf8 encoding

diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -71,7 +71,7 @@
             if result is not None:
                 return W_RopeObject(result)
         elif encoding == "utf-8":
-            result = rope.unicode_encode_utf8(node)
+            result = rope.unicode_encode_utf8(node, allow_surrogates=True)
             if result is not None:
                 return W_RopeObject(result)
     return encode_object(space, w_unistr, encoding, errors)
diff --git a/pypy/rlib/rope.py b/pypy/rlib/rope.py
--- a/pypy/rlib/rope.py
+++ b/pypy/rlib/rope.py
@@ -1485,7 +1485,7 @@
     if rope.is_bytestring():
         return rope
 
-def unicode_encode_utf8(rope):
+def unicode_encode_utf8(rope, allow_surrogates=False):
     from pypy.rlib.runicode import unicode_encode_utf_8
     if rope.is_ascii():
         return rope
@@ -1494,7 +1494,8 @@
                                 unicode_encode_utf8(rope.right))
     elif isinstance(rope, LiteralUnicodeNode):
         return LiteralStringNode(
-            unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
+            unicode_encode_utf_8(rope.u, len(rope.u), "strict",
+                                 allow_surrogates=allow_surrogates))
     elif isinstance(rope, LiteralStringNode):
         return LiteralStringNode(_str_encode_utf_8(rope.s))
 
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -28,8 +28,10 @@
         from pypy.rpython.annlowlevel import hlstr
         value = hlstr(llvalue)
         assert value is not None
-        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict',
-                                             False, self.ll_raise_unicode_exception_decode)
+        univalue, _ = self.rstr_decode_utf_8(
+            value, len(value), 'strict', final=False,
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llunicode(univalue)
 
     def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
@@ -50,9 +52,9 @@
         self.runicode_encode_utf_8 = None
 
     def ensure_ll_encode_utf8(self):
-        from pypy.rlib.runicode import unicode_encode_utf_8
-        self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
-                                                        'runicode_encode_utf_8')
+        from pypy.rlib.runicode import unicode_encode_utf_8_impl
+        self.runicode_encode_utf_8 = func_with_new_name(
+            unicode_encode_utf_8_impl, 'runicode_encode_utf_8')
 
     def rtype_method_upper(self, hop):
         raise TypeError("Cannot do toupper on unicode string")
@@ -65,9 +67,16 @@
         from pypy.rpython.annlowlevel import hlunicode
         s = hlunicode(ll_s)
         assert s is not None
-        bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+        bytes = self.runicode_encode_utf_8(
+            s, len(s), 'strict',
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llstr(bytes)
 
+    def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+                                          startingpos, endingpos):
+        raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+    
 class __extend__(annmodel.SomeString):
     def rtyper_makerepr(self, rtyper):
         return rtyper.type_system.rstr.string_repr


More information about the pypy-commit mailing list