[pypy-commit] pypy default: Fix tests around utf8 encoding
amauryfa
noreply at buildbot.pypy.org
Wed Sep 26 00:00:04 CEST 2012
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch:
Changeset: r57589:10e2fbb7bcec
Date: 2012-09-25 23:51 +0200
http://bitbucket.org/pypy/pypy/changeset/10e2fbb7bcec/
Log: Fix tests around utf8 encoding
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -71,7 +71,7 @@
if result is not None:
return W_RopeObject(result)
elif encoding == "utf-8":
- result = rope.unicode_encode_utf8(node)
+ result = rope.unicode_encode_utf8(node, allow_surrogates=True)
if result is not None:
return W_RopeObject(result)
return encode_object(space, w_unistr, encoding, errors)
diff --git a/pypy/rlib/rope.py b/pypy/rlib/rope.py
--- a/pypy/rlib/rope.py
+++ b/pypy/rlib/rope.py
@@ -1485,7 +1485,7 @@
if rope.is_bytestring():
return rope
-def unicode_encode_utf8(rope):
+def unicode_encode_utf8(rope, allow_surrogates=False):
from pypy.rlib.runicode import unicode_encode_utf_8
if rope.is_ascii():
return rope
@@ -1494,7 +1494,8 @@
unicode_encode_utf8(rope.right))
elif isinstance(rope, LiteralUnicodeNode):
return LiteralStringNode(
- unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
+ unicode_encode_utf_8(rope.u, len(rope.u), "strict",
+ allow_surrogates=allow_surrogates))
elif isinstance(rope, LiteralStringNode):
return LiteralStringNode(_str_encode_utf_8(rope.s))
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -28,8 +28,10 @@
from pypy.rpython.annlowlevel import hlstr
value = hlstr(llvalue)
assert value is not None
- univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict',
- False, self.ll_raise_unicode_exception_decode)
+ univalue, _ = self.rstr_decode_utf_8(
+ value, len(value), 'strict', final=False,
+ errorhandler=self.ll_raise_unicode_exception_decode,
+ allow_surrogates=False)
return self.ll.llunicode(univalue)
def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
@@ -50,9 +52,9 @@
self.runicode_encode_utf_8 = None
def ensure_ll_encode_utf8(self):
- from pypy.rlib.runicode import unicode_encode_utf_8
- self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
- 'runicode_encode_utf_8')
+ from pypy.rlib.runicode import unicode_encode_utf_8_impl
+ self.runicode_encode_utf_8 = func_with_new_name(
+ unicode_encode_utf_8_impl, 'runicode_encode_utf_8')
def rtype_method_upper(self, hop):
raise TypeError("Cannot do toupper on unicode string")
@@ -65,9 +67,16 @@
from pypy.rpython.annlowlevel import hlunicode
s = hlunicode(ll_s)
assert s is not None
- bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+ bytes = self.runicode_encode_utf_8(
+ s, len(s), 'strict',
+ errorhandler=self.ll_raise_unicode_exception_decode,
+ allow_surrogates=False)
return self.ll.llstr(bytes)
+ def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+ startingpos, endingpos):
+ raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+
class __extend__(annmodel.SomeString):
def rtyper_makerepr(self, rtyper):
return rtyper.type_system.rstr.string_repr
More information about the pypy-commit
mailing list