[pypy-commit] pypy py3k: Now that unicode.encode('utf8') can fail, it's important to build
amauryfa
noreply at buildbot.pypy.org
Mon Sep 24 23:47:01 CEST 2012
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r57532:5ad25f0c04b3
Date: 2012-09-23 22:27 +0200
http://bitbucket.org/pypy/pypy/changeset/5ad25f0c04b3/
Log: Now that unicode.encode('utf8') can fail, it's important to build
the cached utf8 copy only when requested. Otherwise chr(0xd800)
crashes the interpreter...
This is difficult to test unfortunately, because the untranslated
version does not fail.
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -45,7 +45,7 @@
def __init__(w_self, unistr):
assert isinstance(unistr, unicode)
w_self._value = unistr
- w_self._utf8 = unistr.encode('utf-8')
+ w_self._utf8 = None
def __repr__(w_self):
""" representation for debugging purposes """
@@ -64,6 +64,13 @@
return self._value
def identifier_w(self, space):
+ if self._utf8 is None:
+ from pypy.objspace.std.unicodetype import encode_error_handler
+ from pypy.rlib.runicode import unicode_encode_utf_8
+ u = self._value
+ eh = encode_error_handler(space)
+ self._utf8 = unicode_encode_utf_8(u, len(u), None,
+ errorhandler=eh)
return self._utf8
W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
More information about the pypy-commit
mailing list