[pypy-commit] pypy default: Use the default encoding in stringobject.unicode_w
dripton
noreply at buildbot.pypy.org
Mon Mar 12 23:35:59 CET 2012
Author: David Ripton <dripton at ripton.net>
Branch:
Changeset: r53366:e41fae0d7da3
Date: 2012-03-12 13:50 -0700
http://bitbucket.org/pypy/pypy/changeset/e41fae0d7da3/
Log: Use the default encoding in stringobject.unicode_w
Fixes issue1079, a problem in str.join with unicode arguments, and
the default encoding set to utf8.
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -56,9 +56,18 @@
return w_self._value
def unicode_w(w_self, space):
- # XXX should this use the default encoding?
- from pypy.objspace.std.unicodetype import plain_str2unicode
- return plain_str2unicode(space, w_self._value)
+ # Use the default encoding.
+ from pypy.objspace.std.unicodetype import unicode_from_string, \
+ decode_object
+ w_defaultencoding = space.call_function(space.sys.get(
+ 'getdefaultencoding'))
+ from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
+ unicode_from_string, decode_object
+ encoding, errors = _get_encoding_and_errors(space, w_defaultencoding,
+ space.w_None)
+ if encoding is None and errors is None:
+ return space.unicode_w(unicode_from_string(space, w_self))
+ return space.unicode_w(decode_object(space, w_self, encoding, errors))
registerimplementation(W_StringObject)
diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py
--- a/pypy/objspace/std/test/test_stringobject.py
+++ b/pypy/objspace/std/test/test_stringobject.py
@@ -501,6 +501,35 @@
raises(TypeError, ''.join, [1])
raises(TypeError, ''.join, [[1]])
+ def test_unicode_join_str_arg_ascii(self):
+ raises(UnicodeDecodeError, u''.join, ['\xc3\xa1'])
+
+ def test_unicode_join_str_arg_utf8(self):
+ # Need default encoding utf-8, but sys.setdefaultencoding
+ # is removed after startup.
+ import sys
+ old_encoding = sys.getdefaultencoding()
+
+ # Duplicate unittest.test_support.CleanImport logic because it won't
+ # import.
+ self.original_modules = sys.modules.copy()
+ for module_name in ['sys']:
+ if module_name in sys.modules:
+ module = sys.modules[module_name]
+ # It is possible that module_name is just an alias for
+ # another module (e.g. stub for modules renamed in 3.x).
+ # In that case, we also need delete the real module to clear
+ # the import cache.
+ if module.__name__ != module_name:
+ del sys.modules[module.__name__]
+ del sys.modules[module_name]
+
+ import sys as temp_sys
+ temp_sys.setdefaultencoding('utf-8')
+ assert u''.join(['\xc3\xa1']) == u'\xe1'
+ temp_sys.setdefaultencoding(old_encoding)
+ sys.modules.update(self.original_modules)
+
def test_unicode_join_endcase(self):
# This class inserts a Unicode object into its argument's natural
# iteration, in the 3rd position.
More information about the pypy-commit
mailing list