[pypy-commit] pypy default: Use the default encoding in stringobject.unicode_w

dripton noreply at buildbot.pypy.org
Mon Mar 12 23:35:59 CET 2012


Author: David Ripton <dripton at ripton.net>
Branch: 
Changeset: r53366:e41fae0d7da3
Date: 2012-03-12 13:50 -0700
http://bitbucket.org/pypy/pypy/changeset/e41fae0d7da3/

Log:	Use the default encoding in stringobject.unicode_w

	Fixes issue1079, a problem in str.join with unicode arguments, and
	the default encoding set to utf8.

diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -56,9 +56,18 @@
         return w_self._value
 
     def unicode_w(w_self, space):
-        # XXX should this use the default encoding?
-        from pypy.objspace.std.unicodetype import plain_str2unicode
-        return plain_str2unicode(space, w_self._value)
+        # Use the default encoding.
+        from pypy.objspace.std.unicodetype import unicode_from_string, \
+                decode_object
+        w_defaultencoding = space.call_function(space.sys.get(
+                                                'getdefaultencoding'))
+        from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
+            unicode_from_string, decode_object
+        encoding, errors = _get_encoding_and_errors(space, w_defaultencoding,
+                                                    space.w_None)
+        if encoding is None and errors is None:
+            return space.unicode_w(unicode_from_string(space, w_self))
+        return space.unicode_w(decode_object(space, w_self, encoding, errors))
 
 registerimplementation(W_StringObject)
 
diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py
--- a/pypy/objspace/std/test/test_stringobject.py
+++ b/pypy/objspace/std/test/test_stringobject.py
@@ -501,6 +501,35 @@
         raises(TypeError, ''.join, [1])
         raises(TypeError, ''.join, [[1]])
 
+    def test_unicode_join_str_arg_ascii(self):
+        raises(UnicodeDecodeError, u''.join, ['\xc3\xa1'])
+
+    def test_unicode_join_str_arg_utf8(self):
+        # Need default encoding utf-8, but sys.setdefaultencoding
+        # is removed after startup.
+        import sys
+        old_encoding = sys.getdefaultencoding()
+
+        # Duplicate unittest.test_support.CleanImport logic because it won't
+        # import.
+        self.original_modules = sys.modules.copy()
+        for module_name in ['sys']:
+            if module_name in sys.modules:
+                module = sys.modules[module_name]
+                # It is possible that module_name is just an alias for
+                # another module (e.g. stub for modules renamed in 3.x).
+                # In that case, we also need delete the real module to clear
+                # the import cache.
+                if module.__name__ != module_name:
+                    del sys.modules[module.__name__]
+                del sys.modules[module_name]
+
+        import sys as temp_sys
+        temp_sys.setdefaultencoding('utf-8')
+        assert u''.join(['\xc3\xa1']) == u'\xe1'
+        temp_sys.setdefaultencoding(old_encoding)
+        sys.modules.update(self.original_modules)
+
     def test_unicode_join_endcase(self):
         # This class inserts a Unicode object into its argument's natural
         # iteration, in the 3rd position.


More information about the pypy-commit mailing list