[pypy-commit] pypy default: issue #1508: fix these last two usages of a fixed-ascii string-to-unicode

Sun Jun 2 20:48:24 CEST 2013

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r64713:64c70e584a54
Date: 2013-06-02 20:47 +0200
http://bitbucket.org/pypy/pypy/changeset/64c70e584a54/

Log:	issue #1508: fix these last two usages of a fixed-ascii string-to-
	unicode convertion, which CPython doesn't do anyway.

diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -352,9 +352,8 @@
         def std_wp(self, r):
             length = len(r)
             if do_unicode and isinstance(r, str):
-                # convert string to unicode explicitely here
-                from pypy.objspace.std.unicodetype import plain_str2unicode
-                r = plain_str2unicode(self.space, r)
+                # convert string to unicode using the default encoding
+                r = self.space.unicode_w(self.space.wrap(r))
             prec = self.prec
             if prec == -1 and self.width == 0:
                 # fast path
@@ -509,12 +508,10 @@
             result = formatter.format()
         except NeedUnicodeFormattingError:
             # fall through to the unicode case
-            from pypy.objspace.std.unicodetype import plain_str2unicode
-            fmt = plain_str2unicode(space, fmt)
+            pass
         else:
             return space.wrap(result)
-    else:
-        fmt = space.unicode_w(w_fmt)
+    fmt = space.unicode_w(w_fmt)
     formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict)
     result = formatter.format()
     return space.wrap(result)
diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py
--- a/pypy/objspace/std/test/test_stringobject.py
+++ b/pypy/objspace/std/test/test_stringobject.py
@@ -530,6 +530,12 @@
                 del sys.modules[module_name]
             temp_sys.setdefaultencoding('utf-8')
             assert u''.join(['\xc3\xa1']) == u'\xe1'
+            #
+            assert ('\xc3\xa1:%s' % u'\xe2') == u'\xe1:\xe2'
+            class Foo(object):
+                def __repr__(self):
+                    return '\xc3\xa2'
+            assert u'\xe1:%r' % Foo() == u'\xe1:\xe2'
         finally:
             temp_sys.setdefaultencoding(old_encoding)
             sys.modules.update(self.original_modules)
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -13,22 +13,6 @@
     from pypy.objspace.std.unicodeobject import W_UnicodeObject
     return W_UnicodeObject(uni)
 
-def plain_str2unicode(space, s):
-    try:
-        return unicode(s)
-    except UnicodeDecodeError:
-        for i in range(len(s)):
-            if ord(s[i]) > 127:
-                raise OperationError(
-                    space.w_UnicodeDecodeError,
-                    space.newtuple([
-                    space.wrap('ascii'),
-                    space.wrap(s),
-                    space.wrap(i),
-                    space.wrap(i+1),
-                    space.wrap("ordinal not in range(128)")]))
-        assert False, "unreachable"
-
 
 unicode_capitalize = SMM('capitalize', 1,
                          doc='S.capitalize() -> unicode\n\nReturn a'