[pypy-commit] pypy py3.6: hg merge default

Mon Apr 29 05:39:29 EDT 2019

Author: Armin Rigo <arigo at tunes.org>
Branch: py3.6
Changeset: r96560:86dc760c19db
Date: 2019-04-29 11:38 +0200
http://bitbucket.org/pypy/pypy/changeset/86dc760c19db/

Log:	hg merge default

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1400,7 +1400,7 @@
                                   s, pos, pos + 4)
             result.append(r)
             continue
-        elif ch >= 0x110000:
+        elif r_uint(ch) >= 0x110000:
             r, pos, rettype = errorhandler(errors, public_encoding_name,
                                   "codepoint not in range(0x110000)",
                                   s, pos, len(s))
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -828,7 +828,7 @@
             if not 0 <= x <= 0x10FFFF:
                 raise oefmt(space.w_TypeError,
                     "character mapping must be in range(0x110000)")
-            return rutf8.unichr_as_utf8(x)
+            return rutf8.unichr_as_utf8(x, allow_surrogates=True)
         elif space.is_w(w_ch, space.w_None):
             # Charmap may return None
             return errorchar
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -125,6 +125,7 @@
         assert (charmap_decode(b"\x00\x01\x02", "strict",
                                {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
                 (u"\U0010FFFFbc", 3))
+        assert charmap_decode(b'\xff', "strict", {0xff: 0xd800}) == (u'\ud800', 1)
 
     def test_escape_decode(self):
         from _codecs import unicode_escape_decode as decode
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -366,10 +366,14 @@
         hexdigits = self.getslice(start, i)
         try:
             val = int(hexdigits, 16)
-            if sys.maxunicode > 65535 and 0xd800 <= val <= 0xdfff:
-                # surrogate pair
-                if self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u':
-                    val = self.decode_surrogate_pair(i, val)
+            if (0xd800 <= val <= 0xdbff and
+                    self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u'):
+                hexdigits = self.getslice(i+2, i+6)
+                lowsurr = int(hexdigits, 16)
+                if 0xdc00 <= lowsurr <= 0xdfff:
+                    # decode surrogate pair
+                    val = 0x10000 + (((val - 0xd800) << 10) |
+                                     (lowsurr - 0xdc00))
                     i += 6
         except ValueError:
             raise DecoderError("Invalid \uXXXX escape (char %d)", i-1)
@@ -380,15 +384,6 @@
         builder.append(utf8_ch)
         return i
 
-    def decode_surrogate_pair(self, i, highsurr):
-        """ uppon enter the following must hold:
-              chars[i] == "\\" and chars[i+1] == "u"
-        """
-        i += 2
-        hexdigits = self.getslice(i, i+4)
-        lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by the caller
-        return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
-
     def decode_key(self, i):
         """ returns a wrapped unicode """
         from rpython.rlib.rarithmetic import intmask
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -198,6 +198,17 @@
         res = _pypyjson.loads('"z\\ud834\\udd20x"')
         assert res == expected
 
+    def test_unicode_not_a_surrogate_pair(self):
+        import _pypyjson
+        res = _pypyjson.loads('"z\\ud800\\ud800x"')
+        assert list(res) == [u'z', u'\ud800', u'\ud800', u'x']
+        res = _pypyjson.loads('"z\\udbff\\uffffx"')
+        assert list(res) == [u'z', u'\udbff', u'\uffff', u'x']
+        res = _pypyjson.loads('"z\\ud800\\ud834\\udd20x"')
+        assert res == u'z\ud800\U0001d120x'
+        res = _pypyjson.loads('"z\\udc00\\udc00x"')
+        assert list(res) == [u'z', u'\udc00', u'\udc00', u'x']
+
     def test_lone_surrogate(self):
         import _pypyjson
         json = '{"a":"\\uD83D"}'
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -452,8 +452,13 @@
             elif c == 'c':
                 return space.newbytes(func(add_arg, argdesc, ll_type))
             elif c == 'u':
-                return space.newutf8(rutf8.unichr_as_utf8(
-                    r_uint(ord(func(add_arg, argdesc, ll_type)))), 1)
+                code = r_uint(ord(func(add_arg, argdesc, ll_type)))
+                try:
+                    return space.newutf8(rutf8.unichr_as_utf8(
+                        code, allow_surrogates=True), 1)
+                except rutf8.OutOfRange:
+                    raise oefmt(space.w_ValueError,
+                        "unicode character %d out of range", code)
             elif c == 'f' or c == 'd' or c == 'g':
                 return space.newfloat(float(func(add_arg, argdesc, ll_type)))
             else:
diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -348,6 +348,21 @@
         arg2.free()
         a.free()
 
+    def test_unicode_array(self):
+        import _rawffi
+        A = _rawffi.Array('u')
+        a = A(6, u'\u1234')
+        assert a[0] == u'\u1234'
+        a[0] = u'\U00012345'
+        assert a[0] == u'\U00012345'
+        a[0] = u'\ud800'
+        assert a[0] == u'\ud800'
+        B = _rawffi.Array('i')
+        b = B.fromaddress(a.itemaddress(0), 1)
+        b[0] = 0xffffffff
+        raises(ValueError, "a[0]")
+        a.free()
+
     def test_returning_unicode(self):
         import _rawffi
         A = _rawffi.Array('u')
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1159,7 +1159,7 @@
             elif mytype.typecode == 'u':
                 code = r_uint(ord(item))
                 try:
-                    item = rutf8.unichr_as_utf8(code)
+                    item = rutf8.unichr_as_utf8(code, allow_surrogates=True)
                 except rutf8.OutOfRange:
                     raise oefmt(space.w_ValueError,
                         "cannot operate on this array('u') because it contains"
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -917,6 +917,10 @@
         assert a.tounicode() == input_unicode
         raises(ValueError, b.tounicode)   # doesn't work
 
+    def test_unicode_surrogate(self):
+        a = self.array('u', u'\ud800')
+        assert a[0] == u'\ud800'
+
     def test_weakref(self):
         import weakref
         a = self.array('u', 'Hi!')
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -326,7 +326,8 @@
             space = self.space
             if do_unicode:
                 cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1)
-                w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp)), 1)
+                w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp),
+                                                  allow_surrogates=True), 1)
             else:
                 cp = ord(self.fmt[self.fmtpos - 1])
                 w_s = space.newbytes(chr(cp))
@@ -478,7 +479,8 @@
                 n = space.int_w(w_value)
                 if do_unicode:
                     try:
-                        c = rutf8.unichr_as_utf8(r_uint(n))
+                        c = rutf8.unichr_as_utf8(r_uint(n),
+                                                 allow_surrogates=True)
                     except rutf8.OutOfRange:
                         raise oefmt(space.w_OverflowError,
                                     "unicode character code out of range")
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -357,9 +357,11 @@
             if recursive:
                 spec = self._build_string(spec_start, end, level)
             w_rendered = self.space.format(w_obj, self.wrap(spec))
-            unwrapper = "utf8_w" if self.is_unicode else "bytes_w"
-            to_interp = getattr(self.space, unwrapper)
-            return to_interp(w_rendered)
+            if self.is_unicode:
+                w_rendered = self.space.unicode_from_object(w_rendered)
+                return self.space.utf8_w(w_rendered)
+            else:
+                return self.space.bytes_w(w_rendered)
 
         def formatter_parser(self):
             self.parser_list_w = []
diff --git a/pypy/objspace/std/test/test_newformat.py b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -245,6 +245,7 @@
     def test_simple(self):
         assert format(self.i(2)) == "2"
         assert isinstance(format(self.i(2), ""), str)
+        assert isinstance(self.i(2).__format__(""), str)
 
     def test_invalid(self):
         raises(ValueError, format, self.i(8), "s")
@@ -491,3 +492,9 @@
         excinfo = raises(ValueError, "{:j}".format, x(1))
         print(excinfo.value)
         assert str(excinfo.value) == "Unknown format code j for object of type 'x'"
+
+    def test_format_char(self):
+        import sys
+        assert '{0:c}'.format(42) == '*'
+        assert '{0:c}'.format(1234) == '\u04d2'
+        raises(OverflowError, '{0:c}'.format, -1)
diff --git a/pypy/objspace/std/test/test_stringformat.py b/pypy/objspace/std/test/test_stringformat.py
--- a/pypy/objspace/std/test/test_stringformat.py
+++ b/pypy/objspace/std/test/test_stringformat.py
@@ -215,6 +215,7 @@
 
     def test_format_wrong_char(self):
         raises(ValueError, 'a%Zb'.__mod__, ((23,),))
+        raises(ValueError, u'a%\ud800b'.__mod__, ((23,),))
 
     def test_incomplete_format(self):
         raises(ValueError, '%'.__mod__, ((23,),))
@@ -234,6 +235,8 @@
         raises(TypeError, '%c'.__mod__, ("",))
         raises(TypeError, '%c'.__mod__, (['c'],))
         raises(TypeError, '%c'.__mod__, b'A')
+        surrogate = 0xd800
+        assert '%c' % surrogate == '\ud800'
 
     def test___int__index__(self):
         class MyInt(object):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1180,8 +1180,7 @@
     def test_format_repeat(self):
         assert format(u"abc", u"z<5") == u"abczz"
         assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007"
-        #CPython2 raises UnicodeEncodeError
-        assert format(123, u"\u2007<5") == u"123\u2007\u2007"
+        assert format(123, "\u2007<5") == "123\u2007\u2007"
 
     def test_formatting_unicode__repr__(self):
         # Printable character
diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py
--- a/pypy/tool/release/force-builds.py
+++ b/pypy/tool/release/force-builds.py
@@ -8,8 +8,13 @@
 
 modified by PyPy team
 """
+from __future__ import absolute_import, division, print_function
 
-import os, sys, urllib, subprocess
+import os, sys, subprocess
+try:
+    from urllib2 import quote
+except ImportError:
+    from urllib.request import quote
 
 from twisted.internet import reactor, defer
 from twisted.python import log
@@ -29,10 +34,10 @@
     'pypy-c-jit-macosx-x86-64',
     'pypy-c-jit-win-x86-32',
     'pypy-c-jit-linux-s390x',
-    'build-pypy-c-jit-linux-armhf-raspbian',
-    'build-pypy-c-jit-linux-armel',
+#    'build-pypy-c-jit-linux-armhf-raspbian',
+#    'build-pypy-c-jit-linux-armel',
     'rpython-linux-x86-32',
-    'rpython-linux-x86-64'
+    'rpython-linux-x86-64',
     'rpython-win-x86-32'
 ]
 
@@ -54,7 +59,7 @@
         log.err(err, "Build force failure")
 
     for builder in BUILDERS:
-        print 'Forcing', builder, '...'
+        print('Forcing', builder, '...')
         url = "http://" + server + "/builders/" + builder + "/force"
         args = [
             ('username', user),
@@ -63,15 +68,15 @@
             ('submit', 'Force Build'),
             ('branch', branch),
             ('comments', "Forced by command line script")]
-        url = url + '?' + '&'.join([k + '=' + urllib.quote(v) for (k, v) in args])
+        url = url + '?' + '&'.join([k + '=' + quote(v) for (k, v) in args])
         requests.append(
-            lock.run(client.getPage, url, followRedirect=False).addErrback(ebList))
+            lock.run(client.getPage, url.encode('utf-8'), followRedirect=False).addErrback(ebList))
 
     d = defer.gatherResults(requests)
     d.addErrback(log.err)
     d.addCallback(lambda ign: reactor.stop())
     reactor.run()
-    print 'See http://buildbot.pypy.org/summary after a while'
+    print('See http://buildbot.pypy.org/summary after a while')
 
 if __name__ == '__main__':
     log.startLogging(sys.stdout)
@@ -86,6 +91,6 @@
     try:
         subprocess.check_call(['hg','id','-r', options.branch])
     except subprocess.CalledProcessError:
-        print 'branch',  options.branch, 'could not be found in local repository'
+        print('branch',  options.branch, 'could not be found in local repository')
         sys.exit(-1) 
     main(options.branch, options.server, user=options.user)