[pypy-commit] pypy py3.6: hg merge default
arigo
pypy.commits at gmail.com
Mon Apr 29 05:39:29 EDT 2019
Author: Armin Rigo <arigo at tunes.org>
Branch: py3.6
Changeset: r96560:86dc760c19db
Date: 2019-04-29 11:38 +0200
http://bitbucket.org/pypy/pypy/changeset/86dc760c19db/
Log: hg merge default
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1400,7 +1400,7 @@
s, pos, pos + 4)
result.append(r)
continue
- elif ch >= 0x110000:
+ elif r_uint(ch) >= 0x110000:
r, pos, rettype = errorhandler(errors, public_encoding_name,
"codepoint not in range(0x110000)",
s, pos, len(s))
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -828,7 +828,7 @@
if not 0 <= x <= 0x10FFFF:
raise oefmt(space.w_TypeError,
"character mapping must be in range(0x110000)")
- return rutf8.unichr_as_utf8(x)
+ return rutf8.unichr_as_utf8(x, allow_surrogates=True)
elif space.is_w(w_ch, space.w_None):
# Charmap may return None
return errorchar
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -125,6 +125,7 @@
assert (charmap_decode(b"\x00\x01\x02", "strict",
{0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
(u"\U0010FFFFbc", 3))
+ assert charmap_decode(b'\xff', "strict", {0xff: 0xd800}) == (u'\ud800', 1)
def test_escape_decode(self):
from _codecs import unicode_escape_decode as decode
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -366,10 +366,14 @@
hexdigits = self.getslice(start, i)
try:
val = int(hexdigits, 16)
- if sys.maxunicode > 65535 and 0xd800 <= val <= 0xdfff:
- # surrogate pair
- if self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u':
- val = self.decode_surrogate_pair(i, val)
+ if (0xd800 <= val <= 0xdbff and
+ self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u'):
+ hexdigits = self.getslice(i+2, i+6)
+ lowsurr = int(hexdigits, 16)
+ if 0xdc00 <= lowsurr <= 0xdfff:
+ # decode surrogate pair
+ val = 0x10000 + (((val - 0xd800) << 10) |
+ (lowsurr - 0xdc00))
i += 6
except ValueError:
raise DecoderError("Invalid \uXXXX escape (char %d)", i-1)
@@ -380,15 +384,6 @@
builder.append(utf8_ch)
return i
- def decode_surrogate_pair(self, i, highsurr):
- """ uppon enter the following must hold:
- chars[i] == "\\" and chars[i+1] == "u"
- """
- i += 2
- hexdigits = self.getslice(i, i+4)
- lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by the caller
- return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
-
def decode_key(self, i):
""" returns a wrapped unicode """
from rpython.rlib.rarithmetic import intmask
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -198,6 +198,17 @@
res = _pypyjson.loads('"z\\ud834\\udd20x"')
assert res == expected
+ def test_unicode_not_a_surrogate_pair(self):
+ import _pypyjson
+ res = _pypyjson.loads('"z\\ud800\\ud800x"')
+ assert list(res) == [u'z', u'\ud800', u'\ud800', u'x']
+ res = _pypyjson.loads('"z\\udbff\\uffffx"')
+ assert list(res) == [u'z', u'\udbff', u'\uffff', u'x']
+ res = _pypyjson.loads('"z\\ud800\\ud834\\udd20x"')
+ assert res == u'z\ud800\U0001d120x'
+ res = _pypyjson.loads('"z\\udc00\\udc00x"')
+ assert list(res) == [u'z', u'\udc00', u'\udc00', u'x']
+
def test_lone_surrogate(self):
import _pypyjson
json = '{"a":"\\uD83D"}'
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -452,8 +452,13 @@
elif c == 'c':
return space.newbytes(func(add_arg, argdesc, ll_type))
elif c == 'u':
- return space.newutf8(rutf8.unichr_as_utf8(
- r_uint(ord(func(add_arg, argdesc, ll_type)))), 1)
+ code = r_uint(ord(func(add_arg, argdesc, ll_type)))
+ try:
+ return space.newutf8(rutf8.unichr_as_utf8(
+ code, allow_surrogates=True), 1)
+ except rutf8.OutOfRange:
+ raise oefmt(space.w_ValueError,
+ "unicode character %d out of range", code)
elif c == 'f' or c == 'd' or c == 'g':
return space.newfloat(float(func(add_arg, argdesc, ll_type)))
else:
diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -348,6 +348,21 @@
arg2.free()
a.free()
+ def test_unicode_array(self):
+ import _rawffi
+ A = _rawffi.Array('u')
+ a = A(6, u'\u1234')
+ assert a[0] == u'\u1234'
+ a[0] = u'\U00012345'
+ assert a[0] == u'\U00012345'
+ a[0] = u'\ud800'
+ assert a[0] == u'\ud800'
+ B = _rawffi.Array('i')
+ b = B.fromaddress(a.itemaddress(0), 1)
+ b[0] = 0xffffffff
+ raises(ValueError, "a[0]")
+ a.free()
+
def test_returning_unicode(self):
import _rawffi
A = _rawffi.Array('u')
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1159,7 +1159,7 @@
elif mytype.typecode == 'u':
code = r_uint(ord(item))
try:
- item = rutf8.unichr_as_utf8(code)
+ item = rutf8.unichr_as_utf8(code, allow_surrogates=True)
except rutf8.OutOfRange:
raise oefmt(space.w_ValueError,
"cannot operate on this array('u') because it contains"
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -917,6 +917,10 @@
assert a.tounicode() == input_unicode
raises(ValueError, b.tounicode) # doesn't work
+ def test_unicode_surrogate(self):
+ a = self.array('u', u'\ud800')
+ assert a[0] == u'\ud800'
+
def test_weakref(self):
import weakref
a = self.array('u', 'Hi!')
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -326,7 +326,8 @@
space = self.space
if do_unicode:
cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1)
- w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp)), 1)
+ w_s = space.newutf8(rutf8.unichr_as_utf8(r_uint(cp),
+ allow_surrogates=True), 1)
else:
cp = ord(self.fmt[self.fmtpos - 1])
w_s = space.newbytes(chr(cp))
@@ -478,7 +479,8 @@
n = space.int_w(w_value)
if do_unicode:
try:
- c = rutf8.unichr_as_utf8(r_uint(n))
+ c = rutf8.unichr_as_utf8(r_uint(n),
+ allow_surrogates=True)
except rutf8.OutOfRange:
raise oefmt(space.w_OverflowError,
"unicode character code out of range")
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -357,9 +357,11 @@
if recursive:
spec = self._build_string(spec_start, end, level)
w_rendered = self.space.format(w_obj, self.wrap(spec))
- unwrapper = "utf8_w" if self.is_unicode else "bytes_w"
- to_interp = getattr(self.space, unwrapper)
- return to_interp(w_rendered)
+ if self.is_unicode:
+ w_rendered = self.space.unicode_from_object(w_rendered)
+ return self.space.utf8_w(w_rendered)
+ else:
+ return self.space.bytes_w(w_rendered)
def formatter_parser(self):
self.parser_list_w = []
diff --git a/pypy/objspace/std/test/test_newformat.py b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -245,6 +245,7 @@
def test_simple(self):
assert format(self.i(2)) == "2"
assert isinstance(format(self.i(2), ""), str)
+ assert isinstance(self.i(2).__format__(""), str)
def test_invalid(self):
raises(ValueError, format, self.i(8), "s")
@@ -491,3 +492,9 @@
excinfo = raises(ValueError, "{:j}".format, x(1))
print(excinfo.value)
assert str(excinfo.value) == "Unknown format code j for object of type 'x'"
+
+ def test_format_char(self):
+ import sys
+ assert '{0:c}'.format(42) == '*'
+ assert '{0:c}'.format(1234) == '\u04d2'
+ raises(OverflowError, '{0:c}'.format, -1)
diff --git a/pypy/objspace/std/test/test_stringformat.py b/pypy/objspace/std/test/test_stringformat.py
--- a/pypy/objspace/std/test/test_stringformat.py
+++ b/pypy/objspace/std/test/test_stringformat.py
@@ -215,6 +215,7 @@
def test_format_wrong_char(self):
raises(ValueError, 'a%Zb'.__mod__, ((23,),))
+ raises(ValueError, u'a%\ud800b'.__mod__, ((23,),))
def test_incomplete_format(self):
raises(ValueError, '%'.__mod__, ((23,),))
@@ -234,6 +235,8 @@
raises(TypeError, '%c'.__mod__, ("",))
raises(TypeError, '%c'.__mod__, (['c'],))
raises(TypeError, '%c'.__mod__, b'A')
+ surrogate = 0xd800
+ assert '%c' % surrogate == '\ud800'
def test___int__index__(self):
class MyInt(object):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1180,8 +1180,7 @@
def test_format_repeat(self):
assert format(u"abc", u"z<5") == u"abczz"
assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007"
- #CPython2 raises UnicodeEncodeError
- assert format(123, u"\u2007<5") == u"123\u2007\u2007"
+ assert format(123, "\u2007<5") == "123\u2007\u2007"
def test_formatting_unicode__repr__(self):
# Printable character
diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py
--- a/pypy/tool/release/force-builds.py
+++ b/pypy/tool/release/force-builds.py
@@ -8,8 +8,13 @@
modified by PyPy team
"""
+from __future__ import absolute_import, division, print_function
-import os, sys, urllib, subprocess
+import os, sys, subprocess
+try:
+ from urllib2 import quote
+except ImportError:
+ from urllib.request import quote
from twisted.internet import reactor, defer
from twisted.python import log
@@ -29,10 +34,10 @@
'pypy-c-jit-macosx-x86-64',
'pypy-c-jit-win-x86-32',
'pypy-c-jit-linux-s390x',
- 'build-pypy-c-jit-linux-armhf-raspbian',
- 'build-pypy-c-jit-linux-armel',
+# 'build-pypy-c-jit-linux-armhf-raspbian',
+# 'build-pypy-c-jit-linux-armel',
'rpython-linux-x86-32',
- 'rpython-linux-x86-64'
+ 'rpython-linux-x86-64',
'rpython-win-x86-32'
]
@@ -54,7 +59,7 @@
log.err(err, "Build force failure")
for builder in BUILDERS:
- print 'Forcing', builder, '...'
+ print('Forcing', builder, '...')
url = "http://" + server + "/builders/" + builder + "/force"
args = [
('username', user),
@@ -63,15 +68,15 @@
('submit', 'Force Build'),
('branch', branch),
('comments', "Forced by command line script")]
- url = url + '?' + '&'.join([k + '=' + urllib.quote(v) for (k, v) in args])
+ url = url + '?' + '&'.join([k + '=' + quote(v) for (k, v) in args])
requests.append(
- lock.run(client.getPage, url, followRedirect=False).addErrback(ebList))
+ lock.run(client.getPage, url.encode('utf-8'), followRedirect=False).addErrback(ebList))
d = defer.gatherResults(requests)
d.addErrback(log.err)
d.addCallback(lambda ign: reactor.stop())
reactor.run()
- print 'See http://buildbot.pypy.org/summary after a while'
+ print('See http://buildbot.pypy.org/summary after a while')
if __name__ == '__main__':
log.startLogging(sys.stdout)
@@ -86,6 +91,6 @@
try:
subprocess.check_call(['hg','id','-r', options.branch])
except subprocess.CalledProcessError:
- print 'branch', options.branch, 'could not be found in local repository'
+ print('branch', options.branch, 'could not be found in local repository')
sys.exit(-1)
main(options.branch, options.server, user=options.user)
More information about the pypy-commit
mailing list