[pypy-commit] pypy fix-vmprof-stacklet-switch-2: merge default
antocuni
pypy.commits at gmail.com
Sat Dec 16 06:02:41 EST 2017
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93446:92e4ca3c2daa
Date: 2017-12-16 12:02 +0100
http://bitbucket.org/pypy/pypy/changeset/92e4ca3c2daa/
Log: merge default
diff too long, truncating to 2000 out of 2875 lines
diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py
rename from pypy/module/test_lib_pypy/test_json_extra.py
rename to extra_tests/test_json.py
--- a/pypy/module/test_lib_pypy/test_json_extra.py
+++ b/extra_tests/test_json.py
@@ -1,4 +1,6 @@
-import py, json
+import pytest
+import json
+from hypothesis import given, strategies
def is_(x, y):
return type(x) is type(y) and x == y
@@ -6,12 +8,26 @@
def test_no_ensure_ascii():
assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"')
assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"')
- e = py.test.raises(UnicodeDecodeError, json.dumps,
- (u"\u1234", "\xc0"), ensure_ascii=False)
- assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ")
- e = py.test.raises(UnicodeDecodeError, json.dumps,
- ("\xc0", u"\u1234"), ensure_ascii=False)
- assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ")
+ with pytest.raises(UnicodeDecodeError) as excinfo:
+ json.dumps((u"\u1234", "\xc0"), ensure_ascii=False)
+ assert str(excinfo.value).startswith(
+ "'ascii' codec can't decode byte 0xc0 ")
+ with pytest.raises(UnicodeDecodeError) as excinfo:
+ json.dumps(("\xc0", u"\u1234"), ensure_ascii=False)
+ assert str(excinfo.value).startswith(
+ "'ascii' codec can't decode byte 0xc0 ")
def test_issue2191():
assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"')
+
+jsondata = strategies.recursive(
+ strategies.none() |
+ strategies.booleans() |
+ strategies.floats(allow_nan=False) |
+ strategies.text(),
+ lambda children: strategies.lists(children) |
+ strategies.dictionaries(strategies.text(), children))
+
+ at given(jsondata)
+def test_roundtrip(d):
+ assert json.loads(json.dumps(d)) == d
diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
--- a/extra_tests/test_textio.py
+++ b/extra_tests/test_textio.py
@@ -1,28 +1,48 @@
from hypothesis import given, strategies as st
from io import BytesIO, TextIOWrapper
+import os
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+ text = text.replace('\r\n', '\n')
+ text = text.replace('\r', '\n')
+ return text.replace('\n', os.linesep)
@st.composite
-def text_with_newlines(draw):
- sep = draw(st.sampled_from(LINESEP))
- lines = draw(st.lists(st.text(max_size=10), max_size=10))
- return sep.join(lines)
+def st_readline_universal(
+ draw, st_nlines=st.integers(min_value=0, max_value=10)):
+ n_lines = draw(st_nlines)
+ lines = draw(st.lists(
+ st.text(st.characters(blacklist_characters='\r\n')),
+ min_size=n_lines, max_size=n_lines))
+ limits = []
+ for line in lines:
+ limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+ limits.append(limit)
+ limits.append(-1)
+ endings = draw(st.lists(
+ st.sampled_from(['\n', '\r', '\r\n']),
+ min_size=n_lines, max_size=n_lines))
+ return (
+ ''.join(line + ending for line, ending in zip(lines, endings)),
+ limits)
- at given(txt=text_with_newlines(),
- mode=st.sampled_from(['\r', '\n', '\r\n', '']),
- limit=st.integers(min_value=-1))
-def test_readline(txt, mode, limit):
+ at given(data=st_readline_universal(),
+ mode=st.sampled_from(['\r', '\n', '\r\n', '', None]))
+def test_readline(data, mode):
+ txt, limits = data
textio = TextIOWrapper(
- BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
+ BytesIO(txt.encode('utf-8', 'surrogatepass')),
+ encoding='utf-8', errors='surrogatepass', newline=mode)
lines = []
- while True:
+ for limit in limits:
line = textio.readline(limit)
- if limit > 0:
- assert len(line) < limit
+ if limit >= 0:
+ assert len(line) <= limit
if line:
lines.append(line)
- else:
+ elif limit:
break
- assert u''.join(lines) == txt
+ if mode is None:
+ txt = translate_newlines(txt)
+ assert txt.startswith(u''.join(lines))
diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py
--- a/lib_pypy/resource.py
+++ b/lib_pypy/resource.py
@@ -20,6 +20,7 @@
or via the attributes ru_utime, ru_stime, ru_maxrss, and so on."""
__metaclass__ = _structseq.structseqtype
+ name = "resource.struct_rusage"
ru_utime = _structseq.structseqfield(0, "user time used")
ru_stime = _structseq.structseqfield(1, "system time used")
diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst
--- a/pypy/doc/build.rst
+++ b/pypy/doc/build.rst
@@ -149,7 +149,7 @@
xz-devel # For lzma on PyPy3.
(XXX plus the SLES11 version of libgdbm-dev and tk-dev)
-On Mac OS X::
+On Mac OS X:
Most of these build-time dependencies are installed alongside
the Developer Tools. However, note that in order for the installation to
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -355,7 +355,11 @@
containers (as list items or in sets for example), the exact rule of
equality used is "``if x is y or x == y``" (on both CPython and PyPy);
as a consequence, because all ``nans`` are identical in PyPy, you
-cannot have several of them in a set, unlike in CPython. (Issue `#1974`__)
+cannot have several of them in a set, unlike in CPython. (Issue `#1974`__).
+Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because
+``cmp`` checks with ``is`` first whether the arguments are identical (there is
+no good value to return from this call to ``cmp``, because ``cmp`` pretends
+that there is a total order on floats, but that is wrong for NaNs).
.. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -62,7 +62,7 @@
* go to pypy/tool/release and run
``force-builds.py <release branch>``
The following JIT binaries should be built, however, we need more buildbots
- windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel,
+ windows, linux-32, linux-64, osx64, armhf-raspberrian, armel,
freebsd64
* wait for builds to complete, make sure there are no failures
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -1,30 +1,42 @@
-===========================
-What's new in PyPy2.7 5.10+
-===========================
-
-.. this is a revision shortly after release-pypy2.7-v5.9.0
-.. startrev:d56dadcef996
-
-.. branch: cppyy-packaging
-Cleanup and improve cppyy packaging
-
-.. branch: docs-osx-brew-openssl
-
-.. branch: keep-debug-symbols
-Add a smartstrip tool, which can optionally keep the debug symbols in a
-separate file, instead of just stripping them away. Use it in packaging
-
-.. branch: bsd-patches
-Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
-tracker (issues 2694, 2695, 2696, 2697)
-
-.. branch: run-extra-tests
-Run extra_tests/ in buildbot
-
-.. branch: vmprof-0.4.10
-Upgrade the _vmprof backend to vmprof 0.4.10
-
-.. branch: fix-vmprof-stacklet-switch
-.. branch: fix-vmprof-stacklet-switch-2
-Fix vmprof+ continulet (i.e. greenelts, eventlet, gevent, ...)
-
+===========================
+What's new in PyPy2.7 5.10+
+===========================
+
+.. this is a revision shortly after release-pypy2.7-v5.9.0
+.. startrev:d56dadcef996
+
+
+.. branch: cppyy-packaging
+
+Cleanup and improve cppyy packaging
+
+.. branch: docs-osx-brew-openssl
+
+.. branch: keep-debug-symbols
+
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+
+Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+.. branch: fix-vmprof-stacklet-switch-2
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
+.. branch: rdict-fast-hash
+
+Make it possible to declare that the hash function of an r_dict is fast in RPython.
+
diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst
--- a/pypy/doc/whatsnew-pypy2-5.6.0.rst
+++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst
@@ -101,7 +101,7 @@
.. branch: newinitwarn
-Match CPython's stricter handling of __new/init__ arguments
+Match CPython's stricter handling of ``__new__``/``__init__`` arguments
.. branch: openssl-1.1
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -11,7 +11,7 @@
To build pypy-c you need a working python environment, and a C compiler.
It is possible to translate with a CPython 2.6 or later, but this is not
-the preferred way, because it will take a lot longer to run � depending
+the preferred way, because it will take a lot longer to run – depending
on your architecture, between two and three times as long. So head to
`our downloads`_ and get the latest stable version.
@@ -103,6 +103,7 @@
must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the
``...\9.0\VC`` directory, and edit it, changing the lines that set
``VCINSTALLDIR`` and ``WindowsSdkDir``::
+
set VCINSTALLDIR=%~dp0\
set WindowsSdkDir=%~dp0\..\WinSDK\
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -1246,3 +1246,7 @@
exc = py.test.raises(SyntaxError, self.get_ast, input).value
assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
" bytes in position 0-1: truncated \\xXX escape")
+ input = "u'\\x1'"
+ exc = py.test.raises(SyntaxError, self.get_ast, input).value
+ assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
+ " bytes in position 0-2: truncated \\xXX escape")
diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -1,4 +1,7 @@
-from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8
+import pytest
+import struct
+from pypy.interpreter.unicodehelper import (
+ encode_utf8, decode_utf8, unicode_encode_utf_32_be)
class FakeSpace:
pass
@@ -24,3 +27,23 @@
assert map(ord, got) == [0xd800, 0xdc00]
got = decode_utf8(space, "\xf0\x90\x80\x80")
assert map(ord, got) == [0x10000]
+
+ at pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"])
+def test_utf32_surrogates(unich):
+ assert (unicode_encode_utf_32_be(unich, 1, None) ==
+ struct.pack('>i', ord(unich)))
+ with pytest.raises(UnicodeEncodeError):
+ unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
+
+ def replace_with(ru, rs):
+ def errorhandler(errors, enc, msg, u, startingpos, endingpos):
+ if errors == 'strict':
+ raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
+ return ru, rs, endingpos
+ return unicode_encode_utf_32_be(
+ u"<%s>" % unich, 3, None,
+ errorhandler, allow_surrogates=False)
+
+ assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
+ assert (replace_with(None, '\xca\xfe\xca\xfe') ==
+ '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>')
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1,7 +1,11 @@
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib import runicode
+from rpython.rlib.runicode import (
+ default_unicode_error_encode, default_unicode_error_decode,
+ MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR)
from pypy.interpreter.error import OperationError
-from rpython.rlib.objectmodel import specialize
-from rpython.rlib import runicode
-from pypy.module._codecs import interp_codecs
@specialize.memo()
def decode_error_handler(space):
@@ -37,6 +41,7 @@
# These functions take and return unwrapped rpython strings and unicodes
def decode_unicode_escape(space, string):
+ from pypy.module._codecs import interp_codecs
state = space.fromcache(interp_codecs.CodecState)
unicodedata_handler = state.get_unicodedata_handler(space)
result, consumed = runicode.str_decode_unicode_escape(
@@ -71,3 +76,229 @@
uni, len(uni), "strict",
errorhandler=None,
allow_surrogates=True)
+
+# ____________________________________________________________
+# utf-32
+
+def str_decode_utf_32(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "native")
+ return result, length
+
+def str_decode_utf_32_be(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "big")
+ return result, length
+
+def str_decode_utf_32_le(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "little")
+ return result, length
+
+def py3k_str_decode_utf_32(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
+ return result, length
+
+def py3k_str_decode_utf_32_be(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "big", 'utf-32-be')
+ return result, length
+
+def py3k_str_decode_utf_32_le(s, size, errors, final=True,
+ errorhandler=None):
+ result, length, byteorder = str_decode_utf_32_helper(
+ s, size, errors, final, errorhandler, "little", 'utf-32-le')
+ return result, length
+
+BOM32_DIRECT = intmask(0x0000FEFF)
+BOM32_REVERSE = intmask(0xFFFE0000)
+
+def str_decode_utf_32_helper(s, size, errors, final=True,
+ errorhandler=None,
+ byteorder="native",
+ public_encoding_name='utf32'):
+ if errorhandler is None:
+ errorhandler = default_unicode_error_decode
+ bo = 0
+
+ if BYTEORDER == 'little':
+ iorder = [0, 1, 2, 3]
+ else:
+ iorder = [3, 2, 1, 0]
+
+ # Check for BOM marks (U+FEFF) in the input and adjust current
+ # byte order setting accordingly. In native mode, the leading BOM
+ # mark is skipped, in all other modes, it is copied to the output
+ # stream as-is (giving a ZWNBSP character).
+ pos = 0
+ if byteorder == 'native':
+ if size >= 4:
+ bom = intmask(
+ (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) |
+ (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]]))
+ if BYTEORDER == 'little':
+ if bom == BOM32_DIRECT:
+ pos += 4
+ bo = -1
+ elif bom == BOM32_REVERSE:
+ pos += 4
+ bo = 1
+ else:
+ if bom == BOM32_DIRECT:
+ pos += 4
+ bo = 1
+ elif bom == BOM32_REVERSE:
+ pos += 4
+ bo = -1
+ elif byteorder == 'little':
+ bo = -1
+ else:
+ bo = 1
+ if size == 0:
+ return u'', 0, bo
+ if bo == -1:
+ # force little endian
+ iorder = [0, 1, 2, 3]
+ elif bo == 1:
+ # force big endian
+ iorder = [3, 2, 1, 0]
+
+ result = UnicodeBuilder(size // 4)
+
+ while pos < size:
+ # remaining bytes at the end? (size should be divisible by 4)
+ if len(s) - pos < 4:
+ if not final:
+ break
+ r, pos = errorhandler(errors, public_encoding_name,
+ "truncated data",
+ s, pos, len(s))
+ result.append(r)
+ if len(s) - pos < 4:
+ break
+ continue
+ ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) |
+ (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]]))
+ if ch >= 0x110000:
+ r, pos = errorhandler(errors, public_encoding_name,
+ "codepoint not in range(0x110000)",
+ s, pos, len(s))
+ result.append(r)
+ continue
+
+ if MAXUNICODE < 65536 and ch >= 0x10000:
+ ch -= 0x10000L
+ result.append(unichr(0xD800 + (ch >> 10)))
+ result.append(unichr(0xDC00 + (ch & 0x03FF)))
+ else:
+ result.append(UNICHR(ch))
+ pos += 4
+ return result.build(), pos, bo
+
+def _STORECHAR32(result, CH, byteorder):
+ c0 = chr(((CH) >> 24) & 0xff)
+ c1 = chr(((CH) >> 16) & 0xff)
+ c2 = chr(((CH) >> 8) & 0xff)
+ c3 = chr((CH) & 0xff)
+ if byteorder == 'little':
+ result.append(c3)
+ result.append(c2)
+ result.append(c1)
+ result.append(c0)
+ else:
+ result.append(c0)
+ result.append(c1)
+ result.append(c2)
+ result.append(c3)
+
+def unicode_encode_utf_32_helper(s, size, errors,
+ errorhandler=None,
+ allow_surrogates=True,
+ byteorder='little',
+ public_encoding_name='utf32'):
+ if errorhandler is None:
+ errorhandler = default_unicode_error_encode
+ if size == 0:
+ if byteorder == 'native':
+ result = StringBuilder(4)
+ _STORECHAR32(result, 0xFEFF, BYTEORDER)
+ return result.build()
+ return ""
+
+ result = StringBuilder(size * 4 + 4)
+ if byteorder == 'native':
+ _STORECHAR32(result, 0xFEFF, BYTEORDER)
+ byteorder = BYTEORDER
+
+ pos = 0
+ while pos < size:
+ ch = ord(s[pos])
+ pos += 1
+ ch2 = 0
+ if not allow_surrogates and 0xD800 <= ch < 0xE000:
+ ru, rs, pos = errorhandler(
+ errors, public_encoding_name, 'surrogates not allowed',
+ s, pos - 1, pos)
+ if rs is not None:
+ # py3k only
+ if len(rs) % 4 != 0:
+ errorhandler(
+ 'strict', public_encoding_name, 'surrogates not allowed',
+ s, pos - 1, pos)
+ result.append(rs)
+ continue
+ for ch in ru:
+ if ord(ch) < 0xD800:
+ _STORECHAR32(result, ord(ch), byteorder)
+ else:
+ errorhandler(
+ 'strict', public_encoding_name,
+ 'surrogates not allowed', s, pos - 1, pos)
+ continue
+ if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size:
+ ch2 = ord(s[pos])
+ if 0xDC00 <= ch2 < 0xE000:
+ ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000
+ pos += 1
+ _STORECHAR32(result, ch, byteorder)
+
+ return result.build()
+
+def unicode_encode_utf_32(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "native")
+
+def unicode_encode_utf_32_be(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "big")
+
+def unicode_encode_utf_32_le(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "little")
+
+def py3k_unicode_encode_utf_32(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "native",
+ 'utf-32-' + BYTEORDER2)
+
+def py3k_unicode_encode_utf_32_be(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "big",
+ 'utf-32-be')
+
+def py3k_unicode_encode_utf_32_le(s, size, errors,
+ errorhandler=None, allow_surrogates=True):
+ return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+ allow_surrogates, "little",
+ 'utf-32-le')
diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py
--- a/pypy/module/__builtin__/test/test_builtin.py
+++ b/pypy/module/__builtin__/test/test_builtin.py
@@ -404,6 +404,7 @@
def test_cmp(self):
+ assert cmp(float('nan'), float('nan')) == 0
assert cmp(9,9) == 0
assert cmp(0,9) < 0
assert cmp(9,0) > 0
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,10 +1,12 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import we_are_translated, not_rpython
from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib import runicode
from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.interpreter import unicodehelper
class VersionTag(object):
@@ -210,7 +212,8 @@
def xmlcharrefreplace_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object')))
+ w_obj = space.getattr(w_exc, space.newtext('object'))
+ obj = space.realunicode_w(w_obj)
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
@@ -236,7 +239,8 @@
def backslashreplace_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object')))
+ w_obj = space.getattr(w_exc, space.newtext('object'))
+ obj = space.realunicode_w(w_obj)
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
@@ -363,19 +367,23 @@
raise oefmt(space.w_TypeError, "handler must be callable")
# ____________________________________________________________
-# delegation to runicode
+# delegation to runicode/unicodehelper
-from rpython.rlib import runicode
+def _find_implementation(impl_name):
+ try:
+ func = getattr(unicodehelper, impl_name)
+ except AttributeError:
+ func = getattr(runicode, impl_name)
+ return func
def make_encoder_wrapper(name):
rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
- assert hasattr(runicode, rname)
+ func = _find_implementation(rname)
@unwrap_spec(uni=unicode, errors='text_or_none')
def wrap_encoder(space, uni, errors="strict"):
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
- func = getattr(runicode, rname)
result = func(uni, len(uni), errors, state.encode_error_handler)
return space.newtuple([space.newbytes(result), space.newint(len(uni))])
wrap_encoder.func_name = rname
@@ -383,7 +391,7 @@
def make_decoder_wrapper(name):
rname = "str_decode_%s" % (name.replace("_decode", ""), )
- assert hasattr(runicode, rname)
+ func = _find_implementation(rname)
@unwrap_spec(string='bufferstr', errors='text_or_none',
w_final=WrappedDefault(False))
def wrap_decoder(space, string, errors="strict", w_final=None):
@@ -391,7 +399,6 @@
errors = 'strict'
final = space.is_true(w_final)
state = space.fromcache(CodecState)
- func = getattr(runicode, rname)
result, consumed = func(string, len(string), errors,
final, state.decode_error_handler)
return space.newtuple([space.newunicode(result), space.newint(consumed)])
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -115,10 +115,10 @@
raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000})
assert (charmap_decode("\x00\x01\x02", "strict",
{0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
- u"\U0010FFFFbc", 3)
+ (u"\U0010FFFFbc", 3))
assert (charmap_decode("\x00\x01\x02", "strict",
{0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
- u"\U0010FFFFbc", 3)
+ (u"\U0010FFFFbc", 3))
def test_escape_decode_errors(self):
from _codecs import escape_decode as decode
@@ -537,8 +537,12 @@
assert '\xff'.decode('utf-7', 'ignore') == ''
assert '\x00'.decode('unicode-internal', 'ignore') == ''
- def test_backslahreplace(self):
- assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000'
+ def test_backslashreplace(self):
+ sin = u"a\xac\u1234\u20ac\u8000\U0010ffff"
+ expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
+ assert sin.encode('ascii', 'backslashreplace') == expected
+ expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff"
+ assert sin.encode("iso-8859-15", "backslashreplace") == expected
def test_badhandler(self):
import codecs
@@ -592,11 +596,11 @@
def handler_unicodeinternal(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
- return (u"\x01", 1)
+ return (u"\x01", 4)
codecs.register_error("test.hui", handler_unicodeinternal)
res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
if sys.maxunicode > 65535:
- assert res == u"\u0000\u0001\u0000" # UCS4 build
+ assert res == u"\u0000\u0001" # UCS4 build
else:
assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
@@ -750,3 +754,31 @@
assert _codecs.unicode_escape_decode(b) == (u'', 0)
assert _codecs.raw_unicode_escape_decode(b) == (u'', 0)
assert _codecs.unicode_internal_decode(b) == (u'', 0)
+
+ def test_xmlcharrefreplace(self):
+ r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace')
+ assert r == 'ሴ\x80⍅y\xab'
+ r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace')
+ assert r == 'ሴ⍅y«'
+
+ def test_errorhandler_collection(self):
+ import _codecs
+ errors = []
+ def record_error(exc):
+ if not isinstance(exc, UnicodeEncodeError):
+ raise TypeError("don't know how to handle %r" % exc)
+ errors.append(exc.object[exc.start:exc.end])
+ return (u'', exc.end)
+ _codecs.register_error("test.record", record_error)
+
+ sin = u"\xac\u1234\u1234\u20ac\u8000"
+ assert sin.encode("ascii", "test.record") == ""
+ assert errors == [sin]
+
+ errors = []
+ assert sin.encode("latin-1", "test.record") == "\xac"
+ assert errors == [u'\u1234\u1234\u20ac\u8000']
+
+ errors = []
+ assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4"
+ assert errors == [u'\u1234\u1234', u'\u8000']
diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/_continuation/test/test_greenlet.py
rename from pypy/module/test_lib_pypy/test_greenlet.py
rename to pypy/module/_continuation/test/test_greenlet.py
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -353,6 +353,7 @@
while scanned < limit:
try:
ch = self.next_char()
+ scanned += 1
except StopIteration:
return False
if ch == u'\n':
@@ -413,6 +414,7 @@
if not space.isinstance_w(w_decoded, space.w_unicode):
msg = "decoder should return a string result, not '%T'"
raise oefmt(space.w_TypeError, msg, w_decoded)
+ return w_decoded
class W_TextIOWrapper(W_TextIOBase):
@@ -737,7 +739,7 @@
remnant = None
continue
- if limit > 0:
+ if limit >= 0:
remaining = limit - builder.getlength()
assert remaining >= 0
else:
@@ -939,12 +941,13 @@
w_decoded = space.call_method(self.w_decoder, "decode",
w_chunk, space.newbool(bool(cookie.need_eof)))
- self.decoded.set(space, w_decoded)
+ w_decoded = check_decoded(space, w_decoded)
# Skip chars_to_skip of the decoded characters
- if len(self.decoded.text) < cookie.chars_to_skip:
+ if space.len_w(w_decoded) < cookie.chars_to_skip:
raise oefmt(space.w_IOError,
"can't restore logical file position")
+ self.decoded.set(space, w_decoded)
self.decoded.pos = cookie.chars_to_skip
else:
self.snapshot = PositionSnapshot(cookie.dec_flags, "")
@@ -957,10 +960,8 @@
def tell_w(self, space):
self._check_closed(space)
-
if not self.seekable:
raise oefmt(space.w_IOError, "underlying stream is not seekable")
-
if not self.telling:
raise oefmt(space.w_IOError,
"telling position disabled by next() call")
@@ -1030,14 +1031,14 @@
# We didn't get enough decoded data; signal EOF to get more.
w_decoded = space.call_method(self.w_decoder, "decode",
space.newbytes(""),
- space.newint(1)) # final=1
+ space.newint(1)) # final=1
check_decoded(space, w_decoded)
- chars_decoded += len(space.unicode_w(w_decoded))
+ chars_decoded += space.len_w(w_decoded)
cookie.need_eof = 1
if chars_decoded < chars_to_skip:
raise oefmt(space.w_IOError,
- "can't reconstruct logical file position")
+ "can't reconstruct logical file position")
finally:
space.call_method(self.w_decoder, "setstate", w_saved_state)
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -1,40 +1,54 @@
import pytest
try:
- from hypothesis import given, strategies as st, assume
+ from hypothesis import given, strategies as st
except ImportError:
pytest.skip("hypothesis required")
+import os
from pypy.module._io.interp_bytesio import W_BytesIO
from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+ text = text.replace(u'\r\n', u'\n')
+ text = text.replace(u'\r', u'\n')
+ return text.replace(u'\n', os.linesep)
@st.composite
-def text_with_newlines(draw):
- sep = draw(st.sampled_from(LINESEP))
- lines = draw(st.lists(st.text(max_size=10), max_size=10))
- return sep.join(lines)
+def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)):
+ n_lines = draw(st_nlines)
+ fragments = []
+ limits = []
+ for _ in range(n_lines):
+ line = draw(st.text(st.characters(blacklist_characters=u'\r\n')))
+ fragments.append(line)
+ ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n']))
+ fragments.append(ending)
+ limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+ limits.append(limit)
+ limits.append(-1)
+ return (u''.join(fragments), limits)
- at given(txt=text_with_newlines(),
- mode=st.sampled_from(['\r', '\n', '\r\n', '']),
- limit=st.integers(min_value=-1))
-def test_readline(space, txt, mode, limit):
- assume(limit != 0)
+ at given(data=st_readline(),
+ mode=st.sampled_from(['\r', '\n', '\r\n', '']))
+def test_readline(space, data, mode):
+ txt, limits = data
w_stream = W_BytesIO(space)
w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
w_textio = W_TextIOWrapper(space)
w_textio.descr_init(
- space, w_stream, encoding='utf-8',
+ space, w_stream,
+ encoding='utf-8', w_errors=space.newtext('surrogatepass'),
w_newline=space.newtext(mode))
lines = []
- while True:
- line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
- if limit > 0:
+ for limit in limits:
+ w_line = w_textio.readline_w(space, space.newint(limit))
+ line = space.unicode_w(w_line)
+ if limit >= 0:
assert len(line) <= limit
if line:
lines.append(line)
- else:
+ elif limit:
break
- assert u''.join(lines) == txt
+ assert txt.startswith(u''.join(lines))
@given(st.text())
def test_read_buffer(text):
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -71,7 +71,7 @@
self.ll_chars = rffi.str2charp(s)
self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
self.pos = 0
- self.cache = r_dict(slice_eq, slice_hash)
+ self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True)
def close(self):
rffi.free_charp(self.ll_chars)
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -49,24 +49,24 @@
first = 0
for i in range(first, len(u)):
- c = u[i]
- if c <= u'~':
- if c == u'"' or c == u'\\':
+ c = ord(u[i])
+ if c <= ord('~'):
+ if c == ord('"') or c == ord('\\'):
sb.append('\\')
- elif c < u' ':
- sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+ elif c < ord(' '):
+ sb.append(ESCAPE_BEFORE_SPACE[c])
continue
- sb.append(chr(ord(c)))
+ sb.append(chr(c))
else:
- if c <= u'\uffff':
+ if c <= ord(u'\uffff'):
sb.append('\\u')
- sb.append(HEX[ord(c) >> 12])
- sb.append(HEX[(ord(c) >> 8) & 0x0f])
- sb.append(HEX[(ord(c) >> 4) & 0x0f])
- sb.append(HEX[ord(c) & 0x0f])
+ sb.append(HEX[c >> 12])
+ sb.append(HEX[(c >> 8) & 0x0f])
+ sb.append(HEX[(c >> 4) & 0x0f])
+ sb.append(HEX[c & 0x0f])
else:
# surrogate pair
- n = ord(c) - 0x10000
+ n = c - 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
sb.append('\\ud')
sb.append(HEX[(s1 >> 8) & 0x0f])
diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -128,7 +128,7 @@
intval: lltype.Signed
"""
self.error(w_ffitype, w_obj)
-
+
def handle_unichar(self, w_ffitype, w_obj, intval):
"""
intval: lltype.Signed
@@ -174,7 +174,7 @@
def handle_struct_rawffi(self, w_ffitype, w_structinstance):
"""
This method should be killed as soon as we remove support for _rawffi structures
-
+
w_structinstance: W_StructureInstance
"""
self.error(w_ffitype, w_structinstance)
@@ -349,7 +349,7 @@
def get_struct_rawffi(self, w_ffitype, w_structdescr):
"""
This should be killed as soon as we kill support for _rawffi structures
-
+
Return type: lltype.Unsigned
(the address of the structure)
"""
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -481,11 +481,13 @@
@unwrap_spec(w_groupnum=WrappedDefault(0))
def start_w(self, w_groupnum):
- return self.space.newint(self.do_span(w_groupnum)[0])
+ start, end = self.do_span(w_groupnum)
+ return self.space.newint(start)
@unwrap_spec(w_groupnum=WrappedDefault(0))
def end_w(self, w_groupnum):
- return self.space.newint(self.do_span(w_groupnum)[1])
+ start, end = self.do_span(w_groupnum)
+ return self.space.newint(end)
@unwrap_spec(w_groupnum=WrappedDefault(0))
def span_w(self, w_groupnum):
diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -87,6 +87,14 @@
assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ def test_findall_unicode(self):
+ import re
+ assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000")
+ assert ["a", "u"] == re.findall("b(.)", "abalbus")
+ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
+ assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ assert [u"xyz"] == re.findall(u".*yz", u"xyz")
+
def test_finditer(self):
import re
it = re.finditer("b(.)", "brabbel")
@@ -999,3 +1007,15 @@
import re
assert re.search(".+ab", "wowowowawoabwowo")
assert None == re.search(".+ab", "wowowaowowo")
+
+
+class AppTestUnicodeExtra:
+ def test_string_attribute(self):
+ import re
+ match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+ assert match.string == u"\u1233\u1234\u1235"
+
+ def test_match_start(self):
+ import re
+ match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+ assert match.start() == 1
diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py
--- a/pypy/module/cpyext/pyerrors.py
+++ b/pypy/module/cpyext/pyerrors.py
@@ -122,7 +122,7 @@
error indicator."""
raise oefmt(space.w_TypeError, "bad argument type for built-in operation")
- at cpython_api([], lltype.Void)
+ at cpython_api([], lltype.Void, error=None)
def PyErr_BadInternalCall(space):
raise oefmt(space.w_SystemError, "Bad internal call!")
diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py
--- a/pypy/module/cpyext/test/test_codecs.py
+++ b/pypy/module/cpyext/test/test_codecs.py
@@ -11,5 +11,5 @@
w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm'))
w_decoder = PyCodec_IncrementalDecoder(space, utf8, None)
w_decoded = space.call_method(w_decoder, 'decode', w_encoded)
- assert space.unwrap(w_decoded) == u'späm'
+ assert space.unicode_w(w_decoded) == u'späm'
rffi.free_charp(utf8)
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -131,7 +131,7 @@
finally:
rffi.free_charp(buf)
w_a = space.getitem(w_globals, space.wrap("a"))
- assert space.unwrap(w_a) == u'caf\xe9'
+ assert space.unicode_w(w_a) == u'caf\xe9'
lltype.free(flags, flavor='raw')
def test_run_file(self, space):
diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py
--- a/pypy/module/cpyext/test/test_object.py
+++ b/pypy/module/cpyext/test/test_object.py
@@ -8,7 +8,7 @@
from pypy.module.cpyext.object import (
PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString,
PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr,
- PyObject_GetItem,
+ PyObject_GetItem,
PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor,
PyObject_Hash, PyObject_Cmp, PyObject_Unicode
)
@@ -209,9 +209,9 @@
PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr)
def test_unicode(self, space, api):
- assert space.unwrap(api.PyObject_Unicode(None)) == u"<NULL>"
- assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]"
- assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e"
+ assert space.unicode_w(api.PyObject_Unicode(None)) == u"<NULL>"
+ assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]"
+ assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e"
with raises_w(space, UnicodeDecodeError):
PyObject_Unicode(space, space.wrap("\xe9"))
@@ -562,7 +562,7 @@
PyObject *a = PyTuple_GetItem(args, 0);
PyObject *b = PyTuple_GetItem(args, 1);
int res = PyObject_RichCompareBool(a, b, Py_EQ);
- return PyLong_FromLong(res);
+ return PyLong_FromLong(res);
"""),])
a = float('nan')
b = float('nan')
diff --git a/pypy/module/cpyext/test/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py
--- a/pypy/module/cpyext/test/test_pyerrors.py
+++ b/pypy/module/cpyext/test/test_pyerrors.py
@@ -425,3 +425,15 @@
assert orig_exc_info == reset_sys_exc_info
assert new_exc_info == (new_exc.__class__, new_exc, None)
assert new_exc_info == new_sys_exc_info
+
+ def test_PyErr_BadInternalCall(self):
+ # NB. it only seemed to fail when run with '-s'... but I think
+ # that it always printed stuff to stderr
+ module = self.import_extension('foo', [
+ ("oops", "METH_NOARGS",
+ r'''
+ PyErr_BadInternalCall();
+ return NULL;
+ '''),
+ ])
+ raises(SystemError, module.oops)
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -178,7 +178,7 @@
array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word))
array2 = PyUnicode_AS_UNICODE(space, word)
array3 = PyUnicode_AsUnicode(space, word)
- for (i, char) in enumerate(space.unwrap(word)):
+ for (i, char) in enumerate(space.unicode_w(word)):
assert array[i] == char
assert array2[i] == char
assert array3[i] == char
@@ -216,12 +216,12 @@
def test_fromstring(self, space):
s = rffi.str2charp(u'sp\x09m'.encode("utf-8"))
w_res = PyUnicode_FromString(space, s)
- assert space.unwrap(w_res) == u'sp\x09m'
+ assert space.unicode_w(w_res) == u'sp\x09m'
res = PyUnicode_FromStringAndSize(space, s, 4)
w_res = from_ref(space, res)
Py_DecRef(space, res)
- assert space.unwrap(w_res) == u'sp\x09m'
+ assert space.unicode_w(w_res) == u'sp\x09m'
rffi.free_charp(s)
def test_unicode_resize(self, space):
@@ -256,17 +256,17 @@
u = rffi.str2charp(u'sp\x134m'.encode("utf-8"))
w_u = PyUnicode_DecodeUTF8(space, u, 5, None)
assert space.type(w_u) is space.w_unicode
- assert space.unwrap(w_u) == u'sp\x134m'
+ assert space.unicode_w(w_u) == u'sp\x134m'
w_u = PyUnicode_DecodeUTF8(space, u, 2, None)
assert space.type(w_u) is space.w_unicode
- assert space.unwrap(w_u) == 'sp'
+ assert space.unicode_w(w_u) == 'sp'
rffi.free_charp(u)
def test_encode_utf8(self, space):
u = rffi.unicode2wcharp(u'sp\x09m')
w_s = PyUnicode_EncodeUTF8(space, u, 4, None)
- assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8')
+ assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8')
rffi.free_wcharp(u)
def test_encode_decimal(self, space):
@@ -364,18 +364,18 @@
def test_fromobject(self, space):
w_u = space.wrap(u'a')
assert PyUnicode_FromObject(space, w_u) is w_u
- assert space.unwrap(
+ assert space.unicode_w(
PyUnicode_FromObject(space, space.wrap('test'))) == 'test'
def test_decode(self, space):
b_text = rffi.str2charp('caf\x82xx')
b_encoding = rffi.str2charp('cp437')
- assert space.unwrap(
+ assert space.unicode_w(
PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9'
w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None)
assert space.isinstance_w(w_text, space.w_unicode)
- assert space.unwrap(w_text) == "test"
+ assert space.unicode_w(w_text) == "test"
with raises_w(space, TypeError):
PyUnicode_FromEncodedObject(space, space.wrap(u"test"),
@@ -391,7 +391,8 @@
u_text = u'abcdefg'
s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp))
b_text = rffi.str2charp(s_text)
- assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text
+ assert space.unicode_w(PyUnicode_Decode(
+ space, b_text, len(s_text), null_charp, null_charp)) == u_text
with raises_w(space, TypeError):
PyUnicode_FromEncodedObject(
space, space.wrap(u_text), null_charp, None)
@@ -508,7 +509,7 @@
def test_concat(self, space):
w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b'))
- assert space.unwrap(w_res) == u'ab'
+ assert space.unicode_w(w_res) == u'ab'
def test_copy(self, space):
w_x = space.wrap(u"abcd\u0660")
@@ -579,29 +580,30 @@
w_format = space.wrap(u'hi %s')
w_args = space.wrap((u'test',))
w_formated = PyUnicode_Format(space, w_format, w_args)
- assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args))
+ assert (space.unicode_w(w_formated) ==
+ space.unicode_w(space.mod(w_format, w_args)))
def test_join(self, space):
w_sep = space.wrap(u'<sep>')
w_seq = space.wrap([u'a', u'b'])
w_joined = PyUnicode_Join(space, w_sep, w_seq)
- assert space.unwrap(w_joined) == u'a<sep>b'
+ assert space.unicode_w(w_joined) == u'a<sep>b'
def test_fromordinal(self, space):
w_char = PyUnicode_FromOrdinal(space, 65)
- assert space.unwrap(w_char) == u'A'
+ assert space.unicode_w(w_char) == u'A'
w_char = PyUnicode_FromOrdinal(space, 0)
- assert space.unwrap(w_char) == u'\0'
+ assert space.unicode_w(w_char) == u'\0'
w_char = PyUnicode_FromOrdinal(space, 0xFFFF)
- assert space.unwrap(w_char) == u'\uFFFF'
+ assert space.unicode_w(w_char) == u'\uFFFF'
def test_replace(self, space):
w_str = space.wrap(u"abababab")
w_substr = space.wrap(u"a")
w_replstr = space.wrap(u"z")
- assert u"zbzbabab" == space.unwrap(
+ assert u"zbzbabab" == space.unicode_w(
PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2))
- assert u"zbzbzbzb" == space.unwrap(
+ assert u"zbzbzbzb" == space.unicode_w(
PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1))
def test_tailmatch(self, space):
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -12,6 +12,7 @@
from pypy.module.cpyext.bytesobject import PyString_Check
from pypy.module.sys.interp_encoding import setdefaultencoding
from pypy.module._codecs.interp_codecs import CodecState
+from pypy.interpreter import unicodehelper
from pypy.objspace.std import unicodeobject
from rpython.rlib import rstring, runicode
from rpython.tool.sourcetools import func_renamer
@@ -620,7 +621,7 @@
else:
errors = None
- result, length, byteorder = runicode.str_decode_utf_32_helper(
+ result, length, byteorder = unicodehelper.str_decode_utf_32_helper(
string, size, errors,
True, # final ? false for multiple passes?
None, # errorhandler
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -31,9 +31,15 @@
pdir.join('file2').write("test2")
pdir.join('another_longer_file_name').write("test3")
mod.pdir = pdir
- unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True)
+ if sys.platform == 'darwin':
+ # see issue https://bugs.python.org/issue31380
+ unicode_dir = udir.ensure('fixc5x9fier.txt', dir=True)
+ file_name = 'cafxe9'
+ else:
+ unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True)
+ file_name = 'caf\xe9'
unicode_dir.join('somefile').write('who cares?')
- unicode_dir.join('caf\xe9').write('who knows?')
+ unicode_dir.join(file_name).write('who knows?')
mod.unicode_dir = unicode_dir
# in applevel tests, os.stat uses the CPython os.stat.
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -310,12 +310,19 @@
errno = rposix.get_saved_errno()
return os.strerror(errno)
+def _check_sleep_arg(space, secs):
+ from rpython.rlib.rfloat import isinf, isnan
+ if secs < 0:
+ raise oefmt(space.w_IOError,
+ "Invalid argument: negative time in sleep")
+ if isinf(secs) or isnan(secs):
+ raise oefmt(space.w_IOError,
+ "Invalid argument: inf or nan")
+
if sys.platform != 'win32':
@unwrap_spec(secs=float)
def sleep(space, secs):
- if secs < 0:
- raise oefmt(space.w_IOError,
- "Invalid argument: negative time in sleep")
+ _check_sleep_arg(space, secs)
rtime.sleep(secs)
else:
from rpython.rlib import rwin32
@@ -336,9 +343,7 @@
OSError(EINTR, "sleep() interrupted"))
@unwrap_spec(secs=float)
def sleep(space, secs):
- if secs < 0:
- raise oefmt(space.w_IOError,
- "Invalid argument: negative time in sleep")
+ _check_sleep_arg(space, secs)
# as decreed by Guido, only the main thread can be
# interrupted.
main_thread = space.fromcache(State).main_thread
diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py
--- a/pypy/module/time/test/test_time.py
+++ b/pypy/module/time/test/test_time.py
@@ -19,6 +19,8 @@
raises(TypeError, time.sleep, "foo")
time.sleep(0.12345)
raises(IOError, time.sleep, -1.0)
+ raises(IOError, time.sleep, float('nan'))
+ raises(IOError, time.sleep, float('inf'))
def test_clock(self):
import time
diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -259,10 +259,10 @@
result[0] = ch
if not composed: # If decomposed normalization we are done
- return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+ return self.build(space, result, stop=j)
if j <= 1:
- return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+ return self.build(space, result, stop=j)
current = result[0]
starter_pos = 0
@@ -310,7 +310,10 @@
result[starter_pos] = current
- return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]]))
+ return self.build(space, result, stop=next_insert)
+
+ def build(self, space, r, stop):
+ return space.newunicode(u''.join([unichr(i) for i in r[:stop]]))
methods = {}
diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py
--- a/pypy/module/unicodedata/test/test_hyp.py
+++ b/pypy/module/unicodedata/test/test_hyp.py
@@ -10,7 +10,7 @@
def normalize(s):
w_s = space.newunicode(s)
w_res = ucd.normalize(space, NF_code, w_s)
- return space.unwrap(w_res)
+ return space.unicode_w(w_res)
return normalize
all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD']
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -135,6 +135,11 @@
check(u'a' + 'b', u'ab')
check('a' + u'b', u'ab')
+ def test_getitem(self):
+ assert u'abc'[2] == 'c'
+ raises(IndexError, u'abc'.__getitem__, 15)
+ assert u'g\u0105\u015b\u0107'[2] == u'\u015b'
+
def test_join(self):
def check(a, b):
assert a == b
@@ -171,6 +176,8 @@
assert u'\n\n'.splitlines() == [u'', u'']
assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
+ assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() ==
+ ['a', 'b'])
def test_zfill(self):
assert u'123'.zfill(2) == u'123'
@@ -217,6 +224,7 @@
raises(ValueError, u'abc'.split, u'')
raises(ValueError, 'abc'.split, u'')
assert u' a b c d'.split(None, 0) == [u'a b c d']
+ assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c']
def test_rsplit(self):
assert u"".rsplit() == []
@@ -246,6 +254,7 @@
raises(ValueError, 'abc'.rsplit, u'')
assert u' a b c '.rsplit(None, 0) == [u' a b c']
assert u''.rsplit('aaa') == [u'']
+ assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
def test_split_rsplit_str_unicode(self):
x = 'abc'.split(u'b')
@@ -291,6 +300,8 @@
assert u"bROWN fOX".title() == u"Brown Fox"
assert u"Brown Fox".title() == u"Brown Fox"
assert u"bro!wn fox".title() == u"Bro!Wn Fox"
+ assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+ assert u'\ud800'.title() == u'\ud800'
def test_istitle(self):
assert u"".istitle() == False
@@ -315,6 +326,18 @@
assert not u'\u01c5abc'.islower()
assert not u'\u01c5ABC'.isupper()
+ def test_lower_upper(self):
+ assert u'a'.lower() == u'a'
+ assert u'A'.lower() == u'a'
+ assert u'\u0105'.lower() == u'\u0105'
+ assert u'\u0104'.lower() == u'\u0105'
+ assert u'\ud800'.lower() == u'\ud800'
+ assert u'a'.upper() == u'A'
+ assert u'A'.upper() == u'A'
+ assert u'\u0105'.upper() == u'\u0104'
+ assert u'\u0104'.upper() == u'\u0104'
+ assert u'\ud800'.upper() == u'\ud800'
+
def test_capitalize(self):
assert u"brown fox".capitalize() == u"Brown fox"
assert u' hello '.capitalize() == u' hello '
@@ -336,6 +359,8 @@
# check with Ll chars with no upper - nothing changes here
assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
u'\u019b\u1d00\u1d86\u0221\u1fb7')
+ assert u'\ud800'.capitalize() == u'\ud800'
+ assert u'xx\ud800'.capitalize() == u'Xx\ud800'
def test_rjust(self):
s = u"abc"
@@ -376,6 +401,16 @@
assert u'one!two!three!'.replace('x', '@') == u'one!two!three!'
assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!'
assert u'abc'.replace('', u'-') == u'-a-b-c-'
+ assert u'\u1234'.replace(u'', '-') == u'-\u1234-'
+ assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-'
assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c'
assert u'abc'.replace('', '-', 0) == u'abc'
assert u''.replace(u'', '') == u''
@@ -479,6 +514,9 @@
assert u''.startswith(u'a') is False
assert u'x'.startswith(u'xx') is False
assert u'y'.startswith(u'xx') is False
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
+ assert u'\u1234'.startswith(u'', 1, 0) is True
def test_startswith_more(self):
assert u'ab'.startswith(u'a', 0) is True
@@ -589,7 +627,7 @@
raises(TypeError, u'hello'.translate)
raises(TypeError, u'abababc'.translate, {ord('a'):''})
- def test_unicode_form_encoded_object(self):
+ def test_unicode_from_encoded_object(self):
assert unicode('x', 'utf-8') == u'x'
assert unicode('x', 'utf-8', 'strict') == u'x'
@@ -634,6 +672,8 @@
assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac'
assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82'
assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96'
+ assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82'
+ assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96'
assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80'
assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80'
assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000
@@ -745,6 +785,7 @@
def test_index(self):
assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12
assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2
+ assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2
def test_rindex(self):
from sys import maxint
@@ -754,6 +795,7 @@
assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0
assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9
assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12
+ assert u"\u1234\u5678".rindex(u'\u5678') == 1
raises(ValueError, u'abcdefghiabc'.rindex, u'hib')
raises(ValueError, u'defghiabc'.rindex, u'def', 1)
@@ -768,12 +810,15 @@
assert u'abcdefghiabc'.rfind(u'') == 12
assert u'abcdefghiabc'.rfind(u'abcd') == 0
assert u'abcdefghiabc'.rfind(u'abcz') == -1
+ assert u"\u1234\u5678".rfind(u'\u5678') == 1
def test_rfind_corner_case(self):
assert u'abc'.rfind('', 4) == -1
def test_find_index_str_unicode(self):
- assert 'abcdefghiabc'.find(u'bc') == 1
+ assert u'abcdefghiabc'.find(u'bc') == 1
+ assert u'ab\u0105b\u0107'.find('b', 2) == 3
+ assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1
assert 'abcdefghiabc'.rfind(u'abc') == 9
raises(UnicodeDecodeError, '\x80'.find, u'')
raises(UnicodeDecodeError, '\x80'.rfind, u'')
@@ -781,6 +826,7 @@
assert 'abcdefghiabc'.rindex(u'abc') == 9
raises(UnicodeDecodeError, '\x80'.index, u'')
raises(UnicodeDecodeError, '\x80'.rindex, u'')
+ assert u"\u1234\u5678".find(u'\u5678') == 1
def test_count(self):
assert u"".count(u"x") ==0
@@ -807,6 +853,7 @@
def test_swapcase(self):
assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
+ assert u'\ud800'.swapcase() == u'\ud800'
def test_buffer(self):
buf = buffer(u'XY')
@@ -878,16 +925,31 @@
def test_getslice(self):
assert u'123456'.__getslice__(1, 5) == u'2345'
- s = u"abc"
- assert s[:] == "abc"
- assert s[1:] == "bc"
- assert s[:2] == "ab"
- assert s[1:2] == "b"
- assert s[-2:] == "bc"
- assert s[:-1] == "ab"
- assert s[-2:2] == "b"
- assert s[1:-1] == "b"
- assert s[-2:-1] == "b"
+ s = u"\u0105b\u0107"
+ assert s[:] == u"\u0105b\u0107"
+ assert s[1:] == u"b\u0107"
+ assert s[:2] == u"\u0105b"
+ assert s[1:2] == u"b"
+ assert s[-2:] == u"b\u0107"
+ assert s[:-1] == u"\u0105b"
+ assert s[-2:2] == u"b"
+ assert s[1:-1] == u"b"
+ assert s[-2:-1] == u"b"
+
+ def test_getitem_slice(self):
+ assert u'123456'.__getitem__(slice(1, 5)) == u'2345'
+ s = u"\u0105b\u0107"
+ assert s[slice(3)] == u"\u0105b\u0107"
+ assert s[slice(1, 3)] == u"b\u0107"
+ assert s[slice(2)] == u"\u0105b"
+ assert s[slice(1,2)] == u"b"
+ assert s[slice(-2,3)] == u"b\u0107"
+ assert s[slice(-1)] == u"\u0105b"
+ assert s[slice(-2,2)] == u"b"
+ assert s[slice(1,-1)] == u"b"
+ assert s[slice(-2,-1)] == u"b"
+ assert u"abcde"[::2] == u"ace"
+ assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd"
def test_no_len_on_str_iter(self):
iterable = u"hello"
diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py
--- a/pypy/tool/release/force-builds.py
+++ b/pypy/tool/release/force-builds.py
@@ -29,7 +29,6 @@
'pypy-c-jit-macosx-x86-64',
'pypy-c-jit-win-x86-32',
'pypy-c-jit-linux-s390x',
- 'build-pypy-c-jit-linux-armhf-raring',
'build-pypy-c-jit-linux-armhf-raspbian',
'build-pypy-c-jit-linux-armel',
]
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -23,7 +23,7 @@
# Download latest builds from the buildmaster, rename the top
# level directory, and repackage ready to be uploaded to bitbucket
-for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64 s390x
+for plat in linux linux64 linux-armhf-raspbian linux-armel osx64 s390x
do
echo downloading package for $plat
if wget -q --show-progress http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -194,13 +194,14 @@
listdef.generalize_range_step(flags['range_step'])
return SomeList(listdef)
- def getdictdef(self, is_r_dict=False, force_non_null=False):
+ def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False):
"""Get the DictDef associated with the current position."""
try:
dictdef = self.dictdefs[self.position_key]
except KeyError:
dictdef = DictDef(self, is_r_dict=is_r_dict,
- force_non_null=force_non_null)
+ force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
self.dictdefs[self.position_key] = dictdef
return dictdef
diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py
--- a/rpython/annotator/builtin.py
+++ b/rpython/annotator/builtin.py
@@ -237,22 +237,30 @@
return SomeInstance(clsdef)
@analyzer_for(rpython.rlib.objectmodel.r_dict)
-def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None):
+ return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq)
+
+ at analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
+def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None):
+ return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn,
+ s_force_non_null, s_simple_hash_eq)
+
+def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq):
if s_force_non_null is None:
force_non_null = False
else:
assert s_force_non_null.is_constant()
force_non_null = s_force_non_null.const
+ if s_simple_hash_eq is None:
+ simple_hash_eq = False
+ else:
+ assert s_simple_hash_eq.is_constant()
+ simple_hash_eq = s_simple_hash_eq.const
dictdef = getbookkeeper().getdictdef(is_r_dict=True,
- force_non_null=force_non_null)
+ force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
- return SomeDict(dictdef)
-
- at analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
-def robjmodel_r_ordereddict(s_eqfn, s_hashfn):
- dictdef = getbookkeeper().getdictdef(is_r_dict=True)
- dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
- return SomeOrderedDict(dictdef)
+ return cls(dictdef)
@analyzer_for(rpython.rlib.objectmodel.hlinvoke)
def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s):
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -81,12 +81,14 @@
def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
s_value = s_ImpossibleValue,
is_r_dict = False,
- force_non_null = False):
+ force_non_null = False,
+ simple_hash_eq = False):
self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
self.dictkey.itemof[self] = True
self.dictvalue = DictValue(bookkeeper, s_value)
self.dictvalue.itemof[self] = True
self.force_non_null = force_non_null
+ self.simple_hash_eq = simple_hash_eq
def read_key(self, position_key):
self.dictkey.read_locations.add(position_key)
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,19 +25,6 @@
return (1 << ((byte_size << 3) - 1)) - 1
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
- """Calculate next power of 2 greater than n minus one."""
- n |= n >> 1
- n |= n >> 2
- n |= n >> 4
- n |= n >> 8
- n |= n >> 16
- if IS_64_BIT:
- n |= n >> 32
- return n
-
class OptIntBounds(Optimization):
"""Keeps track of the bounds placed on integers by guards and remove
@@ -50,7 +37,7 @@
return dispatch_postprocess(self, op)
def propagate_bounds_backward(self, box):
- # FIXME: This takes care of the instruction where box is the reuslt
+ # FIXME: This takes care of the instruction where box is the result
# but the bounds produced by all instructions where box is
# an argument might also be tighten
b = self.getintbound(box)
@@ -91,14 +78,8 @@
b1 = self.getintbound(v1)
v2 = self.get_box_replacement(op.getarg(1))
b2 = self.getintbound(v2)
- if b1.known_ge(IntBound(0, 0)) and \
- b2.known_ge(IntBound(0, 0)):
- r = self.getintbound(op)
- if b1.has_upper and b2.has_upper:
- mostsignificant = b1.upper | b2.upper
- r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
- else:
- r.make_ge(IntBound(0, 0))
+ b = b1.or_bound(b2)
+ self.getintbound(op).intersect(b)
optimize_INT_OR = optimize_INT_OR_or_XOR
optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -112,15 +93,8 @@
def postprocess_INT_AND(self, op):
b1 = self.getintbound(op.getarg(0))
b2 = self.getintbound(op.getarg(1))
- r = self.getintbound(op)
- pos1 = b1.known_ge(IntBound(0, 0))
- pos2 = b2.known_ge(IntBound(0, 0))
- if pos1 or pos2:
- r.make_ge(IntBound(0, 0))
- if pos1:
- r.make_le(b1)
- if pos2:
- r.make_le(b2)
+ b = b1.and_bound(b2)
+ self.getintbound(op).intersect(b)
def optimize_INT_SUB(self, op):
return self.emit(op)
@@ -211,16 +185,10 @@
r.intersect(b1.py_div_bound(b2))
def post_call_INT_PY_MOD(self, op):
+ b1 = self.getintbound(op.getarg(1))
b2 = self.getintbound(op.getarg(2))
- if b2.is_constant():
- val = b2.getint()
- r = self.getintbound(op)
- if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos
- r.make_ge(IntBound(0, 0))
- r.make_lt(IntBound(val, val))
- else: # with Python's modulo: neg < (x % neg) <= 0
- r.make_gt(IntBound(val, val))
- r.make_le(IntBound(0, 0))
+ r = self.getintbound(op)
+ r.intersect(b1.mod_bound(b2))
def optimize_INT_LSHIFT(self, op):
return self.emit(op)
@@ -436,7 +404,7 @@
def optimize_INT_FORCE_GE_ZERO(self, op):
b = self.getintbound(op.getarg(0))
- if b.known_ge(IntBound(0, 0)):
+ if b.known_nonnegative():
self.make_equal_to(op, op.getarg(0))
else:
return self.emit(op)
@@ -647,7 +615,7 @@
if r.is_constant():
if r.getint() == valnonzero:
b1 = self.getintbound(op.getarg(0))
- if b1.known_ge(IntBound(0, 0)):
+ if b1.known_nonnegative():
b1.make_gt(IntBound(0, 0))
self.propagate_bounds_backward(op.getarg(0))
elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,6 +12,19 @@
MAXINT = maxint
MININT = -maxint - 1
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+ """Calculate next power of 2 greater than n minus one."""
+ n |= n >> 1
+ n |= n >> 2
+ n |= n >> 4
+ n |= n >> 8
+ n |= n >> 16
+ if IS_64_BIT:
+ n |= n >> 32
+ return n
+
class IntBound(AbstractInfo):
_attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -92,6 +105,9 @@
def known_ge(self, other):
return other.known_le(self)
+ def known_nonnegative(self):
+ return self.has_lower and 0 <= self.lower
+
def intersect(self, other):
r = False
@@ -192,10 +208,22 @@
else:
return IntUnbounded()
+ def mod_bound(self, other):
+ r = IntUnbounded()
+ if other.is_constant():
+ val = other.getint()
+ if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos
+ r.make_ge(IntBound(0, 0))
+ r.make_lt(IntBound(val, val))
+ else: # with Python's modulo: neg < (x % neg) <= 0
+ r.make_gt(IntBound(val, val))
+ r.make_le(IntBound(0, 0))
+ return r
+
def lshift_bound(self, other):
if self.has_upper and self.has_lower and \
other.has_upper and other.has_lower and \
- other.known_ge(IntBound(0, 0)) and \
+ other.known_nonnegative() and \
other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
try:
vals = (ovfcheck(self.upper << other.upper),
@@ -211,7 +239,7 @@
def rshift_bound(self, other):
if self.has_upper and self.has_lower and \
other.has_upper and other.has_lower and \
- other.known_ge(IntBound(0, 0)) and \
+ other.known_nonnegative() and \
other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
vals = (self.upper >> other.upper,
self.upper >> other.lower,
@@ -221,7 +249,32 @@
else:
return IntUnbounded()
+ def and_bound(self, other):
+ pos1 = self.known_nonnegative()
+ pos2 = other.known_nonnegative()
+ r = IntUnbounded()
+ if pos1 or pos2:
+ r.make_ge(IntBound(0, 0))
+ if pos1:
+ r.make_le(self)
+ if pos2:
+ r.make_le(other)
+ return r
+
+ def or_bound(self, other):
+ r = IntUnbounded()
+ if self.known_nonnegative() and \
+ other.known_nonnegative():
+ if self.has_upper and other.has_upper:
+ mostsignificant = self.upper | other.upper
+ r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+ else:
+ r.make_ge(IntBound(0, 0))
+ return r
+
def contains(self, val):
+ if not we_are_translated():
+ assert not isinstance(val, long)
if not isinstance(val, int):
if ((not self.has_lower or self.lower == MININT) and
not self.has_upper or self.upper == MAXINT):
@@ -282,7 +335,7 @@
guards.append(op)
def is_bool(self):
- return (self.bounded() and self.known_ge(ConstIntBound(0)) and
+ return (self.bounded() and self.known_nonnegative() and
self.known_le(ConstIntBound(1)))
def make_bool(self):
@@ -297,7 +350,7 @@
if self.known_gt(IntBound(0, 0)) or \
self.known_lt(IntBound(0, 0)):
return INFO_NONNULL
- if self.known_ge(IntBound(0, 0)) and \
+ if self.known_nonnegative() and \
self.known_le(IntBound(0, 0)):
return INFO_NULL
return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -273,7 +273,6 @@
self.jitdriver_sd = jitdriver_sd
self.cpu = metainterp_sd.cpu
self.interned_refs = self.cpu.ts.new_ref_dict()
- self.interned_ints = {}
self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
self.pendingfields = None # set temporarily to a list, normally by
# heap.py, as we're about to generate a guard
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,12 +1,34 @@
from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \
- IntLowerBound, IntUnbounded
-from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
+ IntLowerBound, IntUnbounded, next_pow2_m1
from copy import copy
import sys
-from rpython.rlib.rarithmetic import LONG_BIT
+from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
-def bound(a,b):
+from hypothesis import given, strategies
+
+special_values = (
+ range(-100, 100) +
+ [2 ** i for i in range(1, LONG_BIT)] +
+ [-2 ** i for i in range(1, LONG_BIT)] +
+ [2 ** i - 1 for i in range(1, LONG_BIT)] +
+ [-2 ** i - 1 for i in range(1, LONG_BIT)] +
+ [2 ** i + 1 for i in range(1, LONG_BIT)] +
+ [-2 ** i + 1 for i in range(1, LONG_BIT)] +
+ [sys.maxint, -sys.maxint-1])
+
+special_values = strategies.sampled_from(
+ [int(v) for v in special_values if type(int(v)) is int])
+
+ints = strategies.builds(
+ int, # strategies.integers sometimes returns a long?
+ special_values | strategies.integers(
+ min_value=int(-sys.maxint-1), max_value=sys.maxint))
+
+ints_or_none = strategies.none() | ints
+
+
+def bound(a, b):
if a is None and b is None:
return IntUnbounded()
elif a is None:
@@ -14,11 +36,55 @@
elif b is None:
return IntLowerBound(a)
else:
- return IntBound(a,b)
+ return IntBound(a, b)
def const(a):
return bound(a,a)
+
+def build_bound_with_contained_number(a, b, c):
+ a, b, c = sorted([a, b, c])
+ r = bound(a, c)
+ assert r.contains(b)
+ return r, b
+
+bound_with_contained_number = strategies.builds(
+ build_bound_with_contained_number,
+ ints_or_none,
+ ints_or_none,
+ ints
+)
+
+unbounded = strategies.builds(
+ lambda x: (bound(None, None), int(x)),
+ ints
+)
+
+lower_bounded = strategies.builds(
+ lambda x, y: (bound(min(x, y), None), max(x, y)),
+ ints,
+ ints
More information about the pypy-commit
mailing list