From pypy.commits at gmail.com Tue Jan 1 01:46:26 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:26 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5c2b0cc2.1c69fb81.388ac.b07c@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95541:c09e504c21c6 Date: 2018-12-25 21:01 +0200 http://bitbucket.org/pypy/pypy/changeset/c09e504c21c6/ Log: merge default into branch diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -327,6 +327,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(self): ffi = FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -244,6 +244,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(): ffi = _cffi1_backend.FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1654,6 +1654,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ diff --git a/extra_tests/test_pyrepl/conftest.py b/extra_tests/test_pyrepl/conftest.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_pyrepl/conftest.py @@ -0,0 +1,8 @@ +import sys + +def pytest_ignore_collect(path): + if '__pypy__' not in sys.builtin_module_names: + try: + import pyrepl + except ImportError: + return True diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -341,7 +341,7 @@ # """ # note that 'buffer' is a type, set on this instance by __init__ - def from_buffer(self, python_buffer): + def from_buffer(self, python_buffer, require_writable=False): """Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types @@ -349,7 +349,8 @@ but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. """ - return self._backend.from_buffer(self.BCharA, python_buffer) + return self._backend.from_buffer(self.BCharA, python_buffer, + require_writable) def memmove(self, dest, src, n): """ffi.memmove(dest, src, n) copies n bytes of memory from src to dest. diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -394,8 +394,10 @@ * some functions and attributes of the ``gc`` module behave in a slightly different way: for example, ``gc.enable`` and - ``gc.disable`` are supported, but instead of enabling and disabling - the GC, they just enable and disable the execution of finalizers. + ``gc.disable`` are supported, but "enabling and disabling the GC" has + a different meaning in PyPy than in CPython. These functions + actually enable and disable the major collections and the + execution of finalizers. * PyPy prints a random line from past #pypy IRC topics at startup in interactive mode. In a released version, this behaviour is suppressed, but diff --git a/pypy/doc/gc_info.rst b/pypy/doc/gc_info.rst --- a/pypy/doc/gc_info.rst +++ b/pypy/doc/gc_info.rst @@ -22,8 +22,44 @@ larger. (A third category, the very large objects, are initially allocated outside the nursery and never move.) -Since Incminimark is an incremental GC, the major collection is incremental, -meaning there should not be any pauses longer than 1ms. +Since Incminimark is an incremental GC, the major collection is incremental: +the goal is not to have any pause longer than 1ms, but in practice it depends +on the size and characteristics of the heap: occasionally, there can be pauses +between 10-100ms. + + +Semi-manual GC management +-------------------------- + +If there are parts of the program where it is important to have a low latency, +you might want to control precisely when the GC runs, to avoid unexpected +pauses. Note that this has effect only on major collections, while minor +collections continue to work as usual. + +As explained above, a full major collection consists of ``N`` steps, where +``N`` depends on the size of the heap; generally speaking, it is not possible +to predict how many steps will be needed to complete a collection. + +``gc.enable()`` and ``gc.disable()`` control whether the GC runs collection +steps automatically. When the GC is disabled the memory usage will grow +indefinitely, unless you manually call ``gc.collect()`` and +``gc.collect_step()``. + +``gc.collect()`` runs a full major collection. + +``gc.collect_step()`` runs a single collection step. It returns an object of +type GcCollectStepStats_, the same which is passed to the corresponding `GC +Hooks`_. The following code is roughly equivalent to a ``gc.collect()``:: + + while True: + if gc.collect_step().major_is_done: + break + +For a real-world example of usage of this API, you can look at the 3rd-party +module `pypytools.gc.custom`_, which also provides a ``with customgc.nogc()`` +context manager to mark sections where the GC is forbidden. + +.. _`pypytools.gc.custom`: https://bitbucket.org/antocuni/pypytools/src/0273afc3e8bedf0eb1ef630c3bc69e8d9dd661fe/pypytools/gc/custom.py?at=default&fileviewer=file-view-default Fragmentation @@ -184,6 +220,8 @@ the number of pinned objects. +.. _GcCollectStepStats: + The attributes for ``GcCollectStepStats`` are: ``count``, ``duration``, ``duration_min``, ``duration_max`` @@ -192,10 +230,14 @@ ``oldstate``, ``newstate`` Integers which indicate the state of the GC before and after the step. +``major_is_done`` + Boolean which indicate whether this was the last step of the major + collection + The value of ``oldstate`` and ``newstate`` is one of these constants, defined inside ``gc.GcCollectStepStats``: ``STATE_SCANNING``, ``STATE_MARKING``, -``STATE_SWEEPING``, ``STATE_FINALIZING``. It is possible to get a string -representation of it by indexing the ``GC_STATS`` tuple. +``STATE_SWEEPING``, ``STATE_FINALIZING``, ``STATE_USERDEL``. It is possible +to get a string representation of it by indexing the ``GC_STATES`` tuple. The attributes for ``GcCollectStats`` are: diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -65,6 +65,14 @@ Update most test_lib_pypy/ tests and move them to extra_tests/. +.. branch: gc-disable + +Make it possible to manually manage the GC by using a combination of +gc.disable() and gc.collect_step(). Make sure to write a proper release +announcement in which we explain that existing programs could leak memory if +they run for too much time between a gc.disable()/gc.enable() + .. branch: unicode-utf8 Use utf8 internally to represent unicode + diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -327,7 +327,8 @@ return w_ctype.cast(w_ob) - def descr_from_buffer(self, w_python_buffer): + @unwrap_spec(require_writable=int) + def descr_from_buffer(self, w_python_buffer, require_writable=0): """\ Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is @@ -337,7 +338,8 @@ 'array.array' or numpy arrays.""" # w_ctchara = newtype._new_chara_type(self.space) - return func._from_buffer(self.space, w_ctchara, w_python_buffer) + return func._from_buffer(self.space, w_ctchara, w_python_buffer, + require_writable) @unwrap_spec(w_arg=W_CData) diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -110,8 +110,8 @@ def _fetch_as_write_buffer(space, w_x): return space.writebuf_w(w_x) - at unwrap_spec(w_ctype=ctypeobj.W_CType) -def from_buffer(space, w_ctype, w_x): + at unwrap_spec(w_ctype=ctypeobj.W_CType, require_writable=int) +def from_buffer(space, w_ctype, w_x, require_writable=0): from pypy.module._cffi_backend import ctypearray, ctypeprim # if (not isinstance(w_ctype, ctypearray.W_CTypeArray) or @@ -119,13 +119,16 @@ raise oefmt(space.w_TypeError, "needs 'char[]', got '%s'", w_ctype.name) # - return _from_buffer(space, w_ctype, w_x) + return _from_buffer(space, w_ctype, w_x, require_writable) -def _from_buffer(space, w_ctype, w_x): +def _from_buffer(space, w_ctype, w_x, require_writable): if space.isinstance_w(w_x, space.w_unicode): raise oefmt(space.w_TypeError, - "from_buffer() cannot return the address a unicode") - buf = _fetch_as_read_buffer(space, w_x) + "from_buffer() cannot return the address of a unicode object") + if require_writable: + buf = _fetch_as_write_buffer(space, w_x) + else: + buf = _fetch_as_read_buffer(space, w_x) if space.isinstance_w(w_x, space.w_bytes): _cdata = get_raw_address_of_string(space, w_x) else: diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3730,6 +3730,18 @@ check(4 | 8, "CHB", "GTB") check(4 | 16, "CHB", "ROB") +def test_from_buffer_require_writable(): + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p1 = from_buffer(BCharA, b"foo", False) + assert p1 == from_buffer(BCharA, b"foo", False) + py.test.raises((TypeError, BufferError), from_buffer, BCharA, b"foo", True) + ba = bytearray(b"foo") + p1 = from_buffer(BCharA, ba, True) + p1[0] = b"g" + assert ba == b"goo" + def test_memmove(): Short = new_primitive_type("short") ShortA = new_array_type(new_pointer_type(Short), None) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -287,6 +287,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(self): import sys diff --git a/pypy/module/_cppyy/test/Makefile b/pypy/module/_cppyy/test/Makefile --- a/pypy/module/_cppyy/test/Makefile +++ b/pypy/module/_cppyy/test/Makefile @@ -15,7 +15,7 @@ HASGENREFLEX:=$(shell command -v genreflex 2> /dev/null) -cppflags=-std=c++14 -O3 -m64 -fPIC -rdynamic +cppflags=-std=c++14 -O3 -fPIC -rdynamic ifdef HASGENREFLEX genreflex_flags:=$(shell genreflex --cppflags) cppflags+=$(genreflex_flags) @@ -25,7 +25,7 @@ PLATFORM := $(shell uname -s) ifeq ($(PLATFORM),Darwin) - cppflags+=-dynamiclib -single_module -arch x86_64 -undefined dynamic_lookup + cppflags+=-dynamiclib -single_module -undefined dynamic_lookup endif diff --git a/pypy/module/cpyext/src/stringobject.c b/pypy/module/cpyext/src/stringobject.c --- a/pypy/module/cpyext/src/stringobject.c +++ b/pypy/module/cpyext/src/stringobject.c @@ -164,7 +164,7 @@ va_arg(vargs, PY_LONG_LONG)); #endif else if (size_tflag) - sprintf(s, "%" PY_FORMAT_SIZE_T "d", + sprintf(s, "%ld", va_arg(vargs, Py_ssize_t)); else sprintf(s, "%d", va_arg(vargs, int)); diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,6 +4,7 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', + 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', diff --git a/pypy/module/gc/hook.py b/pypy/module/gc/hook.py --- a/pypy/module/gc/hook.py +++ b/pypy/module/gc/hook.py @@ -1,5 +1,6 @@ from rpython.memory.gc.hook import GcHooks -from rpython.memory.gc import incminimark +from rpython.memory.gc import incminimark +from rpython.rlib import rgc from rpython.rlib.nonconst import NonConstant from rpython.rlib.rarithmetic import r_uint, r_longlong, longlongmax from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -117,12 +118,24 @@ self.descr_set_on_gc_collect(space, space.w_None) -class GcMinorHookAction(AsyncAction): +class NoRecursiveAction(AsyncAction): + depth = 0 + + def perform(self, ec, frame): + if self.depth == 0: + try: + self.depth += 1 + return self._do_perform(ec, frame) + finally: + self.depth -= 1 + + +class GcMinorHookAction(NoRecursiveAction): total_memory_used = 0 pinned_objects = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -145,7 +158,7 @@ self.pinned_objects = NonConstant(-42) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcMinorStats( self.count, self.duration, @@ -157,12 +170,12 @@ self.space.call_function(self.w_callable, w_stats) -class GcCollectStepHookAction(AsyncAction): +class GcCollectStepHookAction(NoRecursiveAction): oldstate = 0 newstate = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -185,19 +198,20 @@ self.newstate = NonConstant(-42) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcCollectStepStats( self.count, self.duration, self.duration_min, self.duration_max, self.oldstate, - self.newstate) + self.newstate, + rgc.is_done__states(self.oldstate, self.newstate)) self.reset() self.space.call_function(self.w_callable, w_stats) -class GcCollectHookAction(AsyncAction): +class GcCollectHookAction(NoRecursiveAction): num_major_collects = 0 arenas_count_before = 0 arenas_count_after = 0 @@ -206,7 +220,7 @@ rawmalloc_bytes_after = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -227,7 +241,7 @@ self.rawmalloc_bytes_after = NonConstant(r_uint(42)) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcCollectStats(self.count, self.num_major_collects, self.arenas_count_before, @@ -252,15 +266,32 @@ class W_GcCollectStepStats(W_Root): + # NOTE: this is specific to incminimark: if we want to integrate the + # applevel gc module with another gc, we probably need a more general + # approach to this. + # + # incminimark has 4 GC states: scanning, marking, sweeping and + # finalizing. However, from the user point of view, we have an additional + # "virtual" state: USERDEL, which represent when we run applevel + # finalizers after having completed a GC major collection. This state is + # never explicitly visible when using hooks, but it is used for the return + # value of gc.collect_step (see interp_gc.py) + STATE_SCANNING = incminimark.STATE_SCANNING + STATE_MARKING = incminimark.STATE_MARKING + STATE_SWEEPING = incminimark.STATE_SWEEPING + STATE_FINALIZING = incminimark.STATE_FINALIZING + STATE_USERDEL = incminimark.STATE_FINALIZING + 1 # used by StepCollector + GC_STATES = tuple(incminimark.GC_STATES + ['USERDEL']) def __init__(self, count, duration, duration_min, duration_max, - oldstate, newstate): + oldstate, newstate, major_is_done): self.count = count self.duration = duration self.duration_min = duration_min self.duration_max = duration_max self.oldstate = oldstate self.newstate = newstate + self.major_is_done = major_is_done class W_GcCollectStats(W_Root): @@ -320,11 +351,16 @@ W_GcCollectStepStats.typedef = TypeDef( "GcCollectStepStats", - STATE_SCANNING = incminimark.STATE_SCANNING, - STATE_MARKING = incminimark.STATE_MARKING, - STATE_SWEEPING = incminimark.STATE_SWEEPING, - STATE_FINALIZING = incminimark.STATE_FINALIZING, - GC_STATES = tuple(incminimark.GC_STATES), + STATE_SCANNING = W_GcCollectStepStats.STATE_SCANNING, + STATE_MARKING = W_GcCollectStepStats.STATE_MARKING, + STATE_SWEEPING = W_GcCollectStepStats.STATE_SWEEPING, + STATE_FINALIZING = W_GcCollectStepStats.STATE_FINALIZING, + STATE_USERDEL = W_GcCollectStepStats.STATE_USERDEL, + GC_STATES = tuple(W_GcCollectStepStats.GC_STATES), + major_is_done = interp_attrproperty( + "major_is_done", + cls=W_GcCollectStepStats, + wrapfn="newbool"), **wrap_many(W_GcCollectStepStats, ( "count", "duration", diff --git a/pypy/module/gc/interp_gc.py b/pypy/module/gc/interp_gc.py --- a/pypy/module/gc/interp_gc.py +++ b/pypy/module/gc/interp_gc.py @@ -1,6 +1,7 @@ from pypy.interpreter.gateway import unwrap_spec from pypy.interpreter.error import oefmt from rpython.rlib import rgc +from pypy.module.gc.hook import W_GcCollectStepStats @unwrap_spec(generation=int) @@ -16,7 +17,9 @@ cache.clear() rgc.collect() + _run_finalizers(space) +def _run_finalizers(space): # if we are running in gc.disable() mode but gc.collect() is called, # we should still call the finalizers now. We do this as an attempt # to get closer to CPython's behavior: in Py3.5 some tests @@ -39,18 +42,20 @@ return space.newint(0) def enable(space): - """Non-recursive version. Enable finalizers now. + """Non-recursive version. Enable major collections and finalizers. If they were already enabled, no-op. If they were disabled even several times, enable them anyway. """ + rgc.enable() if not space.user_del_action.enabled_at_app_level: space.user_del_action.enabled_at_app_level = True enable_finalizers(space) def disable(space): - """Non-recursive version. Disable finalizers now. Several calls - to this function are ignored. + """Non-recursive version. Disable major collections and finalizers. + Multiple calls to this function are ignored. """ + rgc.disable() if space.user_del_action.enabled_at_app_level: space.user_del_action.enabled_at_app_level = False disable_finalizers(space) @@ -77,6 +82,59 @@ if uda.pending_with_disabled_del is None: uda.pending_with_disabled_del = [] + +class StepCollector(object): + """ + Invoke rgc.collect_step() until we are done, then run the app-level + finalizers as a separate step + """ + + def __init__(self, space): + self.space = space + self.finalizing = False + + def do(self): + if self.finalizing: + self._run_finalizers() + self.finalizing = False + oldstate = W_GcCollectStepStats.STATE_USERDEL + newstate = W_GcCollectStepStats.STATE_SCANNING + major_is_done = True # now we are finally done + else: + states = self._collect_step() + oldstate = rgc.old_state(states) + newstate = rgc.new_state(states) + major_is_done = False # USERDEL still to do + if rgc.is_done(states): + newstate = W_GcCollectStepStats.STATE_USERDEL + self.finalizing = True + # + duration = -1 + return W_GcCollectStepStats( + count = 1, + duration = duration, + duration_min = duration, + duration_max = duration, + oldstate = oldstate, + newstate = newstate, + major_is_done = major_is_done) + + def _collect_step(self): + return rgc.collect_step() + + def _run_finalizers(self): + _run_finalizers(self.space) + +def collect_step(space): + """ + If the GC is incremental, run a single gc-collect-step. Return True when + the major collection is completed. + If the GC is not incremental, do a full collection and return True. + """ + sc = space.fromcache(StepCollector) + w_stats = sc.do() + return w_stats + # ____________________________________________________________ @unwrap_spec(filename='fsencode') diff --git a/pypy/module/gc/test/test_gc.py b/pypy/module/gc/test/test_gc.py --- a/pypy/module/gc/test/test_gc.py +++ b/pypy/module/gc/test/test_gc.py @@ -1,7 +1,20 @@ import py - +import pytest +from rpython.rlib import rgc +from pypy.interpreter.baseobjspace import ObjSpace +from pypy.interpreter.gateway import interp2app, unwrap_spec +from pypy.module.gc.interp_gc import StepCollector, W_GcCollectStepStats class AppTestGC(object): + + def setup_class(cls): + if cls.runappdirect: + pytest.skip("these tests cannot work with -A") + space = cls.space + def rgc_isenabled(space): + return space.newbool(rgc.isenabled()) + cls.w_rgc_isenabled = space.wrap(interp2app(rgc_isenabled)) + def test_collect(self): import gc gc.collect() # mostly a "does not crash" kind of test @@ -63,12 +76,16 @@ def test_enable(self): import gc assert gc.isenabled() + assert self.rgc_isenabled() gc.disable() assert not gc.isenabled() + assert not self.rgc_isenabled() gc.enable() assert gc.isenabled() + assert self.rgc_isenabled() gc.enable() assert gc.isenabled() + assert self.rgc_isenabled() def test_gc_collect_overrides_gc_disable(self): import gc @@ -83,6 +100,24 @@ assert deleted == [1] gc.enable() + def test_gc_collect_step(self): + import gc + + class X(object): + deleted = 0 + def __del__(self): + X.deleted += 1 + + gc.disable() + X(); X(); X(); + n = 0 + while True: + n += 1 + if gc.collect_step().major_is_done: + break + + assert n >= 2 # at least one step + 1 finalizing + assert X.deleted == 3 class AppTestGcDumpHeap(object): pytestmark = py.test.mark.xfail(run=False) @@ -156,3 +191,55 @@ gc.collect() # the classes C should all go away here for r in rlist: assert r() is None + + +def test_StepCollector(): + W = W_GcCollectStepStats + SCANNING = W.STATE_SCANNING + MARKING = W.STATE_MARKING + SWEEPING = W.STATE_SWEEPING + FINALIZING = W.STATE_FINALIZING + USERDEL = W.STATE_USERDEL + + class MyStepCollector(StepCollector): + my_steps = 0 + my_done = False + my_finalized = 0 + + def __init__(self): + StepCollector.__init__(self, space=None) + self._state_transitions = iter([ + (SCANNING, MARKING), + (MARKING, SWEEPING), + (SWEEPING, FINALIZING), + (FINALIZING, SCANNING)]) + + def _collect_step(self): + self.my_steps += 1 + try: + oldstate, newstate = next(self._state_transitions) + except StopIteration: + assert False, 'should not happen, did you call _collect_step too much?' + return rgc._encode_states(oldstate, newstate) + + def _run_finalizers(self): + self.my_finalized += 1 + + sc = MyStepCollector() + transitions = [] + while True: + result = sc.do() + transitions.append((result.oldstate, result.newstate, sc.my_finalized)) + if result.major_is_done: + break + + assert transitions == [ + (SCANNING, MARKING, False), + (MARKING, SWEEPING, False), + (SWEEPING, FINALIZING, False), + (FINALIZING, USERDEL, False), + (USERDEL, SCANNING, True) + ] + # there is one more transition than actual step, because + # FINALIZING->USERDEL is "virtual" + assert sc.my_steps == len(transitions) - 1 diff --git a/pypy/module/gc/test/test_hook.py b/pypy/module/gc/test/test_hook.py --- a/pypy/module/gc/test/test_hook.py +++ b/pypy/module/gc/test/test_hook.py @@ -69,26 +69,29 @@ def test_on_gc_collect_step(self): import gc + SCANNING = 0 + MARKING = 1 + SWEEPING = 2 + FINALIZING = 3 lst = [] def on_gc_collect_step(stats): lst.append((stats.count, stats.duration, stats.oldstate, - stats.newstate)) + stats.newstate, + stats.major_is_done)) gc.hooks.on_gc_collect_step = on_gc_collect_step - self.fire_gc_collect_step(10, 20, 30) - self.fire_gc_collect_step(40, 50, 60) + self.fire_gc_collect_step(10, SCANNING, MARKING) + self.fire_gc_collect_step(40, FINALIZING, SCANNING) assert lst == [ - (1, 10, 20, 30), - (1, 40, 50, 60), + (1, 10, SCANNING, MARKING, False), + (1, 40, FINALIZING, SCANNING, True), ] # gc.hooks.on_gc_collect_step = None - self.fire_gc_collect_step(70, 80, 90) # won't fire - assert lst == [ - (1, 10, 20, 30), - (1, 40, 50, 60), - ] + oldlst = lst[:] + self.fire_gc_collect_step(70, SCANNING, MARKING) # won't fire + assert lst == oldlst def test_on_gc_collect(self): import gc @@ -123,7 +126,8 @@ assert S.STATE_MARKING == 1 assert S.STATE_SWEEPING == 2 assert S.STATE_FINALIZING == 3 - assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', 'FINALIZING') + assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', + 'FINALIZING', 'USERDEL') def test_cumulative(self): import gc @@ -176,3 +180,22 @@ assert gc.hooks.on_gc_minor is None assert gc.hooks.on_gc_collect_step is None assert gc.hooks.on_gc_collect is None + + def test_no_recursive(self): + import gc + lst = [] + def on_gc_minor(stats): + lst.append((stats.count, + stats.duration, + stats.total_memory_used, + stats.pinned_objects)) + self.fire_gc_minor(1, 2, 3) # won't fire NOW + gc.hooks.on_gc_minor = on_gc_minor + self.fire_gc_minor(10, 20, 30) + self.fire_gc_minor(40, 50, 60) + # the duration for the 2nd call is 41, because it also counts the 1 + # which was fired recursively + assert lst == [ + (1, 10, 20, 30), + (2, 41, 50, 60), + ] diff --git a/pypy/module/math/test/test_direct.py b/pypy/module/math/test/test_direct.py --- a/pypy/module/math/test/test_direct.py +++ b/pypy/module/math/test/test_direct.py @@ -6,11 +6,6 @@ from rpython.rtyper.lltypesystem.module.test.math_cases import (MathTests, get_tester) -consistent_host = True -if '__pypy__' not in sys.builtin_module_names: - if sys.version_info < (2, 6): - consistent_host = False - class TestDirect(MathTests): pass @@ -30,8 +25,6 @@ def make_test_case((fnname, args, expected), dict): # def test_func(self): - if not consistent_host: - py.test.skip("inconsistent behavior before 2.6") try: fn = getattr(math, fnname) except AttributeError: diff --git a/pypy/module/math/test/test_math.py b/pypy/module/math/test/test_math.py --- a/pypy/module/math/test/test_math.py +++ b/pypy/module/math/test/test_math.py @@ -24,7 +24,6 @@ expected = space.wrap(expected) cases.append(space.newtuple([space.wrap(a), space.wrap(b), expected])) cls.w_cases = space.newlist(cases) - cls.w_consistent_host = space.wrap(test_direct.consistent_host) @classmethod def make_callable_wrapper(cls, func): @@ -36,8 +35,6 @@ assert abs(actual - expected) < 10E-5 def test_all_cases(self): - if not self.consistent_host: - skip("please test this on top of PyPy or CPython >= 2.6") import math for fnname, args, expected in self.cases: fn = getattr(math, fnname) diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -149,6 +149,20 @@ def get_size_incl_hash(self, obj): return self.get_size(obj) + # these can be overriden by subclasses, called by the GCTransformer + def enable(self): + pass + + def disable(self): + pass + + def isenabled(self): + return True + + def collect_step(self): + self.collect() + return True + def malloc(self, typeid, length=0, zero=False): """NOT_RPYTHON For testing. The interface used by the gctransformer is diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -379,6 +379,11 @@ self.total_gc_time = 0.0 self.gc_state = STATE_SCANNING + + # if the GC is disabled, it runs only minor collections; major + # collections need to be manually triggered by explicitly calling + # collect() + self.enabled = True # # Two lists of all objects with finalizers. Actually they are lists # of pairs (finalization_queue_nr, object). "probably young objects" @@ -514,6 +519,15 @@ bigobj = self.nonlarge_max + 1 self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2) + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def isenabled(self): + return self.enabled + def _nursery_memory_size(self): extra = self.nonlarge_max + 1 return self.nursery_size + extra @@ -750,22 +764,53 @@ """Do a minor (gen=0), start a major (gen=1), or do a full major (gen>=2) collection.""" if gen < 0: - self._minor_collection() # dangerous! no major GC cycle progress - elif gen <= 1: - self.minor_collection_with_major_progress() - if gen == 1 and self.gc_state == STATE_SCANNING: + # Dangerous! this makes no progress on the major GC cycle. + # If called too often, the memory usage will keep increasing, + # because we'll never completely fill the nursery (and so + # never run anything about the major collection). + self._minor_collection() + elif gen == 0: + # This runs a minor collection. This is basically what occurs + # when the nursery is full. If a major collection is in + # progress, it also runs one more step of it. It might also + # decide to start a major collection just now, depending on + # current memory pressure. + self.minor_collection_with_major_progress(force_enabled=True) + elif gen == 1: + # This is like gen == 0, but if no major collection is running, + # then it forces one to start now. + self.minor_collection_with_major_progress(force_enabled=True) + if self.gc_state == STATE_SCANNING: self.major_collection_step() else: + # This does a complete minor and major collection. self.minor_and_major_collection() self.rrc_invoke_callback() + def collect_step(self): + """ + Do a single major collection step. Return True when the major collection + is completed. - def minor_collection_with_major_progress(self, extrasize=0): - """Do a minor collection. Then, if there is already a major GC - in progress, run at least one major collection step. If there is - no major GC but the threshold is reached, start a major GC. + This is meant to be used together with gc.disable(), to have a + fine-grained control on when the GC runs. + """ + old_state = self.gc_state + self._minor_collection() + self.major_collection_step() + self.rrc_invoke_callback() + return rgc._encode_states(old_state, self.gc_state) + + def minor_collection_with_major_progress(self, extrasize=0, + force_enabled=False): + """Do a minor collection. Then, if the GC is enabled and there + is already a major GC in progress, run at least one major collection + step. If there is no major GC but the threshold is reached, start a + major GC. """ self._minor_collection() + if not self.enabled and not force_enabled: + return # If the gc_state is STATE_SCANNING, we're not in the middle # of an incremental major collection. In that case, wait @@ -2428,25 +2473,6 @@ # We also need to reset the GCFLAG_VISITED on prebuilt GC objects. self.prebuilt_root_objects.foreach(self._reset_gcflag_visited, None) # - # Print statistics - debug_start("gc-collect-done") - debug_print("arenas: ", - self.stat_ac_arenas_count, " => ", - self.ac.arenas_count) - debug_print("bytes used in arenas: ", - self.ac.total_memory_used) - debug_print("bytes raw-malloced: ", - self.stat_rawmalloced_total_size, " => ", - self.rawmalloced_total_size) - debug_stop("gc-collect-done") - self.hooks.fire_gc_collect( - num_major_collects=self.num_major_collects, - arenas_count_before=self.stat_ac_arenas_count, - arenas_count_after=self.ac.arenas_count, - arenas_bytes=self.ac.total_memory_used, - rawmalloc_bytes_before=self.stat_rawmalloced_total_size, - rawmalloc_bytes_after=self.rawmalloced_total_size) - # # Set the threshold for the next major collection to be when we # have allocated 'major_collection_threshold' times more than # we currently have -- but no more than 'max_delta' more than @@ -2460,6 +2486,27 @@ total_memory_used + self.max_delta), reserving_size) # + # Print statistics + debug_start("gc-collect-done") + debug_print("arenas: ", + self.stat_ac_arenas_count, " => ", + self.ac.arenas_count) + debug_print("bytes used in arenas: ", + self.ac.total_memory_used) + debug_print("bytes raw-malloced: ", + self.stat_rawmalloced_total_size, " => ", + self.rawmalloced_total_size) + debug_print("next major collection threshold: ", + self.next_major_collection_threshold) + debug_stop("gc-collect-done") + self.hooks.fire_gc_collect( + num_major_collects=self.num_major_collects, + arenas_count_before=self.stat_ac_arenas_count, + arenas_count_after=self.ac.arenas_count, + arenas_bytes=self.ac.total_memory_used, + rawmalloc_bytes_before=self.stat_rawmalloced_total_size, + rawmalloc_bytes_after=self.rawmalloced_total_size) + # # Max heap size: gives an upper bound on the threshold. If we # already have at least this much allocated, raise MemoryError. if bounded and self.threshold_reached(reserving_size): diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -13,6 +13,7 @@ from rpython.memory.gc import minimark, incminimark from rpython.memory.gctypelayout import zero_gc_pointers_inside, zero_gc_pointers from rpython.rlib.debug import debug_print +from rpython.rlib.test.test_debug import debuglog import pdb WORD = LONG_BIT // 8 @@ -770,4 +771,76 @@ assert elem.prev == lltype.nullptr(S) assert elem.next == lltype.nullptr(S) - + def test_collect_0(self, debuglog): + self.gc.collect(1) # start a major + debuglog.reset() + self.gc.collect(0) # do ONLY a minor + assert debuglog.summary() == {'gc-minor': 1} + + def test_enable_disable(self, debuglog): + def large_malloc(): + # malloc an object which is large enough to trigger a major collection + threshold = self.gc.next_major_collection_threshold + self.malloc(VAR, int(threshold/8)) + summary = debuglog.summary() + debuglog.reset() + return summary + # + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-collect-step', 'gc-minor'] + # + self.gc.disable() + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-minor'] + # + self.gc.enable() + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-collect-step', 'gc-minor'] + + def test_call_collect_when_disabled(self, debuglog): + # malloc an object and put it the old generation + s = self.malloc(S) + s.x = 42 + self.stackroots.append(s) + self.gc.collect() + s = self.stackroots.pop() + # + self.gc.disable() + self.gc.collect(1) # start a major collect + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + assert s.x == 42 # s is not freed yet + # + debuglog.reset() + self.gc.collect(1) # run one more step + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + assert s.x == 42 # s is not freed yet + # + debuglog.reset() + self.gc.collect() # finish the major collection + summary = debuglog.summary() + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + # s is freed + py.test.raises(RuntimeError, 's.x') + + def test_collect_step(self, debuglog): + from rpython.rlib import rgc + n = 0 + states = [] + while True: + debuglog.reset() + val = self.gc.collect_step() + states.append((rgc.old_state(val), rgc.new_state(val))) + summary = debuglog.summary() + assert summary == {'gc-minor': 1, 'gc-collect-step': 1} + if rgc.is_done(val): + break + n += 1 + if n == 100: + assert False, 'this looks like an endless loop' + # + assert states == [ + (incminimark.STATE_SCANNING, incminimark.STATE_MARKING), + (incminimark.STATE_MARKING, incminimark.STATE_SWEEPING), + (incminimark.STATE_SWEEPING, incminimark.STATE_FINALIZING), + (incminimark.STATE_FINALIZING, incminimark.STATE_SCANNING) + ] diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -309,6 +309,12 @@ self.collect_ptr = getfn(GCClass.collect.im_func, [s_gc, annmodel.SomeInteger()], annmodel.s_None) + self.collect_step_ptr = getfn(GCClass.collect_step.im_func, [s_gc], + annmodel.SomeInteger()) + self.enable_ptr = getfn(GCClass.enable.im_func, [s_gc], annmodel.s_None) + self.disable_ptr = getfn(GCClass.disable.im_func, [s_gc], annmodel.s_None) + self.isenabled_ptr = getfn(GCClass.isenabled.im_func, [s_gc], + annmodel.s_Bool) self.can_move_ptr = getfn(GCClass.can_move.im_func, [s_gc, SomeAddress()], annmodel.SomeBool()) @@ -884,6 +890,28 @@ resultvar=op.result) self.pop_roots(hop, livevars) + def gct_gc__collect_step(self, hop): + op = hop.spaceop + livevars = self.push_roots(hop) + hop.genop("direct_call", [self.collect_step_ptr, self.c_const_gc], + resultvar=op.result) + self.pop_roots(hop, livevars) + + def gct_gc__enable(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.enable_ptr, self.c_const_gc], + resultvar=op.result) + + def gct_gc__disable(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.disable_ptr, self.c_const_gc], + resultvar=op.result) + + def gct_gc__isenabled(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.isenabled_ptr, self.c_const_gc], + resultvar=op.result) + def gct_gc_can_move(self, hop): op = hop.spaceop v_addr = hop.genop('cast_ptr_to_adr', diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -1,5 +1,6 @@ import sys import time +from collections import Counter from rpython.rlib.objectmodel import enforceargs from rpython.rtyper.extregistry import ExtRegistryEntry @@ -38,6 +39,23 @@ assert False, ("nesting error: no start corresponding to stop %r" % (category,)) + def reset(self): + # only for tests: empty the log + self[:] = [] + + def summary(self, flatten=False): + res = Counter() + def visit(lst): + for section, sublist in lst: + if section == 'debug_print': + continue + res[section] += 1 + if flatten: + visit(sublist) + # + visit(self) + return res + def __repr__(self): import pprint return pprint.pformat(list(self)) diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -13,6 +13,50 @@ # General GC features collect = gc.collect +enable = gc.enable +disable = gc.disable +isenabled = gc.isenabled + +def collect_step(): + """ + If the GC is incremental, run a single gc-collect-step. + + Return an integer which encodes the starting and ending GC state. Use + rgc.{old_state,new_state,is_done} to decode it. + + If the GC is not incremental, do a full collection and return a value on + which rgc.is_done() return True. + """ + gc.collect() + return _encode_states(1, 0) + +def _encode_states(oldstate, newstate): + return oldstate << 8 | newstate + +def old_state(states): + return (states & 0xFF00) >> 8 + +def new_state(states): + return states & 0xFF + +def is_done(states): + """ + Return True if the return value of collect_step signals the end of a major + collection + """ + old = old_state(states) + new = new_state(states) + return is_done__states(old, new) + +def is_done__states(oldstate, newstate): + "Like is_done, but takes oldstate and newstate explicitly" + # a collection is considered done when it ends up in the starting state + # (which is usually represented as 0). This logic works for incminimark, + # which is currently the only gc actually used and for which collect_step + # is implemented. In case we add more GC in the future, we might want to + # delegate this logic to the GC itself, but for now it is MUCH simpler to + # just write it in plain RPython. + return oldstate != 0 and newstate == 0 def set_max_heap_size(nbytes): """Limit the heap size to n bytes. @@ -131,6 +175,44 @@ args_v = hop.inputargs(lltype.Signed) return hop.genop('gc__collect', args_v, resulttype=hop.r_result) + +class EnableDisableEntry(ExtRegistryEntry): + _about_ = (gc.enable, gc.disable) + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.s_None + + def specialize_call(self, hop): + hop.exception_cannot_occur() + opname = self.instance.__name__ + return hop.genop('gc__%s' % opname, hop.args_v, resulttype=hop.r_result) + + +class IsEnabledEntry(ExtRegistryEntry): + _about_ = gc.isenabled + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.s_Bool + + def specialize_call(self, hop): + hop.exception_cannot_occur() + return hop.genop('gc__isenabled', hop.args_v, resulttype=hop.r_result) + + +class CollectStepEntry(ExtRegistryEntry): + _about_ = collect_step + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.SomeInteger() + + def specialize_call(self, hop): + hop.exception_cannot_occur() + return hop.genop('gc__collect_step', hop.args_v, resulttype=hop.r_result) + + class SetMaxHeapSizeEntry(ExtRegistryEntry): _about_ = set_max_heap_size diff --git a/rpython/rlib/test/test_debug.py b/rpython/rlib/test/test_debug.py --- a/rpython/rlib/test/test_debug.py +++ b/rpython/rlib/test/test_debug.py @@ -1,5 +1,5 @@ - import py +import pytest from rpython.rlib.debug import (check_annotation, make_sure_not_resized, debug_print, debug_start, debug_stop, have_debug_prints, debug_offset, debug_flush, @@ -10,6 +10,12 @@ from rpython.rlib import debug from rpython.rtyper.test.test_llinterp import interpret, gengraph + at pytest.fixture +def debuglog(monkeypatch): + dlog = debug.DebugLog() + monkeypatch.setattr(debug, '_log', dlog) + return dlog + def test_check_annotation(): class Error(Exception): pass @@ -94,7 +100,7 @@ py.test.raises(NotAListOfChars, "interpret(g, [3])") -def test_debug_print_start_stop(): +def test_debug_print_start_stop(debuglog): def f(x): debug_start("mycat") debug_print("foo", 2, "bar", x) @@ -103,22 +109,27 @@ debug_offset() # should not explode at least return have_debug_prints() - try: - debug._log = dlog = debug.DebugLog() - res = f(3) - assert res is True - finally: - debug._log = None - assert dlog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + res = f(3) + assert res is True + assert debuglog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + debuglog.reset() - try: - debug._log = dlog = debug.DebugLog() - res = interpret(f, [3]) - assert res is True - finally: - debug._log = None - assert dlog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + res = interpret(f, [3]) + assert res is True + assert debuglog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] +def test_debuglog_summary(debuglog): + debug_start('foo') + debug_start('bar') # this is nested, so not counted in the summary by default + debug_stop('bar') + debug_stop('foo') + debug_start('foo') + debug_stop('foo') + debug_start('bar') + debug_stop('bar') + # + assert debuglog.summary() == {'foo': 2, 'bar': 1} + assert debuglog.summary(flatten=True) == {'foo': 2, 'bar': 2} def test_debug_start_stop_timestamp(): import time diff --git a/rpython/rlib/test/test_rgc.py b/rpython/rlib/test/test_rgc.py --- a/rpython/rlib/test/test_rgc.py +++ b/rpython/rlib/test/test_rgc.py @@ -39,6 +39,45 @@ assert res is None +def test_enable_disable(): + def f(): + gc.enable() + a = gc.isenabled() + gc.disable() + b = gc.isenabled() + return a and not b + + t, typer, graph = gengraph(f, []) + blockops = list(graph.iterblockops()) + opnames = [op.opname for block, op in blockops + if op.opname.startswith('gc__')] + assert opnames == ['gc__enable', 'gc__isenabled', + 'gc__disable', 'gc__isenabled'] + res = interpret(f, []) + assert res + +def test_collect_step(): + def f(): + return rgc.collect_step() + + assert f() + t, typer, graph = gengraph(f, []) + blockops = list(graph.iterblockops()) + opnames = [op.opname for block, op in blockops + if op.opname.startswith('gc__')] + assert opnames == ['gc__collect_step'] + res = interpret(f, []) + assert res + +def test__encode_states(): + val = rgc._encode_states(42, 43) + assert rgc.old_state(val) == 42 + assert rgc.new_state(val) == 43 + assert not rgc.is_done(val) + # + val = rgc.collect_step() + assert rgc.is_done(val) + def test_can_move(): T0 = lltype.GcStruct('T') T1 = lltype.GcArray(lltype.Float) diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -819,6 +819,18 @@ def op_gc__collect(self, *gen): self.heap.collect(*gen) + def op_gc__collect_step(self): + return self.heap.collect_step() + + def op_gc__enable(self): + self.heap.enable() + + def op_gc__disable(self): + self.heap.disable() + + def op_gc__isenabled(self): + return self.heap.isenabled() + def op_gc_heap_stats(self): raise NotImplementedError diff --git a/rpython/rtyper/lltypesystem/llheap.py b/rpython/rtyper/lltypesystem/llheap.py --- a/rpython/rtyper/lltypesystem/llheap.py +++ b/rpython/rtyper/lltypesystem/llheap.py @@ -5,7 +5,7 @@ setfield = setattr from operator import setitem as setarrayitem -from rpython.rlib.rgc import can_move, collect, add_memory_pressure +from rpython.rlib.rgc import can_move, collect, enable, disable, isenabled, add_memory_pressure, collect_step def setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue, offsets=None): diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -456,6 +456,10 @@ # __________ GC operations __________ 'gc__collect': LLOp(canmallocgc=True), + 'gc__collect_step': LLOp(canmallocgc=True), + 'gc__enable': LLOp(), + 'gc__disable': LLOp(), + 'gc__isenabled': LLOp(), 'gc_free': LLOp(), 'gc_fetch_exception': LLOp(), 'gc_restore_exception': LLOp(), diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -1812,6 +1812,92 @@ res = self.run("ignore_finalizer") assert res == 1 # translated: x1 is removed from the list + def define_enable_disable(self): + class Counter(object): + val = 0 + counter = Counter() + class X(object): + def __del__(self): + counter.val += 1 + def f(should_disable): + x1 = X() + rgc.collect() # make x1 old + assert not rgc.can_move(x1) + x1 = None + # + if should_disable: + gc.disable() + assert not gc.isenabled() + # try to trigger a major collection + N = 100 # this should be enough, increase if not + lst = [] + for i in range(N): + lst.append(chr(i%256) * (1024*1024)) + #print i, counter.val + # + gc.enable() + assert gc.isenabled() + return counter.val + return f + + def test_enable_disable(self): + # first, run with normal gc. If the assert fails it means that in the + # loop we don't allocate enough mem to trigger a major collection. Try + # to increase N + deleted = self.run("enable_disable", 0) + assert deleted == 1, 'This should not fail, try to increment N' + # + # now, run with gc.disable: this should NOT free x1 + deleted = self.run("enable_disable", 1) + assert deleted == 0 + + def define_collect_step(self): + class Counter(object): + val = 0 + counter = Counter() + class X(object): + def __del__(self): + counter.val += 1 + def f(): + x1 = X() + rgc.collect() # make x1 old + assert not rgc.can_move(x1) + x1 = None + # + gc.disable() + n = 0 + states = [] + while True: + n += 1 + val = rgc.collect_step() + states.append((rgc.old_state(val), rgc.new_state(val))) + if rgc.is_done(val): + break + if n == 100: + print 'Endless loop!' + assert False, 'this looks like an endless loop' + + if n < 4: # we expect at least 4 steps + print 'Too few steps! n =', n + assert False + + # check that the state transitions are reasonable + first_state, _ = states[0] + for i, (old_state, new_state) in enumerate(states): + is_last = (i == len(states) - 1) + is_valid = False + if is_last: + assert old_state != new_state == first_state + else: + assert new_state == old_state or new_state == old_state+1 + + return counter.val + return f + + def test_collect_step(self): + deleted = self.run("collect_step") + assert deleted == 1 + def define_total_gc_time(cls): def f(): l = [] diff --git a/rpython/translator/goal/gcbench.py b/rpython/translator/goal/gcbench.py --- a/rpython/translator/goal/gcbench.py +++ b/rpython/translator/goal/gcbench.py @@ -44,8 +44,9 @@ # - Results are sensitive to locking cost, but we dont # check for proper locking import time +import gc -USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N]""" +USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N] [--gc=off|--gc=manual]""" ENABLE_THREADS = True @@ -173,6 +174,7 @@ depths = DEFAULT_DEPTHS threads = 0 repeatcount = 1 + gc_policy = 'on' for arg in argv[1:]: if arg.startswith('--threads='): arg = arg[len('--threads='):] @@ -189,13 +191,22 @@ depths = [int(s) for s in arg] except ValueError: return argerror() + elif arg.startswith('--gc=off'): + gc_policy = 'off' + elif arg.startswith('--gc=manual'): + gc_policy = 'manual' else: try: repeatcount = int(arg) except ValueError: return argerror() + # + if gc_policy == 'off' or gc_policy == 'manual': + gc.disable() for i in range(repeatcount): main(depths, threads) + if gc_policy == 'manual': + gc.collect(1) return 0 From pypy.commits at gmail.com Tue Jan 1 01:46:29 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:29 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: raise for non-ascii.__mod__(unicode) Message-ID: <5c2b0cc5.1c69fb81.dff1b.414b@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95542:d9ad50294bd6 Date: 2018-12-25 22:32 +0200 http://bitbucket.org/pypy/pypy/changeset/d9ad50294bd6/ Log: raise for non-ascii.__mod__(unicode) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -434,6 +434,16 @@ got_unicode = space.isinstance_w(w_value, space.w_unicode) if not do_unicode: if got_unicode: + # Make sure the format string is ascii encodable + try: + self.fmt.decode('ascii') + except UnicodeDecodeError as e: + raise OperationError(space.w_UnicodeDecodeError, + space.newtuple([space.newtext('ascii'), + space.newbytes(self.fmt), + space.newint(e.start), + space.newint(e.end), + space.newtext(e.message)])) raise NeedUnicodeFormattingError s = self.string_formatting(w_value) else: From pypy.commits at gmail.com Tue Jan 1 01:46:30 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: str_w uses ascii encoding Message-ID: <5c2b0cc6.1c69fb81.474ed.33f4@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95543:b8815fb0c04d Date: 2018-12-25 22:32 +0200 http://bitbucket.org/pypy/pypy/changeset/b8815fb0c04d/ Log: str_w uses ascii encoding diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -87,7 +87,7 @@ return space.newint(uid) def str_w(self, space): - return space.text_w(encode_object(space, self, 'utf8', 'strict')) + return space.text_w(encode_object(space, self, 'ascii', 'strict')) def utf8_w(self, space): return self._utf8 From pypy.commits at gmail.com Tue Jan 1 01:46:32 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: bytes.__mod__(unicode) must decode bytes as ascii Message-ID: <5c2b0cc8.1c69fb81.5f4f.b4c6@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95544:cc42e48c8a51 Date: 2018-12-26 08:20 +0200 http://bitbucket.org/pypy/pypy/changeset/cc42e48c8a51/ Log: bytes.__mod__(unicode) must decode bytes as ascii diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -11,6 +11,7 @@ from rpython.tool.sourcetools import func_with_new_name from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.unicodehelper import check_ascii_or_raise class BaseStringFormatter(object): @@ -435,15 +436,7 @@ if not do_unicode: if got_unicode: # Make sure the format string is ascii encodable - try: - self.fmt.decode('ascii') - except UnicodeDecodeError as e: - raise OperationError(space.w_UnicodeDecodeError, - space.newtuple([space.newtext('ascii'), - space.newbytes(self.fmt), - space.newint(e.start), - space.newint(e.end), - space.newtext(e.message)])) + check_ascii_or_raise(space, self.fmt) raise NeedUnicodeFormattingError s = self.string_formatting(w_value) else: From pypy.commits at gmail.com Tue Jan 1 01:46:33 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:33 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: disable seemingly invalid test, confirmation needed Message-ID: <5c2b0cc9.1c69fb81.c21b0.181d@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95545:ad4d65746c50 Date: 2018-12-31 13:40 +0200 http://bitbucket.org/pypy/pypy/changeset/ad4d65746c50/ Log: disable seemingly invalid test, confirmation needed diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -555,12 +555,13 @@ w(None)) raises(gateway.OperationError, space.call_function, w_app_g3_u, w(42)) - w_ascii = space.appexec([], """(): - import sys - return sys.getdefaultencoding() == 'ascii'""") - if space.is_true(w_ascii): - raises(gateway.OperationError, space.call_function, w_app_g3_u, - w("\x80")) + # XXX this part of the test seems wrong, why would "\x80" fail? + # w_ascii = space.appexec([], """(): + # import sys + # return sys.getdefaultencoding() == 'ascii'""") + # if space.is_true(w_ascii): + # raises(gateway.OperationError, space.call_function, w_app_g3_u, + # w("\x80")) def test_interp2app_unwrap_spec_unwrapper(self): space = self.space From pypy.commits at gmail.com Tue Jan 1 01:46:35 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: 'abc'.encode(...) in cpython calls 'abc'.decode('ascii', 'strict').encode(...) Message-ID: <5c2b0ccb.1c69fb81.b636d.82e0@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95546:8704e00eb624 Date: 2019-01-01 08:44 +0200 http://bitbucket.org/pypy/pypy/changeset/8704e00eb624/ Log: 'abc'.encode(...) in cpython calls 'abc'.decode('ascii', 'strict').encode(...) diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -465,6 +465,10 @@ raise oefmt(space.w_TypeError, "Cannot use string as modifiable buffer") + def descr_encode(self, space, w_encoding=None, w_errors=None): + w_uni = self.descr_decode(space, space.newtext('ascii'), space.newtext('strict')) + return space.call_method(w_uni, 'encode', w_encoding, w_errors) + def descr_getbuffer(self, space, w_flags): #from pypy.objspace.std.bufferobject import W_Buffer #return W_Buffer(StringBuffer(self._value)) @@ -869,7 +873,7 @@ center = interpindirect2app(W_AbstractBytesObject.descr_center), count = interpindirect2app(W_AbstractBytesObject.descr_count), decode = interpindirect2app(W_AbstractBytesObject.descr_decode), - encode = interpindirect2app(W_AbstractBytesObject.descr_encode), + encode = interpindirect2app(W_BytesObject.descr_encode), expandtabs = interpindirect2app(W_AbstractBytesObject.descr_expandtabs), find = interpindirect2app(W_AbstractBytesObject.descr_find), rfind = interpindirect2app(W_AbstractBytesObject.descr_rfind), diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -812,6 +812,11 @@ def test_encode(self): assert 'hello'.encode() == 'hello' assert type('hello'.encode()) is str + s = 'hello \xf8 world' + # CPython first decodes the bytes, then encodes + exc = raises(UnicodeDecodeError, s.encode, 'ascii') + assert str(exc.value) == ("'ascii' codec can't decode byte 0xf8" + " in position 6: ordinal not in range(128)") def test_hash(self): # check that we have the same hash as CPython for at least 31 bits From pypy.commits at gmail.com Tue Jan 1 01:46:36 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 31 Dec 2018 22:46:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: mrege default into branch Message-ID: <5c2b0ccc.1c69fb81.1d48a.65d1@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95547:aa0b6372c139 Date: 2019-01-01 08:45 +0200 http://bitbucket.org/pypy/pypy/changeset/aa0b6372c139/ Log: mrege default into branch diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1231,9 +1231,8 @@ assert type(unicode(z)) is unicode assert unicode(z) == u'foobaz' # - # two completely corner cases where we differ from CPython: - #assert unicode(encoding='supposedly_the_encoding') == u'' - #assert unicode(errors='supposedly_the_error') == u'' + assert unicode(encoding='supposedly_the_encoding') == u'' + assert unicode(errors='supposedly_the_error') == u'' e = raises(TypeError, unicode, u'', 'supposedly_the_encoding') assert str(e.value) == 'decoding Unicode is not supported' e = raises(TypeError, unicode, u'', errors='supposedly_the_error') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -197,23 +197,20 @@ return unicodedb.islinebreak(ch) @staticmethod - @unwrap_spec(w_string=WrappedDefault("")) - def descr_new(space, w_unicodetype, w_string, w_encoding=None, + def descr_new(space, w_unicodetype, w_string=None, w_encoding=None, w_errors=None): - # NB. the default value of w_obj is really a *wrapped* empty string: - # there is gateway magic at work - w_obj = w_string - encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - if encoding is None and errors is None: - # this is very quick if w_obj is already a w_unicode - w_value = unicode_from_object(space, w_obj) + if w_string is None: + w_value = W_UnicodeObject.EMPTY + elif encoding is None and errors is None: + # this is very quick if w_string is already a w_unicode + w_value = unicode_from_object(space, w_string) else: - if space.isinstance_w(w_obj, space.w_unicode): + if space.isinstance_w(w_string, space.w_unicode): raise oefmt(space.w_TypeError, "decoding Unicode is not supported") - w_value = unicode_from_encoded_object(space, w_obj, + w_value = unicode_from_encoded_object(space, w_string, encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value From pypy.commits at gmail.com Tue Jan 1 08:31:18 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:18 -0800 (PST) Subject: [pypy-commit] pypy default: happy new year - update copyright info Message-ID: <5c2b6ba6.1c69fb81.c7d93.c89e@mx.google.com> Author: Matti Picus Branch: Changeset: r95548:0679f7b21b79 Date: 2019-01-01 10:34 +0200 http://bitbucket.org/pypy/pypy/changeset/0679f7b21b79/ Log: happy new year - update copyright info diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -59,7 +59,7 @@ # General information about the project. project = u'PyPy' -copyright = u'2018, The PyPy Project' +copyright = u'2019, The PyPy Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From pypy.commits at gmail.com Tue Jan 1 08:31:20 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:20 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5c2b6ba8.1c69fb81.fdee4.cd08@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95549:6b4ce9eb5f31 Date: 2019-01-01 10:34 +0200 http://bitbucket.org/pypy/pypy/changeset/6b4ce9eb5f31/ Log: merge default into branch diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -59,7 +59,7 @@ # General information about the project. project = u'PyPy' -copyright = u'2018, The PyPy Project' +copyright = u'2019, The PyPy Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From pypy.commits at gmail.com Tue Jan 1 08:31:21 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:21 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5c2b6ba9.1c69fb81.33026.7dc0@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95550:057b4ea99850 Date: 2019-01-01 10:35 +0200 http://bitbucket.org/pypy/pypy/changeset/057b4ea99850/ Log: merge default into branch diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -59,7 +59,7 @@ # General information about the project. project = u'PyPy' -copyright = u'2018, The PyPy Project' +copyright = u'2019, The PyPy Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From pypy.commits at gmail.com Tue Jan 1 08:31:23 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:23 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: skip hypothesis (on -A testing) Message-ID: <5c2b6bab.1c69fb81.7ebde.5c9c@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95551:381c99955a5c Date: 2019-01-01 12:26 +0200 http://bitbucket.org/pypy/pypy/changeset/381c99955a5c/ Log: skip hypothesis (on -A testing) diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,5 +1,9 @@ import pytest -from hypothesis import given, strategies +try: + from hypothesis import given, strategies + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False import struct import sys @@ -37,12 +41,14 @@ assert lst == [("??", "ascii", input, 0, 2), ("??", "ascii", input, 5, 7)] - at given(strategies.text()) -def test_utf8_encode_ascii_2(u): - def eh(errors, encoding, reason, p, start, end): - return "?" * (end - start), end +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_utf8_encode_ascii_2(u): + def eh(errors, encoding, reason, p, start, end): + return "?" * (end - start), end - assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") + assert utf8_encode_ascii(u.encode("utf8"), + "replace", eh) == u.encode("ascii", "replace") def test_str_decode_ascii(): assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3) diff --git a/pypy/module/_cffi_backend/test/test_wchar_helper.py b/pypy/module/_cffi_backend/test/test_wchar_helper.py --- a/pypy/module/_cffi_backend/test/test_wchar_helper.py +++ b/pypy/module/_cffi_backend/test/test_wchar_helper.py @@ -1,10 +1,15 @@ -from hypothesis import given, strategies +try: + from hypothesis import given, strategies + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False + from pypy.module._cffi_backend.wchar_helper import utf8_size_as_char16 - - at given(strategies.text()) -def test_utf8_size_as_char16(u): - assert type(u) is unicode - length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u)) - assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u) +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_utf8_size_as_char16(u): + assert type(u) is unicode + length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u)) + assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u) diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1,7 +1,12 @@ # -*- encoding: utf-8 -*- import py import sys -from hypothesis import given, strategies, settings, example +try: + from hypothesis import given, strategies, settings, example + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False + from rpython.rlib import rutf8 from pypy.interpreter.error import OperationError @@ -36,141 +41,137 @@ space.w_unicode, "__new__", space.w_unicode, w_uni) assert w_new is w_uni - @given(strategies.text(), strategies.integers(min_value=0, max_value=10), - strategies.integers(min_value=-1, max_value=10)) - def test_hypo_index_find(self, u, start, len1): - if start + len1 < 0: - return # skip this case - v = u[start : start + len1] - space = self.space - w_u = space.newutf8(u.encode('utf8'), len(u)) - w_v = space.newutf8(v.encode('utf8'), len(v)) - expected = u.find(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'index', w_v, + if HAS_HYPOTHESIS: + @given(strategies.text(), strategies.integers(min_value=0, max_value=10), + strategies.integers(min_value=-1, max_value=10)) + def test_hypo_index_find(self, u, start, len1): + if start + len1 < 0: + return # skip this case + v = u[start : start + len1] + space = self.space + w_u = space.newutf8(u.encode('utf8'), len(u)) + w_v = space.newutf8(v.encode('utf8'), len(v)) + expected = u.find(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'index', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert expected == -1 + else: + assert space.int_w(w_index) == expected >= 0 + + w_index = space.call_method(w_u, 'find', w_v, space.newint(start), space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert expected == -1 - else: - assert space.int_w(w_index) == expected >= 0 + assert space.int_w(w_index) == expected - w_index = space.call_method(w_u, 'find', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == expected + rexpected = u.rfind(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'rindex', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert rexpected == -1 + else: + assert space.int_w(w_index) == rexpected >= 0 - rexpected = u.rfind(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'rindex', w_v, + w_index = space.call_method(w_u, 'rfind', w_v, space.newint(start), space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert rexpected == -1 - else: - assert space.int_w(w_index) == rexpected >= 0 + assert space.int_w(w_index) == rexpected - w_index = space.call_method(w_u, 'rfind', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == rexpected + expected = u.startswith(v, start) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) - expected = u.startswith(v, start) - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) + expected = u.startswith(v, start, start + len1) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) - expected = u.startswith(v, start, start + len1) - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) + expected = u.endswith(v, start) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) - expected = u.endswith(v, start) - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) + expected = u.endswith(v, start, start + len1) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) - expected = u.endswith(v, start, start + len1) - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) + @given(u=strategies.text(), + start=strategies.integers(min_value=0, max_value=10), + len1=strategies.integers(min_value=-1, max_value=10)) + def test_hypo_index_find(u, start, len1, space): + if start + len1 < 0: + return # skip this case + v = u[start : start + len1] + w_u = space.wrap(u) + w_v = space.wrap(v) + expected = u.find(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'index', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert expected == -1 + else: + assert space.int_w(w_index) == expected >= 0 -try: - from hypothesis import given, strategies -except ImportError: - pass -else: - @given(u=strategies.text(), - start=strategies.integers(min_value=0, max_value=10), - len1=strategies.integers(min_value=-1, max_value=10)) - def test_hypo_index_find(u, start, len1, space): - if start + len1 < 0: - return # skip this case - v = u[start : start + len1] - w_u = space.wrap(u) - w_v = space.wrap(v) - expected = u.find(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'index', w_v, + w_index = space.call_method(w_u, 'find', w_v, space.newint(start), space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert expected == -1 - else: - assert space.int_w(w_index) == expected >= 0 + assert space.int_w(w_index) == expected - w_index = space.call_method(w_u, 'find', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == expected + rexpected = u.rfind(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'rindex', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert rexpected == -1 + else: + assert space.int_w(w_index) == rexpected >= 0 - rexpected = u.rfind(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'rindex', w_v, + w_index = space.call_method(w_u, 'rfind', w_v, space.newint(start), space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert rexpected == -1 - else: - assert space.int_w(w_index) == rexpected >= 0 + assert space.int_w(w_index) == rexpected - w_index = space.call_method(w_u, 'rfind', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == rexpected + expected = u.startswith(v, start) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) - expected = u.startswith(v, start) - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) + expected = u.startswith(v, start, start + len1) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) - expected = u.startswith(v, start, start + len1) - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) + expected = u.endswith(v, start) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) - expected = u.endswith(v, start) - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) - - expected = u.endswith(v, start, start + len1) - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) + expected = u.endswith(v, start, start + len1) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) class AppTestUnicodeStringStdOnly: From pypy.commits at gmail.com Tue Jan 1 08:31:25 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: rework this to be more compliant - UnicodeError trumps TypeError Message-ID: <5c2b6bad.1c69fb81.7fc30.70ce@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95552:e79b16b3943d Date: 2019-01-01 12:31 +0200 http://bitbucket.org/pypy/pypy/changeset/e79b16b3943d/ Log: rework this to be more compliant - UnicodeError trumps TypeError diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -45,11 +45,12 @@ if not space.isinstance_w(w_name, space.w_text): try: name = space.text_w(w_name) # typecheck - except Exception as e: + except OperationError as e: + if e.match(space, space.w_UnicodeError): + raise e raise oefmt(space.w_TypeError, "%s(): attribute name must be string", msg) - if space.isinstance_w(w_name, space.w_unicode): - w_name = space.call_method(w_name, 'encode', space.newtext('ascii')) + w_name = space.newtext(w_name) return w_name def delattr(space, w_object, w_name): From pypy.commits at gmail.com Tue Jan 1 08:31:26 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:26 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: allow infinite loop in order to pass test Message-ID: <5c2b6bae.1c69fb81.dcbaa.ef6a@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95553:a767a1a730fb Date: 2019-01-01 12:50 +0200 http://bitbucket.org/pypy/pypy/changeset/a767a1a730fb/ Log: allow infinite loop in order to pass test diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1317,8 +1317,6 @@ "truncated input", s, pos, size) result.append(res) - if pos > size - unicode_bytes: - break continue t = r_uint(0) h = 0 From pypy.commits at gmail.com Tue Jan 1 08:31:29 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:29 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: allow a = array.array('u', u'\xff'); a.byteswap(); ord(a[0]) > sys.maxunicode Message-ID: <5c2b6bb1.1c69fb81.996d9.04e8@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95554:962719fced4a Date: 2019-01-01 15:06 +0200 http://bitbucket.org/pypy/pypy/changeset/962719fced4a/ Log: allow a = array.array('u', u'\xff'); a.byteswap(); ord(a[0]) > sys.maxunicode diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1040,13 +1040,24 @@ return space.newbytes(item) elif mytype.typecode == 'u': code = r_uint(ord(item)) - try: - return space.newutf8(rutf8.unichr_as_utf8(code), 1) - except ValueError: - raise oefmt(space.w_ValueError, - "array contains a 32-bit integer that is outside " - "the range [U+0000; U+10ffff] of valid unicode " - "characters") + # cpython will allow values > sys.maxunicode + # while silently truncating the top bits + if code <= r_uint(0x7F): + # Encode ASCII + item = chr(code) + elif code <= r_uint(0x07FF): + item = (chr((0xc0 | (code >> 6))) + + chr((0x80 | (code & 0x3f)))) + elif code <= r_uint(0xFFFF): + item = (chr((0xe0 | (code >> 12))) + + chr((0x80 | ((code >> 6) & 0x3f))) + + chr((0x80 | (code & 0x3f)))) + else: + item = (chr((0xf0 | (code >> 18)) & 0xff) + + chr((0x80 | ((code >> 12) & 0x3f))) + + chr((0x80 | ((code >> 6) & 0x3f))) + + chr((0x80 | (code & 0x3f)))) + return space.newutf8(item, 1) assert 0, "unreachable" # interface diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -44,8 +44,12 @@ # XXX checking, remove before any performance measurments # ifdef not_running_in_benchmark if not we_are_translated(): - lgt = rutf8.check_utf8(utf8str, True) - assert lgt == length + try: + lgt = rutf8.check_utf8(utf8str, True) + assert lgt == length + except: + # array.array can return invalid unicode + pass @staticmethod def from_utf8builder(builder): From pypy.commits at gmail.com Tue Jan 1 08:31:30 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 05:31:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix for '\x00' in wcharp2utf8n Message-ID: <5c2b6bb2.1c69fb81.b636d.d377@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95555:36b134188782 Date: 2019-01-01 15:27 +0200 http://bitbucket.org/pypy/pypy/changeset/36b134188782/ Log: test, fix for '\x00' in wcharp2utf8n diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py --- a/pypy/module/_rawffi/test/test__rawffi.py +++ b/pypy/module/_rawffi/test/test__rawffi.py @@ -351,9 +351,10 @@ import _rawffi A = _rawffi.Array('u') a = A(6, u'xx\x00\x00xx') - res = _rawffi.wcharp2unicode(a.buffer) - assert isinstance(res, unicode) - assert res == u'xx' + for i in (-1, 6): + res = _rawffi.wcharp2unicode(a.buffer, i) + assert isinstance(res, unicode) + assert res == u'xx' a.free() def test_rawstring2charp(self): diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1046,7 +1046,7 @@ s = rutf8.Utf8StringBuilder(maxlen) i = 0 - while i < maxlen and w[i]: + while i < maxlen and ord(w[i]): s.append_code(ord(w[i])) i += 1 return s.build(), i From pypy.commits at gmail.com Tue Jan 1 09:52:16 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 06:52:16 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: typo Message-ID: <5c2b7ea0.1c69fb81.9b894.4863@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95556:96dc7712122d Date: 2019-01-01 16:50 +0200 http://bitbucket.org/pypy/pypy/changeset/96dc7712122d/ Log: typo diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -50,7 +50,7 @@ raise e raise oefmt(space.w_TypeError, "%s(): attribute name must be string", msg) - w_name = space.newtext(w_name) + w_name = space.newtext(name) return w_name def delattr(space, w_object, w_name): From pypy.commits at gmail.com Tue Jan 1 10:31:00 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 07:31:00 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix MAXUNICODE to 0x10ffff on this branch Message-ID: <5c2b87b4.1c69fb81.9c516.6b0a@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95557:aed51041ef67 Date: 2019-01-01 17:30 +0200 http://bitbucket.org/pypy/pypy/changeset/aed51041ef67/ Log: fix MAXUNICODE to 0x10ffff on this branch diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -4,3 +4,4 @@ * improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object * make sure we review all the places that call ord(unichr) to check for ValueErrors +* Find a more elegant way to define MAXUNICODE in rpython/rlib/runicode.py diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -8,7 +8,8 @@ from rpython.rlib import jit, nonconst -if rffi.sizeof(lltype.UniChar) == 4: +# We always use MAXUNICODE = 0x10ffff when unicode objects use utf8 +if 1 or rffi.sizeof(lltype.UniChar) == 4: MAXUNICODE = 0x10ffff allow_surrogate_by_default = False else: From pypy.commits at gmail.com Tue Jan 1 12:51:36 2019 From: pypy.commits at gmail.com (amauryfa) Date: Tue, 01 Jan 2019 09:51:36 -0800 (PST) Subject: [pypy-commit] pypy py3.6: Add support for FsPath to os.unlink() Message-ID: <5c2ba8a8.1c69fb81.e19e2.7b0f@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r95558:77d2a71504df Date: 2019-01-01 18:44 +0100 http://bitbucket.org/pypy/pypy/changeset/77d2a71504df/ Log: Add support for FsPath to os.unlink() diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -727,8 +727,9 @@ else: return space.newint(rc) - at unwrap_spec(dir_fd=DirFD(rposix.HAVE_UNLINKAT)) -def unlink(space, w_path, __kwonly__, dir_fd=DEFAULT_DIR_FD): + at unwrap_spec(path=path_or_fd(allow_fd=False), + dir_fd=DirFD(rposix.HAVE_UNLINKAT)) +def unlink(space, path, __kwonly__, dir_fd=DEFAULT_DIR_FD): """unlink(path, *, dir_fd=None) Remove a file (same as remove()). @@ -739,15 +740,16 @@ If it is unavailable, using it will raise a NotImplementedError.""" try: if rposix.HAVE_UNLINKAT and dir_fd != DEFAULT_DIR_FD: - path = space.fsencode_w(w_path) - rposix.unlinkat(path, dir_fd, removedir=False) + rposix.unlinkat(space.fsencode_w(path.w_path), + dir_fd, removedir=False) else: - dispatch_filename(rposix.unlink)(space, w_path) + call_rposix(rposix.unlink, path) except OSError as e: - raise wrap_oserror2(space, e, w_path, eintr_retry=False) + raise wrap_oserror2(space, e, path.w_path, eintr_retry=False) - at unwrap_spec(dir_fd=DirFD(rposix.HAVE_UNLINKAT)) -def remove(space, w_path, __kwonly__, dir_fd=DEFAULT_DIR_FD): + at unwrap_spec(path=path_or_fd(allow_fd=False), + dir_fd=DirFD(rposix.HAVE_UNLINKAT)) +def remove(space, path, __kwonly__, dir_fd=DEFAULT_DIR_FD): """remove(path, *, dir_fd=None) Remove a file (same as unlink()). @@ -758,12 +760,12 @@ If it is unavailable, using it will raise a NotImplementedError.""" try: if rposix.HAVE_UNLINKAT and dir_fd != DEFAULT_DIR_FD: - path = space.fsencode_w(w_path) - rposix.unlinkat(path, dir_fd, removedir=False) + rposix.unlinkat(space.fsencode_w(path.w_path), + dir_fd, removedir=False) else: - dispatch_filename(rposix.unlink)(space, w_path) + call_rposix(rposix.unlink, path) except OSError as e: - raise wrap_oserror2(space, e, w_path, eintr_retry=False) + raise wrap_oserror2(space, e, path.w_path, eintr_retry=False) def _getfullpathname(space, w_path): """helper for ntpath.abspath """ @@ -1109,9 +1111,9 @@ wrap_oserror(space, e, eintr_retry=True) @unwrap_spec(src_dir_fd=DirFD(rposix.HAVE_RENAMEAT), - dst_dir_fd=DirFD(rposix.HAVE_RENAMEAT)) + dst_dir_fd=DirFD(rposix.HAVE_RENAMEAT)) def rename(space, w_src, w_dst, __kwonly__, - src_dir_fd=DEFAULT_DIR_FD, dst_dir_fd=DEFAULT_DIR_FD): + src_dir_fd=DEFAULT_DIR_FD, dst_dir_fd=DEFAULT_DIR_FD): """rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None) Rename a file or directory. diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -390,6 +390,16 @@ if sys.platform != "win32": assert posix.access(pdir, posix.X_OK) is False + def test_unlink(self): + os = self.posix + path = self.path + with open(path, 'wb'): + pass + class Path: + def __fspath__(self): + return path + os.unlink(Path()) + def test_times(self): """ posix.times() should return a posix.times_result object giving From pypy.commits at gmail.com Tue Jan 1 12:51:40 2019 From: pypy.commits at gmail.com (amauryfa) Date: Tue, 01 Jan 2019 09:51:40 -0800 (PST) Subject: [pypy-commit] pypy py3.6: hg merge py3.5 Message-ID: <5c2ba8ac.1c69fb81.4fb13.313e@mx.google.com> Author: Amaury Forgeot d'Arc Branch: py3.6 Changeset: r95559:df0beeb7b5ec Date: 2019-01-01 18:50 +0100 http://bitbucket.org/pypy/pypy/changeset/df0beeb7b5ec/ Log: hg merge py3.5 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -327,6 +327,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(self): ffi = FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -244,6 +244,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(): ffi = _cffi1_backend.FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1654,6 +1654,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ diff --git a/extra_tests/test_pyrepl/conftest.py b/extra_tests/test_pyrepl/conftest.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_pyrepl/conftest.py @@ -0,0 +1,8 @@ +import sys + +def pytest_ignore_collect(path): + if '__pypy__' not in sys.builtin_module_names: + try: + import pyrepl + except ImportError: + return True diff --git a/lib-python/3/datetime.py b/lib-python/3/datetime.py --- a/lib-python/3/datetime.py +++ b/lib-python/3/datetime.py @@ -537,7 +537,11 @@ -self._microseconds) def __pos__(self): - return self + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days, + self._seconds, + self._microseconds) def __abs__(self): if self._days < 0: @@ -829,8 +833,7 @@ month = self._month if day is None: day = self._day - # PyPy fix: returns type(self)() instead of date() - return type(self)(year, month, day) + return date.__new__(type(self), year, month, day) # Comparisons of date objects with other. @@ -1323,8 +1326,8 @@ tzinfo = self.tzinfo if fold is None: fold = self._fold - # PyPy fix: returns type(self)() instead of time() - return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) + return time.__new__(type(self), + hour, minute, second, microsecond, tzinfo) # Pickle support. @@ -1387,13 +1390,13 @@ hour, minute, second, microsecond, fold) _check_tzinfo_arg(tzinfo) self = dateinterop.__new__(cls) - self._year = year - self._month = month - self._day = day - self._hour = hour - self._minute = minute - self._second = second - self._microsecond = microsecond + self._year = int(year) + self._month = int(month) + self._day = int(day) + self._hour = int(hour) + self._minute = int(minute) + self._second = int(second) + self._microsecond = int(microsecond) self._tzinfo = tzinfo self._hashcode = -1 self._fold = fold @@ -1606,8 +1609,8 @@ if fold is None: fold = self.fold # PyPy fix: returns type(self)() instead of datetime() - return type(self)(year, month, day, hour, minute, second, - microsecond, tzinfo, fold=fold) + return datetime.__new__(type(self), year, month, day, hour, minute, + second, microsecond, tzinfo) def _local_timezone(self): if self.tzinfo is None: @@ -1883,7 +1886,10 @@ if myoff == otoff: return base if myoff is None or otoff is None: - raise TypeError("cannot mix naive and timezone-aware time") + # The CPython _datetimemodule.c error message and the + # datetime.py one are different + raise TypeError("can't subtract offset-naive and " + "offset-aware datetimes") return base + otoff - myoff def __hash__(self): diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -341,7 +341,7 @@ # """ # note that 'buffer' is a type, set on this instance by __init__ - def from_buffer(self, python_buffer): + def from_buffer(self, python_buffer, require_writable=False): """Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types @@ -349,7 +349,8 @@ but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. """ - return self._backend.from_buffer(self.BCharA, python_buffer) + return self._backend.from_buffer(self.BCharA, python_buffer, + require_writable) def memmove(self, dest, src, n): """ffi.memmove(dest, src, n) copies n bytes of memory from src to dest. diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -59,7 +59,7 @@ # General information about the project. project = u'PyPy' -copyright = u'2018, The PyPy Project' +copyright = u'2019, The PyPy Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -401,8 +401,10 @@ * some functions and attributes of the ``gc`` module behave in a slightly different way: for example, ``gc.enable`` and - ``gc.disable`` are supported, but instead of enabling and disabling - the GC, they just enable and disable the execution of finalizers. + ``gc.disable`` are supported, but "enabling and disabling the GC" has + a different meaning in PyPy than in CPython. These functions + actually enable and disable the major collections and the + execution of finalizers. * PyPy prints a random line from past #pypy IRC topics at startup in interactive mode. In a released version, this behaviour is suppressed, but diff --git a/pypy/doc/gc_info.rst b/pypy/doc/gc_info.rst --- a/pypy/doc/gc_info.rst +++ b/pypy/doc/gc_info.rst @@ -22,8 +22,44 @@ larger. (A third category, the very large objects, are initially allocated outside the nursery and never move.) -Since Incminimark is an incremental GC, the major collection is incremental, -meaning there should not be any pauses longer than 1ms. +Since Incminimark is an incremental GC, the major collection is incremental: +the goal is not to have any pause longer than 1ms, but in practice it depends +on the size and characteristics of the heap: occasionally, there can be pauses +between 10-100ms. + + +Semi-manual GC management +-------------------------- + +If there are parts of the program where it is important to have a low latency, +you might want to control precisely when the GC runs, to avoid unexpected +pauses. Note that this has effect only on major collections, while minor +collections continue to work as usual. + +As explained above, a full major collection consists of ``N`` steps, where +``N`` depends on the size of the heap; generally speaking, it is not possible +to predict how many steps will be needed to complete a collection. + +``gc.enable()`` and ``gc.disable()`` control whether the GC runs collection +steps automatically. When the GC is disabled the memory usage will grow +indefinitely, unless you manually call ``gc.collect()`` and +``gc.collect_step()``. + +``gc.collect()`` runs a full major collection. + +``gc.collect_step()`` runs a single collection step. It returns an object of +type GcCollectStepStats_, the same which is passed to the corresponding `GC +Hooks`_. The following code is roughly equivalent to a ``gc.collect()``:: + + while True: + if gc.collect_step().major_is_done: + break + +For a real-world example of usage of this API, you can look at the 3rd-party +module `pypytools.gc.custom`_, which also provides a ``with customgc.nogc()`` +context manager to mark sections where the GC is forbidden. + +.. _`pypytools.gc.custom`: https://bitbucket.org/antocuni/pypytools/src/0273afc3e8bedf0eb1ef630c3bc69e8d9dd661fe/pypytools/gc/custom.py?at=default&fileviewer=file-view-default Fragmentation @@ -184,6 +220,8 @@ the number of pinned objects. +.. _GcCollectStepStats: + The attributes for ``GcCollectStepStats`` are: ``count``, ``duration``, ``duration_min``, ``duration_max`` @@ -192,10 +230,14 @@ ``oldstate``, ``newstate`` Integers which indicate the state of the GC before and after the step. +``major_is_done`` + Boolean which indicate whether this was the last step of the major + collection + The value of ``oldstate`` and ``newstate`` is one of these constants, defined inside ``gc.GcCollectStepStats``: ``STATE_SCANNING``, ``STATE_MARKING``, -``STATE_SWEEPING``, ``STATE_FINALIZING``. It is possible to get a string -representation of it by indexing the ``GC_STATS`` tuple. +``STATE_SWEEPING``, ``STATE_FINALIZING``, ``STATE_USERDEL``. It is possible +to get a string representation of it by indexing the ``GC_STATES`` tuple. The attributes for ``GcCollectStats`` are: diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -64,3 +64,10 @@ .. branch: cleanup-test_lib_pypy Update most test_lib_pypy/ tests and move them to extra_tests/. + +.. branch: gc-disable + +Make it possible to manually manage the GC by using a combination of +gc.disable() and gc.collect_step(). Make sure to write a proper release +announcement in which we explain that existing programs could leak memory if +they run for too much time between a gc.disable()/gc.enable() diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -327,7 +327,8 @@ return w_ctype.cast(w_ob) - def descr_from_buffer(self, w_python_buffer): + @unwrap_spec(require_writable=int) + def descr_from_buffer(self, w_python_buffer, require_writable=0): """\ Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is @@ -337,7 +338,8 @@ 'array.array' or numpy arrays.""" # w_ctchara = newtype._new_chara_type(self.space) - return func._from_buffer(self.space, w_ctchara, w_python_buffer) + return func._from_buffer(self.space, w_ctchara, w_python_buffer, + require_writable) @unwrap_spec(w_arg=W_CData) diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -110,8 +110,8 @@ def _fetch_as_write_buffer(space, w_x): return space.writebuf_w(w_x) - at unwrap_spec(w_ctype=ctypeobj.W_CType) -def from_buffer(space, w_ctype, w_x): + at unwrap_spec(w_ctype=ctypeobj.W_CType, require_writable=int) +def from_buffer(space, w_ctype, w_x, require_writable=0): from pypy.module._cffi_backend import ctypearray, ctypeprim # if (not isinstance(w_ctype, ctypearray.W_CTypeArray) or @@ -119,13 +119,16 @@ raise oefmt(space.w_TypeError, "needs 'char[]', got '%s'", w_ctype.name) # - return _from_buffer(space, w_ctype, w_x) + return _from_buffer(space, w_ctype, w_x, require_writable) -def _from_buffer(space, w_ctype, w_x): +def _from_buffer(space, w_ctype, w_x, require_writable): if space.isinstance_w(w_x, space.w_unicode): raise oefmt(space.w_TypeError, - "from_buffer() cannot return the address a unicode") - buf = _fetch_as_read_buffer(space, w_x) + "from_buffer() cannot return the address of a unicode object") + if require_writable: + buf = _fetch_as_write_buffer(space, w_x) + else: + buf = _fetch_as_read_buffer(space, w_x) if space.isinstance_w(w_x, space.w_bytes): _cdata = get_raw_address_of_string(space, w_x) else: diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3730,6 +3730,18 @@ check(4 | 8, "CHB", "GTB") check(4 | 16, "CHB", "ROB") +def test_from_buffer_require_writable(): + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p1 = from_buffer(BCharA, b"foo", False) + assert p1 == from_buffer(BCharA, b"foo", False) + py.test.raises((TypeError, BufferError), from_buffer, BCharA, b"foo", True) + ba = bytearray(b"foo") + p1 = from_buffer(BCharA, ba, True) + p1[0] = b"g" + assert ba == b"goo" + def test_memmove(): Short = new_primitive_type("short") ShortA = new_array_type(new_pointer_type(Short), None) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -287,6 +287,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", False) + raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_from_buffer_BytesIO(self): from _cffi_backend import FFI diff --git a/pypy/module/_cppyy/test/Makefile b/pypy/module/_cppyy/test/Makefile --- a/pypy/module/_cppyy/test/Makefile +++ b/pypy/module/_cppyy/test/Makefile @@ -15,7 +15,7 @@ HASGENREFLEX:=$(shell command -v genreflex 2> /dev/null) -cppflags=-std=c++14 -O3 -m64 -fPIC -rdynamic +cppflags=-std=c++14 -O3 -fPIC -rdynamic ifdef HASGENREFLEX genreflex_flags:=$(shell genreflex --cppflags) cppflags+=$(genreflex_flags) @@ -25,7 +25,7 @@ PLATFORM := $(shell uname -s) ifeq ($(PLATFORM),Darwin) - cppflags+=-dynamiclib -single_module -arch x86_64 -undefined dynamic_lookup + cppflags+=-dynamiclib -single_module -undefined dynamic_lookup endif diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,6 +4,7 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', + 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', diff --git a/pypy/module/gc/hook.py b/pypy/module/gc/hook.py --- a/pypy/module/gc/hook.py +++ b/pypy/module/gc/hook.py @@ -1,5 +1,6 @@ from rpython.memory.gc.hook import GcHooks -from rpython.memory.gc import incminimark +from rpython.memory.gc import incminimark +from rpython.rlib import rgc from rpython.rlib.nonconst import NonConstant from rpython.rlib.rarithmetic import r_uint, r_longlong, longlongmax from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -117,12 +118,24 @@ self.descr_set_on_gc_collect(space, space.w_None) -class GcMinorHookAction(AsyncAction): +class NoRecursiveAction(AsyncAction): + depth = 0 + + def perform(self, ec, frame): + if self.depth == 0: + try: + self.depth += 1 + return self._do_perform(ec, frame) + finally: + self.depth -= 1 + + +class GcMinorHookAction(NoRecursiveAction): total_memory_used = 0 pinned_objects = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -145,7 +158,7 @@ self.pinned_objects = NonConstant(-42) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcMinorStats( self.count, self.duration, @@ -157,12 +170,12 @@ self.space.call_function(self.w_callable, w_stats) -class GcCollectStepHookAction(AsyncAction): +class GcCollectStepHookAction(NoRecursiveAction): oldstate = 0 newstate = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -185,19 +198,20 @@ self.newstate = NonConstant(-42) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcCollectStepStats( self.count, self.duration, self.duration_min, self.duration_max, self.oldstate, - self.newstate) + self.newstate, + rgc.is_done__states(self.oldstate, self.newstate)) self.reset() self.space.call_function(self.w_callable, w_stats) -class GcCollectHookAction(AsyncAction): +class GcCollectHookAction(NoRecursiveAction): num_major_collects = 0 arenas_count_before = 0 arenas_count_after = 0 @@ -206,7 +220,7 @@ rawmalloc_bytes_after = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -227,7 +241,7 @@ self.rawmalloc_bytes_after = NonConstant(r_uint(42)) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcCollectStats(self.count, self.num_major_collects, self.arenas_count_before, @@ -252,15 +266,32 @@ class W_GcCollectStepStats(W_Root): + # NOTE: this is specific to incminimark: if we want to integrate the + # applevel gc module with another gc, we probably need a more general + # approach to this. + # + # incminimark has 4 GC states: scanning, marking, sweeping and + # finalizing. However, from the user point of view, we have an additional + # "virtual" state: USERDEL, which represent when we run applevel + # finalizers after having completed a GC major collection. This state is + # never explicitly visible when using hooks, but it is used for the return + # value of gc.collect_step (see interp_gc.py) + STATE_SCANNING = incminimark.STATE_SCANNING + STATE_MARKING = incminimark.STATE_MARKING + STATE_SWEEPING = incminimark.STATE_SWEEPING + STATE_FINALIZING = incminimark.STATE_FINALIZING + STATE_USERDEL = incminimark.STATE_FINALIZING + 1 # used by StepCollector + GC_STATES = tuple(incminimark.GC_STATES + ['USERDEL']) def __init__(self, count, duration, duration_min, duration_max, - oldstate, newstate): + oldstate, newstate, major_is_done): self.count = count self.duration = duration self.duration_min = duration_min self.duration_max = duration_max self.oldstate = oldstate self.newstate = newstate + self.major_is_done = major_is_done class W_GcCollectStats(W_Root): @@ -320,11 +351,16 @@ W_GcCollectStepStats.typedef = TypeDef( "GcCollectStepStats", - STATE_SCANNING = incminimark.STATE_SCANNING, - STATE_MARKING = incminimark.STATE_MARKING, - STATE_SWEEPING = incminimark.STATE_SWEEPING, - STATE_FINALIZING = incminimark.STATE_FINALIZING, - GC_STATES = tuple(incminimark.GC_STATES), + STATE_SCANNING = W_GcCollectStepStats.STATE_SCANNING, + STATE_MARKING = W_GcCollectStepStats.STATE_MARKING, + STATE_SWEEPING = W_GcCollectStepStats.STATE_SWEEPING, + STATE_FINALIZING = W_GcCollectStepStats.STATE_FINALIZING, + STATE_USERDEL = W_GcCollectStepStats.STATE_USERDEL, + GC_STATES = tuple(W_GcCollectStepStats.GC_STATES), + major_is_done = interp_attrproperty( + "major_is_done", + cls=W_GcCollectStepStats, + wrapfn="newbool"), **wrap_many(W_GcCollectStepStats, ( "count", "duration", diff --git a/pypy/module/gc/interp_gc.py b/pypy/module/gc/interp_gc.py --- a/pypy/module/gc/interp_gc.py +++ b/pypy/module/gc/interp_gc.py @@ -1,6 +1,7 @@ from pypy.interpreter.gateway import unwrap_spec from pypy.interpreter.error import oefmt from rpython.rlib import rgc +from pypy.module.gc.hook import W_GcCollectStepStats @unwrap_spec(generation=int) @@ -16,7 +17,9 @@ cache.clear() rgc.collect() + _run_finalizers(space) +def _run_finalizers(space): # if we are running in gc.disable() mode but gc.collect() is called, # we should still call the finalizers now. We do this as an attempt # to get closer to CPython's behavior: in Py3.5 some tests @@ -39,18 +42,20 @@ return space.newint(0) def enable(space): - """Non-recursive version. Enable finalizers now. + """Non-recursive version. Enable major collections and finalizers. If they were already enabled, no-op. If they were disabled even several times, enable them anyway. """ + rgc.enable() if not space.user_del_action.enabled_at_app_level: space.user_del_action.enabled_at_app_level = True enable_finalizers(space) def disable(space): - """Non-recursive version. Disable finalizers now. Several calls - to this function are ignored. + """Non-recursive version. Disable major collections and finalizers. + Multiple calls to this function are ignored. """ + rgc.disable() if space.user_del_action.enabled_at_app_level: space.user_del_action.enabled_at_app_level = False disable_finalizers(space) @@ -77,6 +82,59 @@ if uda.pending_with_disabled_del is None: uda.pending_with_disabled_del = [] + +class StepCollector(object): + """ + Invoke rgc.collect_step() until we are done, then run the app-level + finalizers as a separate step + """ + + def __init__(self, space): + self.space = space + self.finalizing = False + + def do(self): + if self.finalizing: + self._run_finalizers() + self.finalizing = False + oldstate = W_GcCollectStepStats.STATE_USERDEL + newstate = W_GcCollectStepStats.STATE_SCANNING + major_is_done = True # now we are finally done + else: + states = self._collect_step() + oldstate = rgc.old_state(states) + newstate = rgc.new_state(states) + major_is_done = False # USERDEL still to do + if rgc.is_done(states): + newstate = W_GcCollectStepStats.STATE_USERDEL + self.finalizing = True + # + duration = -1 + return W_GcCollectStepStats( + count = 1, + duration = duration, + duration_min = duration, + duration_max = duration, + oldstate = oldstate, + newstate = newstate, + major_is_done = major_is_done) + + def _collect_step(self): + return rgc.collect_step() + + def _run_finalizers(self): + _run_finalizers(self.space) + +def collect_step(space): + """ + If the GC is incremental, run a single gc-collect-step. Return True when + the major collection is completed. + If the GC is not incremental, do a full collection and return True. + """ + sc = space.fromcache(StepCollector) + w_stats = sc.do() + return w_stats + # ____________________________________________________________ @unwrap_spec(filename='fsencode') diff --git a/pypy/module/gc/test/test_gc.py b/pypy/module/gc/test/test_gc.py --- a/pypy/module/gc/test/test_gc.py +++ b/pypy/module/gc/test/test_gc.py @@ -1,7 +1,20 @@ import py - +import pytest +from rpython.rlib import rgc +from pypy.interpreter.baseobjspace import ObjSpace +from pypy.interpreter.gateway import interp2app, unwrap_spec +from pypy.module.gc.interp_gc import StepCollector, W_GcCollectStepStats class AppTestGC(object): + + def setup_class(cls): + if cls.runappdirect: + pytest.skip("these tests cannot work with -A") + space = cls.space + def rgc_isenabled(space): + return space.newbool(rgc.isenabled()) + cls.w_rgc_isenabled = space.wrap(interp2app(rgc_isenabled)) + def test_collect(self): import gc gc.collect() # mostly a "does not crash" kind of test @@ -63,12 +76,16 @@ def test_enable(self): import gc assert gc.isenabled() + assert self.rgc_isenabled() gc.disable() assert not gc.isenabled() + assert not self.rgc_isenabled() gc.enable() assert gc.isenabled() + assert self.rgc_isenabled() gc.enable() assert gc.isenabled() + assert self.rgc_isenabled() def test_gc_collect_overrides_gc_disable(self): import gc @@ -83,6 +100,24 @@ assert deleted == [1] gc.enable() + def test_gc_collect_step(self): + import gc + + class X(object): + deleted = 0 + def __del__(self): + X.deleted += 1 + + gc.disable() + X(); X(); X(); + n = 0 + while True: + n += 1 + if gc.collect_step().major_is_done: + break + + assert n >= 2 # at least one step + 1 finalizing + assert X.deleted == 3 class AppTestGcDumpHeap(object): pytestmark = py.test.mark.xfail(run=False) @@ -156,3 +191,55 @@ gc.collect() # the classes C should all go away here for r in rlist: assert r() is None + + +def test_StepCollector(): + W = W_GcCollectStepStats + SCANNING = W.STATE_SCANNING + MARKING = W.STATE_MARKING + SWEEPING = W.STATE_SWEEPING + FINALIZING = W.STATE_FINALIZING + USERDEL = W.STATE_USERDEL + + class MyStepCollector(StepCollector): + my_steps = 0 + my_done = False + my_finalized = 0 + + def __init__(self): + StepCollector.__init__(self, space=None) + self._state_transitions = iter([ + (SCANNING, MARKING), + (MARKING, SWEEPING), + (SWEEPING, FINALIZING), + (FINALIZING, SCANNING)]) + + def _collect_step(self): + self.my_steps += 1 + try: + oldstate, newstate = next(self._state_transitions) + except StopIteration: + assert False, 'should not happen, did you call _collect_step too much?' + return rgc._encode_states(oldstate, newstate) + + def _run_finalizers(self): + self.my_finalized += 1 + + sc = MyStepCollector() + transitions = [] + while True: + result = sc.do() + transitions.append((result.oldstate, result.newstate, sc.my_finalized)) + if result.major_is_done: + break + + assert transitions == [ + (SCANNING, MARKING, False), + (MARKING, SWEEPING, False), + (SWEEPING, FINALIZING, False), + (FINALIZING, USERDEL, False), + (USERDEL, SCANNING, True) + ] + # there is one more transition than actual step, because + # FINALIZING->USERDEL is "virtual" + assert sc.my_steps == len(transitions) - 1 diff --git a/pypy/module/gc/test/test_hook.py b/pypy/module/gc/test/test_hook.py --- a/pypy/module/gc/test/test_hook.py +++ b/pypy/module/gc/test/test_hook.py @@ -69,26 +69,29 @@ def test_on_gc_collect_step(self): import gc + SCANNING = 0 + MARKING = 1 + SWEEPING = 2 + FINALIZING = 3 lst = [] def on_gc_collect_step(stats): lst.append((stats.count, stats.duration, stats.oldstate, - stats.newstate)) + stats.newstate, + stats.major_is_done)) gc.hooks.on_gc_collect_step = on_gc_collect_step - self.fire_gc_collect_step(10, 20, 30) - self.fire_gc_collect_step(40, 50, 60) + self.fire_gc_collect_step(10, SCANNING, MARKING) + self.fire_gc_collect_step(40, FINALIZING, SCANNING) assert lst == [ - (1, 10, 20, 30), - (1, 40, 50, 60), + (1, 10, SCANNING, MARKING, False), + (1, 40, FINALIZING, SCANNING, True), ] # gc.hooks.on_gc_collect_step = None - self.fire_gc_collect_step(70, 80, 90) # won't fire - assert lst == [ - (1, 10, 20, 30), - (1, 40, 50, 60), - ] + oldlst = lst[:] + self.fire_gc_collect_step(70, SCANNING, MARKING) # won't fire + assert lst == oldlst def test_on_gc_collect(self): import gc @@ -123,7 +126,8 @@ assert S.STATE_MARKING == 1 assert S.STATE_SWEEPING == 2 assert S.STATE_FINALIZING == 3 - assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', 'FINALIZING') + assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', + 'FINALIZING', 'USERDEL') def test_cumulative(self): import gc @@ -176,3 +180,22 @@ assert gc.hooks.on_gc_minor is None assert gc.hooks.on_gc_collect_step is None assert gc.hooks.on_gc_collect is None + + def test_no_recursive(self): + import gc + lst = [] + def on_gc_minor(stats): + lst.append((stats.count, + stats.duration, + stats.total_memory_used, + stats.pinned_objects)) + self.fire_gc_minor(1, 2, 3) # won't fire NOW + gc.hooks.on_gc_minor = on_gc_minor + self.fire_gc_minor(10, 20, 30) + self.fire_gc_minor(40, 50, 60) + # the duration for the 2nd call is 41, because it also counts the 1 + # which was fired recursively + assert lst == [ + (1, 10, 20, 30), + (2, 41, 50, 60), + ] diff --git a/pypy/module/math/test/test_direct.py b/pypy/module/math/test/test_direct.py --- a/pypy/module/math/test/test_direct.py +++ b/pypy/module/math/test/test_direct.py @@ -6,11 +6,6 @@ from rpython.rtyper.lltypesystem.module.test.math_cases import (MathTests, get_tester) -consistent_host = True -if '__pypy__' not in sys.builtin_module_names: - if sys.version_info < (2, 6): - consistent_host = False - class TestDirect(MathTests): pass @@ -30,8 +25,6 @@ def make_test_case((fnname, args, expected), dict): # def test_func(self): - if not consistent_host: - py.test.skip("inconsistent behavior before 2.6") try: fn = getattr(math, fnname) except AttributeError: diff --git a/pypy/module/math/test/test_math.py b/pypy/module/math/test/test_math.py --- a/pypy/module/math/test/test_math.py +++ b/pypy/module/math/test/test_math.py @@ -18,7 +18,6 @@ filename = filename[:-1] space = cls.space cls.w_math_cases = space.wrap(filename) - cls.w_consistent_host = space.wrap(test_direct.consistent_host) @classmethod def make_callable_wrapper(cls, func): @@ -53,8 +52,6 @@ yield fnname, args, expected def test_all_cases(self): - if not self.consistent_host: - skip("please test this on top of PyPy or CPython >= 2.6") import math for fnname, args, expected in self.cases(): fn = getattr(math, fnname) diff --git a/pypy/objspace/std/smalllongobject.py b/pypy/objspace/std/smalllongobject.py --- a/pypy/objspace/std/smalllongobject.py +++ b/pypy/objspace/std/smalllongobject.py @@ -379,7 +379,7 @@ def _pow(space, iv, iw, iz): if iw < 0: if iz != 0: - raise oefmt(space.w_TypeError, + raise oefmt(space.w_ValueError, "pow() 2nd argument cannot be negative when 3rd " "argument specified") raise ValueError diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1105,9 +1105,8 @@ assert type(str(z)) is str assert str(z) == u'foobaz' # - # two completely corner cases where we differ from CPython: - #assert unicode(encoding='supposedly_the_encoding') == u'' - #assert unicode(errors='supposedly_the_error') == u'' + assert str(encoding='supposedly_the_encoding') == u'' + assert str(errors='supposedly_the_error') == u'' e = raises(TypeError, str, u'', 'supposedly_the_encoding') assert str(e.value) == 'decoding str is not supported' e = raises(TypeError, str, u'', errors='supposedly_the_error') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -223,20 +223,18 @@ @staticmethod def descr_new(space, w_unicodetype, w_object=None, w_encoding=None, w_errors=None): - if w_object is None: - w_object = W_UnicodeObject.EMPTY - w_obj = w_object - encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - if encoding is None and errors is None: - # this is very quick if w_obj is already a w_unicode - w_value = unicode_from_object(space, w_obj) + if w_object is None: + w_value = W_UnicodeObject.EMPTY + elif encoding is None and errors is None: + # this is very quick if w_object is already a w_unicode + w_value = unicode_from_object(space, w_object) else: - if space.isinstance_w(w_obj, space.w_unicode): + if space.isinstance_w(w_object, space.w_unicode): raise oefmt(space.w_TypeError, "decoding str is not supported") - w_value = unicode_from_encoded_object(space, w_obj, + w_value = unicode_from_encoded_object(space, w_object, encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -149,6 +149,20 @@ def get_size_incl_hash(self, obj): return self.get_size(obj) + # these can be overriden by subclasses, called by the GCTransformer + def enable(self): + pass + + def disable(self): + pass + + def isenabled(self): + return True + + def collect_step(self): + self.collect() + return True + def malloc(self, typeid, length=0, zero=False): """NOT_RPYTHON For testing. The interface used by the gctransformer is diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -379,6 +379,11 @@ self.total_gc_time = 0.0 self.gc_state = STATE_SCANNING + + # if the GC is disabled, it runs only minor collections; major + # collections need to be manually triggered by explicitly calling + # collect() + self.enabled = True # # Two lists of all objects with finalizers. Actually they are lists # of pairs (finalization_queue_nr, object). "probably young objects" @@ -514,6 +519,15 @@ bigobj = self.nonlarge_max + 1 self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2) + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def isenabled(self): + return self.enabled + def _nursery_memory_size(self): extra = self.nonlarge_max + 1 return self.nursery_size + extra @@ -750,22 +764,53 @@ """Do a minor (gen=0), start a major (gen=1), or do a full major (gen>=2) collection.""" if gen < 0: - self._minor_collection() # dangerous! no major GC cycle progress - elif gen <= 1: - self.minor_collection_with_major_progress() - if gen == 1 and self.gc_state == STATE_SCANNING: + # Dangerous! this makes no progress on the major GC cycle. + # If called too often, the memory usage will keep increasing, + # because we'll never completely fill the nursery (and so + # never run anything about the major collection). + self._minor_collection() + elif gen == 0: + # This runs a minor collection. This is basically what occurs + # when the nursery is full. If a major collection is in + # progress, it also runs one more step of it. It might also + # decide to start a major collection just now, depending on + # current memory pressure. + self.minor_collection_with_major_progress(force_enabled=True) + elif gen == 1: + # This is like gen == 0, but if no major collection is running, + # then it forces one to start now. + self.minor_collection_with_major_progress(force_enabled=True) + if self.gc_state == STATE_SCANNING: self.major_collection_step() else: + # This does a complete minor and major collection. self.minor_and_major_collection() self.rrc_invoke_callback() + def collect_step(self): + """ + Do a single major collection step. Return True when the major collection + is completed. - def minor_collection_with_major_progress(self, extrasize=0): - """Do a minor collection. Then, if there is already a major GC - in progress, run at least one major collection step. If there is - no major GC but the threshold is reached, start a major GC. + This is meant to be used together with gc.disable(), to have a + fine-grained control on when the GC runs. + """ + old_state = self.gc_state + self._minor_collection() + self.major_collection_step() + self.rrc_invoke_callback() + return rgc._encode_states(old_state, self.gc_state) + + def minor_collection_with_major_progress(self, extrasize=0, + force_enabled=False): + """Do a minor collection. Then, if the GC is enabled and there + is already a major GC in progress, run at least one major collection + step. If there is no major GC but the threshold is reached, start a + major GC. """ self._minor_collection() + if not self.enabled and not force_enabled: + return # If the gc_state is STATE_SCANNING, we're not in the middle # of an incremental major collection. In that case, wait @@ -2428,25 +2473,6 @@ # We also need to reset the GCFLAG_VISITED on prebuilt GC objects. self.prebuilt_root_objects.foreach(self._reset_gcflag_visited, None) # - # Print statistics - debug_start("gc-collect-done") - debug_print("arenas: ", - self.stat_ac_arenas_count, " => ", - self.ac.arenas_count) - debug_print("bytes used in arenas: ", - self.ac.total_memory_used) - debug_print("bytes raw-malloced: ", - self.stat_rawmalloced_total_size, " => ", - self.rawmalloced_total_size) - debug_stop("gc-collect-done") - self.hooks.fire_gc_collect( - num_major_collects=self.num_major_collects, - arenas_count_before=self.stat_ac_arenas_count, - arenas_count_after=self.ac.arenas_count, - arenas_bytes=self.ac.total_memory_used, - rawmalloc_bytes_before=self.stat_rawmalloced_total_size, - rawmalloc_bytes_after=self.rawmalloced_total_size) - # # Set the threshold for the next major collection to be when we # have allocated 'major_collection_threshold' times more than # we currently have -- but no more than 'max_delta' more than @@ -2460,6 +2486,27 @@ total_memory_used + self.max_delta), reserving_size) # + # Print statistics + debug_start("gc-collect-done") + debug_print("arenas: ", + self.stat_ac_arenas_count, " => ", + self.ac.arenas_count) + debug_print("bytes used in arenas: ", + self.ac.total_memory_used) + debug_print("bytes raw-malloced: ", + self.stat_rawmalloced_total_size, " => ", + self.rawmalloced_total_size) + debug_print("next major collection threshold: ", + self.next_major_collection_threshold) + debug_stop("gc-collect-done") + self.hooks.fire_gc_collect( + num_major_collects=self.num_major_collects, + arenas_count_before=self.stat_ac_arenas_count, + arenas_count_after=self.ac.arenas_count, + arenas_bytes=self.ac.total_memory_used, + rawmalloc_bytes_before=self.stat_rawmalloced_total_size, + rawmalloc_bytes_after=self.rawmalloced_total_size) + # # Max heap size: gives an upper bound on the threshold. If we # already have at least this much allocated, raise MemoryError. if bounded and self.threshold_reached(reserving_size): diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -13,6 +13,7 @@ from rpython.memory.gc import minimark, incminimark from rpython.memory.gctypelayout import zero_gc_pointers_inside, zero_gc_pointers from rpython.rlib.debug import debug_print +from rpython.rlib.test.test_debug import debuglog import pdb WORD = LONG_BIT // 8 @@ -770,4 +771,76 @@ assert elem.prev == lltype.nullptr(S) assert elem.next == lltype.nullptr(S) - + def test_collect_0(self, debuglog): + self.gc.collect(1) # start a major + debuglog.reset() + self.gc.collect(0) # do ONLY a minor + assert debuglog.summary() == {'gc-minor': 1} + + def test_enable_disable(self, debuglog): + def large_malloc(): + # malloc an object which is large enough to trigger a major collection + threshold = self.gc.next_major_collection_threshold + self.malloc(VAR, int(threshold/8)) + summary = debuglog.summary() + debuglog.reset() + return summary + # + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-collect-step', 'gc-minor'] + # + self.gc.disable() + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-minor'] + # + self.gc.enable() + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-collect-step', 'gc-minor'] + + def test_call_collect_when_disabled(self, debuglog): + # malloc an object and put it the old generation + s = self.malloc(S) + s.x = 42 + self.stackroots.append(s) + self.gc.collect() + s = self.stackroots.pop() + # + self.gc.disable() + self.gc.collect(1) # start a major collect + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + assert s.x == 42 # s is not freed yet + # + debuglog.reset() + self.gc.collect(1) # run one more step + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + assert s.x == 42 # s is not freed yet + # + debuglog.reset() + self.gc.collect() # finish the major collection + summary = debuglog.summary() + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + # s is freed + py.test.raises(RuntimeError, 's.x') + + def test_collect_step(self, debuglog): + from rpython.rlib import rgc + n = 0 + states = [] + while True: + debuglog.reset() + val = self.gc.collect_step() + states.append((rgc.old_state(val), rgc.new_state(val))) + summary = debuglog.summary() + assert summary == {'gc-minor': 1, 'gc-collect-step': 1} + if rgc.is_done(val): + break + n += 1 + if n == 100: + assert False, 'this looks like an endless loop' + # + assert states == [ + (incminimark.STATE_SCANNING, incminimark.STATE_MARKING), + (incminimark.STATE_MARKING, incminimark.STATE_SWEEPING), + (incminimark.STATE_SWEEPING, incminimark.STATE_FINALIZING), + (incminimark.STATE_FINALIZING, incminimark.STATE_SCANNING) + ] diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -309,6 +309,12 @@ self.collect_ptr = getfn(GCClass.collect.im_func, [s_gc, annmodel.SomeInteger()], annmodel.s_None) + self.collect_step_ptr = getfn(GCClass.collect_step.im_func, [s_gc], + annmodel.SomeInteger()) + self.enable_ptr = getfn(GCClass.enable.im_func, [s_gc], annmodel.s_None) + self.disable_ptr = getfn(GCClass.disable.im_func, [s_gc], annmodel.s_None) + self.isenabled_ptr = getfn(GCClass.isenabled.im_func, [s_gc], + annmodel.s_Bool) self.can_move_ptr = getfn(GCClass.can_move.im_func, [s_gc, SomeAddress()], annmodel.SomeBool()) @@ -884,6 +890,28 @@ resultvar=op.result) self.pop_roots(hop, livevars) + def gct_gc__collect_step(self, hop): + op = hop.spaceop + livevars = self.push_roots(hop) + hop.genop("direct_call", [self.collect_step_ptr, self.c_const_gc], + resultvar=op.result) + self.pop_roots(hop, livevars) + + def gct_gc__enable(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.enable_ptr, self.c_const_gc], + resultvar=op.result) + + def gct_gc__disable(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.disable_ptr, self.c_const_gc], + resultvar=op.result) + + def gct_gc__isenabled(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.isenabled_ptr, self.c_const_gc], + resultvar=op.result) + def gct_gc_can_move(self, hop): op = hop.spaceop v_addr = hop.genop('cast_ptr_to_adr', diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -1,5 +1,6 @@ import sys import time +from collections import Counter from rpython.rlib.objectmodel import enforceargs from rpython.rtyper.extregistry import ExtRegistryEntry @@ -38,6 +39,23 @@ assert False, ("nesting error: no start corresponding to stop %r" % (category,)) + def reset(self): + # only for tests: empty the log + self[:] = [] + + def summary(self, flatten=False): + res = Counter() + def visit(lst): + for section, sublist in lst: + if section == 'debug_print': + continue + res[section] += 1 + if flatten: + visit(sublist) + # + visit(self) + return res + def __repr__(self): import pprint return pprint.pformat(list(self)) diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -13,6 +13,50 @@ # General GC features collect = gc.collect +enable = gc.enable +disable = gc.disable +isenabled = gc.isenabled + +def collect_step(): + """ + If the GC is incremental, run a single gc-collect-step. + + Return an integer which encodes the starting and ending GC state. Use + rgc.{old_state,new_state,is_done} to decode it. + + If the GC is not incremental, do a full collection and return a value on + which rgc.is_done() return True. + """ + gc.collect() + return _encode_states(1, 0) + +def _encode_states(oldstate, newstate): + return oldstate << 8 | newstate + +def old_state(states): + return (states & 0xFF00) >> 8 + +def new_state(states): + return states & 0xFF + +def is_done(states): + """ + Return True if the return value of collect_step signals the end of a major + collection + """ + old = old_state(states) + new = new_state(states) + return is_done__states(old, new) + +def is_done__states(oldstate, newstate): + "Like is_done, but takes oldstate and newstate explicitly" + # a collection is considered done when it ends up in the starting state + # (which is usually represented as 0). This logic works for incminimark, + # which is currently the only gc actually used and for which collect_step + # is implemented. In case we add more GC in the future, we might want to + # delegate this logic to the GC itself, but for now it is MUCH simpler to + # just write it in plain RPython. + return oldstate != 0 and newstate == 0 def set_max_heap_size(nbytes): """Limit the heap size to n bytes. @@ -131,6 +175,44 @@ args_v = hop.inputargs(lltype.Signed) return hop.genop('gc__collect', args_v, resulttype=hop.r_result) + +class EnableDisableEntry(ExtRegistryEntry): + _about_ = (gc.enable, gc.disable) + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.s_None + + def specialize_call(self, hop): + hop.exception_cannot_occur() + opname = self.instance.__name__ + return hop.genop('gc__%s' % opname, hop.args_v, resulttype=hop.r_result) + + +class IsEnabledEntry(ExtRegistryEntry): + _about_ = gc.isenabled + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.s_Bool + + def specialize_call(self, hop): + hop.exception_cannot_occur() + return hop.genop('gc__isenabled', hop.args_v, resulttype=hop.r_result) + + +class CollectStepEntry(ExtRegistryEntry): + _about_ = collect_step + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.SomeInteger() + + def specialize_call(self, hop): + hop.exception_cannot_occur() + return hop.genop('gc__collect_step', hop.args_v, resulttype=hop.r_result) + + class SetMaxHeapSizeEntry(ExtRegistryEntry): _about_ = set_max_heap_size diff --git a/rpython/rlib/test/test_debug.py b/rpython/rlib/test/test_debug.py --- a/rpython/rlib/test/test_debug.py +++ b/rpython/rlib/test/test_debug.py @@ -1,5 +1,5 @@ - import py +import pytest from rpython.rlib.debug import (check_annotation, make_sure_not_resized, debug_print, debug_start, debug_stop, have_debug_prints, debug_offset, debug_flush, @@ -10,6 +10,12 @@ from rpython.rlib import debug from rpython.rtyper.test.test_llinterp import interpret, gengraph + at pytest.fixture +def debuglog(monkeypatch): + dlog = debug.DebugLog() + monkeypatch.setattr(debug, '_log', dlog) + return dlog + def test_check_annotation(): class Error(Exception): pass @@ -94,7 +100,7 @@ py.test.raises(NotAListOfChars, "interpret(g, [3])") -def test_debug_print_start_stop(): +def test_debug_print_start_stop(debuglog): def f(x): debug_start("mycat") debug_print("foo", 2, "bar", x) @@ -103,22 +109,27 @@ debug_offset() # should not explode at least return have_debug_prints() - try: - debug._log = dlog = debug.DebugLog() - res = f(3) - assert res is True - finally: - debug._log = None - assert dlog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + res = f(3) + assert res is True + assert debuglog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + debuglog.reset() - try: - debug._log = dlog = debug.DebugLog() - res = interpret(f, [3]) - assert res is True - finally: - debug._log = None - assert dlog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + res = interpret(f, [3]) + assert res is True + assert debuglog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] +def test_debuglog_summary(debuglog): + debug_start('foo') + debug_start('bar') # this is nested, so not counted in the summary by default + debug_stop('bar') + debug_stop('foo') + debug_start('foo') + debug_stop('foo') + debug_start('bar') + debug_stop('bar') + # + assert debuglog.summary() == {'foo': 2, 'bar': 1} + assert debuglog.summary(flatten=True) == {'foo': 2, 'bar': 2} def test_debug_start_stop_timestamp(): import time diff --git a/rpython/rlib/test/test_rgc.py b/rpython/rlib/test/test_rgc.py --- a/rpython/rlib/test/test_rgc.py +++ b/rpython/rlib/test/test_rgc.py @@ -39,6 +39,45 @@ assert res is None +def test_enable_disable(): + def f(): + gc.enable() + a = gc.isenabled() + gc.disable() + b = gc.isenabled() + return a and not b + + t, typer, graph = gengraph(f, []) + blockops = list(graph.iterblockops()) + opnames = [op.opname for block, op in blockops + if op.opname.startswith('gc__')] + assert opnames == ['gc__enable', 'gc__isenabled', + 'gc__disable', 'gc__isenabled'] + res = interpret(f, []) + assert res + +def test_collect_step(): + def f(): + return rgc.collect_step() + + assert f() + t, typer, graph = gengraph(f, []) + blockops = list(graph.iterblockops()) + opnames = [op.opname for block, op in blockops + if op.opname.startswith('gc__')] + assert opnames == ['gc__collect_step'] + res = interpret(f, []) + assert res + +def test__encode_states(): + val = rgc._encode_states(42, 43) + assert rgc.old_state(val) == 42 + assert rgc.new_state(val) == 43 + assert not rgc.is_done(val) + # + val = rgc.collect_step() + assert rgc.is_done(val) + def test_can_move(): T0 = lltype.GcStruct('T') T1 = lltype.GcArray(lltype.Float) diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -819,6 +819,18 @@ def op_gc__collect(self, *gen): self.heap.collect(*gen) + def op_gc__collect_step(self): + return self.heap.collect_step() + + def op_gc__enable(self): + self.heap.enable() + + def op_gc__disable(self): + self.heap.disable() + + def op_gc__isenabled(self): + return self.heap.isenabled() + def op_gc_heap_stats(self): raise NotImplementedError diff --git a/rpython/rtyper/lltypesystem/llheap.py b/rpython/rtyper/lltypesystem/llheap.py --- a/rpython/rtyper/lltypesystem/llheap.py +++ b/rpython/rtyper/lltypesystem/llheap.py @@ -5,7 +5,7 @@ setfield = setattr from operator import setitem as setarrayitem -from rpython.rlib.rgc import can_move, collect, add_memory_pressure +from rpython.rlib.rgc import can_move, collect, enable, disable, isenabled, add_memory_pressure, collect_step def setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue, offsets=None): diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -456,6 +456,10 @@ # __________ GC operations __________ 'gc__collect': LLOp(canmallocgc=True), + 'gc__collect_step': LLOp(canmallocgc=True), + 'gc__enable': LLOp(), + 'gc__disable': LLOp(), + 'gc__isenabled': LLOp(), 'gc_free': LLOp(), 'gc_fetch_exception': LLOp(), 'gc_restore_exception': LLOp(), diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -1812,6 +1812,92 @@ res = self.run("ignore_finalizer") assert res == 1 # translated: x1 is removed from the list + def define_enable_disable(self): + class Counter(object): + val = 0 + counter = Counter() + class X(object): + def __del__(self): + counter.val += 1 + def f(should_disable): + x1 = X() + rgc.collect() # make x1 old + assert not rgc.can_move(x1) + x1 = None + # + if should_disable: + gc.disable() + assert not gc.isenabled() + # try to trigger a major collection + N = 100 # this should be enough, increase if not + lst = [] + for i in range(N): + lst.append(chr(i%256) * (1024*1024)) + #print i, counter.val + # + gc.enable() + assert gc.isenabled() + return counter.val + return f + + def test_enable_disable(self): + # first, run with normal gc. If the assert fails it means that in the + # loop we don't allocate enough mem to trigger a major collection. Try + # to increase N + deleted = self.run("enable_disable", 0) + assert deleted == 1, 'This should not fail, try to increment N' + # + # now, run with gc.disable: this should NOT free x1 + deleted = self.run("enable_disable", 1) + assert deleted == 0 + + def define_collect_step(self): + class Counter(object): + val = 0 + counter = Counter() + class X(object): + def __del__(self): + counter.val += 1 + def f(): + x1 = X() + rgc.collect() # make x1 old + assert not rgc.can_move(x1) + x1 = None + # + gc.disable() + n = 0 + states = [] + while True: + n += 1 + val = rgc.collect_step() + states.append((rgc.old_state(val), rgc.new_state(val))) + if rgc.is_done(val): + break + if n == 100: + print 'Endless loop!' + assert False, 'this looks like an endless loop' + + if n < 4: # we expect at least 4 steps + print 'Too few steps! n =', n + assert False + + # check that the state transitions are reasonable + first_state, _ = states[0] + for i, (old_state, new_state) in enumerate(states): + is_last = (i == len(states) - 1) + is_valid = False + if is_last: + assert old_state != new_state == first_state + else: + assert new_state == old_state or new_state == old_state+1 + + return counter.val + return f + + def test_collect_step(self): + deleted = self.run("collect_step") + assert deleted == 1 + def define_total_gc_time(cls): def f(): l = [] diff --git a/rpython/translator/goal/gcbench.py b/rpython/translator/goal/gcbench.py --- a/rpython/translator/goal/gcbench.py +++ b/rpython/translator/goal/gcbench.py @@ -44,8 +44,9 @@ # - Results are sensitive to locking cost, but we dont # check for proper locking import time +import gc -USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N]""" +USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N] [--gc=off|--gc=manual]""" ENABLE_THREADS = True @@ -173,6 +174,7 @@ depths = DEFAULT_DEPTHS threads = 0 repeatcount = 1 + gc_policy = 'on' for arg in argv[1:]: if arg.startswith('--threads='): arg = arg[len('--threads='):] @@ -189,13 +191,22 @@ depths = [int(s) for s in arg] except ValueError: return argerror() + elif arg.startswith('--gc=off'): + gc_policy = 'off' + elif arg.startswith('--gc=manual'): + gc_policy = 'manual' else: try: repeatcount = int(arg) except ValueError: return argerror() + # + if gc_policy == 'off' or gc_policy == 'manual': + gc.disable() for i in range(repeatcount): main(depths, threads) + if gc_policy == 'manual': + gc.collect(1) return 0 From pypy.commits at gmail.com Tue Jan 1 13:29:08 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 10:29:08 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix formating '%c' Message-ID: <5c2bb174.1c69fb81.30bf9.3555@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95560:9a0c90346239 Date: 2019-01-01 19:19 +0200 http://bitbucket.org/pypy/pypy/changeset/9a0c90346239/ Log: test, fix formating '%c' diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -455,6 +455,8 @@ self.prec = -1 # just because space = self.space if space.isinstance_w(w_value, space.w_bytes): + if do_unicode: + w_value = w_value.descr_decode(space, space.newtext('ascii')) s = space.bytes_w(w_value) if len(s) != 1: raise oefmt(space.w_TypeError, "%c requires int or char") @@ -463,7 +465,7 @@ if not do_unicode: raise NeedUnicodeFormattingError ustr = space.utf8_w(w_value) - if len(ustr) != 1: + if space.len_w(w_value) != 1: raise oefmt(space.w_TypeError, "%c requires int or unichar") self.std_wp(ustr, False) else: @@ -516,7 +518,7 @@ formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict) result = formatter.format() # this can force strings, not sure if it's a problem or not - lgt = rutf8.check_utf8(result, True) + lgt = rutf8.codepoints_in_utf8(result) return space.newutf8(result, lgt) def mod_format(space, w_format, w_values, do_unicode=False): diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1125,6 +1125,16 @@ return u'\u1234' '%s' % X() + def test_formatting_char(self): + for num in range(0x80,0x100): + uchar = unichr(num) + print num + assert uchar == u"%c" % num # works only with ints + assert uchar == u"%c" % uchar # and unicode chars + # the implicit decoding should fail for non-ascii chars + raises(UnicodeDecodeError, u"%c".__mod__, chr(num)) + raises(UnicodeDecodeError, u"%s".__mod__, chr(num)) + def test_str_subclass(self): class Foo9(str): def __unicode__(self): From pypy.commits at gmail.com Tue Jan 1 13:29:09 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 10:29:09 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: add failing test from unicode-utf8-py3, and minimize diff Message-ID: <5c2bb175.1c69fb81.7fc30.9ca1@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95561:347997cdfae4 Date: 2019-01-01 19:49 +0200 http://bitbucket.org/pypy/pypy/changeset/347997cdfae4/ Log: add failing test from unicode-utf8-py3, and minimize diff diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -140,6 +140,98 @@ assert unicode_escape_encode(u'abc') == (u'abc'.encode('unicode_escape'), 3) assert unicode_escape_decode('abc') == (u'abc'.decode('unicode_escape'), 3) assert unicode_escape_decode('\\x61\\x62\\x63') == (u'abc', 12) + def test_unicode_replace(self): + # CPython #8271: during the decoding of an invalid UTF-8 byte sequence, + # only the start byte and the continuation byte(s) are now considered + # invalid, instead of the number of bytes specified by the start byte. + # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 42, + # table 3-8, Row 2) for more information about the algorithm used. + FFFD = u'\ufffd' + sequences = [ + # invalid start bytes + (b'\x80', FFFD), # continuation byte + (b'\x80\x80', FFFD*2), # 2 continuation bytes + (b'\xc0', FFFD), + (b'\xc0\xc0', FFFD*2), + (b'\xc1', FFFD), + (b'\xc1\xc0', FFFD*2), + (b'\xc0\xc1', FFFD*2), + # with start byte of a 2-byte sequence + (b'\xc2', FFFD), # only the start byte + (b'\xc2\xc2', FFFD*2), # 2 start bytes + (b'\xc2\xc2\xc2', FFFD*3), # 3 start bytes + (b'\xc2\x41', FFFD+'A'), # invalid continuation byte + # with start byte of a 3-byte sequence + (b'\xe1', FFFD), # only the start byte + (b'\xe1\xe1', FFFD*2), # 2 start bytes + (b'\xe1\xe1\xe1', FFFD*3), # 3 start bytes + (b'\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes + (b'\xe1\x80', FFFD), # only 1 continuation byte + (b'\xe1\x41', FFFD+'A'), # invalid continuation byte + (b'\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb + (b'\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes + (b'\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte + (b'\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid + (b'\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid + # with start byte of a 4-byte sequence + (b'\xf1', FFFD), # only the start byte + (b'\xf1\xf1', FFFD*2), # 2 start bytes + (b'\xf1\xf1\xf1', FFFD*3), # 3 start bytes + (b'\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes + (b'\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes + (b'\xf1\x80', FFFD), # only 1 continuation bytes + (b'\xf1\x80\x80', FFFD), # only 2 continuation bytes + (b'\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid + (b'\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid + (b'\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid + (b'\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid + (b'\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid + (b'\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid + (b'\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid + (b'\xf1\x41\xf1\x80', FFFD+'A'+FFFD), + (b'\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2), + (b'\xf1\xf1\x80\x41', FFFD*2+'A'), + (b'\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2), + # with invalid start byte of a 4-byte sequence (rfc2279) + (b'\xf5', FFFD), # only the start byte + (b'\xf5\xf5', FFFD*2), # 2 start bytes + (b'\xf5\x80', FFFD*2), # only 1 continuation byte + (b'\xf5\x80\x80', FFFD*3), # only 2 continuation byte + (b'\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes + (b'\xf5\x80\x41', FFFD*2+'A'), # 1 valid cb and 1 invalid + (b'\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD), + (b'\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'), + # with invalid start byte of a 5-byte sequence (rfc2279) + (b'\xf8', FFFD), # only the start byte + (b'\xf8\xf8', FFFD*2), # 2 start bytes + (b'\xf8\x80', FFFD*2), # only one continuation byte + (b'\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid + (b'\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes + # with invalid start byte of a 6-byte sequence (rfc2279) + (b'\xfc', FFFD), # only the start byte + (b'\xfc\xfc', FFFD*2), # 2 start bytes + (b'\xfc\x80\x80', FFFD*3), # only 2 continuation bytes + (b'\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes + # invalid start byte + (b'\xfe', FFFD), + (b'\xfe\x80\x80', FFFD*3), + # other sequences + (b'\xf1\x80\x41\x42\x43', '\ufffd\x41\x42\x43'), + (b'\xf1\x80\xff\x42\x43', '\ufffd\ufffd\x42\x43'), + (b'\xf1\x80\xc2\x81\x43', '\ufffd\x81\x43'), + (b'\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64', + '\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'), + ] + for n, (seq, res) in enumerate(sequences): + print(seq, res) + raises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict') + uni = seq.decode('utf-8', 'replace') + assert uni == res + uni = (seq+b'b').decode('utf-8', 'replace') + assert uni == res+'b' + uni = seq.decode('utf-8', 'ignore') + assert uni == res.replace(u'\uFFFD', '') + class AppTestPartialEvaluation: @@ -359,32 +451,31 @@ def search_function(encoding): def f(input, errors="strict"): return 42 - print encoding if encoding == 'test.mytestenc': return (f, f, None, None) return None _codecs.register(search_function) - raises(TypeError, "hello".decode, "test.mytestenc") + raises(TypeError, b"hello".decode, "test.mytestenc") raises(TypeError, u"hello".encode, "test.mytestenc") def test_cpytest_decode(self): import codecs - assert codecs.decode('\xe4\xf6\xfc', 'latin-1') == u'\xe4\xf6\xfc' + assert codecs.decode(b'\xe4\xf6\xfc', 'latin-1') == u'\xe4\xf6\xfc' raises(TypeError, codecs.decode) - assert codecs.decode('abc') == u'abc' - raises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii') + assert codecs.decode(b'abc') == u'abc' + raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii') def test_bad_errorhandler_return(self): import codecs def baddecodereturn1(exc): return 42 codecs.register_error("test.baddecodereturn1", baddecodereturn1) - raises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1") - raises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1") - raises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1") - raises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1") - raises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") - raises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") + raises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1") + raises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1") + raises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1") + raises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1") + raises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") + raises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") def test_cpy_bug1175396(self): import codecs, StringIO @@ -420,7 +511,7 @@ ' self.abort("cannot load articles")\r\n', ] stream = StringIO.StringIO("".join(s).encode("utf7")) - assert "aborrt" not in stream.getvalue() + assert b"aborrt" not in stream.getvalue() reader = codecs.getreader("utf7")(stream) for (i, line) in enumerate(reader): assert line == s[i] @@ -439,30 +530,44 @@ s = u"spam" assert d.decode(s.encode("utf-8-sig")) == s + def test_decoder_state(self): + import codecs + encoding = 'utf16' + u = 'abc123' + s = u.encode(encoding) + for i in range(len(u) + 1): + d = codecs.getincrementalencoder(encoding)() + part1 = d.encode(u[:i]) + state = d.getstate() + d = codecs.getincrementalencoder(encoding)() + d.setstate(state) + part2 = d.encode(u[i:], True) + assert s == part1 + part2 + def test_escape_decode_escaped_newline(self): import _codecs - s = '\\\n' + s = b'\\\n' decoded = _codecs.unicode_escape_decode(s)[0] assert decoded == '' def test_charmap_decode_1(self): import codecs - assert codecs.charmap_encode(u'xxx') == ('xxx', 3) - assert codecs.charmap_encode(u'xxx', 'strict', {ord('x'): 'XX'}) == ('XXXXXX', 3) + assert codecs.charmap_encode(u'xxx') == (b'xxx', 3) + assert codecs.charmap_encode(u'xxx', 'strict', {ord('x'): b'XX'}) == (b'XXXXXX', 3) - res = codecs.charmap_decode("\x00\x01\x02", "replace", u"ab") + res = codecs.charmap_decode(b"\x00\x01\x02", "replace", u"ab") assert res == (u"ab\ufffd", 3) - res = codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe") + res = codecs.charmap_decode(b"\x00\x01\x02", "replace", u"ab\ufffe") assert res == (u'ab\ufffd', 3) def test_decode_errors(self): import sys if sys.maxunicode > 0xffff: try: - "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") + b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") except UnicodeDecodeError as ex: assert "unicode_internal" == ex.encoding - assert "\x00\x00\x00\x00\x00\x11\x11\x00" == ex.object + assert b"\x00\x00\x00\x00\x00\x11\x11\x00" == ex.object assert ex.start == 4 assert ex.end == 8 else: @@ -473,14 +578,14 @@ assert codecs.replace_errors(UnicodeEncodeError( "ascii", u"\u3042", 0, 1, "ouch")) == (u"?", 1) assert codecs.replace_errors(UnicodeDecodeError( - "ascii", "\xff", 0, 1, "ouch")) == (u"\ufffd", 1) + "ascii", b"\xff", 0, 1, "ouch")) == (u"\ufffd", 1) assert codecs.replace_errors(UnicodeTranslateError( u"\u3042", 0, 1, "ouch")) == (u"\ufffd", 1) assert codecs.replace_errors(UnicodeEncodeError( "ascii", u"\u3042\u3042", 0, 2, "ouch")) == (u"??", 2) assert codecs.replace_errors(UnicodeDecodeError( - "ascii", "\xff\xff", 0, 2, "ouch")) == (u"\ufffd", 2) + "ascii", b"\xff\xff", 0, 2, "ouch")) == (u"\ufffd", 2) assert codecs.replace_errors(UnicodeTranslateError( u"\u3042\u3042", 0, 2, "ouch")) == (u"\ufffd\ufffd", 2) @@ -498,13 +603,13 @@ # A UnicodeDecodeError object without an end attribute class NoEndUnicodeDecodeError(UnicodeDecodeError): def __init__(self): - UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") + UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad") del self.end # A UnicodeDecodeError object with a bad object attribute class BadObjectUnicodeDecodeError(UnicodeDecodeError): def __init__(self): - UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad") + UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad") self.object = [] # A UnicodeTranslateError object without a start attribute @@ -536,18 +641,18 @@ # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement def test_decode_ignore(self): - assert '\xff'.decode('utf-7', 'ignore') == '' - assert '\x00'.decode('unicode-internal', 'ignore') == '' + assert b'\xff'.decode('utf-7', 'ignore') == '' + assert b'\x00'.decode('unicode-internal', 'ignore') == '' def test_backslashreplace(self): import sys sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" if sys.maxunicode > 65535: - expected_ascii = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" - expected_8859 = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + expected_8859 = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" else: - expected_ascii = "a\\xac\\u1234\\u20ac\\u8000\\udbff\\udfff" - expected_8859 = "a\xac\\u1234\xa4\\u8000\\udbff\\udfff" + expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\udbff\\udfff" + expected_8859 = b"a\xac\\u1234\xa4\\u8000\\udbff\\udfff" assert sin.encode('ascii', 'backslashreplace') == expected_ascii assert sin.encode("iso-8859-15", "backslashreplace") == expected_8859 @@ -566,10 +671,10 @@ "test.badhandler" ) for (enc, bytes) in ( - ("utf-8", "\xff"), - ("ascii", "\xff"), - ("utf-7", "+x-"), - ("unicode-internal", "\x00"), + ("utf-8", b"\xff"), + ("ascii", b"\xff"), + ("utf-7", b"+x-"), + ("unicode-internal", b"\x00"), ): raises( TypeError, @@ -578,17 +683,25 @@ "test.badhandler" ) + def test_badhandler_longindex(self): + import codecs + import sys + errors = 'test.badhandler_longindex' + codecs.register_error(errors, lambda x: (u'', sys.maxsize + 1)) + # CPython raises OverflowError here + raises((IndexError, OverflowError), b'apple\x92ham\x93spam'.decode, 'utf-8', errors) + def test_unicode_internal(self): import codecs import sys try: - '\x00'.decode('unicode-internal') + b'\x00'.decode('unicode-internal') except UnicodeDecodeError: pass else: raise Exception("DID NOT RAISE") - res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace") + res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace") if sys.maxunicode > 65535: assert res == u"\u0000\ufffd" # UCS4 build else: @@ -605,7 +718,7 @@ raise TypeError("don't know how to handle %r" % exc) return (u"\x01", 5) codecs.register_error("test.hui", handler_unicodeinternal) - res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") + res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: assert res == u"\u0000\u0001" # UCS4 build else: @@ -720,18 +833,18 @@ x = u'123abc' if sys.byteorder == 'big': assert codecs.getencoder('utf-16')(x) == ( - '\xfe\xff\x001\x002\x003\x00a\x00b\x00c', 6) + b'\xfe\xff\x001\x002\x003\x00a\x00b\x00c', 6) assert codecs.getdecoder('utf-16')( - '\xfe\xff\x001\x002\x003\x00a\x00b\x00c') == (x, 14) + b'\xfe\xff\x001\x002\x003\x00a\x00b\x00c') == (x, 14) else: assert codecs.getencoder('utf-16')(x) == ( - '\xff\xfe1\x002\x003\x00a\x00b\x00c\x00', 6) + b'\xff\xfe1\x002\x003\x00a\x00b\x00c\x00', 6) assert codecs.getdecoder('utf-16')( - '\xff\xfe1\x002\x003\x00a\x00b\x00c\x00') == (x, 14) + b'\xff\xfe1\x002\x003\x00a\x00b\x00c\x00') == (x, 14) def test_unicode_escape(self): assert u'\\'.encode('unicode-escape') == '\\\\' - assert '\\\\'.decode('unicode-escape') == u'\\' + assert b'\\\\'.decode('unicode-escape') == u'\\' assert u'\ud801'.encode('unicode-escape') == '\\ud801' assert u'\u0013'.encode('unicode-escape') == '\\x13' From pypy.commits at gmail.com Tue Jan 1 13:29:11 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 10:29:11 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: apply fix from 0cca4bcffdbf, reduce diff to unicode-utf8-py3, fix test Message-ID: <5c2bb177.1c69fb81.43f6c.285a@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95562:5d13e76c2ee0 Date: 2019-01-01 20:28 +0200 http://bitbucket.org/pypy/pypy/changeset/5d13e76c2ee0/ Log: apply fix from 0cca4bcffdbf, reduce diff to unicode-utf8-py3, fix test diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize from rpython.rlib.rstring import StringBuilder -from rpython.rlib import rutf8 +from rpython.rlib import rutf8, runicode from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rtyper.lltypesystem import rffi from pypy.module.unicodedata import unicodedb @@ -21,6 +21,11 @@ space.newtext(msg)])) return raise_unicode_exception_decode +def decode_never_raise(errors, encoding, msg, s, startingpos, endingpos): + assert startingpos >= 0 + ux = ['\ux' + hex(ord(x))[2:].upper() for x in s[startingpos:endingpos]] + return ''.join(ux), endingpos, 'b' + @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. @@ -35,6 +40,23 @@ space.newtext(msg)])) return raise_unicode_exception_encode + at specialize.memo() +def encode_unicode_error_handler(space): + # Fast version of the "strict" errors handler. + def raise_unicode_exception_encode(errors, encoding, msg, uni, + startingpos, endingpos): + assert isinstance(uni, unicode) + u_len = len(uni) + utf8 = runicode.unicode_encode_utf8sp(uni, u_len) + raise OperationError(space.w_UnicodeEncodeError, + space.newtuple([space.newtext(encoding), + space.newtext(utf8, u_len), + space.newint(startingpos), + space.newint(endingpos), + space.newtext(msg)])) + return u'', None, 0 + return raise_unicode_exception_encode + def default_error_encode( errors, encoding, msg, u, startingpos, endingpos): """A default handler, for tests""" @@ -45,10 +67,10 @@ return '', endingpos raise ValueError -def convert_arg_to_w_unicode(space, w_arg, strict=None): - return space.convert_arg_to_w_unicode(w_arg) +# ____________________________________________________________ +_WIN32 = sys.platform == 'win32' +_MACOSX = sys.platform == 'darwin' -# ____________________________________________________________ def encode(space, w_data, encoding=None, errors='strict'): from pypy.objspace.std.unicodeobject import encode_object @@ -245,18 +267,21 @@ res = StringBuilder(slen) pos = 0 end = len(s) + suppressing = False # we are in a chain of "bad" unicode, only emit one fix while pos < end: ordch1 = ord(s[pos]) # fast path for ASCII if ordch1 <= 0x7F: pos += 1 res.append(chr(ordch1)) + suppressing = False continue if ordch1 <= 0xC1: r, pos = errorhandler(errors, "utf8", "invalid start byte", s, pos, pos + 1) - res.append(r) + if not suppressing: + res.append(r) continue pos += 1 @@ -268,14 +293,16 @@ break r, pos = errorhandler(errors, "utf8", "unexpected end of data", s, pos - 1, pos) - res.append(r) + if not suppressing: + res.append(r) continue ordch2 = ord(s[pos]) if rutf8._invalid_byte_2_of_2(ordch2): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) - res.append(r) + if not suppressing: + res.append(r) continue # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz pos += 1 @@ -289,8 +316,9 @@ pos -= 1 break r, pos = errorhandler(errors, "utf8", "unexpected end of data", - s, pos - 1, pos + 1) + s, pos - 1, pos) res.append(r) + suppressing = True continue ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) @@ -298,12 +326,14 @@ if rutf8._invalid_byte_2_of_3(ordch1, ordch2, True): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) - res.append(r) + if not suppressing: + res.append(r) continue elif rutf8._invalid_byte_3_of_3(ordch3): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos + 1) - res.append(r) + if not suppressing: + res.append(r) continue pos += 2 @@ -311,6 +341,7 @@ res.append(chr(ordch1)) res.append(chr(ordch2)) res.append(chr(ordch3)) + suppressing = False continue if ordch1 <= 0xF4: @@ -321,6 +352,7 @@ r, pos = errorhandler(errors, "utf8", "unexpected end of data", s, pos - 1, pos) res.append(r) + suppressing = True continue ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) @@ -329,7 +361,8 @@ if rutf8._invalid_byte_2_of_4(ordch1, ordch2): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) - res.append(r) + if not suppressing: + res.append(r) continue elif rutf8._invalid_byte_3_of_4(ordch3): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", @@ -339,7 +372,8 @@ elif rutf8._invalid_byte_4_of_4(ordch4): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos + 2) - res.append(r) + if not suppressing: + res.append(r) continue pos += 3 @@ -348,11 +382,13 @@ res.append(chr(ordch2)) res.append(chr(ordch3)) res.append(chr(ordch4)) + suppressing = False continue r, pos = errorhandler(errors, "utf8", "invalid start byte", s, pos - 1, pos) - res.append(r) + if not suppressing: + res.append(r) r = res.build() return r, pos, rutf8.check_utf8(r, True) @@ -899,6 +935,33 @@ return result.build() +def encode_utf8(space, uni, allow_surrogates=False): + # Note that Python3 tends to forbid *all* surrogates in utf-8. + # If allow_surrogates=True, then revert to the Python 2 behavior + # which never raises UnicodeEncodeError. Surrogate pairs are then + # allowed, either paired or lone. A paired surrogate is considered + # like the non-BMP character it stands for. See also *_utf8sp(). + assert isinstance(uni, unicode) + return runicode.unicode_encode_utf_8( + uni, len(uni), "strict", + errorhandler=encode_unicode_error_handler(space), + allow_surrogates=allow_surrogates) + +def encode_utf8sp(space, uni, allow_surrogates=True): + # Surrogate-preserving utf-8 encoding. Any surrogate character + # turns into its 3-bytes encoding, whether it is paired or not. + # This should always be reversible, and the reverse is + # decode_utf8sp(). + return runicode.unicode_encode_utf8sp(uni, len(uni)) + +def decode_utf8sp(space, string): + # Surrogate-preserving utf-8 decoding. Assuming there is no + # encoding error, it should always be reversible, and the reverse is + # encode_utf8sp(). + return str_decode_utf8(string, "string", True, decode_never_raise, + allow_surrogates=True) + + # ____________________________________________________________ # utf-16 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -221,7 +221,7 @@ if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # weeoes - w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj) + w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -250,7 +250,7 @@ if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # for errors - w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj) + w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -395,7 +395,7 @@ def wrap_encoder(space, w_arg, errors="strict"): from pypy.interpreter import unicodehelper - w_arg = unicodehelper.convert_arg_to_w_unicode(space, w_arg, rname) + w_arg = space.convert_arg_to_w_unicode(w_arg) if errors is None: errors = 'strict' state = space.fromcache(CodecState) @@ -650,7 +650,7 @@ mapping = Charmap_Encode(space, w_mapping) state = space.fromcache(CodecState) - w_uni = unicodehelper.convert_arg_to_w_unicode(space, w_unicode) + w_uni = space.convert_arg_to_w_unicode(w_unicode) result = unicodehelper.utf8_encode_charmap( space.utf8_w(w_uni), errors, state.encode_error_handler, mapping) return space.newtuple([space.newbytes(result), space.newint(w_uni._len())]) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -140,6 +140,7 @@ assert unicode_escape_encode(u'abc') == (u'abc'.encode('unicode_escape'), 3) assert unicode_escape_decode('abc') == (u'abc'.decode('unicode_escape'), 3) assert unicode_escape_decode('\\x61\\x62\\x63') == (u'abc', 12) + def test_unicode_replace(self): # CPython #8271: during the decoding of an invalid UTF-8 byte sequence, # only the start byte and the continuation byte(s) are now considered @@ -216,14 +217,13 @@ (b'\xfe', FFFD), (b'\xfe\x80\x80', FFFD*3), # other sequences - (b'\xf1\x80\x41\x42\x43', '\ufffd\x41\x42\x43'), - (b'\xf1\x80\xff\x42\x43', '\ufffd\ufffd\x42\x43'), - (b'\xf1\x80\xc2\x81\x43', '\ufffd\x81\x43'), + (b'\xf1\x80\x41\x42\x43', u'\ufffd\x41\x42\x43'), + (b'\xf1\x80\xff\x42\x43', u'\ufffd\ufffd\x42\x43'), + (b'\xf1\x80\xc2\x81\x43', u'\ufffd\x81\x43'), (b'\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64', - '\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'), + u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'), ] for n, (seq, res) in enumerate(sequences): - print(seq, res) raises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict') uni = seq.decode('utf-8', 'replace') assert uni == res @@ -233,7 +233,6 @@ assert uni == res.replace(u'\uFFFD', '') - class AppTestPartialEvaluation: spaceconfig = dict(usemodules=['array',]) if sys.platform == 'win32': diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1155,7 +1155,7 @@ # test_unicode_conversion_with__str__ if w_unicode_method is None: if space.isinstance_w(w_obj, space.w_unicode): - return unicodehelper.convert_arg_to_w_unicode(space, w_obj) + return space.convert_arg_to_w_unicode(w_obj) w_unicode_method = space.lookup(w_obj, "__str__") if w_unicode_method is not None: w_res = space.get_and_call_function(w_unicode_method, w_obj) From pypy.commits at gmail.com Tue Jan 1 13:29:28 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 10:29:28 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: minimize difference to unicode-utf8 Message-ID: <5c2bb188.1c69fb81.33ab8.d08e@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95563:3bbef2bc9901 Date: 2019-01-01 20:11 +0200 http://bitbucket.org/pypy/pypy/changeset/3bbef2bc9901/ Log: minimize difference to unicode-utf8 diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -8,9 +8,6 @@ from rpython.rtyper.lltypesystem import rffi from pypy.module.unicodedata import unicodedb -_WIN32 = sys.platform == 'win32' -_MACOSX = sys.platform == 'darwin' - @specialize.memo() def decode_error_handler(space): # Fast version of the "strict" errors handler. @@ -34,7 +31,6 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - assert not isinstance(utf8, unicode) u_len = rutf8.get_utf8_length(utf8) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), @@ -42,7 +38,6 @@ space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) - return u'', None, 0 return raise_unicode_exception_encode @specialize.memo() @@ -73,6 +68,8 @@ raise ValueError # ____________________________________________________________ +_WIN32 = sys.platform == 'win32' +_MACOSX = sys.platform == 'darwin' def fsdecode(space, w_string): from pypy.module._codecs import interp_codecs @@ -178,6 +175,7 @@ # Surrogates are accepted and not treated specially at all. # If there happen to be two 3-bytes encoding a pair of surrogates, # you still get two surrogate unicode characters in the result. + # These are the Python3 rules, Python2 differs assert isinstance(string, str) try: return rutf8.check_utf8(string, True, start, end) From pypy.commits at gmail.com Tue Jan 1 15:20:54 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 12:20:54 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix, skip tests Message-ID: <5c2bcba6.1c69fb81.7dee6.3938@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95564:b717b2705342 Date: 2019-01-01 22:19 +0200 http://bitbucket.org/pypy/pypy/changeset/b717b2705342/ Log: fix, skip tests diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -1,3 +1,5 @@ +import pytest + from pypy.interpreter.error import OperationError @@ -630,6 +632,7 @@ def test_unicode_join_str_arg_ascii(self): raises(UnicodeDecodeError, u''.join, ['\xc3\xa1']) + @pytest.mark.xfail(reason='setdefaultencoding does not work?') def test_unicode_join_str_arg_utf8(self): # Need default encoding utf-8, but sys.setdefaultencoding # is removed after startup. diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -111,7 +111,8 @@ @given(u=strategies.text(), start=strategies.integers(min_value=0, max_value=10), len1=strategies.integers(min_value=-1, max_value=10)) - def test_hypo_index_find(u, start, len1, space): + def test_hypo_index_find(self, u, start, len1): + space = self.space if start + len1 < 0: return # skip this case v = u[start : start + len1] From pypy.commits at gmail.com Tue Jan 1 15:20:56 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 01 Jan 2019 12:20:56 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: blindly try to fix win32 failure Message-ID: <5c2bcba8.1c69fb81.1e16d.4384@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95565:1687366068c8 Date: 2019-01-01 22:20 +0200 http://bitbucket.org/pypy/pypy/changeset/1687366068c8/ Log: blindly try to fix win32 failure diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -398,8 +398,9 @@ def dlopen_w(space, w_filename, flags): if WIN32 and space.isinstance_w(w_filename, space.w_unicode): fname = space.text_w(space.repr(w_filename)) - unicode_name = space.unicode_w(w_filename) - with rffi.scoped_unicode2wcharp(unicode_name) as ll_libname: + utf8_name = space.utf8_w(w_filename) + uni_len = space.len_w(w_filename) + with rffi.scoped_utf82wcharp(utf8_name, uni_len) as ll_libname: try: handle = dlopenU(ll_libname, flags) except DLOpenError as e: From pypy.commits at gmail.com Wed Jan 2 04:41:51 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 01:41:51 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix for u'aaa'.count('', 10) == 0, u'aaa'.count('', 3) == 1 Message-ID: <5c2c875f.1c69fb81.864eb.5cd6@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95566:345fe0eeb01c Date: 2019-01-02 10:58 +0200 http://bitbucket.org/pypy/pypy/changeset/345fe0eeb01c/ Log: test, fix for u'aaa'.count('', 10) == 0, u'aaa'.count('', 3) == 1 diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -898,7 +898,9 @@ raises(UnicodeDecodeError, '\x80'.rindex, u'') assert u"\u1234\u5678".find(u'\u5678') == 1 - def test_count(self): + def test_count_unicode(self): + assert u'aaa'.count('', 10) == 0 + assert u'aaa'.count('', 3) == 1 assert u"".count(u"x") ==0 assert u"".count(u"") ==1 assert u"Python".count(u"") ==7 diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -921,7 +921,7 @@ # 'end_index' are measured in bytes. start, end = unwrap_start_stop(space, self._length, w_start, w_end) start_index = 0 - end_index = len(self._utf8) + end_index = len(self._utf8) + 1 if start > 0: if start > self._length: start_index = end_index From pypy.commits at gmail.com Wed Jan 2 04:41:53 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 01:41:53 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix for compatibility - cpython accepts invalid unicode Message-ID: <5c2c8761.1c69fb81.d808d.48fa@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95567:26a182fd648c Date: 2019-01-02 11:34 +0200 http://bitbucket.org/pypy/pypy/changeset/26a182fd648c/ Log: test, fix for compatibility - cpython accepts invalid unicode diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -390,7 +390,8 @@ if len(s) % self.itemsize != 0: raise oefmt(self.space.w_ValueError, "string length not a multiple of item size") - self.check_valid_unicode(space, s) # empty for non-u arrays + # CPython accepts invalid unicode + # self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -589,6 +589,12 @@ assert a[1] == 2 assert a[2] == 3 + def test_deepcopy(self): + a = self.array('u', u'\x01\u263a\x00\ufeff') + from copy import deepcopy + b = deepcopy(a) + assert a == b + def test_addmul(self): a = self.array('i', [1, 2, 3]) assert repr(a + a) == "array('i', [1, 2, 3, 1, 2, 3])" @@ -846,12 +852,6 @@ b.byteswap() assert a != b - def test_unicode_ord_positive(self): - import sys - if sys.maxunicode == 0xffff: - skip("test for 32-bit unicodes") - raises(ValueError, self.array, 'u', b'\xff\xff\xff\xff') - def test_weakref(self): import weakref a = self.array('c', 'Hi!') From pypy.commits at gmail.com Wed Jan 2 09:09:50 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 06:09:50 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix improper changes from merge Message-ID: <5c2cc62e.1c69fb81.75650.951d@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95569:0b7bb06a4e4f Date: 2019-01-02 16:08 +0200 http://bitbucket.org/pypy/pypy/changeset/0b7bb06a4e4f/ Log: fix improper changes from merge diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -723,7 +723,7 @@ assert b'\x00'.decode('unicode-internal', 'ignore') == '' def test_backslashreplace(self): - import sys + import sys, codecs sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" if sys.maxunicode > 65535: expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" @@ -888,7 +888,7 @@ codecs.register_error("test.hui", handler_unicodeinternal) res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001" # UCS4 build + assert res == u"\u0000\u0001\u0000" # UCS4 build else: assert res == u"\x00\x00\x01" # UCS2 build @@ -945,7 +945,7 @@ def test_encode_error_bad_handler(self): import codecs codecs.register_error("test.bad_handler", lambda e: (repl, 1)) - assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz" + assert u"xyz".encode("latin-1", "test.bad_handler") == b"xyz" repl = u"\u1234" raises(UnicodeEncodeError, u"\u5678".encode, "latin-1", "test.bad_handler") diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1151,25 +1151,11 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - code = r_uint(ord(item)) - # cpython will allow values > sys.maxunicode - # while silently truncating the top bits - if code <= r_uint(0x7F): - # Encode ASCII - item = chr(code) - elif code <= r_uint(0x07FF): - item = (chr((0xc0 | (code >> 6))) + - chr((0x80 | (code & 0x3f)))) - elif code <= r_uint(0xFFFF): - item = (chr((0xe0 | (code >> 12))) + - chr((0x80 | ((code >> 6) & 0x3f))) + - chr((0x80 | (code & 0x3f)))) - else: - item = (chr((0xf0 | (code >> 18)) & 0xff) + - chr((0x80 | ((code >> 12) & 0x3f))) + - chr((0x80 | ((code >> 6) & 0x3f))) + - chr((0x80 | (code & 0x3f)))) - return space.newutf8(item, 1) + if ord(item) >= 0x110000: + raise oefmt(space.w_ValueError, + "array contains a unicode character out of " + "range(0x110000)") + return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1) assert 0, "unreachable" # interface diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -499,6 +499,7 @@ else: s = '' if len(s) == 1: + self.std_wp(s) return raise oefmt(space.w_TypeError, "%c requires int or single byte") else: diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1276,8 +1276,8 @@ assert type(str(z)) is str assert str(z) == u'foobaz' # - assert unicode(encoding='supposedly_the_encoding') == u'' - assert unicode(errors='supposedly_the_error') == u'' + assert str(encoding='supposedly_the_encoding') == u'' + assert str(errors='supposedly_the_error') == u'' e = raises(TypeError, str, u'', 'supposedly_the_encoding') assert str(e.value) == 'decoding str is not supported' e = raises(TypeError, str, u'', errors='supposedly_the_error') From pypy.commits at gmail.com Wed Jan 2 09:09:48 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 06:09:48 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch (way too painful) Message-ID: <5c2cc62c.1c69fb81.83c33.69cc@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95568:43c80d4b68c9 Date: 2019-01-02 13:49 +0200 http://bitbucket.org/pypy/pypy/changeset/43c80d4b68c9/ Log: merge unicode-utf8 into branch (way too painful) diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -4,6 +4,7 @@ * improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object * make sure we review all the places that call ord(unichr) to check for ValueErrors +* Find a more elegant way to define MAXUNICODE in rpython/rlib/runicode.py * rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes * revisit why runicode import str_decode_utf_8_impl needed instead of runicode import str_decode_utf_8 * revisit all places where we do utf8.decode('utf-8'), they should work directly with utf8 diff --git a/extra_tests/test_cPickle.py b/extra_tests/test_cPickle.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_cPickle.py @@ -0,0 +1,34 @@ +import pytest +import cPickle + +def test_stack_underflow(): + with pytest.raises(cPickle.UnpicklingError): + cPickle.loads("a string") + +def test_bad_key(): + with pytest.raises(cPickle.UnpicklingError) as excinfo: + cPickle.loads("v") + assert str(excinfo.value) == "invalid load key, 'v'." + +def test_find_global(): + import time, cStringIO + entry = time.strptime('Fri Mar 27 22:20:42 2017') + f = cStringIO.StringIO() + cPickle.Pickler(f).dump(entry) + + f = cStringIO.StringIO(f.getvalue()) + e = cPickle.Unpickler(f).load() + assert e == entry + + f = cStringIO.StringIO(f.getvalue()) + up = cPickle.Unpickler(f) + up.find_global = None + with pytest.raises(cPickle.UnpicklingError) as e: + up.load() + assert str(e.value) == "Global and instance pickles are not supported." + + f = cStringIO.StringIO(f.getvalue()) + up = cPickle.Unpickler(f) + up.find_global = lambda module, name: lambda a, b: (name, a, b) + e = up.load() + assert e == ('struct_time', (2017, 3, 27, 22, 20, 42, 4, 86, -1), {}) diff --git a/extra_tests/test_cStringIO.py b/extra_tests/test_cStringIO.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_cStringIO.py @@ -0,0 +1,23 @@ +""" +Tests for the PyPy cStringIO implementation. +""" +from cStringIO import StringIO + +data = b"some bytes" + +def test_reset(): + """ + Test that the reset method of cStringIO objects sets the position + marker to the beginning of the stream. + """ + stream = StringIO() + stream.write(data) + assert stream.read() == '' + stream.reset() + assert stream.read() == data + + stream = StringIO(data) + assert stream.read() == data + assert stream.read() == '' + stream.reset() + assert stream.read() == data diff --git a/extra_tests/test_string.py b/extra_tests/test_string.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_string.py @@ -0,0 +1,46 @@ + +""" +Test module for functions in string.py +""" +import pytest + +def test_maketrans(): + import string + assert string.maketrans('', '') == ( + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' + '\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0' + '123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu' + 'vwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d' + '\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' + '\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf' + '\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0' + '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1' + '\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2' + '\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3' + '\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff') + assert string.maketrans('a', 'b') == ( + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' + '\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0' + '123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`bbcdefghijklmnopqrstu' + 'vwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d' + '\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' + '\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf' + '\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0' + '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1' + '\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2' + '\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3' + '\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff') + assert string.maketrans('ab', 'cd') == ( + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' + '\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0' + '123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`cdcdefghijklmnopqrstu' + 'vwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d' + '\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' + '\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf' + '\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0' + '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1' + '\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2' + '\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3' + '\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff') + with pytest.raises(ValueError): + string.maketrans('aa', '') diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -59,7 +59,7 @@ # General information about the project. project = u'PyPy' -copyright = u'2018, The PyPy Project' +copyright = u'2019, The PyPy Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -74,4 +74,8 @@ Make it possible to manually manage the GC by using a combination of gc.disable() and gc.collect_step(). Make sure to write a proper release announcement in which we explain that existing programs could leak memory if -they run for too much time between a gc.disable()/gc.enable() \ No newline at end of file +they run for too much time between a gc.disable()/gc.enable() + +.. branch: unicode-utf8 + +Use utf8 internally to represent unicode diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -583,31 +583,33 @@ if num_remainingkwds == 1: for i in range(len(keywords)): if i not in kwds_mapping: - name = keywords[i] - if name is None: - # We'll assume it's unicode. Encode it. - # Careful, I *think* it should not be possible to - # get an IndexError here but you never know. - try: - if keyword_names_w is None: - raise IndexError - # note: negative-based indexing from the end - w_name = keyword_names_w[i - len(keywords)] - except IndexError: + name = '?' + # We'll assume it's unicode. Encode it. + # Careful, I *think* it should not be possible to + # get an IndexError here but you never know. + try: + if keyword_names_w is None: + raise IndexError + # note: negative-based indexing from the end + w_name = keyword_names_w[i - len(keywords)] + except IndexError: + if keywords is None: name = '?' else: - name = space.text_w(w_name) + name = keywords[i] + else: + w_enc = space.newtext(space.sys.defaultencoding) + w_err = space.newtext("replace") + w_name = space.call_method(w_name, "encode", w_enc, + w_err) + name = space.text_w(w_name) break self.kwd_name = name def getmsg(self): if self.num_kwds == 1: - if isinstance(self.kwd_name, unicode): - uname = unicode_encode_utf_8(self.kwd_name, len(self.kwd_name), - 'strict', allow_surrogates=False) - else: - uname = self.kwd_name - msg = "got an unexpected keyword argument '%s'" % uname + msg = "got an unexpected keyword argument '%s'" % ( + self.kwd_name) else: msg = "got %d unexpected keyword arguments" % ( self.num_kwds) diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1605,6 +1605,8 @@ else: assert False + if self.isinstance_w(w_obj, self.w_unicode): + return w_obj.charbuf_w(self) def text_or_none_w(self, w_obj): return None if self.is_none(w_obj) else self.text_w(w_obj) diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py --- a/pypy/interpreter/test/test_argument.py +++ b/pypy/interpreter/test/test_argument.py @@ -55,6 +55,9 @@ pass class DummySpace(object): + class sys: + defaultencoding = 'utf-8' + def newtuple(self, items): return tuple(items) diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,6 +1,10 @@ import py import pytest -from hypothesis import given, strategies +try: + from hypothesis import given, strategies + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False import struct import sys from pypy.interpreter.unicodehelper import ( @@ -130,13 +134,6 @@ with pytest.raises(UnicodeDecodeError): str_decode_utf_32_be(b"\x00\x00\xdc\x80", 4, None) - - at given(strategies.text()) -def test_utf8_encode_ascii_2(u): - def eh(errors, encoding, reason, p, start, end): - return "?" * (end - start), end, 'b' - assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") - def test_str_decode_ascii(): assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3) def eh(errors, encoding, reason, p, start, end): @@ -156,16 +153,6 @@ ("??", "ascii", input, 5, 6), ("??", "ascii", input, 6, 7)] - at given(strategies.text()) -def test_unicode_raw_escape(u): - r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) - assert r == u.encode("raw-unicode-escape") - - at given(strategies.text()) -def test_unicode_escape(u): - r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) - assert r == u.encode("unicode-escape") - def test_encode_decimal(space): assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' with pytest.raises(ValueError): @@ -178,3 +165,21 @@ result = uh.unicode_encode_decimal( u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) assert result == '12ሴ' + +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_utf8_encode_ascii_2(u): + def eh(errors, encoding, reason, p, start, end): + return "?" * (end - start), end, 'b' + assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") + + @given(strategies.text()) + def test_unicode_raw_escape(u): + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) + assert r == u.encode("raw-unicode-escape") + + @given(strategies.text()) + def test_unicode_escape(u): + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) + assert r == u.encode("unicode-escape") + diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1291,7 +1291,6 @@ allow_surrogates, "little", 'utf-16-le') - # ____________________________________________________________ # utf-32 diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -38,29 +38,34 @@ return space.len(w_obj) -def checkattrname(space, w_name): +def checkattrname(space, w_name, msg): # This is a check to ensure that getattr/setattr/delattr only pass a - # string to the rest of the code. XXX not entirely sure if these three + # ascii string to the rest of the code. XXX not entirely sure if these # functions are the only way for non-string objects to reach # space.{get,set,del}attr()... - # Note that if w_name is already an exact string it must be returned - # unmodified (and not e.g. unwrapped-rewrapped). - if not space.is_w(space.type(w_name), space.w_text): - name = space.text_w(w_name) # typecheck - w_name = space.newtext(name) # rewrap as a real string + # Note that if w_name is already an exact string it must be ascii encoded + if not space.isinstance_w(w_name, space.w_text): + try: + name = space.text_w(w_name) # typecheck + except OperationError as e: + if e.match(space, space.w_UnicodeError): + raise e + raise oefmt(space.w_TypeError, + "%s(): attribute name must be string", msg) + w_name = space.newtext(name) return w_name def delattr(space, w_object, w_name): """Delete a named attribute on an object. delattr(x, 'y') is equivalent to ``del x.y''.""" - w_name = checkattrname(space, w_name) + w_name = checkattrname(space, w_name, 'delattr') space.delattr(w_object, w_name) return space.w_None def getattr(space, w_object, w_name, w_defvalue=None): """Get a named attribute from an object. getattr(x, 'y') is equivalent to ``x.y''.""" - w_name = checkattrname(space, w_name) + w_name = checkattrname(space, w_name, 'getattr') try: return space.getattr(w_object, w_name) except OperationError as e: @@ -72,7 +77,7 @@ def hasattr(space, w_object, w_name): """Return whether the object has an attribute with the given name. (This is done by calling getattr(object, name) and catching exceptions.)""" - w_name = checkattrname(space, w_name) + w_name = checkattrname(space, w_name, 'hasattr') try: space.getattr(w_object, w_name) except OperationError as e: @@ -174,7 +179,7 @@ def setattr(space, w_object, w_name, w_val): """Store a named attribute into an object. setattr(x, 'y', z) is equivalent to ``x.y = z''.""" - w_name = checkattrname(space, w_name) + w_name = checkattrname(space, w_name, 'setattr') space.setattr(w_object, w_name, w_val) return space.w_None diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -408,8 +408,9 @@ def dlopen_w(space, w_filename, flags): if WIN32 and space.isinstance_w(w_filename, space.w_unicode): fname = space.text_w(space.repr(w_filename)) - unicode_name = space.realunicode_w(w_filename) - with rffi.scoped_unicode2wcharp(unicode_name) as ll_libname: + utf8_name = space.utf8_w(w_filename) + uni_len = space.len_w(w_filename) + with rffi.scoped_utf82wcharp(utf8_name, uni_len) as ll_libname: try: handle = dlopenU(ll_libname, flags) except DLOpenError as e: diff --git a/pypy/module/_cffi_backend/test/test_wchar_helper.py b/pypy/module/_cffi_backend/test/test_wchar_helper.py --- a/pypy/module/_cffi_backend/test/test_wchar_helper.py +++ b/pypy/module/_cffi_backend/test/test_wchar_helper.py @@ -1,10 +1,15 @@ -from hypothesis import given, strategies +try: + from hypothesis import given, strategies + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False + from pypy.module._cffi_backend.wchar_helper import utf8_size_as_char16 - - at given(strategies.text()) -def test_utf8_size_as_char16(u): - assert type(u) is unicode - length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u)) - assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u) +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_utf8_size_as_char16(u): + assert type(u) is unicode + length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u)) + assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u) diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,9 @@ import sys -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import StringBuilder, UnicodeBuilder from rpython.rlib import runicode from rpython.rlib.runicode import raw_unicode_escape_helper -from rpython.rlib import rutf8 from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -248,6 +247,7 @@ def xmlcharrefreplace_errors(space, w_exc): + check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) @@ -276,6 +276,7 @@ def backslashreplace_errors(space, w_exc): + check_exception(space, w_exc) if (space.isinstance_w(w_exc, space.w_UnicodeEncodeError) or space.isinstance_w(w_exc, space.w_UnicodeTranslateError)): @@ -303,9 +304,9 @@ builder = StringBuilder() pos = start while pos < end: - oc = ord(obj[pos]) + oc = rutf8.codepoint_at_pos(obj, pos) raw_unicode_escape_helper(builder, oc) - pos += 1 + pos = rutf8.next_codepoint_pos(obj, pos) return space.newtuple([space.newtext(builder.build()), w_end]) else: raise oefmt(space.w_TypeError, @@ -663,6 +664,7 @@ def wrap_encoder(space, w_arg, errors="strict"): # w_arg is a W_Unicode or W_Bytes? w_arg = space.convert_arg_to_w_unicode(w_arg, errors) + w_arg = space.convert_arg_to_w_unicode(w_arg) if errors is None: errors = 'strict' allow_surrogates = False @@ -683,6 +685,7 @@ w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): + if errors is None: errors = 'strict' final = space.is_true(w_final) @@ -743,6 +746,7 @@ w_final = WrappedDefault(False)) def utf_8_decode(space, string, errors="strict", w_final=None): + if errors is None: errors = 'strict' final = space.is_true(w_final) @@ -883,6 +887,7 @@ @unwrap_spec(string='bufferstr', errors='text_or_none') def charmap_decode(space, string, errors="strict", w_mapping=None): + if errors is None: errors = 'strict' if len(string) == 0: @@ -953,6 +958,7 @@ def unicode_escape_decode(space, w_string, errors="strict", w_final=None): string = space.getarg_w('s*', w_string).as_str() + if errors is None: errors = 'strict' final = space.is_true(w_final) @@ -987,6 +993,7 @@ @unwrap_spec(errors='text_or_none') def unicode_internal_decode(space, w_string, errors="strict"): + if errors is None: errors = 'strict' # special case for this codec: unicodes are returned as is diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -298,8 +298,8 @@ ] buffer = b'' - result = "" - for (c, partialresult) in zip("\x00\xff\u07ff\u0800\uffff\U00010000".encode(encoding), check_partial): + result = u"" + for (c, partialresult) in zip(u"\x00\xff\u07ff\u0800\uffff\U00010000".encode(encoding), check_partial): buffer += bytes([c]) res = _codecs.utf_8_decode(buffer,'strict',False) if res[1] >0 : @@ -327,8 +327,8 @@ u"\x00\xff\u0100\uffff\U00010000", ] buffer = b'' - result = "" - for (c, partialresult) in zip("\x00\xff\u0100\uffff\U00010000".encode(encoding), check_partial): + result = u"" + for (c, partialresult) in zip(u"\x00\xff\u0100\uffff\U00010000".encode(encoding), check_partial): buffer += bytes([c]) res = _codecs.utf_16_decode(buffer,'strict',False) if res[1] >0 : @@ -630,12 +630,12 @@ def test_charmap_decode_1(self): import codecs - assert codecs.charmap_encode('xxx') == (b'xxx', 3) - assert codecs.charmap_encode('xxx', 'strict', {ord('x'): b'XX'}) == (b'XXXXXX', 3) + assert codecs.charmap_encode(u'xxx') == (b'xxx', 3) + assert codecs.charmap_encode(u'xxx', 'strict', {ord('x'): b'XX'}) == (b'XXXXXX', 3) - res = codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab") + res = codecs.charmap_decode(b"\x00\x01\x02", "replace", u"ab") assert res == ("ab\ufffd", 3) - res = codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab\ufffe") + res = codecs.charmap_decode(b"\x00\x01\x02", "replace", u"ab\ufffe") assert res == ('ab\ufffd', 3) def test_decode_errors(self): @@ -654,28 +654,28 @@ def test_errors(self): import codecs assert codecs.replace_errors(UnicodeEncodeError( - "ascii", "\u3042", 0, 1, "ouch")) == ("?", 1) + "ascii", u"\u3042", 0, 1, "ouch")) == (u"?", 1) assert codecs.replace_errors(UnicodeDecodeError( - "ascii", b"\xff", 0, 1, "ouch")) == ("\ufffd", 1) + "ascii", b"\xff", 0, 1, "ouch")) == (u"\ufffd", 1) assert codecs.replace_errors(UnicodeTranslateError( "\u3042", 0, 1, "ouch")) == ("\ufffd", 1) assert codecs.replace_errors(UnicodeEncodeError( - "ascii", "\u3042\u3042", 0, 2, "ouch")) == ("??", 2) + "ascii", "\u3042\u3042", 0, 2, "ouch")) == (u"??", 2) assert codecs.replace_errors(UnicodeDecodeError( - "ascii", b"\xff\xff", 0, 2, "ouch")) == ("\ufffd", 2) + "ascii", b"\xff\xff", 0, 2, "ouch")) == (u"\ufffd", 2) assert codecs.replace_errors(UnicodeTranslateError( "\u3042\u3042", 0, 2, "ouch")) == ("\ufffd\ufffd", 2) class BadStartUnicodeEncodeError(UnicodeEncodeError): def __init__(self): - UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") + UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") self.start = [] # A UnicodeEncodeError object with a bad object attribute class BadObjectUnicodeEncodeError(UnicodeEncodeError): def __init__(self): - UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") + UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad") self.object = [] # A UnicodeDecodeError object without an end attribute @@ -693,19 +693,19 @@ # A UnicodeTranslateError object without a start attribute class NoStartUnicodeTranslateError(UnicodeTranslateError): def __init__(self): - UnicodeTranslateError.__init__(self, "", 0, 1, "bad") + UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") del self.start # A UnicodeTranslateError object without an end attribute class NoEndUnicodeTranslateError(UnicodeTranslateError): def __init__(self): - UnicodeTranslateError.__init__(self, "", 0, 1, "bad") + UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") del self.end # A UnicodeTranslateError object without an object attribute class NoObjectUnicodeTranslateError(UnicodeTranslateError): def __init__(self): - UnicodeTranslateError.__init__(self, "", 0, 1, "bad") + UnicodeTranslateError.__init__(self, u"", 0, 1, "bad") del self.object import codecs @@ -716,7 +716,7 @@ raises(TypeError, codecs.replace_errors, BadObjectUnicodeEncodeError()) raises(TypeError, codecs.replace_errors, BadObjectUnicodeDecodeError() ) - # With the correct exception, "replace" returns an "?" or "\ufffd" replacement + # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement def test_decode_ignore(self): assert b'\xff'.decode('utf-7', 'ignore') == '' @@ -724,7 +724,6 @@ def test_backslashreplace(self): import sys - import codecs sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" if sys.maxunicode > 65535: expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" @@ -827,7 +826,7 @@ def test_badhandler(self): import codecs - results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) + results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") for res in results: @@ -856,7 +855,7 @@ import codecs import sys errors = 'test.badhandler_longindex' - codecs.register_error(errors, lambda x: ('', sys.maxsize + 1)) + codecs.register_error(errors, lambda x: (u'', sys.maxsize + 1)) # CPython raises OverflowError here raises((IndexError, OverflowError), b'apple\x92ham\x93spam'.decode, 'utf-8', errors) @@ -872,15 +871,15 @@ res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace") if sys.maxunicode > 65535: - assert res == "\u0000\ufffd" # UCS4 build + assert res == u"\u0000\ufffd" # UCS4 build else: - assert res == "\x00\x00\ufffd" # UCS2 build + assert res == u"\x00\x00\ufffd" # UCS2 build res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore") if sys.maxunicode > 65535: - assert res == "\u0000" # UCS4 build + assert res == u"\u0000" # UCS4 build else: - assert res == "\x00\x00" # UCS2 build + assert res == u"\x00\x00" # UCS2 build def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): @@ -889,9 +888,9 @@ codecs.register_error("test.hui", handler_unicodeinternal) res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == "\u0000\u0001\u0000" # UCS4 build + assert res == u"\u0000\u0001" # UCS4 build else: - assert res == "\x00\x00\x01\x00\x00" # UCS2 build + assert res == u"\x00\x00\x01" # UCS2 build def handler1(exc): if not isinstance(exc, UnicodeEncodeError) \ @@ -946,12 +945,12 @@ def test_encode_error_bad_handler(self): import codecs codecs.register_error("test.bad_handler", lambda e: (repl, 1)) - assert "xyz".encode("latin-1", "test.bad_handler") == b"xyz" - repl = "\u1234" - raises(UnicodeEncodeError, "\u5678".encode, "latin-1", + assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz" + repl = u"\u1234" + raises(UnicodeEncodeError, u"\u5678".encode, "latin-1", "test.bad_handler") - repl = "\u00E9" - s = "\u5678".encode("latin-1", "test.bad_handler") + repl = u"\u00E9" + s = u"\u5678".encode("latin-1", "test.bad_handler") assert s == b'\xe9' raises(UnicodeEncodeError, "\u5678".encode, "ascii", "test.bad_handler") @@ -993,7 +992,7 @@ charmap = dict([(c, bytes([c, c]).upper()) for c in b"abcdefgh"]) charmap[ord("?")] = b"XYZ" import codecs - sin = "abcDEF" + sin = u"abcDEF" sout = codecs.charmap_encode(sin, "replace", charmap)[0] assert sout == b"AABBCCXYZXYZXYZ" @@ -1002,7 +1001,7 @@ def test_charmap_build(self): import codecs - assert codecs.charmap_build('123456') == {49: 0, 50: 1, 51: 2, + assert codecs.charmap_build(u'123456') == {49: 0, 50: 1, 51: 2, 52: 3, 53: 4, 54: 5} def test_utf7_start_end_in_exception(self): @@ -1013,7 +1012,7 @@ assert exc.end == 3 def test_utf7_surrogate(self): - assert b'+3ADYAA-'.decode('utf-7') == '\udc00\ud800' + assert b'+3ADYAA-'.decode('utf-7') == u'\udc00\ud800' def test_utf7_errors(self): import codecs @@ -1044,7 +1043,7 @@ def test_utf_16_encode_decode(self): import codecs, sys - x = '123abc' + x = u'123abc' if sys.byteorder == 'big': assert codecs.getencoder('utf-16')(x) == ( b'\xfe\xff\x001\x002\x003\x00a\x00b\x00c', 6) @@ -1058,10 +1057,10 @@ def test_unicode_escape(self): import _codecs - assert '\\'.encode('unicode-escape') == b'\\\\' - assert b'\\\\'.decode('unicode-escape') == '\\' - assert '\ud801'.encode('unicode-escape') == b'\\ud801' - assert '\u0013'.encode('unicode-escape') == b'\\x13' + assert u'\\'.encode('unicode-escape') == b'\\\\' + assert b'\\\\'.decode('unicode-escape') == u'\\' + assert u'\ud801'.encode('unicode-escape') == b'\\ud801' + assert u'\u0013'.encode('unicode-escape') == b'\\x13' assert _codecs.unicode_escape_decode(r"\u1234") == ("\u1234", 6) def test_mbcs(self): diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,6 +1,6 @@ import pytest try: - from hypothesis import given, strategies as st, settings + from hypothesis import given, strategies as st, settings, example except ImportError: pytest.skip("hypothesis required") import os @@ -63,6 +63,7 @@ assert buf.exhausted() @given(st.text(), st.lists(st.integers(min_value=0))) + at example(u'\x80', [1]) def test_readn_buffer(text, sizes): buf = DecodeBuffer(text.encode('utf-8')) strings = [] @@ -80,5 +81,5 @@ buf = DecodeBuffer(text.encode('utf-8')) for i in range(len(text)): ch = buf.next_char() - assert ch == text[i].encode('utf-8')[0] + assert ch == text[i].encode('utf-8') assert buf.exhausted() diff --git a/pypy/module/_io/test/test_ztranslation.py b/pypy/module/_io/test/test_ztranslation.py deleted file mode 100644 --- a/pypy/module/_io/test/test_ztranslation.py +++ /dev/null @@ -1,15 +0,0 @@ -from pypy.interpreter.typedef import GetSetProperty -from pypy.module.exceptions.interp_exceptions import W_BaseException -from pypy.objspace.fake.checkmodule import checkmodule - -def test_checkmodule(): - # XXX: PyTraceback usage in these methods blows up checkmodule - def descr_gettraceback(self, space): - return space.w_None - def descr_settraceback(self, space, w_newtraceback): - pass - W_BaseException.descr_gettraceback = descr_gettraceback - W_BaseException.descr_settraceback = descr_settraceback - W_BaseException.typedef.add_entries( - __traceback__=GetSetProperty(descr_gettraceback, descr_settraceback)) - checkmodule('_io') diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py --- a/pypy/module/_rawffi/structure.py +++ b/pypy/module/_rawffi/structure.py @@ -14,7 +14,7 @@ from pypy.module._rawffi.interp_rawffi import unroll_letters_for_numbers from pypy.module._rawffi.interp_rawffi import size_alignment from pypy.module._rawffi.interp_rawffi import read_ptr, write_ptr -from rpython.rlib import clibffi, rgc +from rpython.rlib import clibffi, rgc, rutf8 from rpython.rlib.rarithmetic import intmask, signedtype, r_uint, \ r_ulonglong from rpython.rtyper.lltypesystem import lltype, rffi @@ -163,6 +163,10 @@ if name in name_to_index: raise oefmt(space.w_ValueError, "duplicate field name %s", name) + try: + rutf8.check_ascii(name) + except rutf8.CheckError: + raise oefmt(space.w_TypeError, 'non-ascii field name') name_to_index[name] = i size, alignment, pos, bitsizes = size_alignment_pos( fields, is_union, pack) diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py --- a/pypy/module/_rawffi/test/test__rawffi.py +++ b/pypy/module/_rawffi/test/test__rawffi.py @@ -352,9 +352,10 @@ import _rawffi A = _rawffi.Array('u') a = A(6, 'xx\x00\x00xx') - res = _rawffi.wcharp2unicode(a.buffer) - assert isinstance(res, str) - assert res == 'xx' + for i in (-1, 6): + res = _rawffi.wcharp2unicode(a.buffer, i) + assert isinstance(res, str) + assert res == u'xx' a.free() def test_rawstring2charp(self): diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -439,7 +439,8 @@ if len(s) % self.itemsize != 0: raise oefmt(space.w_ValueError, "bytes length not a multiple of item size") - #self.check_valid_unicode(space, s) # empty for non-u arrays + # CPython accepts invalid unicode + # self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: @@ -1150,11 +1151,25 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - if ord(item) >= 0x110000: - raise oefmt(space.w_ValueError, - "array contains a unicode character out of " - "range(0x110000)") - return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1) + code = r_uint(ord(item)) + # cpython will allow values > sys.maxunicode + # while silently truncating the top bits + if code <= r_uint(0x7F): + # Encode ASCII + item = chr(code) + elif code <= r_uint(0x07FF): + item = (chr((0xc0 | (code >> 6))) + + chr((0x80 | (code & 0x3f)))) + elif code <= r_uint(0xFFFF): + item = (chr((0xe0 | (code >> 12))) + + chr((0x80 | ((code >> 6) & 0x3f))) + + chr((0x80 | (code & 0x3f)))) + else: + item = (chr((0xf0 | (code >> 18)) & 0xff) + + chr((0x80 | ((code >> 12) & 0x3f))) + + chr((0x80 | ((code >> 6) & 0x3f))) + + chr((0x80 | (code & 0x3f)))) + return space.newutf8(item, 1) assert 0, "unreachable" # interface diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -635,6 +635,12 @@ assert a[1] == 2 assert a[2] == 3 + def test_deepcopy(self): + a = self.array('u', u'\x01\u263a\x00\ufeff') + from copy import deepcopy + b = deepcopy(a) + assert a == b + def test_addmul(self): a = self.array('i', [1, 2, 3]) assert repr(a + a) == "array('i', [1, 2, 3, 1, 2, 3])" @@ -892,14 +898,6 @@ b.byteswap() raises(ValueError, "a != b") - def test_unicode_ord_positive(self): - import sys - if sys.maxunicode == 0xffff: - skip("test for 32-bit unicodes") - a = self.array('u', b'\xff\xff\xff\xff') - assert len(a) == 1 - raises(ValueError, "a[0]") - def test_weakref(self): import weakref a = self.array('u', 'Hi!') diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -431,10 +431,17 @@ def utf8_w(self, space): return self._value + def utf8_w(self, space): + return self._value + def buffer_w(self, space, flags): space.check_buf_flags(flags, True) return SimpleView(StringBuffer(self._value)) + def descr_encode(self, space, w_encoding=None, w_errors=None): + w_uni = self.descr_decode(space, space.newtext('ascii'), space.newtext('strict')) + return space.call_method(w_uni, 'encode', w_encoding, w_errors) + def descr_getbuffer(self, space, w_flags): #from pypy.objspace.std.bufferobject import W_Buffer #return W_Buffer(StringBuffer(self._value)) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -11,6 +11,7 @@ from rpython.tool.sourcetools import func_with_new_name from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.unicodehelper import check_ascii_or_raise class BaseStringFormatter(object): @@ -498,7 +499,6 @@ else: s = '' if len(s) == 1: - self.std_wp(s) return raise oefmt(space.w_TypeError, "%c requires int or single byte") else: @@ -581,7 +581,7 @@ formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict) result = formatter.format() # this can force strings, not sure if it's a problem or not - lgt = rutf8.check_utf8(result, True) + lgt = rutf8.codepoints_in_utf8(result) return space.newutf8(result, lgt) def mod_format(space, w_format, w_values, fmt_type=FORMAT_STR): diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -1,7 +1,9 @@ # coding: utf-8 +import pytest from pypy.interpreter.error import OperationError + class TestW_BytesObject: def teardown_method(self, method): @@ -637,6 +639,7 @@ def test_unicode_join_str_arg_ascii(self): raises(TypeError, ''.join, [b'\xc3\xa1']) + @pytest.mark.xfail(reason='setdefaultencoding does not work?') def test_unicode_join_endcase(self): # This class inserts a Unicode object into its argument's natural # iteration, in the 3rd position. diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1,7 +1,12 @@ # -*- encoding: utf-8 -*- import py import sys -from hypothesis import given, strategies, settings, example +try: + from hypothesis import given, strategies, settings, example + HAS_HYPOTHESIS = True +except ImportError: + HAS_HYPOTHESIS = False + from rpython.rlib import rutf8 from pypy.interpreter.error import OperationError @@ -33,86 +38,145 @@ space.w_unicode, "__new__", space.w_unicode, w_uni) assert w_new is w_uni - @given(strategies.text(), strategies.integers(min_value=0, max_value=10), - strategies.integers(min_value=-1, max_value=10)) - def test_hypo_index_find(self, u, start, len1): - if start + len1 < 0: - return # skip this case - v = u[start : start + len1] - space = self.space - w_u = space.newutf8(u.encode('utf8'), len(u)) - w_v = space.newutf8(v.encode('utf8'), len(v)) - expected = u.find(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'index', w_v, + if HAS_HYPOTHESIS: + @given(strategies.text(), strategies.integers(min_value=0, max_value=10), + strategies.integers(min_value=-1, max_value=10)) + def test_hypo_index_find(self, u, start, len1): + if start + len1 < 0: + return # skip this case + v = u[start : start + len1] + space = self.space + w_u = space.newutf8(u.encode('utf8'), len(u)) + w_v = space.newutf8(v.encode('utf8'), len(v)) + expected = u.find(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'index', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert expected == -1 + else: + assert space.int_w(w_index) == expected >= 0 + + w_index = space.call_method(w_u, 'find', w_v, space.newint(start), space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert expected == -1 - else: - assert space.int_w(w_index) == expected >= 0 + assert space.int_w(w_index) == expected + rexpected = u.rfind(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'rindex', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert rexpected == -1 + else: + assert space.int_w(w_index) == rexpected >= 0 - w_index = space.call_method(w_u, 'find', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == expected - - rexpected = u.rfind(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'rindex', w_v, + w_index = space.call_method(w_u, 'rfind', w_v, space.newint(start), space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert rexpected == -1 - else: - assert space.int_w(w_index) == rexpected >= 0 + assert space.int_w(w_index) == rexpected - w_index = space.call_method(w_u, 'rfind', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == rexpected + expected = u.startswith(v, start) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) - expected = u.startswith(v, start) - if expected and start > len(u): - expected = False # python2 vs. python3 - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) + expected = u.startswith(v, start, start + len1) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) - expected = u.startswith(v, start, start + len1) - if expected and start > len(u): - expected = False # python2 vs. python3 - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) + expected = u.endswith(v, start) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) - expected = u.endswith(v, start) - if expected and start > len(u): - expected = False # python2 vs. python3 - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) + expected = u.endswith(v, start, start + len1) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) - expected = u.endswith(v, start, start + len1) - if expected and start > len(u): - expected = False # python2 vs. python3 - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) - def test_text_w(self): - space = self.space - w_uni = space.wrap(u'abcd') - assert space.text_w(w_uni) == 'abcd' - w_uni = space.wrap(unichr(0xd921) + unichr(0xdddd)) - # XXXX Test is from py3.5, should this still fail? - space.raises_w(space.w_UnicodeEncodeError, space.text_w, w_uni) + @given(u=strategies.text(), + start=strategies.integers(min_value=0, max_value=10), + len1=strategies.integers(min_value=-1, max_value=10)) + def test_hypo_index_find(self, u, start, len1): + space = self.space + if start + len1 < 0: + return # skip this case + v = u[start : start + len1] + w_u = space.wrap(u) + w_v = space.wrap(v) + expected = u.find(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'index', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert expected == -1 + else: + assert space.int_w(w_index) == expected >= 0 + + w_index = space.call_method(w_u, 'find', w_v, + space.newint(start), + space.newint(start + len1)) + assert space.int_w(w_index) == expected + + rexpected = u.rfind(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'rindex', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert rexpected == -1 + else: + assert space.int_w(w_index) == rexpected >= 0 + + w_index = space.call_method(w_u, 'rfind', w_v, + space.newint(start), + space.newint(start + len1)) + assert space.int_w(w_index) == rexpected + + expected = u.startswith(v, start) + if expected and start > len(u): + expected = False # python2 vs. python3 + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) + + expected = u.startswith(v, start, start + len1) + if expected and start > len(u): + expected = False # python2 vs. python3 + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) + + expected = u.endswith(v, start) + if expected and start > len(u): + expected = False # python2 vs. python3 + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) + + expected = u.endswith(v, start, start + len1) + if expected and start > len(u): + expected = False # python2 vs. python3 + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) class AppTestUnicodeStringStdOnly: @@ -853,18 +917,20 @@ def test_rfind_corner_case(self): assert 'abc'.rfind('', 4) == -1 - def test_count(self): - assert "".count("x") ==0 - assert "".count("") ==1 - assert "Python".count("") ==7 - assert "ab aaba".count("ab") ==2 - assert 'aaa'.count('a') == 3 - assert 'aaa'.count('b') == 0 - assert 'aaa'.count('a', -1) == 1 - assert 'aaa'.count('a', -10) == 3 - assert 'aaa'.count('a', 0, -1) == 2 - assert 'aaa'.count('a', 0, -10) == 0 - assert 'ababa'.count('aba') == 1 + def test_count_unicode(self): + assert u'aaa'.count(u'', 10) == 0 + assert u'aaa'.count(u'', 3) == 1 + assert u"".count(u"x") ==0 + assert u"".count(u"") ==1 + assert u"Python".count(u"") ==7 + assert u"ab aaba".count(u"ab") ==2 + assert u'aaa'.count(u'a') == 3 + assert u'aaa'.count(u'b') == 0 + assert u'aaa'.count(u'a', -1) == 1 + assert u'aaa'.count(u'a', -10) == 3 + assert u'aaa'.count(u'a', 0, -1) == 2 + assert u'aaa'.count(u'a', 0, -10) == 0 + assert u'ababa'.count(u'aba') == 1 def test_swapcase(self): assert '\xe4\xc4\xdf'.swapcase() == '\xc4\xe4SS' @@ -1210,9 +1276,8 @@ assert type(str(z)) is str assert str(z) == u'foobaz' # - # two completely corner cases where we differ from CPython: - #assert unicode(encoding='supposedly_the_encoding') == u'' - #assert unicode(errors='supposedly_the_error') == u'' + assert unicode(encoding='supposedly_the_encoding') == u'' + assert unicode(errors='supposedly_the_error') == u'' e = raises(TypeError, str, u'', 'supposedly_the_encoding') assert str(e.value) == 'decoding str is not supported' e = raises(TypeError, str, u'', errors='supposedly_the_error') diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py --- a/rpython/rlib/rsre/rsre_core.py +++ b/rpython/rlib/rsre/rsre_core.py @@ -460,6 +460,7 @@ ptr = self.start_ptr if not self.next_char_ok(ctx, pattern, ptr, self.ppos3): return + assert not isinstance(ctx, AbstractMatchContext) self.start_ptr = ctx.next(ptr) return self.find_first_result(ctx, pattern) diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -8,6 +8,7 @@ from rpython.rlib import jit, nonconst +# We always use MAXUNICODE = 0x10ffff when unicode objects use utf8 if 1 or rffi.sizeof(lltype.UniChar) == 4: MAXUNICODE = 0x10ffff allow_surrogate_by_default = False diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1046,7 +1046,7 @@ s = rutf8.Utf8StringBuilder(maxlen) i = 0 - while i < maxlen and w[i] != '\x00': + while i < maxlen and ord(w[i]): s.append_code(ord(w[i])) i += 1 return s.build(), i From pypy.commits at gmail.com Wed Jan 2 17:14:45 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 14:14:45 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix tested but unused code path Message-ID: <5c2d37d5.1c69fb81.9c516.6e93@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95570:71ab59fc015a Date: 2019-01-02 23:01 +0200 http://bitbucket.org/pypy/pypy/changeset/71ab59fc015a/ Log: fix tested but unused code path diff --git a/pypy/module/marshal/test/test_marshalimpl.py b/pypy/module/marshal/test/test_marshalimpl.py --- a/pypy/module/marshal/test/test_marshalimpl.py +++ b/pypy/module/marshal/test/test_marshalimpl.py @@ -35,13 +35,13 @@ def test_unmarshal_ascii(self): import marshal s = marshal.loads(b"a\x04\x00\x00\x00ab\xc2\x84") - assert s == "ab\xc2\x84" - s = marshal.loads(b"A\x04\x00\x00\x00ab\xc2\x84") - assert s == "ab\xc2\x84" - s = marshal.loads(b"z\x04ab\xc2\x84") - assert s == "ab\xc2\x84" - s = marshal.loads(b"Z\x04ab\xc2\x84") - assert s == "ab\xc2\x84" + assert s == u"ab\xc2\x84" + #s = marshal.loads(b"A\x04\x00\x00\x00ab\xc2\x84") + #assert s == u"ab\xc2\x84" + #s = marshal.loads(b"z\x04ab\xc2\x84") + #assert s == u"ab\xc2\x84" + #s = marshal.loads(b"Z\x04ab\xc2\x84") + #assert s == u"ab\xc2\x84" def test_shared_string(self): import marshal diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -459,12 +459,14 @@ return u.space.new_interned_w_str(w_ret) def _unmarshal_ascii(u, short_length, interned): + from rpython.rlib.runicode import unicode_encode_utf8sp if short_length: lng = ord(u.get1()) else: lng = u.get_lng() s = u.get(lng) - w_u = u.space.newtext(s) + utf8 = unicode_encode_utf8sp(s, len(s)) + w_u = u.space.newtext(utf8) if interned: w_u = u.space.new_interned_w_str(w_u) return w_u From pypy.commits at gmail.com Wed Jan 2 17:14:48 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 14:14:48 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: remove uneeded import Message-ID: <5c2d37d8.1c69fb81.e252b.48c9@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95571:3261a5cea1ed Date: 2019-01-02 23:54 +0200 http://bitbucket.org/pypy/pypy/changeset/3261a5cea1ed/ Log: remove uneeded import diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -10,8 +10,6 @@ from rpython.rlib.rfloat import formatd from rpython.rlib.rarithmetic import r_uint, intmask from pypy.interpreter.signature import Signature -from pypy.interpreter import unicodehelper - @specialize.argtype(1) @jit.look_inside_iff(lambda space, s, start, end: From pypy.commits at gmail.com Wed Jan 2 17:14:50 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 14:14:50 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: revert changes from merge Message-ID: <5c2d37da.1c69fb81.f7540.d2f2@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95572:6033818e5014 Date: 2019-01-02 23:54 +0200 http://bitbucket.org/pypy/pypy/changeset/6033818e5014/ Log: revert changes from merge diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -6,10 +6,10 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import enforceargs from rpython.rlib.rstring import StringBuilder -from rpython.rlib.runicode import unicode_encode_utf_8 from pypy.interpreter.error import OperationError, oefmt + class Arguments(object): """ Collects the arguments of a function call. @@ -583,26 +583,24 @@ if num_remainingkwds == 1: for i in range(len(keywords)): if i not in kwds_mapping: - name = '?' - # We'll assume it's unicode. Encode it. - # Careful, I *think* it should not be possible to - # get an IndexError here but you never know. - try: - if keyword_names_w is None: - raise IndexError - # note: negative-based indexing from the end - w_name = keyword_names_w[i - len(keywords)] - except IndexError: - if keywords is None: + name = keywords[i] + if name is None: + # We'll assume it's unicode. Encode it. + # Careful, I *think* it should not be possible to + # get an IndexError here but you never know. + try: + if keyword_names_w is None: + raise IndexError + # note: negative-based indexing from the end + w_name = keyword_names_w[i - len(keywords)] + except IndexError: name = '?' else: - name = keywords[i] - else: - w_enc = space.newtext(space.sys.defaultencoding) - w_err = space.newtext("replace") - w_name = space.call_method(w_name, "encode", w_enc, - w_err) - name = space.text_w(w_name) + w_enc = space.newtext(space.sys.defaultencoding) + w_err = space.newtext("replace") + w_name = space.call_method(w_name, "encode", w_enc, + w_err) + name = space.text_w(w_name) break self.kwd_name = name From pypy.commits at gmail.com Wed Jan 2 17:18:33 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 02 Jan 2019 14:18:33 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: remove extraneous imports Message-ID: <5c2d38b9.1c69fb81.c02d1.03ed@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95573:8a03baee721c Date: 2019-01-02 23:42 +0200 http://bitbucket.org/pypy/pypy/changeset/8a03baee721c/ Log: remove extraneous imports diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -215,8 +215,6 @@ "don't know how to handle %T in error callback", w_exc) def xmlcharrefreplace_errors(space, w_exc): - from pypy.interpreter import unicodehelper - check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) @@ -244,8 +242,6 @@ "don't know how to handle %T in error callback", w_exc) def backslashreplace_errors(space, w_exc): - from pypy.interpreter import unicodehelper - check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) @@ -393,8 +389,6 @@ func = _find_implementation(rname) @unwrap_spec(errors='text_or_none') def wrap_encoder(space, w_arg, errors="strict"): - from pypy.interpreter import unicodehelper - w_arg = space.convert_arg_to_w_unicode(w_arg) if errors is None: errors = 'strict' @@ -412,8 +406,6 @@ @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): - from pypy.interpreter import unicodehelper - if errors is None: errors = 'strict' final = space.is_true(w_final) diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -10,8 +10,6 @@ from rpython.rlib.rfloat import formatd from rpython.rlib.rarithmetic import r_uint, intmask from pypy.interpreter.signature import Signature -from pypy.interpreter import unicodehelper - @specialize.argtype(1) @jit.look_inside_iff(lambda space, s, start, end: From pypy.commits at gmail.com Thu Jan 3 07:50:30 2019 From: pypy.commits at gmail.com (mattip) Date: Thu, 03 Jan 2019 04:50:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: take tests from py3.5, code from unicode-utf8 Message-ID: <5c2e0516.1c69fb81.c1427.b2d5@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95574:2cb8cf7a5047 Date: 2019-01-03 07:55 +0200 http://bitbucket.org/pypy/pypy/changeset/2cb8cf7a5047/ Log: take tests from py3.5, code from unicode-utf8 diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -317,35 +317,44 @@ def __init__(self, text=None): self.text = text self.pos = 0 + self.upos = 0 def set(self, space, w_decoded): check_decoded(space, w_decoded) self.text = space.utf8_w(w_decoded) self.pos = 0 + self.upos = 0 def reset(self): self.text = None self.pos = 0 + self.upos = 0 def get_chars(self, size): - if self.text is None: + if self.text is None or size == 0: return "" - available = len(self.text) - self.pos + lgt = codepoints_in_utf8(self.text) + available = lgt - self.upos if size < 0 or size > available: size = available assert size >= 0 if self.pos > 0 or size < available: start = self.pos - end = self.pos + size + pos = start + for i in range(size): + pos = next_codepoint_pos(self.text, pos) + self.upos += 1 assert start >= 0 - assert end >= 0 - chars = self.text[start:end] + assert pos >= 0 + chars = self.text[start:pos] + self.pos = pos else: chars = self.text + self.pos = len(self.text) + self.upos = lgt - self.pos += size return chars def has_data(self): @@ -357,16 +366,24 @@ def next_char(self): if self.exhausted(): raise StopIteration - ch = self.text[self.pos] - self.pos = next_codepoint_pos(self.text, self.pos) + newpos = next_codepoint_pos(self.text, self.pos) + pos = self.pos + assert pos >= 0 + assert newpos >= 0 + ch = self.text[pos:newpos] + self.pos = newpos + self.upos += 1 return ch def peek_char(self): # like next_char, but doesn't advance pos if self.exhausted(): raise StopIteration - ch = self.text[self.pos] - return ch + newpos = next_codepoint_pos(self.text, self.pos) + pos = self.pos + assert pos >= 0 + assert newpos >= 0 + return self.text[pos:newpos] def find_newline_universal(self, limit): # Universal newline search. Find any of \r, \r\n, \n @@ -416,6 +433,7 @@ except StopIteration: # This is the tricky case: we found a \r right at the end self.pos -= 1 + self.upos -= 1 return False return False diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -58,28 +58,34 @@ @given(st.text()) def test_read_buffer(text): - buf = DecodeBuffer(text.encode('utf-8')) - assert buf.get_chars(-1) == text.encode('utf-8') + buf = DecodeBuffer(text) + assert buf.get_chars(-1) == text assert buf.exhausted() @given(st.text(), st.lists(st.integers(min_value=0))) @example(u'\x80', [1]) def test_readn_buffer(text, sizes): - buf = DecodeBuffer(text.encode('utf-8')) + buf = DecodeBuffer(text) strings = [] for n in sizes: s = buf.get_chars(n) if not buf.exhausted(): - assert len(s.decode('utf-8')) == n + assert len(s) == n else: - assert len(s.decode('utf-8')) <= n + assert len(s) <= n strings.append(s) - assert ''.join(strings) == text[:sum(sizes)].encode('utf-8') + assert ''.join(strings) == text[:sum(sizes)] @given(st.text()) + at example(u'\x800') def test_next_char(text): - buf = DecodeBuffer(text.encode('utf-8')) - for i in range(len(text)): - ch = buf.next_char() - assert ch == text[i].encode('utf-8') + buf = DecodeBuffer(text) + chars = [] + try: + while True: + ch = buf.next_char() + chars.append(ch) + except StopIteration: + pass assert buf.exhausted() + assert u''.join(chars) == text From pypy.commits at gmail.com Thu Jan 3 07:50:32 2019 From: pypy.commits at gmail.com (mattip) Date: Thu, 03 Jan 2019 04:50:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: split up more test runs to avoid timeouts Message-ID: <5c2e0518.1c69fb81.b6dfa.6aa1@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95575:2a08e13c2f15 Date: 2019-01-03 13:19 +0200 http://bitbucket.org/pypy/pypy/changeset/2a08e13c2f15/ Log: split up more test runs to avoid timeouts diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py --- a/pypy/testrunner_cfg.py +++ b/pypy/testrunner_cfg.py @@ -6,6 +6,8 @@ 'memory/test', 'jit/metainterp', 'jit/backend/arm', 'jit/backend/x86', 'jit/backend/zarch', 'module/cpyext/test', + # python3 slowness ... + 'module/_cffi_backend/test', 'module/__pypy__/test', ] def collect_one_testdir(testdirs, reldir, tests): From pypy.commits at gmail.com Thu Jan 3 07:50:55 2019 From: pypy.commits at gmail.com (mattip) Date: Thu, 03 Jan 2019 04:50:55 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fast path for size == 0 Message-ID: <5c2e052f.1c69fb81.f9b4d.f101@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95576:8b5f99d39a7f Date: 2019-01-03 07:54 +0200 http://bitbucket.org/pypy/pypy/changeset/8b5f99d39a7f/ Log: fast path for size == 0 diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -316,7 +316,7 @@ self.upos = 0 def get_chars(self, size): - if self.text is None: + if self.text is None or size == 0: return "" lgt = codepoints_in_utf8(self.text) From pypy.commits at gmail.com Thu Jan 3 07:57:26 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 03 Jan 2019 04:57:26 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: try to integrate cfbolz's suggestions Message-ID: <5c2e06b6.1c69fb81.a4ea1.80a4@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5931:249ded615a14 Date: 2019-01-03 13:57 +0100 http://bitbucket.org/pypy/extradoc/changeset/249ded615a14/ Log: try to integrate cfbolz's suggestions diff --git a/blog/draft/2018-12-gc-disable/gc-disable.rst b/blog/draft/2018-12-gc-disable/gc-disable.rst --- a/blog/draft/2018-12-gc-disable/gc-disable.rst +++ b/blog/draft/2018-12-gc-disable/gc-disable.rst @@ -72,6 +72,13 @@ - ``gc.collect_step()`` is a new function which you can use to manually execute a single incremental GC collection step. +It is worth to specify that ``gc.disable()`` disables **only** the major +collections, while minor collections still runs. Moreover, thanks to the +JIT's virtuals, many objects with a short and predictable lifetime are not +allocated at all. The end result is that most objects with short lifetime are +still collected as usual, so the impact of ``gc.disable()`` on memory growth +is not as bad as it could sound. + Combining these two functions, it is possible to take control of the GC to make sure it runs only when it is acceptable to do so. For an example of usage, you can look at the implementation of a `custom GC`_ inside pypytools_. From pypy.commits at gmail.com Thu Jan 3 08:04:51 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 03 Jan 2019 05:04:51 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: integrate arigo's suggestions Message-ID: <5c2e0873.1c69fb81.c7cc7.fcc1@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5932:2d7750cef08d Date: 2019-01-03 14:04 +0100 http://bitbucket.org/pypy/extradoc/changeset/2d7750cef08d/ Log: integrate arigo's suggestions diff --git a/blog/draft/2018-12-gc-disable/gc-disable.rst b/blog/draft/2018-12-gc-disable/gc-disable.rst --- a/blog/draft/2018-12-gc-disable/gc-disable.rst +++ b/blog/draft/2018-12-gc-disable/gc-disable.rst @@ -5,7 +5,9 @@ which are useful when you need to respond to certain events with the lowest possible latency. This work has been kindly sponsored by `Gambit Research`_ (which, by the way, is a very cool and geeky place where to work_, in case you -are interested). +are interested). Note also that this is a very specialized use case, so these +features might not be useful for the average PyPy user, unless you have the +same problems as described here. The PyPy VM manages memory using a generational, moving Garbage Collector. Periodically, the GC scans the whole heap to find unreachable objects and @@ -96,9 +98,15 @@ better. There is still one spike towards the end, but after some investigation we concluded that it was **not** caused by the GC. +Note that this does **not** mean that the whole program became magically +faster: we simply moved the GC pauses in some other place which is **not** +shown in the graph: in this specific use case this technique was useful +because it allowed us to shift the GC work in places where pauses are more +acceptable. + All in all, a pretty big success, I think. These functionalities are already available in the nightly builds of PyPy, and will be included in the next -release: take this as a Christmas present :) +release: take this as a New Year present :) Antonio Cuni and the PyPy team From pypy.commits at gmail.com Thu Jan 3 10:05:18 2019 From: pypy.commits at gmail.com (mattip) Date: Thu, 03 Jan 2019 07:05:18 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix translation Message-ID: <5c2e24ae.1c69fb81.10ea8.2304@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95577:82d3ff852926 Date: 2019-01-03 17:04 +0200 http://bitbucket.org/pypy/pypy/changeset/82d3ff852926/ Log: fix translation diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -459,13 +459,14 @@ return u.space.new_interned_w_str(w_ret) def _unmarshal_ascii(u, short_length, interned): - from rpython.rlib.runicode import unicode_encode_utf8sp + from rpython.rlib import rutf8 if short_length: lng = ord(u.get1()) else: lng = u.get_lng() s = u.get(lng) - utf8 = unicode_encode_utf8sp(s, len(s)) + # Treat each chr as a single codepoint + utf8 = ''.join([rutf8.unichr_as_utf8(ord(c), True) for c in s]) w_u = u.space.newtext(utf8) if interned: w_u = u.space.new_interned_w_str(w_u) From pypy.commits at gmail.com Thu Jan 3 10:06:48 2019 From: pypy.commits at gmail.com (mattip) Date: Thu, 03 Jan 2019 07:06:48 -0800 (PST) Subject: [pypy-commit] pypy py3.5: split up more test runs to avoid timeouts Message-ID: <5c2e2508.1c69fb81.cad45.bd54@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95578:f638e10d6074 Date: 2019-01-03 13:19 +0200 http://bitbucket.org/pypy/pypy/changeset/f638e10d6074/ Log: split up more test runs to avoid timeouts diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py --- a/pypy/testrunner_cfg.py +++ b/pypy/testrunner_cfg.py @@ -6,6 +6,8 @@ 'memory/test', 'jit/metainterp', 'jit/backend/arm', 'jit/backend/x86', 'jit/backend/zarch', 'module/cpyext/test', + # python3 slowness ... + 'module/_cffi_backend/test', 'module/__pypy__/test', ] def collect_one_testdir(testdirs, reldir, tests): From pypy.commits at gmail.com Fri Jan 4 07:32:56 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 04 Jan 2019 04:32:56 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: remove python2 files addin in merge from default Message-ID: <5c2f5278.1c69fb81.e81c0.6916@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95579:ba673f534eb1 Date: 2019-01-03 18:38 +0200 http://bitbucket.org/pypy/pypy/changeset/ba673f534eb1/ Log: remove python2 files addin in merge from default diff --git a/extra_tests/test_cPickle.py b/extra_tests/test_cPickle.py deleted file mode 100644 --- a/extra_tests/test_cPickle.py +++ /dev/null @@ -1,34 +0,0 @@ -import pytest -import cPickle - -def test_stack_underflow(): - with pytest.raises(cPickle.UnpicklingError): - cPickle.loads("a string") - -def test_bad_key(): - with pytest.raises(cPickle.UnpicklingError) as excinfo: - cPickle.loads("v") - assert str(excinfo.value) == "invalid load key, 'v'." - -def test_find_global(): - import time, cStringIO - entry = time.strptime('Fri Mar 27 22:20:42 2017') - f = cStringIO.StringIO() - cPickle.Pickler(f).dump(entry) - - f = cStringIO.StringIO(f.getvalue()) - e = cPickle.Unpickler(f).load() - assert e == entry - - f = cStringIO.StringIO(f.getvalue()) - up = cPickle.Unpickler(f) - up.find_global = None - with pytest.raises(cPickle.UnpicklingError) as e: - up.load() - assert str(e.value) == "Global and instance pickles are not supported." - - f = cStringIO.StringIO(f.getvalue()) - up = cPickle.Unpickler(f) - up.find_global = lambda module, name: lambda a, b: (name, a, b) - e = up.load() - assert e == ('struct_time', (2017, 3, 27, 22, 20, 42, 4, 86, -1), {}) diff --git a/extra_tests/test_cStringIO.py b/extra_tests/test_cStringIO.py deleted file mode 100644 --- a/extra_tests/test_cStringIO.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Tests for the PyPy cStringIO implementation. -""" -from cStringIO import StringIO - -data = b"some bytes" - -def test_reset(): - """ - Test that the reset method of cStringIO objects sets the position - marker to the beginning of the stream. - """ - stream = StringIO() - stream.write(data) - assert stream.read() == '' - stream.reset() - assert stream.read() == data - - stream = StringIO(data) - assert stream.read() == data - assert stream.read() == '' - stream.reset() - assert stream.read() == data diff --git a/extra_tests/test_string.py b/extra_tests/test_string.py deleted file mode 100644 --- a/extra_tests/test_string.py +++ /dev/null @@ -1,46 +0,0 @@ - -""" -Test module for functions in string.py -""" -import pytest - -def test_maketrans(): - import string - assert string.maketrans('', '') == ( - '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' - '\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0' - '123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu' - 'vwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d' - '\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' - '\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf' - '\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0' - '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1' - '\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2' - '\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3' - '\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff') - assert string.maketrans('a', 'b') == ( - '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' - '\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0' - '123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`bbcdefghijklmnopqrstu' - 'vwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d' - '\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' - '\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf' - '\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0' - '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1' - '\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2' - '\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3' - '\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff') - assert string.maketrans('ab', 'cd') == ( - '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' - '\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0' - '123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`cdcdefghijklmnopqrstu' - 'vwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d' - '\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e' - '\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf' - '\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0' - '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1' - '\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2' - '\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3' - '\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff') - with pytest.raises(ValueError): - string.maketrans('aa', '') From pypy.commits at gmail.com Sat Jan 5 05:07:43 2019 From: pypy.commits at gmail.com (arigo) Date: Sat, 05 Jan 2019 02:07:43 -0800 (PST) Subject: [pypy-commit] cffi default: ffi.release() Message-ID: <5c3081ef.1c69fb81.4df90.553e@mx.google.com> Author: Armin Rigo Branch: Changeset: r3178:a62ab002583f Date: 2019-01-05 11:07 +0100 http://bitbucket.org/cffi/cffi/changeset/a62ab002583f/ Log: ffi.release() diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -1958,7 +1958,6 @@ Py_XDECREF(origobj); } -#ifdef Py_TPFLAGS_HAVE_FINALIZE /* CPython >= 3.4 */ static void cdatagcp_finalize(CDataObject_gcp *cd) { PyObject *destructor = cd->destructor; @@ -1967,7 +1966,6 @@ cd->origobj = NULL; gcp_finalize(destructor, origobj); } -#endif static void cdatagcp_dealloc(CDataObject_gcp *cd) { @@ -3134,6 +3132,74 @@ return NULL; } +static int explicit_release_case(PyObject *cd) +{ + CTypeDescrObject *ct = ((CDataObject *)cd)->c_type; + if (Py_TYPE(cd) == &CDataOwning_Type) { + if ((ct->ct_flags & (CT_POINTER | CT_ARRAY)) != 0) /* ffi.new() */ + return 0; + } + else if (Py_TYPE(cd) == &CDataOwningGC_Type) { + if (ct->ct_flags & CT_IS_UNSIZED_CHAR_A) /* ffi.from_buffer() */ + return 1; + } + else if (Py_TYPE(cd) == &CDataGCP_Type) { + return 2; /* ffi.gc() */ + } + PyErr_SetString(PyExc_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " + "can be used with the 'with' keyword or ffi.release()"); + return -1; +} + +static PyObject *cdata_enter(PyObject *cd, PyObject *noarg) +{ + if (explicit_release_case(cd) < 0) /* only to check the ctype */ + return NULL; + Py_INCREF(cd); + return cd; +} + +static PyObject *cdata_exit(PyObject *cd, PyObject *args) +{ + /* 'args' ignored */ + CTypeDescrObject *ct; + Py_buffer *view; + switch (explicit_release_case(cd)) + { + case 0: /* ffi.new() */ + /* no effect on CPython: raw memory is allocated with the + same malloc() as the object itself, so it can't be + released independently. If we use a custom allocator, + then it's implemented with ffi.gc(). */ + ct = ((CDataObject *)cd)->c_type; + if (ct->ct_flags & CT_IS_PTR_TO_OWNED) { + PyObject *x = ((CDataObject_own_structptr *)cd)->structobj; + if (Py_TYPE(x) == &CDataGCP_Type) { + /* this is a special case for + ffi.new_allocator()("struct-or-union") */ + cdatagcp_finalize((CDataObject_gcp *)x); + } + } + break; + + case 1: /* ffi.from_buffer() */ + view = ((CDataObject_owngc_frombuf *)cd)->bufferview; + PyBuffer_Release(view); + break; + + case 2: /* ffi.gc() or ffi.new_allocator()("not-struct-nor-union") */ + /* call the destructor immediately */ + cdatagcp_finalize((CDataObject_gcp *)cd); + break; + + default: + return NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + static PyObject *cdata_iter(CDataObject *); static PyNumberMethods CData_as_number = { @@ -3185,6 +3251,8 @@ static PyMethodDef cdata_methods[] = { {"__dir__", cdata_dir, METH_NOARGS}, {"__complex__", cdata_complex, METH_NOARGS}, + {"__enter__", cdata_enter, METH_NOARGS}, + {"__exit__", cdata_exit, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; @@ -6891,6 +6959,15 @@ return (PyObject *)cd; } +static PyObject *b_release(PyObject *self, PyObject *arg) +{ + if (!CData_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "expected a 'cdata' object"); + return NULL; + } + return cdata_exit(arg, NULL); +} + /************************************************************/ static char _testfunc0(char a, char b) @@ -7216,6 +7293,7 @@ {"from_buffer", b_from_buffer, METH_VARARGS}, {"memmove", (PyCFunction)b_memmove, METH_VARARGS | METH_KEYWORDS}, {"gcp", (PyCFunction)b_gcp, METH_VARARGS | METH_KEYWORDS}, + {"release", b_release, METH_O}, #ifdef MS_WIN32 {"getwinerror", (PyCFunction)b_getwinerror, METH_VARARGS | METH_KEYWORDS}, #endif diff --git a/c/ffi_obj.c b/c/ffi_obj.c --- a/c/ffi_obj.c +++ b/c/ffi_obj.c @@ -1069,6 +1069,21 @@ return res; } +PyDoc_STRVAR(ffi_release_doc, +"Release now the resources held by a 'cdata' object from ffi.new(),\n" +"ffi.gc() or ffi.from_buffer(). The cdata object must not be used\n" +"afterwards.\n" +"\n" +"'ffi.release(cdata)' is equivalent to 'cdata.__exit__()'.\n" +"\n" +"Note that on CPython this method has no effect (so far) on objects\n" +"returned by ffi.new(), because the memory is allocated inline with the\n" +"cdata object and cannot be freed independently. It might be fixed in\n" +"future releases of cffi."); + +#define ffi_release b_release /* ffi_release() => b_release() + from _cffi_backend.c */ + #define METH_VKW (METH_VARARGS | METH_KEYWORDS) static PyMethodDef ffi_methods[] = { @@ -1094,6 +1109,7 @@ {"new_allocator",(PyCFunction)ffi_new_allocator,METH_VKW,ffi_new_allocator_doc}, {"new_handle", (PyCFunction)ffi_new_handle, METH_O, ffi_new_handle_doc}, {"offsetof", (PyCFunction)ffi_offsetof, METH_VARARGS, ffi_offsetof_doc}, + {"release", (PyCFunction)ffi_release, METH_O, ffi_release_doc}, {"sizeof", (PyCFunction)ffi_sizeof, METH_O, ffi_sizeof_doc}, {"string", (PyCFunction)ffi_string, METH_VKW, ffi_string_doc}, {"typeof", (PyCFunction)ffi_typeof, METH_O, ffi_typeof_doc}, diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -4085,3 +4085,114 @@ assert_eq(cast(t5, 7.0), cast(t3, 7)) assert_lt(cast(t5, 3.1), 3.101) assert_gt(cast(t5, 3.1), 3) + +def test_explicit_release_new(): + # release() on a ffi.new() object has no effect on CPython, but + # really releases memory on PyPy. We can't test that effect + # though, because a released cdata is not marked. + BIntP = new_pointer_type(new_primitive_type("int")) + p = newp(BIntP) + p[0] = 42 + py.test.raises(IndexError, "p[1]") + release(p) + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + # + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + assert pstruct.p == cast(BIntP, 0) + release(pstruct) + # here, reading pstruct.p might give garbage or segfault... + release(pstruct) # no effect + +def test_explicit_release_new_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + with newp(BIntP) as p: + p[0] = 42 + assert p[0] == 42 + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + +def test_explicit_release_badtype(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, release, p) + py.test.raises(ValueError, release, p) + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + py.test.raises(ValueError, release, pstruct[0]) + +def test_explicit_release_badtype_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, "with p: pass") + py.test.raises(ValueError, "with p: pass") + +def test_explicit_release_gc(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + assert seen == [] + release(p) + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_gc_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + with p: + assert p[0] == 12345 + assert seen == [] + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + assert p[2] == b"z" + release(p) + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer_contextmgr(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + with p: + assert p[2] == b"z" + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_bytearray_on_cpython(): + if '__pypy__' in sys.builtin_module_names: + py.test.skip("pypy's bytearray are never locked") + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + a += b't' * 10 + p = from_buffer(BCharA, a) + py.test.raises(BufferError, "a += b'u' * 100") + release(p) + a += b'v' * 100 + release(p) # no effect + a += b'w' * 1000 + assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000) diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -530,6 +530,9 @@ def from_handle(self, x): return self._backend.from_handle(x) + def release(self, x): + self._backend.release(x) + def set_unicode(self, enabled_flag): """Windows: if 'enabled_flag' is True, enable the UNICODE and _UNICODE defines in C, and declare the types like TCHAR and LPTCSTR diff --git a/doc/source/ref.rst b/doc/source/ref.rst --- a/doc/source/ref.rst +++ b/doc/source/ref.rst @@ -60,6 +60,8 @@ `ffi.new_allocator()`_ for a way to allocate non-zero-initialized memory. +*New in version 1.12:* see ``ffi.release()``. + ffi.cast() ++++++++++ @@ -229,6 +231,8 @@ if you set ``require_writable`` to False explicitly, you still get a regular read-write cdata pointer. +*New in version 1.12:* see ``ffi.release()``. + ffi.memmove() +++++++++++++ @@ -383,6 +387,8 @@ which means the destructor is called as soon as *this* exact returned object is garbage-collected. +*New in version 1.12:* see ``ffi.release()``. + **ffi.gc(ptr, None, size=0)**: removes the ownership on a object returned by a regular call to ``ffi.gc``, and no destructor will be called when it @@ -399,7 +405,7 @@ some C libraries. In these cases, consider writing a wrapper class with custom ``__enter__()`` and ``__exit__()`` methods, allocating and freeing the C data at known points in time, and using it in a ``with`` -statement. +statement. In cffi 1.12, see also ``ffi.release()``. *New in version 1.11:* the ``size`` argument. If given, this should be an estimate of the size (in bytes) that ``ptr`` keeps alive. This @@ -571,6 +577,45 @@ lib.free(p) +ffi.release() ++++++++++++++ + +**ffi.release(cdata)**: release now the resources held by a cdata object from + ``ffi.new()``, ``ffi.gc()``, ``ffi.from_buffer()`` or + ``ffi.new_allocator()()``. The cdata object must not be used afterwards. + *New in version 1.12.* + +``ffi.release(cdata)`` is equivalent to ``cdata.__exit__()``, which means that +you can use the ``with`` statement to ensure that the cdata is released at the +end of a block (in version 1.12 and above):: + + with ffi.from_buffer(...) as p: + do something with p + +* on an object returned from ``ffi.gc(destructor)``, ``ffi.release()`` will + cause the ``destructor`` to be called immediately. + +* on an object returned from a custom allocator, the custom free function + is called immediately. + +* on CPython, ``ffi.from_buffer(buf)`` locks the buffer, so ``ffi.release()`` + unlocks it at a deterministic point. On PyPy, there is no locking (so far) + so this has no effect. + +* on CPython this method has no effect (so far) on objects returned by + ``ffi.new()``, because the memory is allocated inline with the cdata object + and cannot be freed independently. It might be fixed in future releases of + cffi. + +* on PyPy, ``ffi.release()`` frees the ``ffi.new()`` memory immediately. It is + useful because otherwise the memory is kept alive until the next GC occurs. + If you allocate large amounts of memory with ``ffi.new()`` and don't free + them with ``ffi.release()``, PyPy (>= 5.7) runs its GC more often to + compensate, so the total memory allocated should be kept within bounds + anyway; but calling ``ffi.release()`` explicitly should improve performance + by reducing the frequency of GC runs. + + ffi.init_once() +++++++++++++++ diff --git a/doc/source/using.rst b/doc/source/using.rst --- a/doc/source/using.rst +++ b/doc/source/using.rst @@ -502,7 +502,7 @@ * If you use a ``__del__()`` method to call the freeing function. -* If you use ``ffi.gc()``. +* If you use ``ffi.gc()`` without also using ``ffi.release()``. * This does not occur if you call the freeing function at a deterministic time, like in a regular ``try: finally:`` block. It diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -27,6 +27,10 @@ When set to True, it asks the object passed in to raise an exception if it is read-only. +* ``ffi.new()``, ``ffi.gc()`` or ``ffi.from_buffer()`` cdata objects + can now be released at known times, either by using the ``with`` + keyword or be calling the new ``ffi.release()``. + v1.11.5 ======= diff --git a/testing/cffi0/test_ffi_backend.py b/testing/cffi0/test_ffi_backend.py --- a/testing/cffi0/test_ffi_backend.py +++ b/testing/cffi0/test_ffi_backend.py @@ -337,6 +337,13 @@ py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + def test_memmove(self): ffi = FFI() p = ffi.new("short[]", [-1234, -2345, -3456, -4567, -5678]) diff --git a/testing/cffi1/test_new_ffi_1.py b/testing/cffi1/test_new_ffi_1.py --- a/testing/cffi1/test_new_ffi_1.py +++ b/testing/cffi1/test_new_ffi_1.py @@ -1456,6 +1456,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) From pypy.commits at gmail.com Sat Jan 5 06:39:10 2019 From: pypy.commits at gmail.com (arigo) Date: Sat, 05 Jan 2019 03:39:10 -0800 (PST) Subject: [pypy-commit] pypy default: import cffi/a62ab002583f: ffi.release() Message-ID: <5c30975e.1c69fb81.97583.26b2@mx.google.com> Author: Armin Rigo Branch: Changeset: r95580:0d9cf56b9811 Date: 2019-01-05 12:38 +0100 http://bitbucket.org/pypy/pypy/changeset/0d9cf56b9811/ Log: import cffi/a62ab002583f: ffi.release() diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -338,6 +338,13 @@ py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + def test_memmove(self): ffi = FFI() p = ffi.new("short[]", [-1234, -2345, -3456, -4567, -5678]) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -530,6 +530,9 @@ def from_handle(self, x): return self._backend.from_handle(x) + def release(self, x): + self._backend.release(x) + def set_unicode(self, enabled_flag): """Windows: if 'enabled_flag' is True, enable the UNICODE and _UNICODE defines in C, and declare the types like TCHAR and LPTCSTR diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -16,6 +16,13 @@ except ImportError: lock = None +def _workaround_for_static_import_finders(): + # Issue #392: packaging tools like cx_Freeze can not find these + # because pycparser uses exec dynamic import. This is an obscure + # workaround. This function is never called. + import pycparser.yacctab + import pycparser.lextab + CDEF_SOURCE_STRING = "" _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", re.DOTALL | re.MULTILINE) diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -52,6 +52,7 @@ 'unpack': 'func.unpack', 'buffer': 'cbuffer.MiniBuffer', 'memmove': 'func.memmove', + 'release': 'func.release', 'get_errno': 'cerrno.get_errno', 'set_errno': 'cerrno.set_errno', diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -483,6 +483,18 @@ def get_structobj(self): return None + def enter_exit(self, exit_now): + raise oefmt(self.space.w_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " + "can be used with the 'with' keyword or ffi.release()") + + def descr_enter(self): + self.enter_exit(False) + return self + + def descr_exit(self, args_w): + self.enter_exit(True) + class W_CDataMem(W_CData): """This is used only by the results of cffi.cast('int', x) @@ -535,14 +547,33 @@ def get_structobj(self): return self + def enter_exit(self, exit_now): + from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray + if not isinstance(self.ctype, W_CTypePtrOrArray): + W_CData.enter_exit(self, exit_now) + elif exit_now: + self._do_exit() + + def _do_exit(self): + raise NotImplementedError + class W_CDataNewStd(W_CDataNewOwning): """Subclass using the standard allocator, lltype.malloc()/lltype.free()""" - _attrs_ = [] + _attrs_ = ['explicitly_freed'] + explicitly_freed = False @rgc.must_be_light_finalizer def __del__(self): - lltype.free(self._ptr, flavor='raw') + if not self.explicitly_freed: + lltype.free(self._ptr, flavor='raw') + + def _do_exit(self): + if not self.explicitly_freed: + rgc.add_memory_pressure(-self._sizeof(), self) + self.explicitly_freed = True + rgc.may_ignore_finalizer(self) + lltype.free(self._ptr, flavor='raw') class W_CDataNewNonStd(W_CDataNewOwning): @@ -550,7 +581,16 @@ _attrs_ = ['w_raw_cdata', 'w_free'] def _finalize_(self): - self.space.call_function(self.w_free, self.w_raw_cdata) + if self.w_free is not None: + self.space.call_function(self.w_free, self.w_raw_cdata) + + def _do_exit(self): + w_free = self.w_free + if w_free is not None: + rgc.add_memory_pressure(-self._sizeof(), self) + self.w_free = None + self.may_unregister_rpython_finalizer(self.space) + self.space.call_function(w_free, self.w_raw_cdata) class W_CDataPtrToStructOrUnion(W_CData): @@ -580,6 +620,12 @@ else: return None + def enter_exit(self, exit_now): + if exit_now: + structobj = self.structobj + if isinstance(structobj, W_CDataNewOwning): + structobj._do_exit() + class W_CDataSliced(W_CData): """Subclass with an explicit length, for slices.""" @@ -634,6 +680,9 @@ return "buffer len %d from '%s' object" % ( self.length, self.space.type(self.w_keepalive).name) + def enter_exit(self, exit_now): + pass # for now, no effect on PyPy + class W_CDataGCP(W_CData): """For ffi.gc().""" @@ -647,6 +696,9 @@ self.register_finalizer(space) def _finalize_(self): + self.invoke_finalizer() + + def invoke_finalizer(self): w_destructor = self.w_destructor if w_destructor is not None: self.w_destructor = None @@ -656,6 +708,11 @@ self.w_destructor = None self.may_unregister_rpython_finalizer(self.space) + def enter_exit(self, exit_now): + if exit_now: + self.may_unregister_rpython_finalizer(self.space) + self.invoke_finalizer() + W_CData.typedef = TypeDef( '_cffi_backend.CData', @@ -686,5 +743,7 @@ __iter__ = interp2app(W_CData.iter), __weakref__ = make_weakref_descr(W_CData), __dir__ = interp2app(W_CData.dir), + __enter__ = interp2app(W_CData.descr_enter), + __exit__ = interp2app(W_CData.descr_exit), ) W_CData.typedef.acceptable_as_base_class = False diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -703,6 +703,16 @@ pass return w_res + @unwrap_spec(w_cdata=W_CData) + def descr_release(self, w_cdata): + """\ +Release now the resources held by a 'cdata' object from ffi.new(), +ffi.gc() or ffi.from_buffer(). The cdata object must not be used +afterwards. + +'ffi.release(cdata)' is equivalent to 'cdata.__exit__()'.""" + w_cdata.enter_exit(True) + class W_InitOnceLock(W_Root): def __init__(self, space): @@ -777,6 +787,7 @@ new_allocator = interp2app(W_FFIObject.descr_new_allocator), new_handle = interp2app(W_FFIObject.descr_new_handle), offsetof = interp2app(W_FFIObject.descr_offsetof), + release = interp2app(W_FFIObject.descr_release), sizeof = interp2app(W_FFIObject.descr_sizeof), string = interp2app(W_FFIObject.descr_string), typeof = interp2app(W_FFIObject.descr_typeof), diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -264,3 +264,7 @@ @unwrap_spec(w_cdata=cdataobj.W_CData, size=int) def gcp(space, w_cdata, w_destructor, size=0): return w_cdata.with_gc(w_destructor, size) + + at unwrap_spec(w_cdata=cdataobj.W_CData) +def release(space, w_cdata): + w_cdata.enter_exit(True) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -4074,3 +4074,114 @@ assert_eq(cast(t5, 7.0), cast(t3, 7)) assert_lt(cast(t5, 3.1), 3.101) assert_gt(cast(t5, 3.1), 3) + +def test_explicit_release_new(): + # release() on a ffi.new() object has no effect on CPython, but + # really releases memory on PyPy. We can't test that effect + # though, because a released cdata is not marked. + BIntP = new_pointer_type(new_primitive_type("int")) + p = newp(BIntP) + p[0] = 42 + py.test.raises(IndexError, "p[1]") + release(p) + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + # + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + assert pstruct.p == cast(BIntP, 0) + release(pstruct) + # here, reading pstruct.p might give garbage or segfault... + release(pstruct) # no effect + +def test_explicit_release_new_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + with newp(BIntP) as p: + p[0] = 42 + assert p[0] == 42 + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + +def test_explicit_release_badtype(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, release, p) + py.test.raises(ValueError, release, p) + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + py.test.raises(ValueError, release, pstruct[0]) + +def test_explicit_release_badtype_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, "with p: pass") + py.test.raises(ValueError, "with p: pass") + +def test_explicit_release_gc(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + assert seen == [] + release(p) + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_gc_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + with p: + assert p[0] == 12345 + assert seen == [] + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + assert p[2] == b"z" + release(p) + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer_contextmgr(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + with p: + assert p[2] == b"z" + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_bytearray_on_cpython(): + if '__pypy__' in sys.builtin_module_names: + py.test.skip("pypy's bytearray are never locked") + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + a += b't' * 10 + p = from_buffer(BCharA, a) + py.test.raises(BufferError, "a += b'u' * 100") + release(p) + a += b'v' * 100 + release(p) # no effect + a += b'w' * 1000 + assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000) diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py --- a/pypy/module/_cffi_backend/test/test_recompiler.py +++ b/pypy/module/_cffi_backend/test/test_recompiler.py @@ -2107,3 +2107,36 @@ else: assert lib.__loader__ is None assert lib.__spec__ is None + + def test_release(self): + ffi, lib = self.prepare("", "test_release", "") + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + ffi, lib = self.prepare("struct ab { int a, b; };", + "test_release_new_allocator", + "struct ab { int a, b; };") + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] From pypy.commits at gmail.com Sun Jan 6 02:57:09 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 05 Jan 2019 23:57:09 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: skip tests if no hypothesis Message-ID: <5c31b4d5.1c69fb81.d7041.e4bd@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95581:690d8f9f4b72 Date: 2019-01-04 15:19 +0200 http://bitbucket.org/pypy/pypy/changeset/690d8f9f4b72/ Log: skip tests if no hypothesis diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -68,16 +68,16 @@ ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), ("??", "ascii", input, 6, 7)] +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_unicode_raw_escape(u): + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) + assert r == u.encode("raw-unicode-escape") - at given(strategies.text()) -def test_unicode_raw_escape(u): - r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) - assert r == u.encode("raw-unicode-escape") - - at given(strategies.text()) -def test_unicode_escape(u): - r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) - assert r == u.encode("unicode-escape") + @given(strategies.text()) + def test_unicode_escape(u): + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) + assert r == u.encode("unicode-escape") def test_encode_decimal(space): assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' From pypy.commits at gmail.com Sun Jan 6 02:57:11 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 05 Jan 2019 23:57:11 -0800 (PST) Subject: [pypy-commit] pypy default: add passing test Message-ID: <5c31b4d7.1c69fb81.36483.eab5@mx.google.com> Author: Matti Picus Branch: Changeset: r95582:1d1e6e0d47c4 Date: 2019-01-06 09:32 +0200 http://bitbucket.org/pypy/pypy/changeset/1d1e6e0d47c4/ Log: add passing test diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -9,9 +9,11 @@ b.append(u"1") s = b.build() assert s == u"abc1231" + assert type(s) is unicode assert b.build() == s b.append(u"123") assert b.build() == s + u"123" + assert type(b.build()) is unicode def test_preallocate(self): from __pypy__.builders import UnicodeBuilder @@ -20,6 +22,7 @@ b.append(u"123") s = b.build() assert s == u"abc123" + assert type(s) is unicode def test_append_slice(self): from __pypy__.builders import UnicodeBuilder @@ -28,8 +31,11 @@ raises(ValueError, b.append_slice, u"1", 2, 1) s = b.build() assert s == u"cde" + assert type(s) is unicode b.append_slice(u"abc", 1, 2) - assert b.build() == u"cdeb" + s = b.build() + assert s == u"cdeb" + assert type(s) is unicode def test_stringbuilder(self): from __pypy__.builders import StringBuilder @@ -42,3 +48,8 @@ assert len(b) == 16 assert s == "abc123you and me" assert b.build() == s + + def test_encode(self): + from __pypy__.builders import UnicodeBuilder + b = UnicodeBuilder() + raises(UnicodeDecodeError, b.append, b'\xc0') From pypy.commits at gmail.com Sun Jan 6 02:57:13 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 05 Jan 2019 23:57:13 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5c31b4d9.1c69fb81.95983.e1e1@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95583:a05d741a4652 Date: 2019-01-06 09:33 +0200 http://bitbucket.org/pypy/pypy/changeset/a05d741a4652/ Log: merge default into branch diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -338,6 +338,13 @@ py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + def test_memmove(self): ffi = FFI() p = ffi.new("short[]", [-1234, -2345, -3456, -4567, -5678]) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -530,6 +530,9 @@ def from_handle(self, x): return self._backend.from_handle(x) + def release(self, x): + self._backend.release(x) + def set_unicode(self, enabled_flag): """Windows: if 'enabled_flag' is True, enable the UNICODE and _UNICODE defines in C, and declare the types like TCHAR and LPTCSTR diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -16,6 +16,13 @@ except ImportError: lock = None +def _workaround_for_static_import_finders(): + # Issue #392: packaging tools like cx_Freeze can not find these + # because pycparser uses exec dynamic import. This is an obscure + # workaround. This function is never called. + import pycparser.yacctab + import pycparser.lextab + CDEF_SOURCE_STRING = "" _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", re.DOTALL | re.MULTILINE) diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -48,3 +48,8 @@ assert len(b) == 16 assert s == "abc123you and me" assert b.build() == s + + def test_encode(self): + from __pypy__.builders import UnicodeBuilder + b = UnicodeBuilder() + raises(UnicodeDecodeError, b.append, b'\xc0') diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -52,6 +52,7 @@ 'unpack': 'func.unpack', 'buffer': 'cbuffer.MiniBuffer', 'memmove': 'func.memmove', + 'release': 'func.release', 'get_errno': 'cerrno.get_errno', 'set_errno': 'cerrno.set_errno', diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -483,6 +483,18 @@ def get_structobj(self): return None + def enter_exit(self, exit_now): + raise oefmt(self.space.w_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " + "can be used with the 'with' keyword or ffi.release()") + + def descr_enter(self): + self.enter_exit(False) + return self + + def descr_exit(self, args_w): + self.enter_exit(True) + class W_CDataMem(W_CData): """This is used only by the results of cffi.cast('int', x) @@ -535,14 +547,33 @@ def get_structobj(self): return self + def enter_exit(self, exit_now): + from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray + if not isinstance(self.ctype, W_CTypePtrOrArray): + W_CData.enter_exit(self, exit_now) + elif exit_now: + self._do_exit() + + def _do_exit(self): + raise NotImplementedError + class W_CDataNewStd(W_CDataNewOwning): """Subclass using the standard allocator, lltype.malloc()/lltype.free()""" - _attrs_ = [] + _attrs_ = ['explicitly_freed'] + explicitly_freed = False @rgc.must_be_light_finalizer def __del__(self): - lltype.free(self._ptr, flavor='raw') + if not self.explicitly_freed: + lltype.free(self._ptr, flavor='raw') + + def _do_exit(self): + if not self.explicitly_freed: + rgc.add_memory_pressure(-self._sizeof(), self) + self.explicitly_freed = True + rgc.may_ignore_finalizer(self) + lltype.free(self._ptr, flavor='raw') class W_CDataNewNonStd(W_CDataNewOwning): @@ -550,7 +581,16 @@ _attrs_ = ['w_raw_cdata', 'w_free'] def _finalize_(self): - self.space.call_function(self.w_free, self.w_raw_cdata) + if self.w_free is not None: + self.space.call_function(self.w_free, self.w_raw_cdata) + + def _do_exit(self): + w_free = self.w_free + if w_free is not None: + rgc.add_memory_pressure(-self._sizeof(), self) + self.w_free = None + self.may_unregister_rpython_finalizer(self.space) + self.space.call_function(w_free, self.w_raw_cdata) class W_CDataPtrToStructOrUnion(W_CData): @@ -580,6 +620,12 @@ else: return None + def enter_exit(self, exit_now): + if exit_now: + structobj = self.structobj + if isinstance(structobj, W_CDataNewOwning): + structobj._do_exit() + class W_CDataSliced(W_CData): """Subclass with an explicit length, for slices.""" @@ -634,6 +680,9 @@ return "buffer len %d from '%s' object" % ( self.length, self.space.type(self.w_keepalive).name) + def enter_exit(self, exit_now): + pass # for now, no effect on PyPy + class W_CDataGCP(W_CData): """For ffi.gc().""" @@ -647,6 +696,9 @@ self.register_finalizer(space) def _finalize_(self): + self.invoke_finalizer() + + def invoke_finalizer(self): w_destructor = self.w_destructor if w_destructor is not None: self.w_destructor = None @@ -656,6 +708,11 @@ self.w_destructor = None self.may_unregister_rpython_finalizer(self.space) + def enter_exit(self, exit_now): + if exit_now: + self.may_unregister_rpython_finalizer(self.space) + self.invoke_finalizer() + W_CData.typedef = TypeDef( '_cffi_backend.CData', @@ -686,5 +743,7 @@ __iter__ = interp2app(W_CData.iter), __weakref__ = make_weakref_descr(W_CData), __dir__ = interp2app(W_CData.dir), + __enter__ = interp2app(W_CData.descr_enter), + __exit__ = interp2app(W_CData.descr_exit), ) W_CData.typedef.acceptable_as_base_class = False diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -703,6 +703,16 @@ pass return w_res + @unwrap_spec(w_cdata=W_CData) + def descr_release(self, w_cdata): + """\ +Release now the resources held by a 'cdata' object from ffi.new(), +ffi.gc() or ffi.from_buffer(). The cdata object must not be used +afterwards. + +'ffi.release(cdata)' is equivalent to 'cdata.__exit__()'.""" + w_cdata.enter_exit(True) + class W_InitOnceLock(W_Root): def __init__(self, space): @@ -777,6 +787,7 @@ new_allocator = interp2app(W_FFIObject.descr_new_allocator), new_handle = interp2app(W_FFIObject.descr_new_handle), offsetof = interp2app(W_FFIObject.descr_offsetof), + release = interp2app(W_FFIObject.descr_release), sizeof = interp2app(W_FFIObject.descr_sizeof), string = interp2app(W_FFIObject.descr_string), typeof = interp2app(W_FFIObject.descr_typeof), diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -264,3 +264,7 @@ @unwrap_spec(w_cdata=cdataobj.W_CData, size=int) def gcp(space, w_cdata, w_destructor, size=0): return w_cdata.with_gc(w_destructor, size) + + at unwrap_spec(w_cdata=cdataobj.W_CData) +def release(space, w_cdata): + w_cdata.enter_exit(True) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -4074,3 +4074,114 @@ assert_eq(cast(t5, 7.0), cast(t3, 7)) assert_lt(cast(t5, 3.1), 3.101) assert_gt(cast(t5, 3.1), 3) + +def test_explicit_release_new(): + # release() on a ffi.new() object has no effect on CPython, but + # really releases memory on PyPy. We can't test that effect + # though, because a released cdata is not marked. + BIntP = new_pointer_type(new_primitive_type("int")) + p = newp(BIntP) + p[0] = 42 + py.test.raises(IndexError, "p[1]") + release(p) + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + # + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + assert pstruct.p == cast(BIntP, 0) + release(pstruct) + # here, reading pstruct.p might give garbage or segfault... + release(pstruct) # no effect + +def test_explicit_release_new_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + with newp(BIntP) as p: + p[0] = 42 + assert p[0] == 42 + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + +def test_explicit_release_badtype(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, release, p) + py.test.raises(ValueError, release, p) + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + py.test.raises(ValueError, release, pstruct[0]) + +def test_explicit_release_badtype_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, "with p: pass") + py.test.raises(ValueError, "with p: pass") + +def test_explicit_release_gc(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + assert seen == [] + release(p) + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_gc_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + with p: + assert p[0] == 12345 + assert seen == [] + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + assert p[2] == b"z" + release(p) + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer_contextmgr(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + with p: + assert p[2] == b"z" + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_bytearray_on_cpython(): + if '__pypy__' in sys.builtin_module_names: + py.test.skip("pypy's bytearray are never locked") + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + a += b't' * 10 + p = from_buffer(BCharA, a) + py.test.raises(BufferError, "a += b'u' * 100") + release(p) + a += b'v' * 100 + release(p) # no effect + a += b'w' * 1000 + assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000) diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py --- a/pypy/module/_cffi_backend/test/test_recompiler.py +++ b/pypy/module/_cffi_backend/test/test_recompiler.py @@ -2107,3 +2107,36 @@ else: assert lib.__loader__ is None assert lib.__spec__ is None + + def test_release(self): + ffi, lib = self.prepare("", "test_release", "") + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + ffi, lib = self.prepare("struct ab { int a, b; };", + "test_release_new_allocator", + "struct ab { int a, b; };") + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] From pypy.commits at gmail.com Sun Jan 6 02:57:15 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 05 Jan 2019 23:57:15 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix failing test, like append_slice Message-ID: <5c31b4db.1c69fb81.8c71a.7292@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95584:06c2230fec3c Date: 2019-01-06 09:56 +0200 http://bitbucket.org/pypy/pypy/changeset/06c2230fec3c/ Log: fix failing test, like append_slice diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py --- a/pypy/module/__pypy__/interp_builders.py +++ b/pypy/module/__pypy__/interp_builders.py @@ -64,8 +64,9 @@ def descr__new__(space, w_subtype, size=-1): return W_UnicodeBuilder(space, 3 * size) - @unwrap_spec(s='utf8') - def descr_append(self, space, s): + def descr_append(self, space, w_s): + w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s) + s = space.utf8_w(w_unicode) self.builder.append(s) @unwrap_spec(start=int, end=int) From pypy.commits at gmail.com Sun Jan 6 09:26:48 2019 From: pypy.commits at gmail.com (Alexander Schremmer) Date: Sun, 06 Jan 2019 06:26:48 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: Attendance, guest room available! Message-ID: <5c321028.1c69fb81.300bd.f5c9@mx.google.com> Author: Alexander Schremmer Branch: extradoc Changeset: r5933:a13f0d58394c Date: 2019-01-06 15:26 +0100 http://bitbucket.org/pypy/extradoc/changeset/a13f0d58394c/ Log: Attendance, guest room available! diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -5,15 +5,16 @@ column are known to be coming but there are no details available yet from them. -============================ ============== ====================== +============================ ============== =========================== Name Arrive/Depart Accomodation -============================ ============== ====================== +============================ ============== =========================== Carl Friedrich Bolz-Tereick always there private Matti Picus Feb 4? - 9? any suggestions?? Manuel? Feb 4 - 7 share a room? Antonio Cuni Feb 3 - 9 airbnb Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf -============================ ============== ====================== +Alexander Schremmer Feb 4 - 8 Essen, guest room available +============================ ============== =========================== From pypy.commits at gmail.com Sun Jan 6 13:31:42 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 06 Jan 2019 10:31:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix bug in unicode.title Message-ID: <5c32498e.1c69fb81.82d21.8ba6@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95585:fb8915bddd8d Date: 2019-01-06 20:30 +0200 http://bitbucket.org/pypy/pypy/changeset/fb8915bddd8d/ Log: test, fix bug in unicode.title diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -372,6 +372,8 @@ assert u"bro!wn fox".title() == u"Bro!Wn Fox" assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" assert u'\ud800'.title() == u'\ud800' + assert (unichr(0x345) + u'abc').title() == u'\u0399Abc' + assert (unichr(0x345) + u'ABC').title() == u'\u0399Abc' def test_istitle(self): assert u"".istitle() == False diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -345,13 +345,13 @@ input = self._utf8 builder = rutf8.Utf8StringBuilder(len(input)) previous_is_cased = False - for ch in rutf8.Utf8StringIterator(input): + for ch0 in rutf8.Utf8StringIterator(input): if not previous_is_cased: - ch = unicodedb.totitle(ch) + ch1 = unicodedb.totitle(ch0) else: - ch = unicodedb.tolower(ch) - builder.append_code(ch) - previous_is_cased = unicodedb.iscased(ch) + ch1 = unicodedb.tolower(ch0) + builder.append_code(ch1) + previous_is_cased = unicodedb.iscased(ch0) return self.from_utf8builder(builder) def descr_translate(self, space, w_table): From pypy.commits at gmail.com Mon Jan 7 02:16:06 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 06 Jan 2019 23:16:06 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unitcode-utf8 into branch Message-ID: <5c32fcb6.1c69fb81.c1957.4cb9@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95586:2186945d3c34 Date: 2019-01-07 08:42 +0200 http://bitbucket.org/pypy/pypy/changeset/2186945d3c34/ Log: merge unitcode-utf8 into branch diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -338,6 +338,13 @@ py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + def test_memmove(self): ffi = FFI() p = ffi.new("short[]", [-1234, -2345, -3456, -4567, -5678]) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -530,6 +530,9 @@ def from_handle(self, x): return self._backend.from_handle(x) + def release(self, x): + self._backend.release(x) + def set_unicode(self, enabled_flag): """Windows: if 'enabled_flag' is True, enable the UNICODE and _UNICODE defines in C, and declare the types like TCHAR and LPTCSTR diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -16,6 +16,13 @@ except ImportError: lock = None +def _workaround_for_static_import_finders(): + # Issue #392: packaging tools like cx_Freeze can not find these + # because pycparser uses exec dynamic import. This is an obscure + # workaround. This function is never called. + import pycparser.yacctab + import pycparser.lextab + CDEF_SOURCE_STRING = "" _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", re.DOTALL | re.MULTILINE) diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -152,6 +152,22 @@ ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), ("??", "ascii", input, 6, 7)] +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_unicode_raw_escape(u): + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) + assert r == u.encode("raw-unicode-escape") + + @given(strategies.text()) + def test_unicode_escape(u): + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) + assert r == u.encode("unicode-escape") + + @given(strategies.text()) + def test_utf8_encode_ascii_2(u): + def eh(errors, encoding, reason, p, start, end): + return "?" * (end - start), end, 'b' + assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") def test_encode_decimal(space): assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' @@ -166,20 +182,3 @@ u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) assert result == '12ሴ' -if HAS_HYPOTHESIS: - @given(strategies.text()) - def test_utf8_encode_ascii_2(u): - def eh(errors, encoding, reason, p, start, end): - return "?" * (end - start), end, 'b' - assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") - - @given(strategies.text()) - def test_unicode_raw_escape(u): - r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) - assert r == u.encode("raw-unicode-escape") - - @given(strategies.text()) - def test_unicode_escape(u): - r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) - assert r == u.encode("unicode-escape") - diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py --- a/pypy/module/__pypy__/interp_builders.py +++ b/pypy/module/__pypy__/interp_builders.py @@ -63,8 +63,9 @@ def descr__new__(space, w_subtype, size=-1): return W_UnicodeBuilder(space, 3 * size) - @unwrap_spec(s='utf8') - def descr_append(self, space, s): + def descr_append(self, space, w_s): + w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s) + s = space.utf8_w(w_unicode) self.builder.append(s) @unwrap_spec(start=int, end=int) diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -45,3 +45,8 @@ assert len(b) == 16 assert s == b"abc123you and me" assert b.build() == s + + def test_encode(self): + from __pypy__.builders import UnicodeBuilder + b = UnicodeBuilder() + raises(UnicodeDecodeError, b.append, b'\xc0') diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -52,6 +52,7 @@ 'unpack': 'func.unpack', 'buffer': 'cbuffer.MiniBuffer', 'memmove': 'func.memmove', + 'release': 'func.release', 'get_errno': 'cerrno.get_errno', 'set_errno': 'cerrno.set_errno', diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -476,6 +476,18 @@ def get_structobj(self): return None + def enter_exit(self, exit_now): + raise oefmt(self.space.w_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " + "can be used with the 'with' keyword or ffi.release()") + + def descr_enter(self): + self.enter_exit(False) + return self + + def descr_exit(self, args_w): + self.enter_exit(True) + class W_CDataMem(W_CData): """This is used only by the results of cffi.cast('int', x) @@ -528,14 +540,33 @@ def get_structobj(self): return self + def enter_exit(self, exit_now): + from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray + if not isinstance(self.ctype, W_CTypePtrOrArray): + W_CData.enter_exit(self, exit_now) + elif exit_now: + self._do_exit() + + def _do_exit(self): + raise NotImplementedError + class W_CDataNewStd(W_CDataNewOwning): """Subclass using the standard allocator, lltype.malloc()/lltype.free()""" - _attrs_ = [] + _attrs_ = ['explicitly_freed'] + explicitly_freed = False @rgc.must_be_light_finalizer def __del__(self): - lltype.free(self._ptr, flavor='raw') + if not self.explicitly_freed: + lltype.free(self._ptr, flavor='raw') + + def _do_exit(self): + if not self.explicitly_freed: + rgc.add_memory_pressure(-self._sizeof(), self) + self.explicitly_freed = True + rgc.may_ignore_finalizer(self) + lltype.free(self._ptr, flavor='raw') class W_CDataNewNonStd(W_CDataNewOwning): @@ -543,7 +574,16 @@ _attrs_ = ['w_raw_cdata', 'w_free'] def _finalize_(self): - self.space.call_function(self.w_free, self.w_raw_cdata) + if self.w_free is not None: + self.space.call_function(self.w_free, self.w_raw_cdata) + + def _do_exit(self): + w_free = self.w_free + if w_free is not None: + rgc.add_memory_pressure(-self._sizeof(), self) + self.w_free = None + self.may_unregister_rpython_finalizer(self.space) + self.space.call_function(w_free, self.w_raw_cdata) class W_CDataPtrToStructOrUnion(W_CData): @@ -573,6 +613,12 @@ else: return None + def enter_exit(self, exit_now): + if exit_now: + structobj = self.structobj + if isinstance(structobj, W_CDataNewOwning): + structobj._do_exit() + class W_CDataSliced(W_CData): """Subclass with an explicit length, for slices.""" @@ -627,6 +673,9 @@ return "buffer len %d from '%s' object" % ( self.length, self.space.type(self.w_keepalive).name) + def enter_exit(self, exit_now): + pass # for now, no effect on PyPy + class W_CDataGCP(W_CData): """For ffi.gc().""" @@ -640,6 +689,9 @@ self.register_finalizer(space) def _finalize_(self): + self.invoke_finalizer() + + def invoke_finalizer(self): w_destructor = self.w_destructor if w_destructor is not None: self.w_destructor = None @@ -649,6 +701,11 @@ self.w_destructor = None self.may_unregister_rpython_finalizer(self.space) + def enter_exit(self, exit_now): + if exit_now: + self.may_unregister_rpython_finalizer(self.space) + self.invoke_finalizer() + W_CData.typedef = TypeDef( '_cffi_backend.CData', @@ -678,5 +735,7 @@ __iter__ = interp2app(W_CData.iter), __weakref__ = make_weakref_descr(W_CData), __dir__ = interp2app(W_CData.dir), + __enter__ = interp2app(W_CData.descr_enter), + __exit__ = interp2app(W_CData.descr_exit), ) W_CData.typedef.acceptable_as_base_class = False diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -703,6 +703,16 @@ pass return w_res + @unwrap_spec(w_cdata=W_CData) + def descr_release(self, w_cdata): + """\ +Release now the resources held by a 'cdata' object from ffi.new(), +ffi.gc() or ffi.from_buffer(). The cdata object must not be used +afterwards. + +'ffi.release(cdata)' is equivalent to 'cdata.__exit__()'.""" + w_cdata.enter_exit(True) + class W_InitOnceLock(W_Root): def __init__(self, space): @@ -777,6 +787,7 @@ new_allocator = interp2app(W_FFIObject.descr_new_allocator), new_handle = interp2app(W_FFIObject.descr_new_handle), offsetof = interp2app(W_FFIObject.descr_offsetof), + release = interp2app(W_FFIObject.descr_release), sizeof = interp2app(W_FFIObject.descr_sizeof), string = interp2app(W_FFIObject.descr_string), typeof = interp2app(W_FFIObject.descr_typeof), diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -264,3 +264,7 @@ @unwrap_spec(w_cdata=cdataobj.W_CData, size=int) def gcp(space, w_cdata, w_destructor, size=0): return w_cdata.with_gc(w_destructor, size) + + at unwrap_spec(w_cdata=cdataobj.W_CData) +def release(space, w_cdata): + w_cdata.enter_exit(True) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -4074,3 +4074,114 @@ assert_eq(cast(t5, 7.0), cast(t3, 7)) assert_lt(cast(t5, 3.1), 3.101) assert_gt(cast(t5, 3.1), 3) + +def test_explicit_release_new(): + # release() on a ffi.new() object has no effect on CPython, but + # really releases memory on PyPy. We can't test that effect + # though, because a released cdata is not marked. + BIntP = new_pointer_type(new_primitive_type("int")) + p = newp(BIntP) + p[0] = 42 + py.test.raises(IndexError, "p[1]") + release(p) + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + # + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + assert pstruct.p == cast(BIntP, 0) + release(pstruct) + # here, reading pstruct.p might give garbage or segfault... + release(pstruct) # no effect + +def test_explicit_release_new_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + with newp(BIntP) as p: + p[0] = 42 + assert p[0] == 42 + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + +def test_explicit_release_badtype(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, release, p) + py.test.raises(ValueError, release, p) + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + py.test.raises(ValueError, release, pstruct[0]) + +def test_explicit_release_badtype_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, "with p: pass") + py.test.raises(ValueError, "with p: pass") + +def test_explicit_release_gc(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + assert seen == [] + release(p) + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_gc_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + with p: + assert p[0] == 12345 + assert seen == [] + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + assert p[2] == b"z" + release(p) + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer_contextmgr(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + with p: + assert p[2] == b"z" + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_bytearray_on_cpython(): + if '__pypy__' in sys.builtin_module_names: + py.test.skip("pypy's bytearray are never locked") + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + a += b't' * 10 + p = from_buffer(BCharA, a) + py.test.raises(BufferError, "a += b'u' * 100") + release(p) + a += b'v' * 100 + release(p) # no effect + a += b'w' * 1000 + assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000) diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py --- a/pypy/module/_cffi_backend/test/test_recompiler.py +++ b/pypy/module/_cffi_backend/test/test_recompiler.py @@ -2108,3 +2108,36 @@ else: assert lib.__loader__ is None assert lib.__spec__ is None + + def test_release(self): + ffi, lib = self.prepare("", "test_release", "") + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + ffi, lib = self.prepare("struct ab { int a, b; };", + "test_release_new_allocator", + "struct ab { int a, b; };") + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -247,7 +247,6 @@ def xmlcharrefreplace_errors(space, w_exc): - check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) @@ -276,7 +275,6 @@ def backslashreplace_errors(space, w_exc): - check_exception(space, w_exc) if (space.isinstance_w(w_exc, space.w_UnicodeEncodeError) or space.isinstance_w(w_exc, space.w_UnicodeTranslateError)): @@ -664,7 +662,6 @@ def wrap_encoder(space, w_arg, errors="strict"): # w_arg is a W_Unicode or W_Bytes? w_arg = space.convert_arg_to_w_unicode(w_arg, errors) - w_arg = space.convert_arg_to_w_unicode(w_arg) if errors is None: errors = 'strict' allow_surrogates = False @@ -684,8 +681,6 @@ @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): - - if errors is None: errors = 'strict' final = space.is_true(w_final) From pypy.commits at gmail.com Mon Jan 7 18:04:51 2019 From: pypy.commits at gmail.com (arigo) Date: Mon, 07 Jan 2019 15:04:51 -0800 (PST) Subject: [pypy-commit] cffi default: Implement ffi.from_buffer("foo[]", x) Message-ID: <5c33db13.1c69fb81.a0ddf.27c1@mx.google.com> Author: Armin Rigo Branch: Changeset: r3179:097f3540b5aa Date: 2019-01-07 23:23 +0100 http://bitbucket.org/cffi/cffi/changeset/097f3540b5aa/ Log: Implement ffi.from_buffer("foo[]", x) Also contains some improvements to the documentation about other recent additions diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -175,7 +175,7 @@ #define CT_IS_FILE 0x00100000 #define CT_IS_VOID_PTR 0x00200000 #define CT_WITH_VAR_ARRAY 0x00400000 -#define CT_IS_UNSIZED_CHAR_A 0x00800000 +/* unused 0x00800000 */ #define CT_LAZY_FIELD_LIST 0x01000000 #define CT_WITH_PACKED_CHANGE 0x02000000 #define CT_IS_SIGNED_WCHAR 0x04000000 @@ -1870,7 +1870,7 @@ cffi_closure_free(closure); #endif } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; PyBuffer_Release(view); PyObject_Free(view); @@ -1889,7 +1889,7 @@ PyObject *args = (PyObject *)(closure->user_data); Py_VISIT(args); } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; Py_VISIT(view->obj); } @@ -1911,7 +1911,7 @@ closure->user_data = NULL; Py_XDECREF(args); } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; PyBuffer_Release(view); } @@ -2125,7 +2125,7 @@ else return _cdata_repr2(cd, "calling", PyTuple_GET_ITEM(args, 1)); } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; Py_ssize_t buflen = get_array_length(cd); return PyText_FromFormat( @@ -2369,7 +2369,7 @@ else if (cd->c_type->ct_flags & CT_ARRAY) { if (i < 0) { PyErr_SetString(PyExc_IndexError, - "negative index not supported"); + "negative index"); return NULL; } if (i >= get_array_length(cd)) { @@ -2422,7 +2422,7 @@ if (ct->ct_flags & CT_ARRAY) { if (start < 0) { PyErr_SetString(PyExc_IndexError, - "negative index not supported"); + "negative index"); return NULL; } if (stop > get_array_length(cd)) { @@ -3140,7 +3140,7 @@ return 0; } else if (Py_TYPE(cd) == &CDataOwningGC_Type) { - if (ct->ct_flags & CT_IS_UNSIZED_CHAR_A) /* ffi.from_buffer() */ + if (ct->ct_flags & CT_ARRAY) /* ffi.from_buffer() */ return 1; } else if (Py_TYPE(cd) == &CDataGCP_Type) { @@ -3309,24 +3309,24 @@ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc)cdataowning_repr, /* tp_repr */ - 0, /* tp_as_number */ + 0, /* inherited */ /* tp_as_number */ 0, /* tp_as_sequence */ &CDataOwn_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ + 0, /* inherited */ /* tp_hash */ + 0, /* inherited */ /* tp_call */ 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ + 0, /* inherited */ /* tp_getattro */ + 0, /* inherited */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ 0, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ + 0, /* inherited */ /* tp_richcompare */ + 0, /* inherited */ /* tp_weaklistoffset */ + 0, /* inherited */ /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + 0, /* inherited */ /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ &CData_Type, /* tp_base */ @@ -3351,25 +3351,25 @@ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc)cdataowninggc_repr, /* tp_repr */ - 0, /* tp_as_number */ + 0, /* inherited */ /* tp_as_number */ 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ + 0, /* inherited */ /* tp_as_mapping */ + 0, /* inherited */ /* tp_hash */ + 0, /* inherited */ /* tp_call */ 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ + 0, /* inherited */ /* tp_getattro */ + 0, /* inherited */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES /* tp_flags */ | Py_TPFLAGS_HAVE_GC, 0, /* tp_doc */ (traverseproc)cdataowninggc_traverse, /* tp_traverse */ (inquiry)cdataowninggc_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ + 0, /* inherited */ /* tp_richcompare */ + 0, /* inherited */ /* tp_weaklistoffset */ + 0, /* inherited */ /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + 0, /* inherited */ /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ &CDataOwning_Type, /* tp_base */ @@ -3393,15 +3393,15 @@ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ + 0, /* inherited */ /* tp_repr */ + 0, /* inherited */ /* tp_as_number */ 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ + 0, /* inherited */ /* tp_as_mapping */ + 0, /* inherited */ /* tp_hash */ + 0, /* inherited */ /* tp_call */ 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ + 0, /* inherited */ /* tp_getattro */ + 0, /* inherited */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES /* tp_flags */ #ifdef Py_TPFLAGS_HAVE_FINALIZE @@ -3411,11 +3411,11 @@ 0, /* tp_doc */ (traverseproc)cdatagcp_traverse, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ + 0, /* inherited */ /* tp_richcompare */ + 0, /* inherited */ /* tp_weaklistoffset */ + 0, /* inherited */ /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + 0, /* inherited */ /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ &CData_Type, /* tp_base */ @@ -3427,7 +3427,7 @@ 0, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ - 0, /* tp_free */ + 0, /* inherited */ /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ @@ -3527,6 +3527,8 @@ CTypeDescrObject *ct, int dont_clear) { + /* note: objects with &CDataOwning_Type are always allocated with + either a plain malloc() or calloc(), and freed with free(). */ CDataObject *cd; if (dont_clear) cd = malloc(size); @@ -4688,9 +4690,6 @@ sprintf(extra_text, "[]"); length = -1; arraysize = -1; - if ((ctitem->ct_flags & CT_PRIMITIVE_CHAR) && - ctitem->ct_size == sizeof(char)) - flags |= CT_IS_UNSIZED_CHAR_A; } else { sprintf(extra_text, "[%llu]", (unsigned PY_LONG_LONG)length); @@ -6815,6 +6814,13 @@ { CDataObject *cd; Py_buffer *view; + Py_ssize_t arraylength; + + if (!(ct->ct_flags & CT_ARRAY)) { + PyErr_Format(PyExc_TypeError, "expected an array ctype, got '%s'", + ct->ct_name); + return NULL; + } /* PyPy 5.7 can obtain buffers for string (python 2) or bytes (python 3). from_buffer(u"foo") is disallowed. @@ -6834,6 +6840,41 @@ if (_my_PyObject_GetContiguousBuffer(x, view, require_writable) < 0) goto error1; + if (ct->ct_length >= 0) { + /* it's an array with a fixed length; make sure that the + buffer contains enough bytes. */ + if (view->len < ct->ct_size) { + PyErr_Format(PyExc_ValueError, + "buffer is too small (%zd bytes) for '%s' (%zd bytes)", + view->len, ct->ct_name, ct->ct_size); + goto error1; + } + arraylength = ct->ct_length; + } + else { + /* it's an open 'array[]' */ + if (ct->ct_itemdescr->ct_size == 1) { + /* fast path, performance only */ + arraylength = view->len; + } + else if (ct->ct_itemdescr->ct_size > 0) { + /* give it as many items as fit the buffer. Ignore a + partial last element. */ + arraylength = view->len / ct->ct_itemdescr->ct_size; + } + else { + /* it's an array 'empty[]'. Unsupported obscure case: + the problem is that setting the length of the result + to anything large (like SSIZE_T_MAX) is dangerous, + because if someone tries to loop over it, it will + turn effectively into an infinite loop. */ + PyErr_Format(PyExc_ZeroDivisionError, + "from_buffer('%s', ..): the actual length of the array " + "cannot be computed", ct->ct_name); + goto error1; + } + } + cd = (CDataObject *)PyObject_GC_New(CDataObject_owngc_frombuf, &CDataOwningGC_Type); if (cd == NULL) @@ -6843,7 +6884,7 @@ cd->c_type = ct; cd->c_data = view->buf; cd->c_weakreflist = NULL; - ((CDataObject_owngc_frombuf *)cd)->length = view->len; + ((CDataObject_owngc_frombuf *)cd)->length = arraylength; ((CDataObject_owngc_frombuf *)cd)->bufferview = view; PyObject_GC_Track(cd); return (PyObject *)cd; @@ -6865,10 +6906,6 @@ &require_writable)) return NULL; - if (!(ct->ct_flags & CT_IS_UNSIZED_CHAR_A)) { - PyErr_Format(PyExc_TypeError, "needs 'char[]', got '%s'", ct->ct_name); - return NULL; - } return direct_from_buffer(ct, x, require_writable); } diff --git a/c/ffi_obj.c b/c/ffi_obj.c --- a/c/ffi_obj.c +++ b/c/ffi_obj.c @@ -697,16 +697,29 @@ "containing large quantities of raw data in some other format, like\n" "'array.array' or numpy arrays."); -static PyObject *ffi_from_buffer(PyObject *self, PyObject *args, PyObject *kwds) +static PyObject *ffi_from_buffer(FFIObject *self, PyObject *args, + PyObject *kwds) { - PyObject *arg; + PyObject *cdecl, *python_buf = NULL; + CTypeDescrObject *ct; int require_writable = 0; - static char *keywords[] = {"python_buffer", "require_writable", NULL}; + static char *keywords[] = {"cdecl", "python_buffer", + "require_writable", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:from_buffer", keywords, - &arg, &require_writable)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:from_buffer", keywords, + &cdecl, &python_buf, &require_writable)) return NULL; - return direct_from_buffer(g_ct_chararray, arg, require_writable); + + if (python_buf == NULL) { + python_buf = cdecl; + ct = g_ct_chararray; + } + else { + ct = _ffi_type(self, cdecl, ACCEPT_STRING|ACCEPT_CTYPE); + if (ct == NULL) + return NULL; + } + return direct_from_buffer(ct, python_buf, require_writable); } PyDoc_STRVAR(ffi_gc_doc, diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -3753,6 +3753,64 @@ p1[0] = b"g" assert ba == b"goo" +def test_from_buffer_types(): + BInt = new_primitive_type("int") + BIntP = new_pointer_type(BInt) + BIntA = new_array_type(BIntP, None) + lst = [-12345678, 87654321, 489148] + bytestring = buffer(newp(BIntA, lst))[:] + b'XYZ' + # + p1 = from_buffer(BIntA, bytestring) # int[] + assert typeof(p1) is BIntA + assert len(p1) == 3 + assert p1[0] == lst[0] + assert p1[1] == lst[1] + assert p1[2] == lst[2] + py.test.raises(IndexError, "p1[3]") + py.test.raises(IndexError, "p1[-1]") + # + py.test.raises(TypeError, from_buffer, BInt, bytestring) + py.test.raises(TypeError, from_buffer, BIntP, bytestring) + # + BIntA2 = new_array_type(BIntP, 2) + p2 = from_buffer(BIntA2, bytestring) # int[2] + assert typeof(p2) is BIntA2 + assert len(p2) == 2 + assert p2[0] == lst[0] + assert p2[1] == lst[1] + py.test.raises(IndexError, "p2[2]") + py.test.raises(IndexError, "p2[-1]") + assert p2 == p1 + # + BIntA4 = new_array_type(BIntP, 4) # int[4]: too big + py.test.raises(ValueError, from_buffer, BIntA4, bytestring) + # + BStruct = new_struct_type("foo") + complete_struct_or_union(BStruct, [('a1', BInt, -1), + ('a2', BInt, -1)]) + BStructP = new_pointer_type(BStruct) + BStructA = new_array_type(BStructP, None) + p1 = from_buffer(BStructA, bytestring) # struct[] + assert len(p1) == 1 + assert typeof(p1) is BStructA + assert p1[0].a1 == lst[0] + assert p1[0].a2 == lst[1] + py.test.raises(IndexError, "p1[1]") + # + BEmptyStruct = new_struct_type("empty") + complete_struct_or_union(BEmptyStruct, [], Ellipsis, 0) + assert sizeof(BEmptyStruct) == 0 + BEmptyStructP = new_pointer_type(BEmptyStruct) + BEmptyStructA = new_array_type(BEmptyStructP, None) + py.test.raises(ZeroDivisionError, from_buffer, # empty[] + BEmptyStructA, bytestring) + # + BEmptyStructA5 = new_array_type(BEmptyStructP, 5) + p1 = from_buffer(BEmptyStructA5, bytestring) # struct empty[5] + assert typeof(p1) is BEmptyStructA5 + assert len(p1) == 5 + assert cast(BIntP, p1) == from_buffer(BIntA, bytestring) + def test_memmove(): Short = new_primitive_type("short") ShortA = new_array_type(new_pointer_type(Short), None) diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -16,6 +16,8 @@ # Python 3.x basestring = str +_unspecified = object() + class FFI(object): @@ -341,15 +343,22 @@ # """ # note that 'buffer' is a type, set on this instance by __init__ - def from_buffer(self, python_buffer, require_writable=False): - """Return a that points to the data of the + def from_buffer(self, cdecl, python_buffer=_unspecified, + require_writable=False): + """Return a cdata of the given type pointing to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types str or unicode (you can build 'char[]' arrays explicitly) but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. + + The first argument is optional and default to 'char[]'. """ - return self._backend.from_buffer(self.BCharA, python_buffer, + if python_buffer is _unspecified: + cdecl, python_buffer = self.BCharA, cdecl + elif isinstance(cdecl, basestring): + cdecl = self._typeof(cdecl) + return self._backend.from_buffer(cdecl, python_buffer, require_writable) def memmove(self, dest, src, n): diff --git a/doc/source/ref.rst b/doc/source/ref.rst --- a/doc/source/ref.rst +++ b/doc/source/ref.rst @@ -60,7 +60,7 @@ `ffi.new_allocator()`_ for a way to allocate non-zero-initialized memory. -*New in version 1.12:* see ``ffi.release()``. +*New in version 1.12:* see also ``ffi.release()``. ffi.cast() @@ -190,8 +190,8 @@ *New in version 1.10:* ``ffi.buffer`` is now the type of the returned buffer objects; ``ffi.buffer()`` actually calls the constructor. -**ffi.from_buffer(python_buffer, require_writable=False)**: -return a ```` that +**ffi.from_buffer([cdecl,] python_buffer, require_writable=False)**: +return an array cdata (by default a ````) that points to the data of the given Python object, which must support the buffer interface. This is the opposite of ``ffi.buffer()``. It gives a reference to the existing data, not a copy. @@ -219,19 +219,33 @@ resize the bytearray, the ```` object will point to freed memory); and byte strings were supported in version 1.8 onwards. -*New in version 1.12:* added the ``require_writable`` argument. If set to -True, the function fails if the buffer obtained from ``python_buffer`` is -read-only (e.g. if ``python_buffer`` is a byte string). The exact exception is -raised by the object itself, and for things like bytes it varies with the -Python version, so don't rely on it. (Before version 1.12, the same effect can -be achieved with a hack: call ``ffi.memmove(python_buffer, b"", 0)``. This has -no effect if the object is writable, but fails if it is read-only.) +*New in version 1.12:* added the optional *first* argument ``cdecl``, and +the keyword argument ``require_writable``: -Please keep in mind that CFFI does not implement the C keyword ``const``: even -if you set ``require_writable`` to False explicitly, you still get a regular -read-write cdata pointer. +* ``cdecl`` defaults to ``"char[]"``, but a different array type can be + specified for the result. A value like ``"int[]"`` will return an array of + ints instead of chars, and its length will be set to the number of ints + that fit in the buffer (rounded down if the division is not exact). Values + like ``"int[42]"`` or ``"int[2][3]"`` will return an array of exactly 42 + (resp. 2-by-3) ints, raising a ValueError if the buffer is too small. The + difference between specifying ``"int[]"`` and using the older code ``p1 = + ffi.from_buffer(x); p2 = ffi.cast("int *", p1)`` is that the older code + needs to keep ``p1`` alive as long as ``p2`` is in use, because only ``p1`` + keeps the underlying Python object alive and locked. (In addition, + ``ffi.from_buffer("int[]", x)`` gives better array bound checking.) -*New in version 1.12:* see ``ffi.release()``. +* if ``require_writable`` is set to True, the function fails if the buffer + obtained from ``python_buffer`` is read-only (e.g. if ``python_buffer`` is + a byte string). The exact exception is raised by the object itself, and + for things like bytes it varies with the Python version, so don't rely on + it. (Before version 1.12, the same effect can be achieved with a hack: + call ``ffi.memmove(python_buffer, b"", 0)``. This has no effect if the + object is writable, but fails if it is read-only.) Please keep in mind + that CFFI does not implement the C keyword ``const``: even if you set + ``require_writable`` to False explicitly, you still get a regular + read-write cdata pointer. + +*New in version 1.12:* see also ``ffi.release()``. ffi.memmove() @@ -387,7 +401,7 @@ which means the destructor is called as soon as *this* exact returned object is garbage-collected. -*New in version 1.12:* see ``ffi.release()``. +*New in version 1.12:* see also ``ffi.release()``. **ffi.gc(ptr, None, size=0)**: removes the ownership on a object returned by a @@ -569,21 +583,34 @@ and then call these two functions manually:: - p = lib.malloc(bigsize) + p = lib.malloc(n * ffi.sizeof("int")) try: - my_array = ffi.cast("some_other_type_than_void*", p) + my_array = ffi.cast("int *", p) ... finally: lib.free(p) +In cffi version 1.12 you can indeed use ``ffi.new_allocator()`` but use the +``with`` statement (see ``ffi.release()``) to force the free function to be +called at a known point. The above is equivalent to this code:: -ffi.release() -+++++++++++++ + my_new = ffi.new_allocator(lib.malloc, lib.free) # at global level + ... + with my_new("int[]", n) as my_array: + ... -**ffi.release(cdata)**: release now the resources held by a cdata object from - ``ffi.new()``, ``ffi.gc()``, ``ffi.from_buffer()`` or - ``ffi.new_allocator()()``. The cdata object must not be used afterwards. - *New in version 1.12.* + +.. _ffi-release: + +ffi.release() and the context manager ++++++++++++++++++++++++++++++++++++++ + +**ffi.release(cdata)**: release the resources held by a cdata object from +``ffi.new()``, ``ffi.gc()``, ``ffi.from_buffer()`` or +``ffi.new_allocator()()``. The cdata object must not be used afterwards. +The regular destructor of the cdata object releases the same resources, +but this allows the operation to occur at a known time. +*New in version 1.12.* ``ffi.release(cdata)`` is equivalent to ``cdata.__exit__()``, which means that you can use the ``with`` statement to ensure that the cdata is released at the @@ -592,6 +619,8 @@ with ffi.from_buffer(...) as p: do something with p +The effect is more precisely as follows: + * on an object returned from ``ffi.gc(destructor)``, ``ffi.release()`` will cause the ``destructor`` to be called immediately. @@ -599,8 +628,8 @@ is called immediately. * on CPython, ``ffi.from_buffer(buf)`` locks the buffer, so ``ffi.release()`` - unlocks it at a deterministic point. On PyPy, there is no locking (so far) - so this has no effect. + can be used to unlock it at a known time. On PyPy, there is no locking + (so far) so this has no effect. * on CPython this method has no effect (so far) on objects returned by ``ffi.new()``, because the memory is allocated inline with the cdata object @@ -615,6 +644,11 @@ anyway; but calling ``ffi.release()`` explicitly should improve performance by reducing the frequency of GC runs. +After ``ffi.release(x)``, do not use anything pointed to by ``x`` any longer. +As an exception to this rule, you can call ``ffi.release(x)`` several times +for the exact same cdata object ``x``; the calls after the first one are +ignored. + ffi.init_once() +++++++++++++++ diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -23,8 +23,9 @@ * CPython 2.x: ``ffi.dlopen()`` failed with non-ascii file names on Posix -* ``ffi.from_buffer()`` takes a new keyword argument ``require_writable``. - When set to True, it asks the object passed in to raise an exception if +* ``ffi.from_buffer()`` takes two new arguments: an optional *first* argument + gives the array type of the result; and the keyword argument + ``require_writable`` can ask the object passed in to raise an exception if it is read-only. * ``ffi.new()``, ``ffi.gc()`` or ``ffi.from_buffer()`` cdata objects diff --git a/testing/cffi0/test_ffi_backend.py b/testing/cffi0/test_ffi_backend.py --- a/testing/cffi0/test_ffi_backend.py +++ b/testing/cffi0/test_ffi_backend.py @@ -324,16 +324,22 @@ a = array.array('H', [10000, 20000, 30000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 6 ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 3 + assert c[1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/testing/cffi1/test_ffi_obj.py b/testing/cffi1/test_ffi_obj.py --- a/testing/cffi1/test_ffi_obj.py +++ b/testing/cffi1/test_ffi_obj.py @@ -238,19 +238,31 @@ def test_ffi_from_buffer(): import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + py.test.raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/testing/cffi1/test_new_ffi_1.py b/testing/cffi1/test_new_ffi_1.py --- a/testing/cffi1/test_new_ffi_1.py +++ b/testing/cffi1/test_new_ffi_1.py @@ -1675,24 +1675,6 @@ py.test.raises(TypeError, len, q.a) py.test.raises(TypeError, list, q.a) - def test_from_buffer(self): - import array - a = array.array('H', [10000, 20000, 30000]) - c = ffi.from_buffer(a) - assert ffi.typeof(c) is ffi.typeof("char[]") - ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) - assert c == ffi.from_buffer(a, require_writable=True) - # - p = ffi.from_buffer(b"abcd") - assert p[2] == b"c" - # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", - require_writable=True) - def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ "char", From pypy.commits at gmail.com Tue Jan 8 03:10:50 2019 From: pypy.commits at gmail.com (arigo) Date: Tue, 08 Jan 2019 00:10:50 -0800 (PST) Subject: [pypy-commit] pypy default: Fix test Message-ID: <5c345b0a.1c69fb81.9e287.b29c@mx.google.com> Author: Armin Rigo Branch: Changeset: r95587:0916788c705e Date: 2019-01-08 09:10 +0100 http://bitbucket.org/pypy/pypy/changeset/0916788c705e/ Log: Fix test diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py --- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py +++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py @@ -407,6 +407,7 @@ i138 = call_i(ConstClass(_ll_1_raw_malloc_varsize_zero__Signed), 6, descr=...) check_memory_error(i138) setfield_gc(p132, i138, descr=...) + setfield_gc(p132, 0, descr=...) setfield_gc(p132, ConstPtr(ptr139), descr=...) setfield_gc(p132, -1, descr=...) setfield_gc(p0, p133, descr=...) From pypy.commits at gmail.com Tue Jan 8 03:21:56 2019 From: pypy.commits at gmail.com (arigo) Date: Tue, 08 Jan 2019 00:21:56 -0800 (PST) Subject: [pypy-commit] cffi default: Implement a limited form of from_buffer-release() on pypy Message-ID: <5c345da4.1c69fb81.b715b.2801@mx.google.com> Author: Armin Rigo Branch: Changeset: r3180:c25914172239 Date: 2019-01-08 09:21 +0100 http://bitbucket.org/cffi/cffi/changeset/c25914172239/ Log: Implement a limited form of from_buffer-release() on pypy diff --git a/doc/source/ref.rst b/doc/source/ref.rst --- a/doc/source/ref.rst +++ b/doc/source/ref.rst @@ -629,7 +629,9 @@ * on CPython, ``ffi.from_buffer(buf)`` locks the buffer, so ``ffi.release()`` can be used to unlock it at a known time. On PyPy, there is no locking - (so far) so this has no effect. + (so far); the effect of ``ffi.release()`` is limited to removing the link, + allowing the original buffer object to be garbage-collected even if the + cdata object stays alive. * on CPython this method has no effect (so far) on objects returned by ``ffi.new()``, because the memory is allocated inline with the cdata object From pypy.commits at gmail.com Tue Jan 8 03:26:13 2019 From: pypy.commits at gmail.com (arigo) Date: Tue, 08 Jan 2019 00:26:13 -0800 (PST) Subject: [pypy-commit] cffi default: improve error message Message-ID: <5c345ea5.1c69fb81.7bfae.09ba@mx.google.com> Author: Armin Rigo Branch: Changeset: r3181:eb6ae6bf3c61 Date: 2019-01-08 09:26 +0100 http://bitbucket.org/cffi/cffi/changeset/eb6ae6bf3c61/ Log: improve error message diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -3147,8 +3147,9 @@ return 2; /* ffi.gc() */ } PyErr_SetString(PyExc_ValueError, - "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " - "can be used with the 'with' keyword or ffi.release()"); + "only 'cdata' object from ffi.new(), ffi.gc(), ffi.from_buffer() " + "or ffi.new_allocator()() can be used with the 'with' keyword or " + "ffi.release()"); return -1; } From pypy.commits at gmail.com Tue Jan 8 04:01:16 2019 From: pypy.commits at gmail.com (arigo) Date: Tue, 08 Jan 2019 01:01:16 -0800 (PST) Subject: [pypy-commit] cffi default: Oops, fix leak on errors Message-ID: <5c3466dc.1c69fb81.d026b.7a82@mx.google.com> Author: Armin Rigo Branch: Changeset: r3182:1bba8ce1cb04 Date: 2019-01-08 10:00 +0100 http://bitbucket.org/cffi/cffi/changeset/1bba8ce1cb04/ Log: Oops, fix leak on errors diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -6848,7 +6848,7 @@ PyErr_Format(PyExc_ValueError, "buffer is too small (%zd bytes) for '%s' (%zd bytes)", view->len, ct->ct_name, ct->ct_size); - goto error1; + goto error2; } arraylength = ct->ct_length; } @@ -6872,7 +6872,7 @@ PyErr_Format(PyExc_ZeroDivisionError, "from_buffer('%s', ..): the actual length of the array " "cannot be computed", ct->ct_name); - goto error1; + goto error2; } } From pypy.commits at gmail.com Tue Jan 8 04:57:20 2019 From: pypy.commits at gmail.com (arigo) Date: Tue, 08 Jan 2019 01:57:20 -0800 (PST) Subject: [pypy-commit] pypy default: update to cffi/1bba8ce1cb04: from_buffer("type", buf) Message-ID: <5c347400.1c69fb81.55481.dc1f@mx.google.com> Author: Armin Rigo Branch: Changeset: r95588:76ec5a848264 Date: 2019-01-08 10:56 +0100 http://bitbucket.org/pypy/pypy/changeset/76ec5a848264/ Log: update to cffi/1bba8ce1cb04: from_buffer("type",buf) diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -325,16 +325,22 @@ a = array.array('H', [10000, 20000, 30000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 6 ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 3 + assert c[1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -239,19 +239,31 @@ def test_ffi_from_buffer(): import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + py.test.raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1676,24 +1676,6 @@ py.test.raises(TypeError, len, q.a) py.test.raises(TypeError, list, q.a) - def test_from_buffer(self): - import array - a = array.array('H', [10000, 20000, 30000]) - c = ffi.from_buffer(a) - assert ffi.typeof(c) is ffi.typeof("char[]") - ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) - assert c == ffi.from_buffer(a, require_writable=True) - # - p = ffi.from_buffer(b"abcd") - assert p[2] == b"c" - # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", - require_writable=True) - def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ "char", diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -16,6 +16,8 @@ # Python 3.x basestring = str +_unspecified = object() + class FFI(object): @@ -341,15 +343,22 @@ # """ # note that 'buffer' is a type, set on this instance by __init__ - def from_buffer(self, python_buffer, require_writable=False): - """Return a that points to the data of the + def from_buffer(self, cdecl, python_buffer=_unspecified, + require_writable=False): + """Return a cdata of the given type pointing to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types str or unicode (you can build 'char[]' arrays explicitly) but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. + + The first argument is optional and default to 'char[]'. """ - return self._backend.from_buffer(self.BCharA, python_buffer, + if python_buffer is _unspecified: + cdecl, python_buffer = self.BCharA, cdecl + elif isinstance(cdecl, basestring): + cdecl = self._typeof(cdecl) + return self._backend.from_buffer(cdecl, python_buffer, require_writable) def memmove(self, dest, src, n): diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -485,8 +485,9 @@ def enter_exit(self, exit_now): raise oefmt(self.space.w_ValueError, - "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " - "can be used with the 'with' keyword or ffi.release()") + "only 'cdata' object from ffi.new(), ffi.gc(), ffi.from_buffer() " + "or ffi.new_allocator()() can be used with the 'with' keyword or " + "ffi.release()") def descr_enter(self): self.enter_exit(False) @@ -664,24 +665,28 @@ class W_CDataFromBuffer(W_CData): _attrs_ = ['buf', 'length', 'w_keepalive'] - _immutable_fields_ = ['buf', 'length', 'w_keepalive'] + _immutable_fields_ = ['buf', 'length'] - def __init__(self, space, cdata, ctype, buf, w_object): + def __init__(self, space, cdata, length, ctype, buf, w_object): W_CData.__init__(self, space, cdata, ctype) self.buf = buf - self.length = buf.getlength() + self.length = length self.w_keepalive = w_object def get_array_length(self): return self.length def _repr_extra(self): - w_repr = self.space.repr(self.w_keepalive) - return "buffer len %d from '%s' object" % ( - self.length, self.space.type(self.w_keepalive).name) + if self.w_keepalive is not None: + name = self.space.type(self.w_keepalive).name + else: + name = "(released)" + return "buffer len %d from '%s' object" % (self.length, name) def enter_exit(self, exit_now): - pass # for now, no effect on PyPy + # for now, limited effect on PyPy + if exit_now: + self.w_keepalive = None class W_CDataGCP(W_CData): diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -25,7 +25,7 @@ assert isinstance(ctptr, W_CTypePointer) W_CTypePtrOrArray.__init__(self, space, arraysize, extra, 0, ctptr.ctitem) - self.length = length + self.length = length # -1 if no length is given, e.g. 'int[]' self.ctptr = ctptr def _alignof(self): @@ -86,7 +86,7 @@ def _check_subscript_index(self, w_cdata, i): space = self.space if i < 0: - raise oefmt(space.w_IndexError, "negative index not supported") + raise oefmt(space.w_IndexError, "negative index") if i >= w_cdata.get_array_length(): raise oefmt(space.w_IndexError, "index too large for cdata '%s' (expected %d < %d)", @@ -96,7 +96,7 @@ def _check_slice_index(self, w_cdata, start, stop): space = self.space if start < 0: - raise oefmt(space.w_IndexError, "negative index not supported") + raise oefmt(space.w_IndexError, "negative index") if stop > w_cdata.get_array_length(): raise oefmt(space.w_IndexError, "index too large (expected %d <= %d)", diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -328,7 +328,8 @@ @unwrap_spec(require_writable=int) - def descr_from_buffer(self, w_python_buffer, require_writable=0): + def descr_from_buffer(self, w_cdecl, w_python_buffer=None, + require_writable=0): """\ Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is @@ -337,9 +338,13 @@ containing large quantities of raw data in some other format, like 'array.array' or numpy arrays.""" # - w_ctchara = newtype._new_chara_type(self.space) - return func._from_buffer(self.space, w_ctchara, w_python_buffer, - require_writable) + if w_python_buffer is None: + w_python_buffer = w_cdecl + w_ctype = newtype._new_chara_type(self.space) + else: + w_ctype = self.ffi_type(w_cdecl, ACCEPT_STRING | ACCEPT_CTYPE) + return func.from_buffer(self.space, w_ctype, w_python_buffer, + require_writable) @unwrap_spec(w_arg=W_CData) diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -112,16 +112,10 @@ @unwrap_spec(w_ctype=ctypeobj.W_CType, require_writable=int) def from_buffer(space, w_ctype, w_x, require_writable=0): - from pypy.module._cffi_backend import ctypearray, ctypeprim - # - if (not isinstance(w_ctype, ctypearray.W_CTypeArray) or - not isinstance(w_ctype.ctptr.ctitem, ctypeprim.W_CTypePrimitiveChar)): - raise oefmt(space.w_TypeError, - "needs 'char[]', got '%s'", w_ctype.name) - # - return _from_buffer(space, w_ctype, w_x, require_writable) - -def _from_buffer(space, w_ctype, w_x, require_writable): + from pypy.module._cffi_backend import ctypearray + if not isinstance(w_ctype, ctypearray.W_CTypeArray): + raise oefmt(space.w_TypeError, "expected an array ctype, got '%s'", + w_ctype.name) if space.isinstance_w(w_x, space.w_unicode): raise oefmt(space.w_TypeError, "from_buffer() cannot return the address of a unicode object") @@ -140,7 +134,37 @@ "buffer interface but cannot be rendered as a plain " "raw address on PyPy", w_x) # - return cdataobj.W_CDataFromBuffer(space, _cdata, w_ctype, buf, w_x) + buffersize = buf.getlength() + arraylength = w_ctype.length + if arraylength >= 0: + # it's an array with a fixed length; make sure that the + # buffer contains enough bytes. + if buffersize < w_ctype.size: + raise oefmt(space.w_ValueError, + "buffer is too small (%d bytes) for '%s' (%d bytes)", + buffersize, w_ctype.name, w_ctype.size) + else: + # it's an open 'array[]' + itemsize = w_ctype.ctitem.size + if itemsize == 1: + # fast path, performance only + arraylength = buffersize + elif itemsize > 0: + # give it as many items as fit the buffer. Ignore a + # partial last element. + arraylength = buffersize / itemsize + else: + # it's an array 'empty[]'. Unsupported obscure case: + # the problem is that setting the length of the result + # to anything large (like SSIZE_T_MAX) is dangerous, + # because if someone tries to loop over it, it will + # turn effectively into an infinite loop. + raise oefmt(space.w_ZeroDivisionError, + "from_buffer('%s', ..): the actual length of the array " + "cannot be computed", w_ctype.name) + # + return cdataobj.W_CDataFromBuffer(space, _cdata, arraylength, + w_ctype, buf, w_x) # ____________________________________________________________ diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3742,6 +3742,64 @@ p1[0] = b"g" assert ba == b"goo" +def test_from_buffer_types(): + BInt = new_primitive_type("int") + BIntP = new_pointer_type(BInt) + BIntA = new_array_type(BIntP, None) + lst = [-12345678, 87654321, 489148] + bytestring = buffer(newp(BIntA, lst))[:] + b'XYZ' + # + p1 = from_buffer(BIntA, bytestring) # int[] + assert typeof(p1) is BIntA + assert len(p1) == 3 + assert p1[0] == lst[0] + assert p1[1] == lst[1] + assert p1[2] == lst[2] + py.test.raises(IndexError, "p1[3]") + py.test.raises(IndexError, "p1[-1]") + # + py.test.raises(TypeError, from_buffer, BInt, bytestring) + py.test.raises(TypeError, from_buffer, BIntP, bytestring) + # + BIntA2 = new_array_type(BIntP, 2) + p2 = from_buffer(BIntA2, bytestring) # int[2] + assert typeof(p2) is BIntA2 + assert len(p2) == 2 + assert p2[0] == lst[0] + assert p2[1] == lst[1] + py.test.raises(IndexError, "p2[2]") + py.test.raises(IndexError, "p2[-1]") + assert p2 == p1 + # + BIntA4 = new_array_type(BIntP, 4) # int[4]: too big + py.test.raises(ValueError, from_buffer, BIntA4, bytestring) + # + BStruct = new_struct_type("foo") + complete_struct_or_union(BStruct, [('a1', BInt, -1), + ('a2', BInt, -1)]) + BStructP = new_pointer_type(BStruct) + BStructA = new_array_type(BStructP, None) + p1 = from_buffer(BStructA, bytestring) # struct[] + assert len(p1) == 1 + assert typeof(p1) is BStructA + assert p1[0].a1 == lst[0] + assert p1[0].a2 == lst[1] + py.test.raises(IndexError, "p1[1]") + # + BEmptyStruct = new_struct_type("empty") + complete_struct_or_union(BEmptyStruct, [], Ellipsis, 0) + assert sizeof(BEmptyStruct) == 0 + BEmptyStructP = new_pointer_type(BEmptyStruct) + BEmptyStructA = new_array_type(BEmptyStructP, None) + py.test.raises(ZeroDivisionError, from_buffer, # empty[] + BEmptyStructA, bytestring) + # + BEmptyStructA5 = new_array_type(BEmptyStructP, 5) + p1 = from_buffer(BEmptyStructA5, bytestring) # struct empty[5] + assert typeof(p1) is BEmptyStructA5 + assert len(p1) == 5 + assert cast(BIntP, p1) == from_buffer(BIntA, bytestring) + def test_memmove(): Short = new_primitive_type("short") ShortA = new_array_type(new_pointer_type(Short), None) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -282,19 +282,31 @@ import _cffi_backend as _cffi1_backend import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) From pypy.commits at gmail.com Wed Jan 9 12:13:18 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 09:13:18 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Remove unused parameter 'flags' from make_ctx() Message-ID: <5c362bae.1c69fb81.f9b4d.cd8b@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95589:fc63a4a7eff5 Date: 2019-01-09 17:12 +0000 http://bitbucket.org/pypy/pypy/changeset/fc63a4a7eff5/ Log: Remove unused parameter 'flags' from make_ctx() diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -169,7 +169,7 @@ assert length >= 0 return (length, unicodestr, string, buf) - def make_ctx(self, w_string, pos=0, endpos=sys.maxint, flags=0): + def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for searching in the given w_string object.""" space = self.space @@ -182,7 +182,6 @@ endpos = pos elif endpos > length: endpos = length - flags = self.flags | flags # if unicodestr is not None: if self.is_known_bytes(): @@ -190,7 +189,7 @@ "can't use a bytes pattern on a string-like " "object") return rsre_core.UnicodeMatchContext(unicodestr, - pos, endpos, flags) + pos, endpos, self.flags) else: if self.is_known_unicode(): raise oefmt(space.w_TypeError, @@ -198,10 +197,10 @@ "object") if string is not None: return rsre_core.StrMatchContext(string, - pos, endpos, flags) + pos, endpos, self.flags) else: return rsre_core.BufMatchContext(buf, - pos, endpos, flags) + pos, endpos, self.flags) def getmatch(self, ctx, found): if found: From pypy.commits at gmail.com Wed Jan 9 13:10:47 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 09 Jan 2019 10:10:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: revert 4ef833b2310d Message-ID: <5c363927.1c69fb81.fa7bf.aee9@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95590:082a51c10570 Date: 2019-01-09 08:08 +0200 http://bitbucket.org/pypy/pypy/changeset/082a51c10570/ Log: revert 4ef833b2310d diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -6,15 +6,14 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask -from rpython.rlib import jit -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder -from rpython.rlib.runicode import unicode_encode_utf_8 +from rpython.rlib import jit, rutf8 +from rpython.rlib.rstring import StringBuilder # ____________________________________________________________ # # Constants and exposed functions -from rpython.rlib.rsre import rsre_core, rsre_char +from rpython.rlib.rsre import rsre_core, rsre_char, rsre_utf8 from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, MAXGROUPS, getlower, set_unicode_db @@ -35,17 +34,21 @@ def slice_w(space, ctx, start, end, w_default): - if 0 <= start <= end: + # 'start' and 'end' are byte positions + if ctx.ZERO <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): return space.newbytes(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): + start = ctx._real_pos(start) + end = ctx._real_pos(end) return space.newbytes(ctx._string[start:end]) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + s = ctx._utf8[start:end] + lgt = rutf8.get_utf8_length(s) + return space.newutf8(s, lgt) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - uni = ctx._unicodestr[start:end] - uni_utf8 = unicode_encode_utf_8(uni, len(uni), 'strict', - allow_surrogates=True) - return space.newtext(uni_utf8, len(uni)) + return space.newtext(ctx._unicodestr[start:end]) else: # unreachable raise SystemError @@ -57,6 +60,7 @@ # Returns a list of RPython-level integers. # Unlike the app-level groups() method, groups are numbered from 0 # and the returned list does not start with the whole match range. + # The integers are byte positions, not character indexes (for utf8). if num_groups == 0: return None result = [-1] * (2 * num_groups) @@ -109,7 +113,7 @@ def repr_w(self): space = self.space - u = space.utf8_w(space.repr(self.w_pattern)) + u = space.utf8_w(space.repr(self.w_pattern)).decode() if len(u) > 200: u = u[:200] flag_items = [] @@ -127,12 +131,12 @@ if flags != 0: flag_items.append('0x%x' % flags) if len(flag_items) == 0: - usep = '' - uflags = '' + usep = u'' + uflags = u'' else: - usep = ', ' - uflags = '|'.join(flag_items) - return space.newtext('re.compile(%s%s%s)' % (u, usep, uflags)) + usep = u', ' + uflags = u'|'.join([item.decode('latin-1') for item in flag_items]) + return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags)) def fget_groupindex(self, space): w_groupindex = self.w_groupindex @@ -162,7 +166,7 @@ buf = None space = self.space if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.realunicode_w(w_string) + unicodestr = space.utf8_w(w_string).decode('utf8') length = len(unicodestr) elif space.isinstance_w(w_string, space.w_bytes): string = space.bytes_w(w_string) @@ -174,7 +178,7 @@ return (length, unicodestr, string, buf) def make_ctx(self, w_string, pos=0, endpos=sys.maxint, flags=0): - """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for + """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space length, unicodestr, string, buf = self.getstring(w_string) @@ -207,6 +211,27 @@ return rsre_core.BufMatchContext(buf, pos, endpos, flags) + def fresh_copy(self, ctx): + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + result = rsre_utf8.Utf8MatchContext( + ctx._utf8, ctx.match_start, ctx.end, ctx.flags) + result.w_unicode_obj = ctx.w_unicode_obj + elif isinstance(ctx, rsre_core.StrMatchContext): + result = self._make_str_match_context( + ctx._string, ctx.match_start, ctx.end) + elif isinstance(ctx, rsre_core.BufMatchContext): + result = rsre_core.BufMatchContext( + ctx._buffer, ctx.match_start, ctx.end, ctx.flags) + else: + raise AssertionError("bad ctx type") + result.match_end = ctx.match_end + return result + + def _make_str_match_context(self, str, pos, endpos): + # for tests to override + return rsre_core.StrMatchContext(str, + pos, endpos, self.flags) + def getmatch(self, ctx, found): if found: return W_SRE_Match(self, ctx) @@ -234,7 +259,7 @@ space = self.space matchlist_w = [] ctx = self.make_ctx(w_string, pos, endpos) - while ctx.match_start <= ctx.end: + while True: if not searchcontext(space, ctx, self.code): break num_groups = self.num_groups @@ -251,8 +276,12 @@ w_item = allgroups_w(space, ctx, fmarks, num_groups, w_emptystr) matchlist_w.append(w_item) - no_progress = (ctx.match_start == ctx.match_end) - ctx.reset(ctx.match_end + no_progress) + reset_at = ctx.match_end + if ctx.match_start == ctx.match_end: + if reset_at == ctx.end: + break + reset_at = ctx.next_indirect(reset_at) + ctx.reset(reset_at) return space.newlist(matchlist_w) @unwrap_spec(pos=int, endpos=int) @@ -277,15 +306,15 @@ # splitlist = [] n = 0 - last = 0 ctx = self.make_ctx(w_string) + last = ctx.ZERO while not maxsplit or n < maxsplit: if not searchcontext(space, ctx, self.code): break if ctx.match_start == ctx.match_end: # zero-width match if ctx.match_start == ctx.end: # or end of string break - ctx.reset(ctx.match_end + 1) + ctx.reset(ctx.next_indirect(ctx.match_end)) continue splitlist.append(slice_w(space, ctx, last, ctx.match_start, space.w_None)) @@ -314,27 +343,31 @@ def subx(self, w_ptemplate, w_string, count): space = self.space - # use a (much faster) string/unicode builder if w_ptemplate and + # use a (much faster) string builder (possibly utf8) if w_ptemplate and # w_string are both string or both unicode objects, and if w_ptemplate # is a literal - use_builder = False - filter_as_unicode = filter_as_string = None + use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 + filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: - length, filter_as_unicode, filter_as_string, buf = ( - self.getstring(w_ptemplate)) - if filter_as_unicode is not None: - literal = u'\\' not in filter_as_unicode - use_builder = ( - space.isinstance_w(w_string, space.w_unicode) and literal) + if space.isinstance_w(w_ptemplate, space.w_unicode): + filter_as_string = space.utf8_w(w_ptemplate) + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_unicode) and literal: + use_builder = 'U' else: - if buf is not None: - filter_as_string = buf.as_str() - literal = '\\' not in filter_as_string - use_builder = ( - space.isinstance_w(w_string, space.w_bytes) and literal) + try: + filter_as_string = space.bytes_w(w_ptemplate) + except OperationError as e: + if e.async(space): + raise + literal = False + else: + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_bytes) and literal: + use_builder = 'S' if literal: w_filter = w_ptemplate filter_is_callable = False @@ -351,18 +384,16 @@ # # XXX this is a bit of a mess, but it improves performance a lot ctx = self.make_ctx(w_string) - sublist_w = strbuilder = unicodebuilder = None - if use_builder: - if filter_as_unicode is not None: - unicodebuilder = UnicodeBuilder(ctx.end) - else: - assert filter_as_string is not None - strbuilder = StringBuilder(ctx.end) + sublist_w = strbuilder = None + if use_builder != '\x00': + assert filter_as_string is not None + strbuilder = StringBuilder(ctx.end) else: sublist_w = [] - n = last_pos = 0 - pattern = self.code + n = 0 + last_pos = ctx.ZERO while not count or n < count: + pattern = self.code sub_jitdriver.jit_merge_point( self=self, use_builder=use_builder, @@ -371,9 +402,7 @@ ctx=ctx, pattern=pattern, w_filter=w_filter, strbuilder=strbuilder, - unicodebuilder=unicodebuilder, filter_as_string=filter_as_string, - filter_as_unicode=filter_as_unicode, count=count, w_string=w_string, n=n, last_pos=last_pos, sublist_w=sublist_w @@ -384,10 +413,7 @@ if last_pos < ctx.match_start: _sub_append_slice( ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.match_start) - start = ctx.match_end - if start == ctx.match_start: - start += 1 + strbuilder, last_pos, ctx.match_start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position @@ -395,40 +421,48 @@ if filter_is_callable: w_match = self.getmatch(ctx, True) # make a copy of 'ctx'; see test_sub_matches_stay_valid - ctx = ctx.fresh_copy(start) # match_start/match_end dropped + ctx = self.fresh_copy(ctx) w_piece = space.call_function(w_filter, w_match) if not space.is_w(w_piece, space.w_None): - assert strbuilder is None and unicodebuilder is None - assert not use_builder + assert strbuilder is None + assert use_builder == '\x00' sublist_w.append(w_piece) else: - if use_builder: - if strbuilder is not None: - assert filter_as_string is not None - strbuilder.append(filter_as_string) - else: - assert unicodebuilder is not None - assert filter_as_unicode is not None - unicodebuilder.append(filter_as_unicode) + if use_builder != '\x00': + assert filter_as_string is not None + assert strbuilder is not None + strbuilder.append(filter_as_string) else: sublist_w.append(w_filter) n += 1 elif last_pos >= ctx.end: break # empty match at the end: finished + + start = ctx.match_end + if start == ctx.match_start: + if start == ctx.end: + break + start = ctx.next_indirect(start) ctx.reset(start) if last_pos < ctx.end: _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.end) - if use_builder: - if strbuilder is not None: - return space.newbytes(strbuilder.build()), n + strbuilder, last_pos, ctx.end) + if use_builder != '\x00': + assert strbuilder is not None + result_bytes = strbuilder.build() + if use_builder == 'S': + assert not isinstance(ctx, rsre_utf8.Utf8MatchContext) + return space.newbytes(result_bytes), n + elif use_builder == 'U': + assert isinstance(ctx, rsre_utf8.Utf8MatchContext) + return space.newutf8(result_bytes, + rutf8.get_utf8_length(result_bytes)), n else: - assert unicodebuilder is not None - return space.newtext(unicodebuilder.build()), n + raise AssertionError(use_builder) else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newtext('') + w_emptystr = space.newutf8('', 0) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', @@ -438,26 +472,28 @@ sub_jitdriver = jit.JitDriver( reds="""count n last_pos ctx w_filter - strbuilder unicodebuilder + strbuilder filter_as_string - filter_as_unicode w_string sublist_w self""".split(), greens=["filter_is_callable", "use_builder", "filter_type", "pattern"]) def _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, start, end): - if use_builder: + strbuilder, start, end): + if use_builder != '\x00': + assert strbuilder is not None if isinstance(ctx, rsre_core.BufMatchContext): - assert strbuilder is not None + assert use_builder == 'S' return strbuilder.append(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): - assert strbuilder is not None + assert use_builder == 'S' + start = ctx._real_pos(start) + end = ctx._real_pos(end) return strbuilder.append_slice(ctx._string, start, end) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - assert unicodebuilder is not None - return unicodebuilder.append_slice(ctx._unicodestr, start, end) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + assert use_builder == 'U' + return strbuilder.append_slice(ctx._utf8, start, end) assert 0, "unreachable" else: sublist_w.append(slice_w(space, ctx, start, end, space.w_None)) @@ -532,10 +568,10 @@ ctx = self.ctx start, end = ctx.match_start, ctx.match_end w_s = slice_w(space, ctx, start, end, space.w_None) - u = space.utf8_w(space.repr(w_s)) + u = space.utf8_w(space.repr(w_s)).decode() if len(u) > 50: u = u[:50] - return space.newtext('<_sre.SRE_Match object; span=(%d, %d), match=%s>' % + return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' % (start, end, u)) def cannot_copy_w(self): @@ -593,19 +629,38 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + start = self.bytepos_to_charindex(start) return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + end = self.bytepos_to_charindex(end) return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + return self.new_charindex_tuple(start, end) + + def new_charindex_tuple(self, start, end): + start = self.bytepos_to_charindex(start) + end = self.bytepos_to_charindex(end) return self.space.newtuple([self.space.newint(start), self.space.newint(end)]) + def bytepos_to_charindex(self, bytepos): + # Transform a 'byte position', as returned by all methods from + # rsre_core, back into a 'character index'. This is for UTF8 + # handling. + ctx = self.ctx + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + index_storage = ctx.w_unicode_obj._get_index_storage() + return rutf8.codepoint_index_at_byte_position( + ctx.w_unicode_obj._utf8, index_storage, bytepos) + else: + return bytepos + def flatten_marks(self): if self.flatten_cache is None: num_groups = self.srepat.num_groups @@ -613,6 +668,8 @@ return self.flatten_cache def do_span(self, w_arg): + # return a pair of integers, which are byte positions, not + # character indexes (for utf8) space = self.space try: groupnum = space.int_w(w_arg) @@ -660,10 +717,10 @@ return space.w_None def fget_pos(self, space): - return space.newint(self.ctx.original_pos) + return space.newint(self.bytepos_to_charindex(self.ctx.original_pos)) def fget_endpos(self, space): - return space.newint(self.ctx.end) + return space.newint(self.bytepos_to_charindex(self.ctx.end)) def fget_regs(self, space): space = self.space @@ -671,11 +728,11 @@ num_groups = self.srepat.num_groups result_w = [None] * (num_groups + 1) ctx = self.ctx - result_w[0] = space.newtuple([space.newint(ctx.match_start), - space.newint(ctx.match_end)]) + result_w[0] = self.new_charindex_tuple(ctx.match_start, + ctx.match_end) for i in range(num_groups): - result_w[i + 1] = space.newtuple([space.newint(fmarks[i*2]), - space.newint(fmarks[i*2+1])]) + result_w[i + 1] = self.new_charindex_tuple(fmarks[i*2], + fmarks[i*2+1]) return space.newtuple(result_w) def fget_string(self, space): @@ -684,6 +741,9 @@ return space.newbytes(ctx._buffer.as_str()) elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + lgt = rutf8.get_utf8_length(ctx._utf8) + return space.newutf8(ctx._utf8, lgt) elif isinstance(ctx, rsre_core.UnicodeMatchContext): return space.newtext(ctx._unicodestr) else: @@ -726,38 +786,53 @@ self.ctx = ctx self.code = code # 'self.ctx' is always a fresh context in which no searching - # or matching succeeded so far. + # or matching succeeded so far. It is None when the iterator is + # exhausted. def iter_w(self): return self def next_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: raise OperationError(self.space.w_StopIteration, self.space.w_None) if not searchcontext(self.space, self.ctx, self.code): raise OperationError(self.space.w_StopIteration, self.space.w_None) return self.getmatch(True) def match_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: return self.space.w_None return self.getmatch(matchcontext(self.space, self.ctx, self.code)) def search_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: return self.space.w_None return self.getmatch(searchcontext(self.space, self.ctx, self.code)) def getmatch(self, found): + ctx = self.ctx + assert ctx is not None if found: - ctx = self.ctx nextstart = ctx.match_end - nextstart += (ctx.match_start == nextstart) - self.ctx = ctx.fresh_copy(nextstart) + exhausted = False + if ctx.match_start == nextstart: + if nextstart == ctx.end: + exhausted = True + else: + nextstart = ctx.next_indirect(nextstart) + if exhausted: + self.ctx = None + else: + self.ctx = self.srepat.fresh_copy(ctx) + self.ctx.match_start = nextstart match = W_SRE_Match(self.srepat, ctx) return match else: - self.ctx.match_start += 1 # obscure corner case + # obscure corner case + if ctx.match_start == ctx.end: + self.ctx = None + else: + ctx.match_start = ctx.next_indirect(ctx.match_start) return None W_SRE_Scanner.typedef = TypeDef( From pypy.commits at gmail.com Wed Jan 9 13:10:49 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 09 Jan 2019 10:10:49 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix the obvious problems with make_ctx and subx Message-ID: <5c363929.1c69fb81.cc6e4.670c@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95591:b27cae03176c Date: 2019-01-09 20:05 +0200 http://bitbucket.org/pypy/pypy/changeset/b27cae03176c/ Log: fix the obvious problems with make_ctx and subx diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -181,35 +181,52 @@ """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space - length, unicodestr, string, buf = self.getstring(w_string) if pos < 0: pos = 0 - elif pos > length: - pos = length if endpos < pos: endpos = pos - elif endpos > length: - endpos = length flags = self.flags | flags - # - if unicodestr is not None: - if self.is_known_bytes(): - raise oefmt(space.w_TypeError, - "can't use a bytes pattern on a string-like " - "object") - return rsre_core.UnicodeMatchContext(unicodestr, - pos, endpos, flags) + if space.isinstance_w(w_string, space.w_unicode): + w_unicode_obj = space.convert_arg_to_w_unicode(w_string) + utf8str = w_unicode_obj._utf8 + length = w_unicode_obj._len() + if pos <= 0: + bytepos = 0 + elif pos >= length: + bytepos = len(utf8str) + else: + index_storage = w_unicode_obj._get_index_storage() + bytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, pos) + if endpos >= length: + endbytepos = len(utf8str) + else: + index_storage = w_unicode_obj._get_index_storage() + endbytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, endpos) + ctx = rsre_utf8.Utf8MatchContext( + utf8str, bytepos, endbytepos, flags) + # xxx we store the w_string on the ctx too, for + # W_SRE_Match.bytepos_to_charindex() + ctx.w_unicode_obj = w_unicode_obj + return ctx + elif space.isinstance_w(w_string, space.w_bytes): + str = space.bytes_w(w_string) + if pos > len(str): + pos = len(str) + if endpos > len(str): + endpos = len(str) + return self._make_str_match_context(str, pos, endpos) else: - if self.is_known_unicode(): - raise oefmt(space.w_TypeError, - "can't use a string pattern on a bytes-like " - "object") - if string is not None: - return rsre_core.StrMatchContext(string, - pos, endpos, flags) - else: - return rsre_core.BufMatchContext(buf, - pos, endpos, flags) + buf = space.readbuf_w(w_string) + size = buf.getlength() + assert size >= 0 + if pos > size: + pos = size + if endpos > size: + endpos = size + return rsre_core.BufMatchContext(buf, + pos, endpos, flags) def fresh_copy(self, ctx): if isinstance(ctx, rsre_utf8.Utf8MatchContext): @@ -347,7 +364,16 @@ # w_string are both string or both unicode objects, and if w_ptemplate # is a literal use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 + print w_ptemplate, w_string, count filter_as_string = None + if space.isinstance_w(w_string, space.w_unicode): + if not self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "cannot use a bytes pattern on a string-like object") + else: + if self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "cannot use a string pattern on a bytes-like object") if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True @@ -357,26 +383,20 @@ literal = '\\' not in filter_as_string if space.isinstance_w(w_string, space.w_unicode) and literal: use_builder = 'U' + elif space.isinstance_w(w_ptemplate, space.w_bytes): + filter_as_string = space.bytes_w(w_ptemplate) + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_bytes) and literal: + use_builder = 'S' else: - try: - filter_as_string = space.bytes_w(w_ptemplate) - except OperationError as e: - if e.async(space): - raise - literal = False - else: - literal = '\\' not in filter_as_string - if space.isinstance_w(w_string, space.w_bytes) and literal: - use_builder = 'S' + filter_as_string = space.readbuf_w(w_ptemplate).as_str() + w_ptemplate = space.newbytes(filter_as_string) + literal = False if literal: w_filter = w_ptemplate filter_is_callable = False else: # not a literal; hand it over to the template compiler - # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer - # (e.g. a bytearray), convert it to a byte string here. - if buf is not None: - w_ptemplate = space.newbytes(filter_as_string) w_re = import_re(space) w_filter = space.call_method(w_re, '_subx', self, w_ptemplate) From pypy.commits at gmail.com Wed Jan 9 14:10:30 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 11:10:30 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Reduce diff with default Message-ID: <5c364726.1c69fb81.4f1ab.5025@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95592:3e25d6001d03 Date: 2019-01-09 17:54 +0000 http://bitbucket.org/pypy/pypy/changeset/3e25d6001d03/ Log: Reduce diff with default diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -148,59 +148,48 @@ return False return space.isinstance_w(self.w_pattern, space.w_unicode) - def getstring(self, w_string): - """Accepts a string-like object (str, bytes, bytearray, buffer...) - and returns a tuple (len, rpython_unicode, rpython_str, rpython_buf), - where only one of the rpython_xxx is non-None. - """ - unicodestr = None - string = None - buf = None - space = self.space - if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.unicode_w(w_string) - length = len(unicodestr) - elif space.isinstance_w(w_string, space.w_bytes): - string = space.bytes_w(w_string) - length = len(string) - else: - buf = space.readbuf_w(w_string) - length = buf.getlength() - assert length >= 0 - return (length, unicodestr, string, buf) - def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for searching in the given w_string object.""" space = self.space - length, unicodestr, string, buf = self.getstring(w_string) if pos < 0: pos = 0 - elif pos > length: - pos = length if endpos < pos: endpos = pos - elif endpos > length: - endpos = length - # - if unicodestr is not None: + if space.isinstance_w(w_string, space.w_unicode): if self.is_known_bytes(): raise oefmt(space.w_TypeError, "can't use a bytes pattern on a string-like " "object") - return rsre_core.UnicodeMatchContext(unicodestr, - pos, endpos, self.flags) + unicodestr = space.unicode_w(w_string) + length = len(unicodestr) + if pos > length: + pos = length + if endpos > length: + endpos = length + return rsre_core.UnicodeMatchContext( + unicodestr, pos, endpos, self.flags) + elif self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "can't use a string pattern on a bytes-like " + "object") + elif space.isinstance_w(w_string, space.w_bytes): + string = space.bytes_w(w_string) + length = len(string) + if pos > length: + pos = length + if endpos > length: + endpos = length + return rsre_core.StrMatchContext(string, pos, endpos, self.flags) else: - if self.is_known_unicode(): - raise oefmt(space.w_TypeError, - "can't use a string pattern on a bytes-like " - "object") - if string is not None: - return rsre_core.StrMatchContext(string, - pos, endpos, self.flags) - else: - return rsre_core.BufMatchContext(buf, - pos, endpos, self.flags) + buf = space.readbuf_w(w_string) + size = buf.getlength() + assert size >= 0 + if pos > size: + pos = size + if endpos > size: + endpos = size + return rsre_core.BufMatchContext(buf, pos, endpos, self.flags) def getmatch(self, ctx, found): if found: @@ -313,20 +302,23 @@ # w_string are both string or both unicode objects, and if w_ptemplate # is a literal use_builder = False + is_buffer = False filter_as_unicode = filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: - length, filter_as_unicode, filter_as_string, buf = ( - self.getstring(w_ptemplate)) - if filter_as_unicode is not None: + if space.isinstance_w(w_ptemplate, space.w_unicode): + filter_as_unicode = space.unicode_w(w_ptemplate) literal = u'\\' not in filter_as_unicode use_builder = ( space.isinstance_w(w_string, space.w_unicode) and literal) else: - if buf is not None: - filter_as_string = buf.as_str() + if space.isinstance_w(w_ptemplate, space.w_bytes): + filter_as_string = space.bytes_w(w_ptemplate) + else: + filter_as_string = space.readbuf_w(w_ptemplate).as_str() + is_buffer = True literal = '\\' not in filter_as_string use_builder = ( space.isinstance_w(w_string, space.w_bytes) and literal) @@ -337,7 +329,7 @@ # not a literal; hand it over to the template compiler # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer # (e.g. a bytearray), convert it to a byte string here. - if buf is not None: + if is_buffer: w_ptemplate = space.newbytes(filter_as_string) w_re = import_re(space) w_filter = space.call_method(w_re, '_subx', From pypy.commits at gmail.com Wed Jan 9 14:10:32 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 11:10:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: hg merge py3.5 Message-ID: <5c364728.1c69fb81.3b09c.f66d@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-py3 Changeset: r95593:c56cd272e570 Date: 2019-01-09 18:25 +0000 http://bitbucket.org/pypy/pypy/changeset/c56cd272e570/ Log: hg merge py3.5 diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -152,16 +152,19 @@ return False return space.isinstance_w(self.w_pattern, space.w_unicode) - def getstring(self, w_string): - """Accepts a string-like object (str, bytes, bytearray, buffer...) - and returns a tuple (len, rpython_unicode, rpython_str, rpython_buf), - where only one of the rpython_xxx is non-None. - """ - unicodestr = None - string = None - buf = None + def make_ctx(self, w_string, pos=0, endpos=sys.maxint): + """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for + searching in the given w_string object.""" space = self.space + if pos < 0: + pos = 0 + if endpos < pos: + endpos = pos if space.isinstance_w(w_string, space.w_unicode): + if self.is_known_bytes(): + raise oefmt(space.w_TypeError, + "can't use a bytes pattern on a string-like " + "object") unicodestr = space.realunicode_w(w_string) length = len(unicodestr) elif space.isinstance_w(w_string, space.w_bytes): @@ -169,43 +172,13 @@ length = len(string) else: buf = space.readbuf_w(w_string) - length = buf.getlength() - assert length >= 0 - return (length, unicodestr, string, buf) - - def make_ctx(self, w_string, pos=0, endpos=sys.maxint, flags=0): - """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for - searching in the given w_string object.""" - space = self.space - length, unicodestr, string, buf = self.getstring(w_string) - if pos < 0: - pos = 0 - elif pos > length: - pos = length - if endpos < pos: - endpos = pos - elif endpos > length: - endpos = length - flags = self.flags | flags - # - if unicodestr is not None: - if self.is_known_bytes(): - raise oefmt(space.w_TypeError, - "can't use a bytes pattern on a string-like " - "object") - return rsre_core.UnicodeMatchContext(unicodestr, - pos, endpos, flags) - else: - if self.is_known_unicode(): - raise oefmt(space.w_TypeError, - "can't use a string pattern on a bytes-like " - "object") - if string is not None: - return rsre_core.StrMatchContext(string, - pos, endpos, flags) - else: - return rsre_core.BufMatchContext(buf, - pos, endpos, flags) + size = buf.getlength() + assert size >= 0 + if pos > size: + pos = size + if endpos > size: + endpos = size + return rsre_core.BufMatchContext(buf, pos, endpos, self.flags) def getmatch(self, ctx, found): if found: @@ -318,20 +291,23 @@ # w_string are both string or both unicode objects, and if w_ptemplate # is a literal use_builder = False + is_buffer = False filter_as_unicode = filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: - length, filter_as_unicode, filter_as_string, buf = ( - self.getstring(w_ptemplate)) - if filter_as_unicode is not None: + if space.isinstance_w(w_ptemplate, space.w_unicode): + filter_as_unicode = space.realunicode_w(w_ptemplate) literal = u'\\' not in filter_as_unicode use_builder = ( space.isinstance_w(w_string, space.w_unicode) and literal) else: - if buf is not None: - filter_as_string = buf.as_str() + if space.isinstance_w(w_ptemplate, space.w_bytes): + filter_as_string = space.bytes_w(w_ptemplate) + else: + filter_as_string = space.readbuf_w(w_ptemplate).as_str() + is_buffer = True literal = '\\' not in filter_as_string use_builder = ( space.isinstance_w(w_string, space.w_bytes) and literal) @@ -342,7 +318,7 @@ # not a literal; hand it over to the template compiler # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer # (e.g. a bytearray), convert it to a byte string here. - if buf is not None: + if is_buffer: w_ptemplate = space.newbytes(filter_as_string) w_re = import_re(space) w_filter = space.call_method(w_re, '_subx', From pypy.commits at gmail.com Wed Jan 9 14:10:34 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 11:10:34 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: Backed out changeset 4ef833b2310d Message-ID: <5c36472a.1c69fb81.75650.486e@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-py3 Changeset: r95594:b77770dc5c23 Date: 2019-01-09 18:32 +0000 http://bitbucket.org/pypy/pypy/changeset/b77770dc5c23/ Log: Backed out changeset 4ef833b2310d diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -6,15 +6,15 @@ from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask -from rpython.rlib import jit -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib import jit, rutf8 +from rpython.rlib.rstring import StringBuilder from rpython.rlib.runicode import unicode_encode_utf_8 # ____________________________________________________________ # # Constants and exposed functions -from rpython.rlib.rsre import rsre_core, rsre_char +from rpython.rlib.rsre import rsre_core, rsre_char, rsre_utf8 from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, MAXGROUPS, getlower, set_unicode_db @@ -35,12 +35,19 @@ def slice_w(space, ctx, start, end, w_default): - if 0 <= start <= end: + # 'start' and 'end' are byte positions + if ctx.ZERO <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): return space.newbytes(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): + start = ctx._real_pos(start) + end = ctx._real_pos(end) return space.newbytes(ctx._string[start:end]) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + s = ctx._utf8[start:end] + lgt = rutf8.get_utf8_length(s) + return space.newutf8(s, lgt) elif isinstance(ctx, rsre_core.UnicodeMatchContext): uni = ctx._unicodestr[start:end] uni_utf8 = unicode_encode_utf_8(uni, len(uni), 'strict', @@ -57,6 +64,7 @@ # Returns a list of RPython-level integers. # Unlike the app-level groups() method, groups are numbered from 0 # and the returned list does not start with the whole match range. + # The integers are byte positions, not character indexes (for utf8). if num_groups == 0: return None result = [-1] * (2 * num_groups) @@ -180,6 +188,27 @@ endpos = size return rsre_core.BufMatchContext(buf, pos, endpos, self.flags) + def fresh_copy(self, ctx): + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + result = rsre_utf8.Utf8MatchContext( + ctx._utf8, ctx.match_start, ctx.end, ctx.flags) + result.w_unicode_obj = ctx.w_unicode_obj + elif isinstance(ctx, rsre_core.StrMatchContext): + result = self._make_str_match_context( + ctx._string, ctx.match_start, ctx.end) + elif isinstance(ctx, rsre_core.BufMatchContext): + result = rsre_core.BufMatchContext( + ctx._buffer, ctx.match_start, ctx.end, ctx.flags) + else: + raise AssertionError("bad ctx type") + result.match_end = ctx.match_end + return result + + def _make_str_match_context(self, str, pos, endpos): + # for tests to override + return rsre_core.StrMatchContext(str, + pos, endpos, self.flags) + def getmatch(self, ctx, found): if found: return W_SRE_Match(self, ctx) @@ -207,7 +236,7 @@ space = self.space matchlist_w = [] ctx = self.make_ctx(w_string, pos, endpos) - while ctx.match_start <= ctx.end: + while True: if not searchcontext(space, ctx, self.code): break num_groups = self.num_groups @@ -224,8 +253,12 @@ w_item = allgroups_w(space, ctx, fmarks, num_groups, w_emptystr) matchlist_w.append(w_item) - no_progress = (ctx.match_start == ctx.match_end) - ctx.reset(ctx.match_end + no_progress) + reset_at = ctx.match_end + if ctx.match_start == ctx.match_end: + if reset_at == ctx.end: + break + reset_at = ctx.next_indirect(reset_at) + ctx.reset(reset_at) return space.newlist(matchlist_w) @unwrap_spec(pos=int, endpos=int) @@ -250,15 +283,15 @@ # splitlist = [] n = 0 - last = 0 ctx = self.make_ctx(w_string) + last = ctx.ZERO while not maxsplit or n < maxsplit: if not searchcontext(space, ctx, self.code): break if ctx.match_start == ctx.match_end: # zero-width match if ctx.match_start == ctx.end: # or end of string break - ctx.reset(ctx.match_end + 1) + ctx.reset(ctx.next_indirect(ctx.match_end)) continue splitlist.append(slice_w(space, ctx, last, ctx.match_start, space.w_None)) @@ -287,21 +320,20 @@ def subx(self, w_ptemplate, w_string, count): space = self.space - # use a (much faster) string/unicode builder if w_ptemplate and + # use a (much faster) string builder (possibly utf8) if w_ptemplate and # w_string are both string or both unicode objects, and if w_ptemplate # is a literal - use_builder = False - is_buffer = False - filter_as_unicode = filter_as_string = None + use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 + filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: if space.isinstance_w(w_ptemplate, space.w_unicode): - filter_as_unicode = space.realunicode_w(w_ptemplate) - literal = u'\\' not in filter_as_unicode - use_builder = ( - space.isinstance_w(w_string, space.w_unicode) and literal) + filter_as_string = space.utf8_w(w_ptemplate) + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_unicode) and literal: + use_builder = 'U' else: if space.isinstance_w(w_ptemplate, space.w_bytes): filter_as_string = space.bytes_w(w_ptemplate) @@ -309,8 +341,8 @@ filter_as_string = space.readbuf_w(w_ptemplate).as_str() is_buffer = True literal = '\\' not in filter_as_string - use_builder = ( - space.isinstance_w(w_string, space.w_bytes) and literal) + if space.isinstance_w(w_string, space.w_bytes) and literal: + use_builder = 'S' if literal: w_filter = w_ptemplate filter_is_callable = False @@ -327,18 +359,16 @@ # # XXX this is a bit of a mess, but it improves performance a lot ctx = self.make_ctx(w_string) - sublist_w = strbuilder = unicodebuilder = None - if use_builder: - if filter_as_unicode is not None: - unicodebuilder = UnicodeBuilder(ctx.end) - else: - assert filter_as_string is not None - strbuilder = StringBuilder(ctx.end) + sublist_w = strbuilder = None + if use_builder != '\x00': + assert filter_as_string is not None + strbuilder = StringBuilder(ctx.end) else: sublist_w = [] - n = last_pos = 0 - pattern = self.code + n = 0 + last_pos = ctx.ZERO while not count or n < count: + pattern = self.code sub_jitdriver.jit_merge_point( self=self, use_builder=use_builder, @@ -347,9 +377,7 @@ ctx=ctx, pattern=pattern, w_filter=w_filter, strbuilder=strbuilder, - unicodebuilder=unicodebuilder, filter_as_string=filter_as_string, - filter_as_unicode=filter_as_unicode, count=count, w_string=w_string, n=n, last_pos=last_pos, sublist_w=sublist_w @@ -360,10 +388,7 @@ if last_pos < ctx.match_start: _sub_append_slice( ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.match_start) - start = ctx.match_end - if start == ctx.match_start: - start += 1 + strbuilder, last_pos, ctx.match_start) if not (last_pos == ctx.match_start == ctx.match_end and n > 0): # the above ignores empty matches on latest position @@ -371,37 +396,45 @@ if filter_is_callable: w_match = self.getmatch(ctx, True) # make a copy of 'ctx'; see test_sub_matches_stay_valid - ctx = ctx.fresh_copy(start) # match_start/match_end dropped + ctx = self.fresh_copy(ctx) w_piece = space.call_function(w_filter, w_match) if not space.is_w(w_piece, space.w_None): - assert strbuilder is None and unicodebuilder is None - assert not use_builder + assert strbuilder is None + assert use_builder == '\x00' sublist_w.append(w_piece) else: - if use_builder: - if strbuilder is not None: - assert filter_as_string is not None - strbuilder.append(filter_as_string) - else: - assert unicodebuilder is not None - assert filter_as_unicode is not None - unicodebuilder.append(filter_as_unicode) + if use_builder != '\x00': + assert filter_as_string is not None + assert strbuilder is not None + strbuilder.append(filter_as_string) else: sublist_w.append(w_filter) n += 1 elif last_pos >= ctx.end: break # empty match at the end: finished + + start = ctx.match_end + if start == ctx.match_start: + if start == ctx.end: + break + start = ctx.next_indirect(start) ctx.reset(start) if last_pos < ctx.end: _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, last_pos, ctx.end) - if use_builder: - if strbuilder is not None: - return space.newbytes(strbuilder.build()), n + strbuilder, last_pos, ctx.end) + if use_builder != '\x00': + assert strbuilder is not None + result_bytes = strbuilder.build() + if use_builder == 'S': + assert not isinstance(ctx, rsre_utf8.Utf8MatchContext) + return space.newbytes(result_bytes), n + elif use_builder == 'U': + assert isinstance(ctx, rsre_utf8.Utf8MatchContext) + return space.newutf8(result_bytes, + rutf8.get_utf8_length(result_bytes)), n else: - assert unicodebuilder is not None - return space.newtext(unicodebuilder.build()), n + raise AssertionError(use_builder) else: if space.isinstance_w(w_string, space.w_unicode): w_emptystr = space.newtext('') @@ -414,26 +447,28 @@ sub_jitdriver = jit.JitDriver( reds="""count n last_pos ctx w_filter - strbuilder unicodebuilder + strbuilder filter_as_string - filter_as_unicode w_string sublist_w self""".split(), greens=["filter_is_callable", "use_builder", "filter_type", "pattern"]) def _sub_append_slice(ctx, space, use_builder, sublist_w, - strbuilder, unicodebuilder, start, end): - if use_builder: + strbuilder, start, end): + if use_builder != '\x00': + assert strbuilder is not None if isinstance(ctx, rsre_core.BufMatchContext): - assert strbuilder is not None + assert use_builder == 'S' return strbuilder.append(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): - assert strbuilder is not None + assert use_builder == 'S' + start = ctx._real_pos(start) + end = ctx._real_pos(end) return strbuilder.append_slice(ctx._string, start, end) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - assert unicodebuilder is not None - return unicodebuilder.append_slice(ctx._unicodestr, start, end) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + assert use_builder == 'U' + return strbuilder.append_slice(ctx._utf8, start, end) assert 0, "unreachable" else: sublist_w.append(slice_w(space, ctx, start, end, space.w_None)) @@ -569,19 +604,38 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + start = self.bytepos_to_charindex(start) return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + end = self.bytepos_to_charindex(end) return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): start, end = self.do_span(w_groupnum) + return self.new_charindex_tuple(start, end) + + def new_charindex_tuple(self, start, end): + start = self.bytepos_to_charindex(start) + end = self.bytepos_to_charindex(end) return self.space.newtuple([self.space.newint(start), self.space.newint(end)]) + def bytepos_to_charindex(self, bytepos): + # Transform a 'byte position', as returned by all methods from + # rsre_core, back into a 'character index'. This is for UTF8 + # handling. + ctx = self.ctx + if isinstance(ctx, rsre_utf8.Utf8MatchContext): + index_storage = ctx.w_unicode_obj._get_index_storage() + return rutf8.codepoint_index_at_byte_position( + ctx.w_unicode_obj._utf8, index_storage, bytepos) + else: + return bytepos + def flatten_marks(self): if self.flatten_cache is None: num_groups = self.srepat.num_groups @@ -589,6 +643,8 @@ return self.flatten_cache def do_span(self, w_arg): + # return a pair of integers, which are byte positions, not + # character indexes (for utf8) space = self.space try: groupnum = space.int_w(w_arg) @@ -636,10 +692,10 @@ return space.w_None def fget_pos(self, space): - return space.newint(self.ctx.original_pos) + return space.newint(self.bytepos_to_charindex(self.ctx.original_pos)) def fget_endpos(self, space): - return space.newint(self.ctx.end) + return space.newint(self.bytepos_to_charindex(self.ctx.end)) def fget_regs(self, space): space = self.space @@ -647,11 +703,11 @@ num_groups = self.srepat.num_groups result_w = [None] * (num_groups + 1) ctx = self.ctx - result_w[0] = space.newtuple([space.newint(ctx.match_start), - space.newint(ctx.match_end)]) + result_w[0] = self.new_charindex_tuple(ctx.match_start, + ctx.match_end) for i in range(num_groups): - result_w[i + 1] = space.newtuple([space.newint(fmarks[i*2]), - space.newint(fmarks[i*2+1])]) + result_w[i + 1] = self.new_charindex_tuple(fmarks[i*2], + fmarks[i*2+1]) return space.newtuple(result_w) def fget_string(self, space): @@ -660,6 +716,9 @@ return space.newbytes(ctx._buffer.as_str()) elif isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string) + elif isinstance(ctx, rsre_utf8.Utf8MatchContext): + lgt = rutf8.get_utf8_length(ctx._utf8) + return space.newutf8(ctx._utf8, lgt) elif isinstance(ctx, rsre_core.UnicodeMatchContext): return space.newtext(ctx._unicodestr) else: @@ -702,38 +761,53 @@ self.ctx = ctx self.code = code # 'self.ctx' is always a fresh context in which no searching - # or matching succeeded so far. + # or matching succeeded so far. It is None when the iterator is + # exhausted. def iter_w(self): return self def next_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: raise OperationError(self.space.w_StopIteration, self.space.w_None) if not searchcontext(self.space, self.ctx, self.code): raise OperationError(self.space.w_StopIteration, self.space.w_None) return self.getmatch(True) def match_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: return self.space.w_None return self.getmatch(matchcontext(self.space, self.ctx, self.code)) def search_w(self): - if self.ctx.match_start > self.ctx.end: + if self.ctx is None: return self.space.w_None return self.getmatch(searchcontext(self.space, self.ctx, self.code)) def getmatch(self, found): + ctx = self.ctx + assert ctx is not None if found: - ctx = self.ctx nextstart = ctx.match_end - nextstart += (ctx.match_start == nextstart) - self.ctx = ctx.fresh_copy(nextstart) + exhausted = False + if ctx.match_start == nextstart: + if nextstart == ctx.end: + exhausted = True + else: + nextstart = ctx.next_indirect(nextstart) + if exhausted: + self.ctx = None + else: + self.ctx = self.srepat.fresh_copy(ctx) + self.ctx.match_start = nextstart match = W_SRE_Match(self.srepat, ctx) return match else: - self.ctx.match_start += 1 # obscure corner case + # obscure corner case + if ctx.match_start == ctx.end: + self.ctx = None + else: + ctx.match_start = ctx.next_indirect(ctx.match_start) return None W_SRE_Scanner.typedef = TypeDef( From pypy.commits at gmail.com Wed Jan 9 14:10:35 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 11:10:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix merge Message-ID: <5c36472b.1c69fb81.fcc65.d465@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-py3 Changeset: r95595:682209b9fc7a Date: 2019-01-09 18:56 +0000 http://bitbucket.org/pypy/pypy/changeset/682209b9fc7a/ Log: fix merge diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -173,11 +173,41 @@ raise oefmt(space.w_TypeError, "can't use a bytes pattern on a string-like " "object") - unicodestr = space.realunicode_w(w_string) - length = len(unicodestr) + w_unicode_obj = space.convert_arg_to_w_unicode(w_string) + utf8str = w_unicode_obj._utf8 + length = w_unicode_obj._len() + if pos <= 0: + bytepos = 0 + elif pos >= length: + bytepos = len(utf8str) + else: + index_storage = w_unicode_obj._get_index_storage() + bytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, pos) + if endpos >= length: + endbytepos = len(utf8str) + else: + index_storage = w_unicode_obj._get_index_storage() + endbytepos = rutf8.codepoint_position_at_index(utf8str, + index_storage, endpos) + ctx = rsre_utf8.Utf8MatchContext( + utf8str, bytepos, endbytepos, self.flags) + # xxx we store the w_string on the ctx too, for + # W_SRE_Match.bytepos_to_charindex() + ctx.w_unicode_obj = w_unicode_obj + return ctx + elif self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "can't use a string pattern on a bytes-like " + "object") elif space.isinstance_w(w_string, space.w_bytes): string = space.bytes_w(w_string) length = len(string) + if pos > length: + pos = length + if endpos > length: + endpos = length + return rsre_core.StrMatchContext(string, pos, endpos, self.flags) else: buf = space.readbuf_w(w_string) size = buf.getlength() @@ -324,6 +354,7 @@ # w_string are both string or both unicode objects, and if w_ptemplate # is a literal use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 + is_buffer = False filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate From pypy.commits at gmail.com Wed Jan 9 14:10:37 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 11:10:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge heads Message-ID: <5c36472d.1c69fb81.c6e72.ca9a@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-py3 Changeset: r95596:a56140430aa7 Date: 2019-01-09 19:03 +0000 http://bitbucket.org/pypy/pypy/changeset/a56140430aa7/ Log: merge heads diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -8,7 +8,6 @@ from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit, rutf8 from rpython.rlib.rstring import StringBuilder -from rpython.rlib.runicode import unicode_encode_utf_8 # ____________________________________________________________ # @@ -49,10 +48,7 @@ lgt = rutf8.get_utf8_length(s) return space.newutf8(s, lgt) elif isinstance(ctx, rsre_core.UnicodeMatchContext): - uni = ctx._unicodestr[start:end] - uni_utf8 = unicode_encode_utf_8(uni, len(uni), 'strict', - allow_surrogates=True) - return space.newtext(uni_utf8, len(uni)) + return space.newtext(ctx._unicodestr[start:end]) else: # unreachable raise SystemError @@ -356,6 +352,14 @@ use_builder = '\x00' # or 'S'tring or 'U'nicode/UTF8 is_buffer = False filter_as_string = None + if space.isinstance_w(w_string, space.w_unicode): + if not self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "cannot use a bytes pattern on a string-like object") + else: + if self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "cannot use a string pattern on a bytes-like object") if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True @@ -365,6 +369,11 @@ literal = '\\' not in filter_as_string if space.isinstance_w(w_string, space.w_unicode) and literal: use_builder = 'U' + elif space.isinstance_w(w_ptemplate, space.w_bytes): + filter_as_string = space.bytes_w(w_ptemplate) + literal = '\\' not in filter_as_string + if space.isinstance_w(w_string, space.w_bytes) and literal: + use_builder = 'S' else: if space.isinstance_w(w_ptemplate, space.w_bytes): filter_as_string = space.bytes_w(w_ptemplate) @@ -468,7 +477,7 @@ raise AssertionError(use_builder) else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newtext('') + w_emptystr = space.newutf8('', 0) else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', From pypy.commits at gmail.com Wed Jan 9 19:22:22 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 09 Jan 2019 16:22:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix translation Message-ID: <5c36903e.1c69fb81.da3dd.e10c@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-py3 Changeset: r95597:316992f1c55d Date: 2019-01-10 00:21 +0000 http://bitbucket.org/pypy/pypy/changeset/316992f1c55d/ Log: fix translation diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -47,8 +47,6 @@ s = ctx._utf8[start:end] lgt = rutf8.get_utf8_length(s) return space.newutf8(s, lgt) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newtext(ctx._unicodestr[start:end]) else: # unreachable raise SystemError @@ -157,7 +155,7 @@ return space.isinstance_w(self.w_pattern, space.w_unicode) def make_ctx(self, w_string, pos=0, endpos=sys.maxint): - """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for + """Make a StrMatchContext, BufMatchContext or a Utf8MatchContext for searching in the given w_string object.""" space = self.space if pos < 0: @@ -759,8 +757,6 @@ elif isinstance(ctx, rsre_utf8.Utf8MatchContext): lgt = rutf8.get_utf8_length(ctx._utf8) return space.newutf8(ctx._utf8, lgt) - elif isinstance(ctx, rsre_core.UnicodeMatchContext): - return space.newtext(ctx._unicodestr) else: raise SystemError From pypy.commits at gmail.com Thu Jan 10 04:33:46 2019 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 10 Jan 2019 01:33:46 -0800 (PST) Subject: [pypy-commit] =?utf-8?q?extradoc_extradoc=3A_Tim_is_coming_to_th?= =?utf-8?q?e_D=C3=BCsseldorf_sprint?= Message-ID: <5c37117a.1c69fb81.8bd3b.3de3@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: extradoc Changeset: r5934:57a845f89133 Date: 2019-01-10 09:33 +0000 http://bitbucket.org/pypy/extradoc/changeset/57a845f89133/ Log: Tim is coming to the Düsseldorf sprint diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -12,8 +12,9 @@ Matti Picus Feb 4? - 9? any suggestions?? Manuel? Feb 4 - 7 share a room? Antonio Cuni Feb 3 - 9 airbnb -Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf +Andrew Lawrence Feb 3 - 9 backpackers D�sseldorf Alexander Schremmer Feb 4 - 8 Essen, guest room available +Tim Felgentreff Feb 4 - 9 ? ============================ ============== =========================== From pypy.commits at gmail.com Thu Jan 10 04:47:05 2019 From: pypy.commits at gmail.com (cfbolz) Date: Thu, 10 Jan 2019 01:47:05 -0800 (PST) Subject: [pypy-commit] =?utf-8?q?extradoc_extradoc=3A_Add_=C5=81ukas_and_?= =?utf-8?q?Maciek_to_ddorf_sprint?= Message-ID: <5c371499.1c69fb81.5b3ea.4723@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: extradoc Changeset: r5935:36e6157b5dac Date: 2019-01-10 09:46 +0000 http://bitbucket.org/pypy/extradoc/changeset/36e6157b5dac/ Log: Add Łukas and Maciek to ddorf sprint diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -12,9 +12,11 @@ Matti Picus Feb 4? - 9? any suggestions?? Manuel? Feb 4 - 7 share a room? Antonio Cuni Feb 3 - 9 airbnb -Andrew Lawrence Feb 3 - 9 backpackers D�sseldorf +Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf Alexander Schremmer Feb 4 - 8 Essen, guest room available Tim Felgentreff Feb 4 - 9 ? +Maciej Fijałkowski Feb 3 - 9 airbnb +Łukasz Langa Feb 3 - 9 airbnb ============================ ============== =========================== From pypy.commits at gmail.com Thu Jan 10 05:23:48 2019 From: pypy.commits at gmail.com (fijal) Date: Thu, 10 Jan 2019 02:23:48 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: matti is staying with us Message-ID: <5c371d34.1c69fb81.d7a29.de6c@mx.google.com> Author: fijal Branch: extradoc Changeset: r5936:c65a069376b9 Date: 2019-01-10 11:23 +0100 http://bitbucket.org/pypy/extradoc/changeset/c65a069376b9/ Log: matti is staying with us diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -9,7 +9,7 @@ Name Arrive/Depart Accomodation ============================ ============== =========================== Carl Friedrich Bolz-Tereick always there private -Matti Picus Feb 4? - 9? any suggestions?? +Matti Picus Feb 4? - 9? airbnb Manuel? Feb 4 - 7 share a room? Antonio Cuni Feb 3 - 9 airbnb Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf From pypy.commits at gmail.com Thu Jan 10 16:37:18 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 10 Jan 2019 13:37:18 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: add myself Message-ID: <5c37bb0e.1c69fb81.da3dd.c62e@mx.google.com> Author: Armin Rigo Branch: extradoc Changeset: r5937:3fbf4e3aa39c Date: 2019-01-10 22:37 +0100 http://bitbucket.org/pypy/extradoc/changeset/3fbf4e3aa39c/ Log: add myself diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -17,6 +17,7 @@ Tim Felgentreff Feb 4 - 9 ? Maciej Fijałkowski Feb 3 - 9 airbnb Łukasz Langa Feb 3 - 9 airbnb +Armin Rigo Feb 3 - 9 airbnb ============================ ============== =========================== From pypy.commits at gmail.com Fri Jan 11 05:38:55 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:38:55 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Added additional flags for objects Message-ID: <5c38723f.1c69fb81.5b3ea.e7ce@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95598:c03fe327893a Date: 2018-03-19 10:52 +0100 http://bitbucket.org/pypy/pypy/changeset/c03fe327893a/ Log: Added additional flags for objects Implemented refcount overhead (for non-cyclic refcount) Implemented buffer for potential roots of cycles Fixed assert to allow for recursive cpyext calls Added some cycle detection tests from experimental branch (disabled now) diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -208,10 +208,11 @@ # running and should not themselves release the GIL). # # **make_generic_cpy_call():** RPython to C, with the GIL held. Before -# the call, must assert that the global variable is 0 and set the -# current thread identifier into the global variable. After the call, -# assert that the global variable still contains the current thread id, -# and reset it to 0. +# the call, must assert that the global variable is 0 or the current +# thread identifier (recursive call) and set the current thread identifier +# into the global variable. After the call, assert that the global variable +# still contains the current thread id, and reset it to the value it held +# before the call. # # **make_wrapper():** C to RPython; by default assume that the GIL is # held, but accepts gil="acquire", "release", "around", @@ -1763,7 +1764,8 @@ # see "Handling of the GIL" above tid = rthread.get_ident() - assert cpyext_glob_tid_ptr[0] == 0 + tid_before = cpyext_glob_tid_ptr[0] + assert tid_before == 0 or tid_before == tid cpyext_glob_tid_ptr[0] = tid preexist_error = PyErr_Occurred(space) @@ -1772,7 +1774,7 @@ result = call_external_function(func, *boxed_args) finally: assert cpyext_glob_tid_ptr[0] == tid - cpyext_glob_tid_ptr[0] = 0 + cpyext_glob_tid_ptr[0] = tid_before for i, ARG in unrolling_arg_types: # note that this loop is nicely unrolled statically by RPython _pyobj = to_decref[i] diff --git a/pypy/module/cpyext/include/boolobject.h b/pypy/module/cpyext/include/boolobject.h --- a/pypy/module/cpyext/include/boolobject.h +++ b/pypy/module/cpyext/include/boolobject.h @@ -13,8 +13,8 @@ #define Py_True ((PyObject *) &_Py_TrueStruct) /* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True -#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False +#define Py_RETURN_TRUE do { Py_INCREF(Py_True); return Py_True; } while(0) +#define Py_RETURN_FALSE do { Py_INCREF(Py_False); return Py_False; } while(0) #ifdef __cplusplus } diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -2,6 +2,7 @@ #define Py_OBJECT_H #include +#include #ifdef __cplusplus extern "C" { @@ -12,7 +13,13 @@ #define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) #define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) -#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None +#define PY_REFCNT_FROM_PYPY (4L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 2))) +#define PY_REFCNT_GREEN (4L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 7))) +#define PY_REFCNT_OVERFLOW (1L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 7) / 2L - 1L)) +#define PY_REFCNT_MASK ((PY_REFCNT_OVERFLOW << 1L) - 1L) +#define Py_RETURN_NONE return (((((PyObject *)(Py_None))->ob_refcnt & PY_REFCNT_OVERFLOW) == 0) ? \ + ((PyObject *)(Py_None))->ob_refcnt++ : Py_IncRef((PyObject *)(Py_None))), Py_None + /* CPython has this for backwards compatibility with really old extensions, and now @@ -34,14 +41,21 @@ #define Py_XDECREF(ob) (Py_DecRef((PyObject *)(ob))) #else /* Fast version */ -#define Py_INCREF(ob) (((PyObject *)(ob))->ob_refcnt++) -#define Py_DECREF(op) \ - do { \ - if (--((PyObject *)(op))->ob_refcnt != 0) \ - ; \ - else \ - _Py_Dealloc((PyObject *)(op)); \ - } while (0) +#define Py_INCREF(ob) do { \ + if (!(((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW)) \ + ((PyObject *)(ob))->ob_refcnt++; \ + else \ + Py_IncRef((PyObject *)(ob)); \ + } while (0) +#define Py_DECREF(ob) do { \ + if (!(((PyObject *)(ob))->ob_refcnt & PY_REFCNT_GREEN) || \ + (((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW)) \ + Py_DecRef((PyObject *)(ob)); \ + else if (--((PyObject *)(ob))->ob_refcnt & PY_REFCNT_MASK) \ + ; \ + else if ((!((PyObject *)(ob))->ob_refcnt) & PY_REFCNT_FROM_PYPY) \ + _Py_Dealloc((PyObject *)(ob)); \ + } while (0) #define Py_XINCREF(op) do { if ((op) == NULL) ; else Py_INCREF(op); } while (0) #define Py_XDECREF(op) do { if ((op) == NULL) ; else Py_DECREF(op); } while (0) @@ -61,7 +75,8 @@ } \ } while (0) -#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) +#define Py_REFCNT(ob) ((((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW == 0) ? \ + (((PyObject*)(ob))->ob_refcnt & PY_REFCNT_MASK) : _Py_RefCnt_Overflow(ob)) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -11,6 +11,7 @@ from pypy.module.cpyext.pyerrors import PyErr_NoMemory, PyErr_BadInternalCall from pypy.objspace.std.typeobject import W_TypeObject from pypy.interpreter.error import OperationError, oefmt +from rpython.rlib.rawrefcount import REFCNT_MASK import pypy.module.__builtin__.operation as operation @@ -50,7 +51,7 @@ def _dealloc(space, obj): # This frees an object after its refcount dropped to zero, so we # assert that it is really zero here. - assert obj.c_ob_refcnt == 0 + assert obj.c_ob_refcnt & REFCNT_MASK == 0 pto = obj.c_ob_type obj_voidp = rffi.cast(rffi.VOIDP, obj) generic_cpy_call(space, pto.c_tp_free, obj_voidp) diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -17,8 +17,13 @@ from rpython.rlib.objectmodel import keepalive_until_here from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_base_ptr from rpython.rlib import rawrefcount, jit -from rpython.rlib.debug import ll_assert, fatalerror - +from rpython.rlib.debug import ll_assert, fatalerror, debug_print +from rpython.rlib.rawrefcount import ( + REFCNT_MASK, REFCNT_FROM_PYPY, REFCNT_OVERFLOW, REFCNT_CYCLE_BUFFERED, + REFCNT_CLR_MASK, REFCNT_CLR_GREEN, REFCNT_CLR_PURPLE, + W_MARKER_DEALLOCATING) +from pypy.module.cpyext.api import slot_function +from pypy.module.cpyext.typeobjectdefs import visitproc #________________________________________________________ # type description @@ -249,8 +254,6 @@ w_obj._cpyext_attach_pyobj(space, py_obj) -w_marker_deallocating = W_Root() - @jit.dont_look_inside def from_ref(space, ref): """ @@ -262,7 +265,7 @@ return None w_obj = rawrefcount.to_obj(W_Root, ref) if w_obj is not None: - if w_obj is not w_marker_deallocating: + if w_obj is not W_MARKER_DEALLOCATING: return w_obj fatalerror( "*** Invalid usage of a dying CPython object ***\n" @@ -315,7 +318,7 @@ def pyobj_has_w_obj(pyobj): w_obj = rawrefcount.to_obj(W_Root, pyobj) - return w_obj is not None and w_obj is not w_marker_deallocating + return w_obj is not None and w_obj is not W_MARKER_DEALLOCATING def w_obj_has_pyobj(w_obj): return bool(rawrefcount.from_obj(PyObject, w_obj)) @@ -341,7 +344,7 @@ pyobj = as_pyobj(space, w_obj, w_userdata, immortal=immortal) if pyobj: # != NULL assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY - pyobj.c_ob_refcnt += 1 + rawrefcount.incref(pyobj) keepalive_until_here(w_obj) return pyobj @@ -375,7 +378,7 @@ pyobj = rffi.cast(PyObject, pyobj) w_obj = from_ref(space, pyobj) if pyobj: - pyobj.c_ob_refcnt -= 1 + rawrefcount.decref(pyobj) assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY keepalive_until_here(w_obj) return w_obj @@ -386,7 +389,7 @@ assert is_pyobj(pyobj) pyobj = rffi.cast(PyObject, pyobj) assert pyobj.c_ob_refcnt >= 1 - pyobj.c_ob_refcnt += 1 + rawrefcount.incref(pyobj) @specialize.ll() def decref(space, pyobj): @@ -394,23 +397,64 @@ assert is_pyobj(pyobj) pyobj = rffi.cast(PyObject, pyobj) if pyobj: - assert pyobj.c_ob_refcnt > 0 - assert (pyobj.c_ob_pypy_link == 0 or - pyobj.c_ob_refcnt > rawrefcount.REFCNT_FROM_PYPY) - pyobj.c_ob_refcnt -= 1 - if pyobj.c_ob_refcnt == 0: - state = space.fromcache(State) - generic_cpy_call(space, state.C._Py_Dealloc, pyobj) + rawrefcount.decref(pyobj) + rc = pyobj.c_ob_refcnt + if rc & REFCNT_MASK == 0: + if rc & REFCNT_FROM_PYPY == 0 and rc & REFCNT_CLR_MASK != REFCNT_CLR_PURPLE: + state = space.fromcache(State) + generic_cpy_call(space, state.C._Py_Dealloc, pyobj) + elif rc & REFCNT_CLR_MASK != REFCNT_CLR_GREEN: + possible_root(space, pyobj) #else: # w_obj = rawrefcount.to_obj(W_Root, ref) # if w_obj is not None: # assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY + at jit.dont_look_inside +def possible_root(space, obj): + #debug_print("possible root", obj) + rc = obj.c_ob_refcnt + if not obj.c_ob_type or not obj.c_ob_type.c_tp_traverse: + #debug_print("mark green", obj) + rc = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_GREEN + elif rc & REFCNT_CLR_MASK != REFCNT_CLR_PURPLE: + rc = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_PURPLE + if rc & REFCNT_CYCLE_BUFFERED == 0: + #debug_print("mark purple", obj) + rawrefcount.buffer_pyobj(obj) + rc = rc | REFCNT_CYCLE_BUFFERED + obj.c_ob_refcnt = rc + + at cpython_api([PyObject], lltype.Void) +def Py_IncRef(space, obj): + incref(space, obj) + + at cpython_api([PyObject], lltype.Void) +def Py_DecRef(space, obj): + decref(space, obj) + + at cpython_api([PyObject], lltype.SignedLongLong, error=CANNOT_FAIL) +def _Py_RefCnt_Overflow(space, obj): + return refcnt_overflow(space, obj) + + at specialize.ll() +def refcnt_overflow(space, obj): + if is_pyobj(obj): + pyobj = rffi.cast(PyObject, obj) + else: + pyobj = as_pyobj(space, obj, None) + if pyobj: + if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: + return REFCNT_OVERFLOW + else: + return (pyobj.c_ob_refcnt & REFCNT_MASK) + \ + rawrefcount.overflow_get(pyobj) + return 0 @init_function def write_w_marker_deallocating(space): if we_are_translated(): - llptr = cast_instance_to_base_ptr(w_marker_deallocating) + llptr = cast_instance_to_base_ptr(W_MARKER_DEALLOCATING) state = space.fromcache(State) state.C.set_marker(rffi.cast(Py_ssize_t, llptr)) diff --git a/pypy/module/cpyext/src/object.c b/pypy/module/cpyext/src/object.c --- a/pypy/module/cpyext/src/object.c +++ b/pypy/module/cpyext/src/object.c @@ -5,6 +5,7 @@ extern void _PyPy_Free(void *ptr); extern void *_PyPy_Malloc(Py_ssize_t size); +/* void Py_IncRef(PyObject *o) { @@ -16,6 +17,7 @@ { Py_XDECREF(o); } +*/ /* * The actual value of this variable will be the address of diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -9,10 +9,12 @@ PyString_ConcatAndDel, PyString_Format, PyString_InternFromString, PyString_AsEncodedObject, PyString_AsDecodedObject, _PyString_Eq, _PyString_Join) -from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP, generic_cpy_call -from pypy.module.cpyext.pyobject import decref, from_ref, make_ref +from pypy.module.cpyext.api import ( + PyObjectP, PyObject, Py_ssize_tP, generic_cpy_call) +from pypy.module.cpyext.pyobject import ( + Py_DecRef, Py_IncRef, _Py_RefCnt_Overflow, from_ref, make_ref, decref) from pypy.module.cpyext.buffer import PyObject_AsCharBuffer -from pypy.module.cpyext.api import PyTypeObjectPtr +from rpython.rlib import rawrefcount class AppTestBytesObject(AppTestCpythonExtensionBase): @@ -510,9 +512,9 @@ ref = make_ref(space, space.wrap('abc')) ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') ptr[0] = ref - prev_refcnt = ref.c_ob_refcnt + prev_refcnt = ref.c_ob_refcnt & rawrefcount.REFCNT_MASK PyString_Concat(space, ptr, space.wrap('def')) - assert ref.c_ob_refcnt == prev_refcnt - 1 + assert ref.c_ob_refcnt & rawrefcount.REFCNT_MASK == prev_refcnt - 1 assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' with pytest.raises(OperationError): PyString_Concat(space, ptr, space.w_None) @@ -548,9 +550,9 @@ w_text = space.wrap("text") ref = make_ref(space, w_text) - prev_refcnt = ref.c_ob_refcnt + prev_refcnt = ref.c_ob_refcnt & rawrefcount.REFCNT_MASK assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 - assert ref.c_ob_refcnt == prev_refcnt + assert ref.c_ob_refcnt & rawrefcount.REFCNT_MASK == prev_refcnt assert lenp[0] == 4 assert rffi.charp2str(bufp[0]) == 'text' lltype.free(bufp, flavor='raw') @@ -609,3 +611,53 @@ w_seq = space.wrap(['a', 'b']) w_joined = _PyString_Join(space, w_sep, w_seq) assert space.unwrap(w_joined) == 'ab' + + def test_refcnt_overflow(self, space): + ref1 = make_ref(space, space.wrap('foo')) + ref1.c_ob_refcnt = rawrefcount.REFCNT_OVERFLOW - 1 + + Py_IncRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + + Py_IncRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + 1 + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + 1 + + Py_IncRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + 1 + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + 2 + + Py_IncRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + 1 + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + 3 + + Py_DecRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + 1 + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + 2 + + Py_DecRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + 1 + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + 1 + + Py_DecRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW + assert _Py_RefCnt_Overflow(space, ref1) \ + == rawrefcount.REFCNT_OVERFLOW + + Py_DecRef(space, ref1) + assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ + == rawrefcount.REFCNT_OVERFLOW - 1 diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -74,6 +74,8 @@ from rpython.rlib.objectmodel import specialize from rpython.rlib import rgc from rpython.memory.gc.minimarkpage import out_of_memory +from pypy.module.cpyext.api import slot_function, PyObject +from rpython.rtyper.lltypesystem import rffi # # Handles the objects in 2 generations: @@ -188,6 +190,16 @@ ('forw', llmemory.Address)) FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB) NURSARRAY = lltype.Array(llmemory.Address) +VISIT_FUNCTYPE = rffi.CCallback([PyObject, rffi.VOIDP], + rffi.INT_real) + +def traverse(obj, func_ptr): + from pypy.module.cpyext.api import generic_cpy_call + from pypy.module.cpyext.typeobjectdefs import visitproc + if obj.c_ob_type and obj.c_ob_type.c_tp_traverse: + visitproc_ptr = rffi.cast(visitproc, func_ptr) + generic_cpy_call(True, obj.c_ob_type.c_tp_traverse, obj, + visitproc_ptr, rffi.cast(rffi.VOIDP, obj)) # ____________________________________________________________ @@ -2990,13 +3002,13 @@ _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True}) PYOBJ_HDR = lltype.Struct('GCHdr_PyObject', - ('ob_refcnt', lltype.Signed), - ('ob_pypy_link', lltype.Signed)) + ('c_ob_refcnt', lltype.Signed), + ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) def _pyobj(self, pyobjaddr): - return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR) + return llmemory.cast_adr_to_ptr(pyobjaddr, lltype.Ptr(PyObject.TO)) def rawrefcount_init(self, dealloc_trigger_callback): # see pypy/doc/discussion/rawrefcount.rst @@ -3005,6 +3017,7 @@ self.rrc_p_list_old = self.AddressStack() self.rrc_o_list_young = self.AddressStack() self.rrc_o_list_old = self.AddressStack() + self.rrc_buffered = self.AddressStack() self.rrc_p_dict = self.AddressDict() # non-nursery keys only self.rrc_p_dict_nurs = self.AddressDict() # nursery keys only self.rrc_dealloc_trigger_callback = dealloc_trigger_callback @@ -3026,7 +3039,7 @@ ll_assert(self.rrc_enabled, "rawrefcount.init not called") obj = llmemory.cast_ptr_to_adr(gcobj) objint = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = objint + self._pyobj(pyobject).c_ob_pypy_link = objint # lst = self.rrc_p_list_young if self.is_in_nursery(obj): @@ -3046,14 +3059,17 @@ else: self.rrc_o_list_old.append(pyobject) objint = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = objint + self._pyobj(pyobject).c_ob_pypy_link = objint # there is no rrc_o_dict def rawrefcount_mark_deallocating(self, gcobj, pyobject): ll_assert(self.rrc_enabled, "rawrefcount.init not called") obj = llmemory.cast_ptr_to_adr(gcobj) # should be a prebuilt obj objint = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = objint + self._pyobj(pyobject).c_ob_pypy_link = objint + + def rawrefcount_buffer_pyobj(self, pyobject): + self.rrc_buffered.append(pyobject) def rawrefcount_from_obj(self, gcobj): obj = llmemory.cast_ptr_to_adr(gcobj) @@ -3064,7 +3080,7 @@ return dct.get(obj) def rawrefcount_to_obj(self, pyobject): - obj = llmemory.cast_int_to_adr(self._pyobj(pyobject).ob_pypy_link) + obj = llmemory.cast_int_to_adr(self._pyobj(pyobject).c_ob_pypy_link) return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF) def rawrefcount_next_dead(self): @@ -3085,15 +3101,14 @@ self.singleaddr) def _rrc_minor_trace(self, pyobject, singleaddr): - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + from rpython.rlib.rawrefcount import REFCNT_MASK # - rc = self._pyobj(pyobject).ob_refcnt - if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: + rc = self._pyobj(pyobject).c_ob_refcnt + if rc & REFCNT_MASK == 0: pass # the corresponding object may die else: # force the corresponding object to be alive - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link singleaddr.address[0] = llmemory.cast_int_to_adr(intobj) self._trace_drag_out1(singleaddr) @@ -3110,14 +3125,14 @@ no_o_dict) def _rrc_minor_free(self, pyobject, surviving_list, surviving_dict): - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) if self.is_in_nursery(obj): if self.is_forwarded(obj): # Common case: survives and moves obj = self.get_forwarding_address(obj) intobj = llmemory.cast_adr_to_int(obj, "symbolic") - self._pyobj(pyobject).ob_pypy_link = intobj + self._pyobj(pyobject).c_ob_pypy_link = intobj surviving = True if surviving_dict: # Surviving nursery object: was originally in @@ -3148,23 +3163,24 @@ def _rrc_free(self, pyobject): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + from rpython.rlib.rawrefcount import REFCNT_MASK # - rc = self._pyobj(pyobject).ob_refcnt + rc = self._pyobj(pyobject).c_ob_refcnt if rc >= REFCNT_FROM_PYPY_LIGHT: rc -= REFCNT_FROM_PYPY_LIGHT - if rc == 0: + if rc & REFCNT_MASK == 0: lltype.free(self._pyobj(pyobject), flavor='raw') else: # can only occur if LIGHT is used in create_link_pyobj() - self._pyobj(pyobject).ob_refcnt = rc - self._pyobj(pyobject).ob_pypy_link = 0 + self._pyobj(pyobject).c_ob_refcnt = rc + self._pyobj(pyobject).c_ob_pypy_link = 0 else: ll_assert(rc >= REFCNT_FROM_PYPY, "refcount underflow?") ll_assert(rc < int(REFCNT_FROM_PYPY_LIGHT * 0.99), "refcount underflow from REFCNT_FROM_PYPY_LIGHT?") rc -= REFCNT_FROM_PYPY - self._pyobj(pyobject).ob_pypy_link = 0 - if rc == 0: + self._pyobj(pyobject).c_ob_pypy_link = 0 + if rc & REFCNT_MASK == 0: self.rrc_dealloc_pending.append(pyobject) # an object with refcnt == 0 cannot stay around waiting # for its deallocator to be called. Some code (lxml) @@ -3175,22 +3191,21 @@ # because after a Py_INCREF()/Py_DECREF() on it, its # tp_dealloc is also called! rc = 1 - self._pyobj(pyobject).ob_refcnt = rc + self._pyobj(pyobject).c_ob_refcnt = rc _rrc_free._always_inline_ = True def rrc_major_collection_trace(self): self.rrc_p_list_old.foreach(self._rrc_major_trace, None) def _rrc_major_trace(self, pyobject, ignore): - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + from rpython.rlib.rawrefcount import REFCNT_MASK # - rc = self._pyobj(pyobject).ob_refcnt - if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: + rc = self._pyobj(pyobject).c_ob_refcnt + if rc & REFCNT_MASK == 0: pass # the corresponding object may die else: # force the corresponding object to be alive - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) self.objects_to_trace.append(obj) self.visit_all_objects() @@ -3220,7 +3235,7 @@ # This is true if the obj has one of the following two flags: # * GCFLAG_VISITED: was seen during tracing # * GCFLAG_NO_HEAP_PTRS: immortal object never traced (so far) - intobj = self._pyobj(pyobject).ob_pypy_link + intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) if self.header(obj).tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS): surviving_list.append(pyobject) diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -2,9 +2,15 @@ from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from rpython.memory.gc.test.test_direct import BaseDirectGCTest -from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY -from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT - +from rpython.rlib.rawrefcount import (REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT, + REFCNT_MASK) +from pypy.module.cpyext.api import (PyObject, PyTypeObject, PyTypeObjectPtr, + PyObjectFields, cpython_struct) +from pypy.module.cpyext.complexobject import PyComplexObject +from rpython.rtyper.lltypesystem import rffi +from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc +from rpython.rtyper.annlowlevel import llhelper +from rpython.rtyper.tool import rffi_platform PYOBJ_HDR = IncrementalMiniMarkGC.PYOBJ_HDR PYOBJ_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_HDR_PTR @@ -14,6 +20,17 @@ ('prev', lltype.Ptr(S)), ('next', lltype.Ptr(S)))) +T = lltype.Ptr(lltype.ForwardReference()) +T.TO.become(lltype.Struct('test', + ('base', PyObject.TO), + ('next', T), + ('prev', T), + ('value', lltype.Signed))) + +TRAVERSE_FUNCTYPE = rffi.CCallback([PyObject, visitproc, rffi.VOIDP], + rffi.INT_real) +t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) + class TestRawRefCount(BaseDirectGCTest): GCClass = IncrementalMiniMarkGC @@ -56,21 +73,22 @@ self._collect(major=False) p1 = self.stackroots.pop() p1ref = lltype.cast_opaque_ptr(llmemory.GCREF, p1) - r1 = lltype.malloc(PYOBJ_HDR, flavor='raw', immortal=create_immortal) - r1.ob_refcnt = rc - r1.ob_pypy_link = 0 + r1 = lltype.malloc(PyObject.TO, flavor='raw', immortal=create_immortal) + r1.c_ob_refcnt = rc + r1.c_ob_pypy_link = 0 + r1.c_ob_type = lltype.nullptr(PyTypeObject) r1addr = llmemory.cast_ptr_to_adr(r1) if is_pyobj: assert not is_light self.gc.rawrefcount_create_link_pyobj(p1ref, r1addr) else: self.gc.rawrefcount_create_link_pypy(p1ref, r1addr) - assert r1.ob_refcnt == rc - assert r1.ob_pypy_link != 0 + assert r1.c_ob_refcnt == rc + assert r1.c_ob_pypy_link != 0 def check_alive(extra_refcount): - assert r1.ob_refcnt == rc + extra_refcount - assert r1.ob_pypy_link != 0 + assert r1.c_ob_refcnt == rc + extra_refcount + assert r1.c_ob_pypy_link != 0 p1ref = self.gc.rawrefcount_to_obj(r1addr) p1 = lltype.cast_opaque_ptr(lltype.Ptr(S), p1ref) assert p1.x == intval @@ -81,19 +99,53 @@ return p1 return p1, p1ref, r1, r1addr, check_alive + def _rawrefcount_cycle_obj(self): + + def test_tp_traverse(obj, visit, args): + test = rffi.cast(T, obj) + vret = 0 + if llmemory.cast_ptr_to_adr(test.next).ptr is not None: + next = rffi.cast(PyObject, test.next) + vret = visit(next, args) + if vret != 0: + return vret + if llmemory.cast_ptr_to_adr(test.prev).ptr is not None: + next = rffi.cast(PyObject, test.prev) + vret = visit(next, args) + if vret != 0: + return vret + return vret + + func_ptr = llhelper(TRAVERSE_FUNCTYPE, test_tp_traverse) + rffi_func_ptr = rffi.cast(traverseproc, func_ptr) + t1.c_tp_traverse = rffi_func_ptr + + r1 = lltype.malloc(T.TO, flavor='raw', immortal=True) + r1.base.c_ob_pypy_link = 0 + r1.base.c_ob_type = t1 + r1.base.c_ob_refcnt = 1 + return r1 + + def _rawrefcount_buffer_obj(self, obj): + from rpython.rlib.rawrefcount import REFCNT_CLR_MASK, REFCNT_CLR_PURPLE + rc = obj.base.c_ob_refcnt + obj.base.c_ob_refcnt = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_PURPLE + objaddr = llmemory.cast_ptr_to_adr(obj) + self.gc.rawrefcount_buffer_pyobj(objaddr) + def test_rawrefcount_objects_basic(self, old=False): p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=True, create_old=old)) p2 = self.malloc(S) p2.x = 84 p2ref = lltype.cast_opaque_ptr(llmemory.GCREF, p2) - r2 = lltype.malloc(PYOBJ_HDR, flavor='raw') - r2.ob_refcnt = 1 - r2.ob_pypy_link = 0 + r2 = lltype.malloc(PyObject.TO, flavor='raw') + r2.c_ob_refcnt = 1 + r2.c_ob_pypy_link = 0 r2addr = llmemory.cast_ptr_to_adr(r2) # p2 and r2 are not linked - assert r1.ob_pypy_link != 0 - assert r2.ob_pypy_link == 0 + assert r1.c_ob_pypy_link != 0 + assert r2.c_ob_pypy_link == 0 assert self.gc.rawrefcount_from_obj(p1ref) == r1addr assert self.gc.rawrefcount_from_obj(p2ref) == llmemory.NULL assert self.gc.rawrefcount_to_obj(r1addr) == p1ref @@ -106,16 +158,16 @@ p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=True, create_old=old)) check_alive(0) - r1.ob_refcnt += 1 + r1.c_ob_refcnt += 1 self._collect(major=False) check_alive(+1) self._collect(major=True) check_alive(+1) - r1.ob_refcnt -= 1 + r1.c_ob_refcnt -= 1 self._collect(major=False) p1 = check_alive(0) self._collect(major=True) - py.test.raises(RuntimeError, "r1.ob_refcnt") # dead + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead py.test.raises(RuntimeError, "p1.x") # dead self.gc.check_no_more_rawrefcount_state() assert self.trigger == [] @@ -129,7 +181,7 @@ if old: check_alive(0) self._collect(major=True) - py.test.raises(RuntimeError, "r1.ob_refcnt") # dead + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead py.test.raises(RuntimeError, "p1.x") # dead self.gc.check_no_more_rawrefcount_state() @@ -147,7 +199,7 @@ check_alive(0) assert p1.x == 42 self._collect(major=True) - py.test.raises(RuntimeError, "r1.ob_refcnt") # dead + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead py.test.raises(RuntimeError, "p1.x") # dead self.gc.check_no_more_rawrefcount_state() @@ -164,18 +216,18 @@ p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=False, create_old=old)) check_alive(0) - r1.ob_refcnt += 1 + r1.c_ob_refcnt += 1 self._collect(major=False) check_alive(+1) self._collect(major=True) check_alive(+1) - r1.ob_refcnt -= 1 + r1.c_ob_refcnt -= 1 self._collect(major=False) p1 = check_alive(0) self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 # in the pending list - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 # in the pending list + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr assert self.gc.rawrefcount_next_dead() == llmemory.NULL assert self.gc.rawrefcount_next_dead() == llmemory.NULL @@ -197,8 +249,8 @@ assert p1.x == 42 self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -214,8 +266,8 @@ else: self._collect(major=False, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -232,10 +284,10 @@ p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_pyobj=True, force_external=external)) check_alive(0) - r1.ob_refcnt += 1 # the pyobject is kept alive + r1.c_ob_refcnt += 1 # the pyobject is kept alive self._collect(major=False) - assert r1.ob_refcnt == 1 # refcnt dropped to 1 - assert r1.ob_pypy_link == 0 # detached + assert r1.c_ob_refcnt == 1 # refcnt dropped to 1 + assert r1.c_ob_pypy_link == 0 # detached self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -252,8 +304,8 @@ self._collect(major=True, expected_trigger=1) else: self._collect(major=False, expected_trigger=1) - assert r1.ob_refcnt == 1 # refcnt 1, in the pending list - assert r1.ob_pypy_link == 0 # detached + assert r1.c_ob_refcnt == 1 # refcnt 1, in the pending list + assert r1.c_ob_pypy_link == 0 # detached assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -277,8 +329,8 @@ assert self.trigger == [] self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead - assert r1.ob_refcnt == 1 - assert r1.ob_pypy_link == 0 + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 assert self.gc.rawrefcount_next_dead() == r1addr self.gc.check_no_more_rawrefcount_state() lltype.free(r1, flavor='raw') @@ -289,3 +341,146 @@ check_alive(0) self._collect(major=True) check_alive(0) + + # def test_cycle_self_reference_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r1.next = r1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + # + # def test_cycle_self_reference_not_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r1.base.c_ob_refcnt += 1 + # r1.next = r1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 + # + # def test_simple_cycle_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r2.next = r1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + # + # def test_simple_cycle_not_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r2.next = r1 + # r2.base.c_ob_refcnt += 1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 1 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 2 + # + # def test_complex_cycle_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r3 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r1.prev = r2 + # r2.base.c_ob_refcnt += 1 + # r2.next = r3 + # r3.prev = r1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 + # + # def test_complex_cycle_not_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r3 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r1.prev = r2 + # r2.base.c_ob_refcnt += 1 + # r2.next = r3 + # r3.prev = r1 + # r3.base.c_ob_refcnt += 1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 1 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 2 + # assert r3.base.c_ob_refcnt & REFCNT_MASK == 2 + # + # def test_cycle_2_buffered_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r2.prev = r1 + # self._rawrefcount_buffer_obj(r1) + # self._rawrefcount_buffer_obj(r2) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + # + # def test_cycle_2_buffered_not_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r2.prev = r1 + # r1.base.c_ob_refcnt += 1 + # self._rawrefcount_buffer_obj(r1) + # self._rawrefcount_buffer_obj(r2) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 1 + # + # def test_multiple_cycles_partial_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r3 = self._rawrefcount_cycle_obj() + # r4 = self._rawrefcount_cycle_obj() + # r5 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r2.next = r3 + # r3.next = r1 + # r2.prev = r5 + # r5.next = r4 + # r4.next = r5 + # r5.base.c_ob_refcnt += 1 + # r4.base.c_ob_refcnt += 1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r4.base.c_ob_refcnt & REFCNT_MASK == 2 + # assert r5.base.c_ob_refcnt & REFCNT_MASK == 1 + # + # def test_multiple_cycles_all_free(self): + # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + # r1 = self._rawrefcount_cycle_obj() + # r2 = self._rawrefcount_cycle_obj() + # r3 = self._rawrefcount_cycle_obj() + # r4 = self._rawrefcount_cycle_obj() + # r5 = self._rawrefcount_cycle_obj() + # r1.next = r2 + # r2.next = r3 + # r3.next = r1 + # r2.prev = r5 + # r5.next = r4 + # r4.next = r5 + # r5.base.c_ob_refcnt += 1 + # self._rawrefcount_buffer_obj(r1) + # self.gc.rrc_collect_cycles() + # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r4.base.c_ob_refcnt & REFCNT_MASK == 0 + # assert r5.base.c_ob_refcnt & REFCNT_MASK == 0 diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -489,6 +489,10 @@ GCClass.rawrefcount_mark_deallocating, [s_gc, s_gcref, SomeAddress()], annmodel.s_None) + self.rawrefcount_buffer_pyobj = getfn( + GCClass.rawrefcount_buffer_pyobj, + [s_gc, SomeAddress()], + annmodel.s_None) self.rawrefcount_from_obj_ptr = getfn( GCClass.rawrefcount_from_obj, [s_gc, s_gcref], SomeAddress(), inline = True) @@ -1339,6 +1343,13 @@ [self.rawrefcount_mark_deallocating, self.c_const_gc, v_gcobj, v_pyobject]) + def gct_gc_rawrefcount_buffer_pyobj(self, hop): + [v_pyobject] = hop.spaceop.args + assert v_pyobject.concretetype == llmemory.Address + hop.genop("direct_call", + [self.rawrefcount_buffer_pyobj, self.c_const_gc, + v_pyobject]) + def gct_gc_rawrefcount_from_obj(self, hop): [v_gcobj] = hop.spaceop.args assert v_gcobj.concretetype == llmemory.GCREF diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -4,18 +4,49 @@ # This is meant for pypy's cpyext module, but is a generally # useful interface over our GC. XXX "pypy" should be removed here # -import sys, weakref, py +import sys, weakref, py, math from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rlib.objectmodel import we_are_translated, specialize, not_rpython from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.rlib import rgc +from rpython.rlib import rgc, objectmodel +from pypy.interpreter.baseobjspace import W_Root -REFCNT_FROM_PYPY = sys.maxint // 4 + 1 -REFCNT_FROM_PYPY_LIGHT = REFCNT_FROM_PYPY + (sys.maxint // 2 + 1) +MAX_BIT = int(math.log(sys.maxint, 2)) + +# Flags +REFCNT_FROM_PYPY = 1 << MAX_BIT - 2 # Reference from a pypy object +REFCNT_FROM_PYPY_LIGHT = (1 << MAX_BIT - 1) + REFCNT_FROM_PYPY # Light reference from a pypy object +REFCNT_CYCLE_BUFFERED = 1 << MAX_BIT - 3 # Object in roots buffer (for potential cycles) +REFCNT_IN_WAVEFRONT = 1 << MAX_BIT - 4 # Object in any wavefront + +# Offsets and sizes +REFCNT_CLR_OFFS = MAX_BIT - 7 +REFCNT_CRC_OFFS = REFCNT_CLR_OFFS / 2 +REFCNT_BITS = REFCNT_CRC_OFFS - 1 + +# Concurrent cycle collection colors +REFCNT_CLR_BLACK = 0 << REFCNT_CLR_OFFS # In use or free (default) +REFCNT_CLR_GRAY = 1 << REFCNT_CLR_OFFS # Possible member of cycle +REFCNT_CLR_YELLOW = 2 << REFCNT_CLR_OFFS # Member of garbage cycle +REFCNT_CLR_PURPLE = 3 << REFCNT_CLR_OFFS # Possible root of cycle +REFCNT_CLR_GREEN = 4 << REFCNT_CLR_OFFS # Acyclic +REFCNT_CLR_ORANGE = 5 << REFCNT_CLR_OFFS # In orange wavefront (might change to YELLOW + IN_WAVEFRONT + phase = 3) +REFCNT_CLR_MASK = 7 << REFCNT_CLR_OFFS + +# Cyclic reference count with overflow bit +REFCNT_CRC_OVERFLOW = 1 << REFCNT_CRC_OFFS + REFCNT_BITS +REFCNT_CRC_MASK = (1 << REFCNT_CRC_OFFS + REFCNT_BITS + 1) - 1 +REFCNT_CRC = 1 < REFCNT_CRC_OFFS + +# True reference count with overflow bit +REFCNT_OVERFLOW = 1 << REFCNT_BITS +REFCNT_MASK = (1 << REFCNT_BITS + 1) - 1 + RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) +W_MARKER_DEALLOCATING = W_Root() def _build_pypy_link(p): @@ -23,6 +54,47 @@ _adr2pypy.append(p) return res +def incref(pyobj): + if pyobj.c_ob_refcnt & REFCNT_OVERFLOW == 0: + pyobj.c_ob_refcnt += 1 + else: + if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: + pyobj.c_ob_refcnt += 1 + overflow_new(pyobj) + else: + overflow_add(pyobj) + +def decref(pyobj): + if pyobj.c_ob_refcnt & REFCNT_OVERFLOW == 0: + pyobj.c_ob_refcnt -= 1 + else: + if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: + pyobj.c_ob_refcnt -= 1 + elif overflow_sub(pyobj): + pyobj.c_ob_refcnt -= 1 + +_refcount_overflow = dict() + +def overflow_new(obj): + _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] = 0 + +def overflow_add(obj): + _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] += 1 + +def overflow_sub(obj): + addr = objectmodel.current_object_addr_as_int(obj) + c = _refcount_overflow[addr] + if c > 0: + _refcount_overflow[addr] = c - 1 + return False + else: + _refcount_overflow.pop(addr) + return True + +def overflow_get(obj): + return _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] + +# TODO: _cyclic_refcount_overflow = dict() @not_rpython def init(dealloc_trigger_callback=None): @@ -72,6 +144,10 @@ ob.c_ob_pypy_link = _build_pypy_link(marker) @not_rpython +def buffer_pyobj(ob): + pass # TODO: implement? + + at not_rpython def from_obj(OB_PTR_TYPE, p): ob = _pypy2ob.get(p) if ob is None: @@ -122,7 +198,8 @@ wr_p_list = [] new_p_list = [] for ob in reversed(_p_list): - if ob.c_ob_refcnt not in (REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT): + if ob.c_ob_refcnt & REFCNT_MASK > 0 \ + or ob.c_ob_refcnt & REFCNT_FROM_PYPY == 0: new_p_list.append(ob) else: p = detach(ob, wr_p_list) @@ -155,7 +232,8 @@ if ob.c_ob_refcnt >= REFCNT_FROM_PYPY_LIGHT: ob.c_ob_refcnt -= REFCNT_FROM_PYPY_LIGHT ob.c_ob_pypy_link = 0 - if ob.c_ob_refcnt == 0: + if ob.c_ob_refcnt & REFCNT_MASK == 0 \ + and ob.c_ob_refcnt < REFCNT_FROM_PYPY: lltype.free(ob, flavor='raw', track_allocation=track_allocation) else: @@ -163,8 +241,9 @@ assert ob.c_ob_refcnt < int(REFCNT_FROM_PYPY_LIGHT * 0.99) ob.c_ob_refcnt -= REFCNT_FROM_PYPY ob.c_ob_pypy_link = 0 - if ob.c_ob_refcnt == 0: - ob.c_ob_refcnt = 1 + if ob.c_ob_refcnt & REFCNT_MASK == 0 \ + and ob.c_ob_refcnt < REFCNT_FROM_PYPY: + ob.c_ob_refcnt += 1 _d_list.append(ob) return None @@ -252,6 +331,17 @@ func_boehm_eci) hop.genop('direct_call', [c_func]) +class Entry(ExtRegistryEntry): + _about_ = buffer_pyobj + + def compute_result_annotation(self, s_ob): + pass + + def specialize_call(self, hop): + name = 'gc_rawrefcount_buffer_pyobj' + hop.exception_cannot_occur() + v_ob = hop.inputarg(hop.args_r[0], arg=0) + hop.genop(name, [_unspec_ob(hop, v_ob)]) class Entry(ExtRegistryEntry): _about_ = from_obj diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -969,6 +969,9 @@ def op_gc_rawrefcount_mark_deallocating(self, *args): raise NotImplementedError("gc_rawrefcount_mark_deallocating") + def op_gc_rawrefcount_buffer_pyobj(self, *args): + raise NotImplementedError("gc_rawrefcount_buffer_pyobj") + def op_gc_rawrefcount_next_dead(self, *args): raise NotImplementedError("gc_rawrefcount_next_dead") diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -494,6 +494,7 @@ 'gc_rawrefcount_create_link_pypy': LLOp(), 'gc_rawrefcount_create_link_pyobj': LLOp(), 'gc_rawrefcount_mark_deallocating': LLOp(), + 'gc_rawrefcount_buffer_pyobj': LLOp(), 'gc_rawrefcount_from_obj': LLOp(sideeffects=False), 'gc_rawrefcount_to_obj': LLOp(sideeffects=False), 'gc_rawrefcount_next_dead': LLOp(), diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -564,7 +564,7 @@ def _container_example(self): def ex(*args): return self.RESULT._defl() - return _func(self, _callable=ex) + return _func(self, {'_callable': ex}) def _trueargs(self): return [arg for arg in self.ARGS if arg is not Void] @@ -2094,7 +2094,7 @@ class _func(_container): - def __init__(self, TYPE, **attrs): + def __init__(self, TYPE, attrs): attrs.setdefault('_TYPE', TYPE) attrs.setdefault('_name', '?') attrs.setdefault('_callable', None) @@ -2303,7 +2303,8 @@ hash(tuple(attrs.items())) except TypeError: raise TypeError("'%r' must be hashable"%attrs) - o = _func(TYPE, _name=name, **attrs) + attrs['_name'] = name + o = _func(TYPE, attrs) return _ptr(Ptr(TYPE), o) def _getconcretetype(v): From pypy.commits at gmail.com Fri Jan 11 05:38:57 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:38:57 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Call tp_traverse from incminimark Message-ID: <5c387241.1c69fb81.50a0a.f124@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95599:fb1c6fe11349 Date: 2018-03-20 16:38 +0100 http://bitbucket.org/pypy/pypy/changeset/fb1c6fe11349/ Log: Call tp_traverse from incminimark Mark cpython objects reachable by pypy objects diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1293,7 +1293,10 @@ # if do tuple_attach of the prebuilt empty tuple, we need to call # _PyPy_Malloc) builder.attach_all(space) - + + #import rpython.rlib.rawrefcount + #rawrefcount.init_traverse(generic_cpy_call_gc) + setup_init_functions(eci, prefix) return modulename.new(ext='') @@ -1716,6 +1719,11 @@ return make_generic_cpy_call(FT, False)(space, func, *args) @specialize.ll() +def generic_cpy_call_gc(func, *args): + FT = lltype.typeOf(func).TO + return make_generic_cpy_call_gc(FT, False)(func, *args) + + at specialize.ll() def generic_cpy_call_expect_null(space, func, *args): FT = lltype.typeOf(func).TO return make_generic_cpy_call(FT, True)(space, func, *args) @@ -1815,3 +1823,75 @@ return result return generic_cpy_call + + at specialize.memo() +def make_generic_cpy_call_gc(FT, expect_null): + from pypy.module.cpyext.pyobject import is_pyobj, make_ref, decref + from pypy.module.cpyext.pyobject import get_w_obj_and_decref + from pypy.module.cpyext.pyerrors import PyErr_Occurred + unrolling_arg_types = unrolling_iterable(enumerate(FT.ARGS)) + RESULT_TYPE = FT.RESULT + + # copied and modified from rffi.py + # We need tons of care to ensure that no GC operation and no + # exception checking occurs in call_external_function. + argnames = ', '.join(['a%d' % i for i in range(len(FT.ARGS))]) + source = py.code.Source(""" + def cpy_call_external(funcptr, %(argnames)s): + # NB. it is essential that no exception checking occurs here! + res = funcptr(%(argnames)s) + return res + """ % locals()) + miniglobals = {'__name__': __name__, # for module name propagation + } + exec source.compile() in miniglobals + call_external_function = specialize.ll()(miniglobals['cpy_call_external']) + call_external_function._dont_inline_ = True + call_external_function._gctransformer_hint_close_stack_ = True + # don't inline, as a hack to guarantee that no GC pointer is alive + # anywhere in call_external_function + + @specialize.ll() + def generic_cpy_call(func, *args): + boxed_args = () + to_decref = () + assert len(args) == len(FT.ARGS) + for i, ARG in unrolling_arg_types: + arg = args[i] + _pyobj = None + if is_PyObject(ARG): + assert is_pyobj(arg) + + boxed_args += (arg,) + to_decref += (_pyobj,) + + # see "Handling of the GIL" above + tid = rthread.get_ident() + tid_before = cpyext_glob_tid_ptr[0] + assert tid_before == 0 or tid_before == tid + cpyext_glob_tid_ptr[0] = tid + + try: + # Call the function + result = call_external_function(func, *boxed_args) + finally: + assert cpyext_glob_tid_ptr[0] == tid + cpyext_glob_tid_ptr[0] = tid_before + for i, ARG in unrolling_arg_types: + # note that this loop is nicely unrolled statically by RPython + _pyobj = to_decref[i] + if _pyobj is not None: + pyobj = rffi.cast(PyObject, _pyobj) + rawrefcount.decref(pyobj) + + if is_PyObject(RESULT_TYPE): + ret = None + + # Check for exception consistency + # XXX best attempt, will miss preexisting error that is + # overwritten with a new error of the same type + + return ret + return result + + return generic_cpy_call \ No newline at end of file diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -321,6 +321,8 @@ #define _PyGC_FINALIZED(o) 1 #define PyType_IS_GC(tp) 1 +/* TODO: implement like in cpython + (see https://github.com/python/cpython/blob/517da1e58f4c489d4b31579852cde5f7113da08e/Include/objimpl.h#L295) */ #define PyObject_GC_Track(o) do { } while(0) #define PyObject_GC_UnTrack(o) do { } while(0) #define _PyObject_GC_TRACK(o) do { } while(0) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -193,13 +193,27 @@ VISIT_FUNCTYPE = rffi.CCallback([PyObject, rffi.VOIDP], rffi.INT_real) -def traverse(obj, func_ptr): - from pypy.module.cpyext.api import generic_cpy_call - from pypy.module.cpyext.typeobjectdefs import visitproc - if obj.c_ob_type and obj.c_ob_type.c_tp_traverse: - visitproc_ptr = rffi.cast(visitproc, func_ptr) - generic_cpy_call(True, obj.c_ob_type.c_tp_traverse, obj, - visitproc_ptr, rffi.cast(rffi.VOIDP, obj)) + +def visit_trace_non_rc_roots(pyobj, self_ptr): + from rpython.rlib.rawrefcount import (REFCNT_CLR_BLACK, + REFCNT_CLR_MASK) + from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance + + self_adr = rffi.cast(llmemory.Address, self_ptr) + self = cast_adr_to_nongc_instance(IncrementalMiniMarkGC, self_adr) + + # if the pyobj is not marked, remember it and if there is a linked pypy + # object also remember it + if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_BLACK: + pyobject = llmemory.cast_ptr_to_adr(pyobj) + self.rrc_more_pyobjects_to_scan.append(pyobject) + intobj = pyobj.c_ob_pypy_link + if intobj != 0: + obj = llmemory.cast_int_to_adr(intobj) + hdr = self.header(obj) + if not (hdr.tid & GCFLAG_VISITED): + self.objects_to_trace.append(obj) + return rffi.cast(rffi.INT_real, 0) # ____________________________________________________________ @@ -2346,7 +2360,7 @@ self.visit_all_objects() # if self.rrc_enabled: - self.rrc_major_collection_trace() + self.rrc_major_collection_trace() # ll_assert(not (self.probably_young_objects_with_finalizers .non_empty()), @@ -3022,6 +3036,9 @@ self.rrc_p_dict_nurs = self.AddressDict() # nursery keys only self.rrc_dealloc_trigger_callback = dealloc_trigger_callback self.rrc_dealloc_pending = self.AddressStack() + self.rrc_pyobjects_to_scan = self.AddressStack() + self.rrc_more_pyobjects_to_scan = self.AddressStack() + self.rrc_pyobjects_to_trace = self.AddressStack() self.rrc_enabled = True def check_no_more_rawrefcount_state(self): @@ -3194,8 +3211,13 @@ self._pyobj(pyobject).c_ob_refcnt = rc _rrc_free._always_inline_ = True + NO_CYCLE_DETECTION = False + def rrc_major_collection_trace(self): - self.rrc_p_list_old.foreach(self._rrc_major_trace, None) + if self.NO_CYCLE_DETECTION: + self.rrc_p_list_old.foreach(self._rrc_major_trace, None) + else: + self.rrc_major_collection_trace_cycle() def _rrc_major_trace(self, pyobject, ignore): from rpython.rlib.rawrefcount import REFCNT_MASK @@ -3210,6 +3232,71 @@ self.objects_to_trace.append(obj) self.visit_all_objects() + def rrc_major_collection_trace_cycle(self): + assert not self.objects_to_trace.non_empty() + assert not self.rrc_pyobjects_to_scan.non_empty() + assert not self.rrc_more_pyobjects_to_scan.non_empty() + assert not self.rrc_pyobjects_to_trace.non_empty() + + # initially, scan all old pyobjects which are linked to objects + self.rrc_p_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) + + # as long as we find new pyobjects which should be marked, recursively + # mark them + while self.rrc_pyobjects_to_trace.non_empty(): + while self.rrc_pyobjects_to_trace.non_empty(): + pyobj = self.rrc_pyobjects_to_trace.pop() + self._rrc_major_trace_non_rc_roots(pyobj) + + # see if we found new pypy objects to trace + if self.objects_to_trace.non_empty(): + self.visit_all_objects() + self.objects_to_trace.delete() + + # look if there are some pyobjects with linked objects which were + # not marked previously, but are marked now + swap = self.rrc_pyobjects_to_scan + self.rrc_pyobjects_to_scan = self.rrc_more_pyobjects_to_scan + self.rrc_more_pyobjects_to_scan = swap + self.rrc_pyobjects_to_scan.foreach( + self._rrc_major_scan_non_rc_roots, None) + self.rrc_pyobjects_to_scan.delete() + + def traverse(self, pyobject, func_ptr): + from pypy.module.cpyext.api import generic_cpy_call_gc + from pypy.module.cpyext.typeobjectdefs import visitproc + from rpython.rtyper.annlowlevel import cast_nongc_instance_to_adr + self_addr = cast_nongc_instance_to_adr(self) + pyobj = self._pyobj(pyobject) + if pyobj.c_ob_type and pyobj.c_ob_type.c_tp_traverse: + visitproc_ptr = rffi.cast(visitproc, func_ptr) + generic_cpy_call_gc(pyobj.c_ob_type.c_tp_traverse, pyobj, + visitproc_ptr, rffi.cast(rffi.VOIDP, self_addr)) + #cast_nongc_instance_to_adr(self) + + def _rrc_major_trace_non_rc_roots(self, pyobject): + from rpython.rtyper.annlowlevel import llhelper + func_ptr = llhelper(VISIT_FUNCTYPE, visit_trace_non_rc_roots) + self.traverse(pyobject, func_ptr) + + def _rrc_major_scan_non_rc_roots(self, pyobject, ignore): + from rpython.rlib.rawrefcount import (REFCNT_CLR_BLACK, + REFCNT_CLR_MASK) + # check in the object header of the linked pypy object, if it is marked + # or not + pyobj = self._pyobj(pyobject) + intobj = pyobj.c_ob_pypy_link + obj = llmemory.cast_int_to_adr(intobj) + hdr = self.header(obj) + if hdr.tid & GCFLAG_VISITED: + if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_BLACK: + # process the pyobject now + self.rrc_pyobjects_to_trace.append(pyobject) + else: + # save the pyobject for later, in case its linked object becomes + # marked + self.rrc_more_pyobjects_to_scan.append(pyobject) + def rrc_major_collection_free(self): ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 2") length_estimate = self.rrc_p_dict.length() diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -112,6 +112,15 @@ _d_marker = None _dealloc_trigger_callback = dealloc_trigger_callback +# def init_traverse(traverse_cpy_call): +# global _traverse_cpy_call +# _traverse_cpy_call = traverse_cpy_call +# +# def traverse_cpy_call(pyobj, visitproc_ptr, arg): +# global _traverse_cpy_call +# _traverse_cpy_call(pyobj.c_ob_type.c_tp_traverse, pyobj, +# visitproc_ptr, arg) + @not_rpython def create_link_pypy(p, ob): "a link where the PyPy object contains some or all the data" From pypy.commits at gmail.com Fri Jan 11 05:38:59 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:38:59 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Refactored call to tp_traverse from incminimark so there are no dependencies to pypy Message-ID: <5c387243.1c69fb81.d639f.4b82@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95600:94b062729ca4 Date: 2018-03-23 11:46 +0100 http://bitbucket.org/pypy/pypy/changeset/94b062729ca4/ Log: Refactored call to tp_traverse from incminimark so there are no dependencies to pypy diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1719,11 +1719,6 @@ return make_generic_cpy_call(FT, False)(space, func, *args) @specialize.ll() -def generic_cpy_call_gc(func, *args): - FT = lltype.typeOf(func).TO - return make_generic_cpy_call_gc(FT, False)(func, *args) - - at specialize.ll() def generic_cpy_call_expect_null(space, func, *args): FT = lltype.typeOf(func).TO return make_generic_cpy_call(FT, True)(space, func, *args) diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -20,8 +20,7 @@ from rpython.rlib.debug import ll_assert, fatalerror, debug_print from rpython.rlib.rawrefcount import ( REFCNT_MASK, REFCNT_FROM_PYPY, REFCNT_OVERFLOW, REFCNT_CYCLE_BUFFERED, - REFCNT_CLR_MASK, REFCNT_CLR_GREEN, REFCNT_CLR_PURPLE, - W_MARKER_DEALLOCATING) + REFCNT_CLR_MASK, REFCNT_CLR_GREEN, REFCNT_CLR_PURPLE) from pypy.module.cpyext.api import slot_function from pypy.module.cpyext.typeobjectdefs import visitproc @@ -254,6 +253,8 @@ w_obj._cpyext_attach_pyobj(space, py_obj) +w_marker_deallocating = W_Root() + @jit.dont_look_inside def from_ref(space, ref): """ @@ -265,7 +266,7 @@ return None w_obj = rawrefcount.to_obj(W_Root, ref) if w_obj is not None: - if w_obj is not W_MARKER_DEALLOCATING: + if w_obj is not w_marker_deallocating: return w_obj fatalerror( "*** Invalid usage of a dying CPython object ***\n" @@ -318,7 +319,7 @@ def pyobj_has_w_obj(pyobj): w_obj = rawrefcount.to_obj(W_Root, pyobj) - return w_obj is not None and w_obj is not W_MARKER_DEALLOCATING + return w_obj is not None and w_obj is not w_marker_deallocating def w_obj_has_pyobj(w_obj): return bool(rawrefcount.from_obj(PyObject, w_obj)) @@ -454,7 +455,7 @@ @init_function def write_w_marker_deallocating(space): if we_are_translated(): - llptr = cast_instance_to_base_ptr(W_MARKER_DEALLOCATING) + llptr = cast_instance_to_base_ptr(w_marker_deallocating) state = space.fromcache(State) state.C.set_marker(rffi.cast(Py_ssize_t, llptr)) diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -1,8 +1,8 @@ from rpython.rlib.objectmodel import we_are_translated, specialize -from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rtyper.lltypesystem import rffi, lltype, llmemory from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter import executioncontext -from rpython.rtyper.annlowlevel import llhelper +from rpython.rtyper.annlowlevel import llhelper, llhelper_args from rpython.rlib.rdynload import DLLHANDLE from rpython.rlib import rawrefcount import sys @@ -70,7 +70,10 @@ decref(space, ob) print 'dealloc_trigger DONE' return "RETRY" - rawrefcount.init(dealloc_trigger) + def tp_traverse(obj_addr, callback, args): + # TODO: implement + pass + rawrefcount.init(dealloc_trigger, tp_traverse) else: if space.config.translation.gc == "boehm": action = BoehmPyObjDeallocAction(space) @@ -80,6 +83,25 @@ pyobj_dealloc_action = PyObjDeallocAction(space) self.dealloc_trigger = lambda: pyobj_dealloc_action.fire() + def _rawrefcount_tp_traverse(space, pyobj_ptr, callback, args): + from pypy.module.cpyext.api import (generic_cpy_call, + PyObject) + from pypy.module.cpyext.typeobjectdefs import visitproc + # convert to pointers with correct types (PyObject) + callback_addr = llmemory.cast_ptr_to_adr(callback) + callback_ptr = llmemory.cast_adr_to_ptr(callback_addr, + visitproc) + pyobj_addr = llmemory.cast_ptr_to_adr(pyobj_ptr) + pyobj = llmemory.cast_adr_to_ptr(pyobj_addr, PyObject) + # now call tp_traverse (if possible) + if pyobj.c_ob_type and pyobj.c_ob_type.c_tp_traverse: + generic_cpy_call(space, pyobj.c_ob_type.c_tp_traverse, + pyobj, + callback_ptr, args) + self.tp_traverse = (lambda o, v, a: + _rawrefcount_tp_traverse(self.space, + o, v, a)) + def build_api(self): """NOT_RPYTHON This function is called when at object space creation, @@ -111,7 +133,9 @@ # does something different. Sigh. rawrefcount.init( llhelper(rawrefcount.RAWREFCOUNT_DEALLOC_TRIGGER, - self.dealloc_trigger)) + self.dealloc_trigger), + llhelper(rawrefcount.RAWREFCOUNT_TRAVERSE, + self.tp_traverse)) self.builder.attach_all(space) setup_new_method_def(space) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -74,7 +74,6 @@ from rpython.rlib.objectmodel import specialize from rpython.rlib import rgc from rpython.memory.gc.minimarkpage import out_of_memory -from pypy.module.cpyext.api import slot_function, PyObject from rpython.rtyper.lltypesystem import rffi # @@ -190,30 +189,6 @@ ('forw', llmemory.Address)) FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB) NURSARRAY = lltype.Array(llmemory.Address) -VISIT_FUNCTYPE = rffi.CCallback([PyObject, rffi.VOIDP], - rffi.INT_real) - - -def visit_trace_non_rc_roots(pyobj, self_ptr): - from rpython.rlib.rawrefcount import (REFCNT_CLR_BLACK, - REFCNT_CLR_MASK) - from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance - - self_adr = rffi.cast(llmemory.Address, self_ptr) - self = cast_adr_to_nongc_instance(IncrementalMiniMarkGC, self_adr) - - # if the pyobj is not marked, remember it and if there is a linked pypy - # object also remember it - if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_BLACK: - pyobject = llmemory.cast_ptr_to_adr(pyobj) - self.rrc_more_pyobjects_to_scan.append(pyobject) - intobj = pyobj.c_ob_pypy_link - if intobj != 0: - obj = llmemory.cast_int_to_adr(intobj) - hdr = self.header(obj) - if not (hdr.tid & GCFLAG_VISITED): - self.objects_to_trace.append(obj) - return rffi.cast(rffi.INT_real, 0) # ____________________________________________________________ @@ -3020,11 +2995,17 @@ ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) + VISIT_FUNCTYPE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], + rffi.INT_real)) + RAWREFCOUNT_TRAVERSE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, + VISIT_FUNCTYPE, + rffi.VOIDP], + lltype.Void)) def _pyobj(self, pyobjaddr): - return llmemory.cast_adr_to_ptr(pyobjaddr, lltype.Ptr(PyObject.TO)) + return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR) - def rawrefcount_init(self, dealloc_trigger_callback): + def rawrefcount_init(self, dealloc_trigger_callback, tp_traverse): # see pypy/doc/discussion/rawrefcount.rst if not self.rrc_enabled: self.rrc_p_list_young = self.AddressStack() @@ -3035,6 +3016,7 @@ self.rrc_p_dict = self.AddressDict() # non-nursery keys only self.rrc_p_dict_nurs = self.AddressDict() # nursery keys only self.rrc_dealloc_trigger_callback = dealloc_trigger_callback + self.rrc_tp_traverse = tp_traverse self.rrc_dealloc_pending = self.AddressStack() self.rrc_pyobjects_to_scan = self.AddressStack() self.rrc_more_pyobjects_to_scan = self.AddressStack() @@ -3214,10 +3196,13 @@ NO_CYCLE_DETECTION = False def rrc_major_collection_trace(self): + debug_start("gc-rrc-trace") if self.NO_CYCLE_DETECTION: self.rrc_p_list_old.foreach(self._rrc_major_trace, None) else: self.rrc_major_collection_trace_cycle() + self.rrc_p_list_old.foreach(self._rrc_major_trace, None) # for now, remove later + debug_stop("gc-rrc-trace") def _rrc_major_trace(self, pyobject, ignore): from rpython.rlib.rawrefcount import REFCNT_MASK @@ -3238,20 +3223,22 @@ assert not self.rrc_more_pyobjects_to_scan.non_empty() assert not self.rrc_pyobjects_to_trace.non_empty() - # initially, scan all old pyobjects which are linked to objects - self.rrc_p_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) + # initially, scan all real pyobjects (not proxies) which are linked to objects + #self.rrc_p_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) + self.rrc_o_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) # as long as we find new pyobjects which should be marked, recursively # mark them while self.rrc_pyobjects_to_trace.non_empty(): while self.rrc_pyobjects_to_trace.non_empty(): - pyobj = self.rrc_pyobjects_to_trace.pop() - self._rrc_major_trace_non_rc_roots(pyobj) + pyobject = self.rrc_pyobjects_to_trace.pop() + self._rrc_traverse(pyobject) # see if we found new pypy objects to trace if self.objects_to_trace.non_empty(): self.visit_all_objects() self.objects_to_trace.delete() + self.objects_to_trace = self.AddressStack() # look if there are some pyobjects with linked objects which were # not marked previously, but are marked now @@ -3261,26 +3248,29 @@ self.rrc_pyobjects_to_scan.foreach( self._rrc_major_scan_non_rc_roots, None) self.rrc_pyobjects_to_scan.delete() + self.rrc_pyobjects_to_scan = self.AddressStack() - def traverse(self, pyobject, func_ptr): - from pypy.module.cpyext.api import generic_cpy_call_gc - from pypy.module.cpyext.typeobjectdefs import visitproc - from rpython.rtyper.annlowlevel import cast_nongc_instance_to_adr - self_addr = cast_nongc_instance_to_adr(self) - pyobj = self._pyobj(pyobject) - if pyobj.c_ob_type and pyobj.c_ob_type.c_tp_traverse: - visitproc_ptr = rffi.cast(visitproc, func_ptr) - generic_cpy_call_gc(pyobj.c_ob_type.c_tp_traverse, pyobj, - visitproc_ptr, rffi.cast(rffi.VOIDP, self_addr)) - #cast_nongc_instance_to_adr(self) + self.rrc_more_pyobjects_to_scan.delete() + self.rrc_more_pyobjects_to_scan = self.AddressStack() - def _rrc_major_trace_non_rc_roots(self, pyobject): - from rpython.rtyper.annlowlevel import llhelper - func_ptr = llhelper(VISIT_FUNCTYPE, visit_trace_non_rc_roots) - self.traverse(pyobject, func_ptr) + def _rrc_mark_cpyobj(self, pyobj): + from rpython.rlib.rawrefcount import (REFCNT_CLR_GRAY, + REFCNT_CLR_MASK) + # if the pyobj is not marked, remember it and if there is a linked pypy + # object also remember it + if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_GRAY: + pyobj.c_ob_refcnt = REFCNT_CLR_GRAY + pyobject = llmemory.cast_ptr_to_adr(pyobj) + self.rrc_more_pyobjects_to_scan.append(pyobject) + intobj = pyobj.c_ob_pypy_link + if intobj != 0: + obj = llmemory.cast_int_to_adr(intobj) + hdr = self.header(obj) + if not (hdr.tid & GCFLAG_VISITED): + self.objects_to_trace.append(obj) def _rrc_major_scan_non_rc_roots(self, pyobject, ignore): - from rpython.rlib.rawrefcount import (REFCNT_CLR_BLACK, + from rpython.rlib.rawrefcount import (REFCNT_CLR_GRAY, REFCNT_CLR_MASK) # check in the object header of the linked pypy object, if it is marked # or not @@ -3289,8 +3279,9 @@ obj = llmemory.cast_int_to_adr(intobj) hdr = self.header(obj) if hdr.tid & GCFLAG_VISITED: - if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_BLACK: + if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_GRAY: # TODO change to black, but make white default # process the pyobject now + pyobj.c_ob_refcnt = REFCNT_CLR_GRAY self.rrc_pyobjects_to_trace.append(pyobject) else: # save the pyobject for later, in case its linked object becomes @@ -3330,3 +3321,22 @@ surviving_dict.insertclean(obj, pyobject) else: self._rrc_free(pyobject) + + def _rrc_visit(pyobj, self_ptr): + from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance + # + debug_print("visit called!") + self_adr = rffi.cast(llmemory.Address, self_ptr) + self = cast_adr_to_nongc_instance(IncrementalMiniMarkGC, self_adr) + self._rrc_mark_cpyobj(pyobj) + return rffi.cast(rffi.INT_real, 0) + + def _rrc_traverse(self, pyobject): + from rpython.rtyper.annlowlevel import (cast_nongc_instance_to_adr, + llhelper) + # + pyobj = self._pyobj(pyobject) + callback_ptr = llhelper(self.VISIT_FUNCTYPE, + IncrementalMiniMarkGC._rrc_visit) + self_ptr = rffi.cast(rffi.VOIDP, cast_nongc_instance_to_adr(self)) + self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) \ No newline at end of file diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -53,7 +53,9 @@ else: rc = REFCNT_FROM_PYPY self.trigger = [] - self.gc.rawrefcount_init(lambda: self.trigger.append(1)) + self.trigger2 = [] + self.gc.rawrefcount_init(lambda: self.trigger.append(1), + lambda: self.trigger2.append(1)) # if create_immortal: p1 = lltype.malloc(S, immortal=True) diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -475,7 +475,8 @@ if hasattr(GCClass, 'rawrefcount_init'): self.rawrefcount_init_ptr = getfn( GCClass.rawrefcount_init, - [s_gc, SomePtr(GCClass.RAWREFCOUNT_DEALLOC_TRIGGER)], + [s_gc, SomePtr(GCClass.RAWREFCOUNT_DEALLOC_TRIGGER), + SomePtr(GCClass.RAWREFCOUNT_TRAVERSE)], annmodel.s_None) self.rawrefcount_create_link_pypy_ptr = getfn( GCClass.rawrefcount_create_link_pypy, @@ -1314,10 +1315,12 @@ self.pop_roots(hop, livevars) def gct_gc_rawrefcount_init(self, hop): - [v_fnptr] = hop.spaceop.args + [v_fnptr, v_fnptr2] = hop.spaceop.args assert v_fnptr.concretetype == self.GCClass.RAWREFCOUNT_DEALLOC_TRIGGER + assert v_fnptr2.concretetype == self.GCClass.RAWREFCOUNT_TRAVERSE hop.genop("direct_call", - [self.rawrefcount_init_ptr, self.c_const_gc, v_fnptr]) + [self.rawrefcount_init_ptr, self.c_const_gc, v_fnptr, + v_fnptr2]) def gct_gc_rawrefcount_create_link_pypy(self, hop): [v_gcobj, v_pyobject] = hop.spaceop.args diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -10,7 +10,6 @@ from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rlib import rgc, objectmodel -from pypy.interpreter.baseobjspace import W_Root MAX_BIT = int(math.log(sys.maxint, 2)) @@ -44,9 +43,17 @@ REFCNT_OVERFLOW = 1 << REFCNT_BITS REFCNT_MASK = (1 << REFCNT_BITS + 1) - 1 - +PYOBJ_HDR = lltype.Struct('GCHdr_PyObject', + ('c_ob_refcnt', lltype.Signed), + ('c_ob_pypy_link', lltype.Signed)) +PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) -W_MARKER_DEALLOCATING = W_Root() +VISIT_FUNCTYPE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], + rffi.INT_real)) +RAWREFCOUNT_TRAVERSE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, + VISIT_FUNCTYPE, + rffi.VOIDP], + lltype.Void)) def _build_pypy_link(p): @@ -97,12 +104,12 @@ # TODO: _cyclic_refcount_overflow = dict() @not_rpython -def init(dealloc_trigger_callback=None): +def init(dealloc_trigger_callback=None, tp_traverse=None): """set up rawrefcount with the GC. This is only used for tests; it should not be called at all during translation. """ global _p_list, _o_list, _adr2pypy, _pypy2ob, _pypy2ob_rev - global _d_list, _dealloc_trigger_callback + global _d_list, _dealloc_trigger_callback, _tp_traverse _p_list = [] _o_list = [] _adr2pypy = [None] @@ -111,6 +118,7 @@ _d_list = [] _d_marker = None _dealloc_trigger_callback = dealloc_trigger_callback + _tp_traverse = tp_traverse # def init_traverse(traverse_cpy_call): # global _traverse_cpy_call @@ -308,14 +316,15 @@ class Entry(ExtRegistryEntry): _about_ = init - def compute_result_annotation(self, s_dealloc_callback): + def compute_result_annotation(self, s_dealloc_callback, tp_traverse): from rpython.rtyper.llannotation import SomePtr assert isinstance(s_dealloc_callback, SomePtr) # ll-ptr-to-function + # add assert? def specialize_call(self, hop): hop.exception_cannot_occur() - [v_dealloc_callback] = hop.inputargs(hop.args_r[0]) - hop.genop('gc_rawrefcount_init', [v_dealloc_callback]) + v_dealloc_callback, v_tp_traverse = hop.inputargs(*hop.args_r) + hop.genop('gc_rawrefcount_init', [v_dealloc_callback, v_tp_traverse]) class Entry(ExtRegistryEntry): From pypy.commits at gmail.com Fri Jan 11 05:39:01 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:01 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Directly call tp_traverse instead of via generic_cpy_call Message-ID: <5c387245.1c69fb81.a0f38.aef4@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95601:fd6699184d11 Date: 2018-03-23 12:53 +0100 http://bitbucket.org/pypy/pypy/changeset/fd6699184d11/ Log: Directly call tp_traverse instead of via generic_cpy_call diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -83,9 +83,8 @@ pyobj_dealloc_action = PyObjDeallocAction(space) self.dealloc_trigger = lambda: pyobj_dealloc_action.fire() - def _rawrefcount_tp_traverse(space, pyobj_ptr, callback, args): - from pypy.module.cpyext.api import (generic_cpy_call, - PyObject) + def _rawrefcount_tp_traverse(pyobj_ptr, callback, args): + from pypy.module.cpyext.api import PyObject from pypy.module.cpyext.typeobjectdefs import visitproc # convert to pointers with correct types (PyObject) callback_addr = llmemory.cast_ptr_to_adr(callback) @@ -95,12 +94,10 @@ pyobj = llmemory.cast_adr_to_ptr(pyobj_addr, PyObject) # now call tp_traverse (if possible) if pyobj.c_ob_type and pyobj.c_ob_type.c_tp_traverse: - generic_cpy_call(space, pyobj.c_ob_type.c_tp_traverse, - pyobj, - callback_ptr, args) + pyobj.c_ob_type.c_tp_traverse(pyobj, callback_ptr, + args) self.tp_traverse = (lambda o, v, a: - _rawrefcount_tp_traverse(self.space, - o, v, a)) + _rawrefcount_tp_traverse(o, v, a)) def build_api(self): """NOT_RPYTHON From pypy.commits at gmail.com Fri Jan 11 05:39:03 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:03 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Removed unnecessary code Message-ID: <5c387247.1c69fb81.dff1b.f548@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95602:65ead3f78618 Date: 2018-04-12 10:21 +0200 http://bitbucket.org/pypy/pypy/changeset/65ead3f78618/ Log: Removed unnecessary code diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -14,8 +14,7 @@ #define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) #define PY_REFCNT_FROM_PYPY (4L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 2))) -#define PY_REFCNT_GREEN (4L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 7))) -#define PY_REFCNT_OVERFLOW (1L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 7) / 2L - 1L)) +#define PY_REFCNT_OVERFLOW (1L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 4) - 1L)) #define PY_REFCNT_MASK ((PY_REFCNT_OVERFLOW << 1L) - 1L) #define Py_RETURN_NONE return (((((PyObject *)(Py_None))->ob_refcnt & PY_REFCNT_OVERFLOW) == 0) ? \ ((PyObject *)(Py_None))->ob_refcnt++ : Py_IncRef((PyObject *)(Py_None))), Py_None @@ -48,12 +47,11 @@ Py_IncRef((PyObject *)(ob)); \ } while (0) #define Py_DECREF(ob) do { \ - if (!(((PyObject *)(ob))->ob_refcnt & PY_REFCNT_GREEN) || \ - (((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW)) \ - Py_DecRef((PyObject *)(ob)); \ + if ((((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW)) \ + Py_DecRef((PyObject *)(ob)); \ else if (--((PyObject *)(ob))->ob_refcnt & PY_REFCNT_MASK) \ ; \ - else if ((!((PyObject *)(ob))->ob_refcnt) & PY_REFCNT_FROM_PYPY) \ + else \ _Py_Dealloc((PyObject *)(ob)); \ } while (0) diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -18,9 +18,8 @@ from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_base_ptr from rpython.rlib import rawrefcount, jit from rpython.rlib.debug import ll_assert, fatalerror, debug_print -from rpython.rlib.rawrefcount import ( - REFCNT_MASK, REFCNT_FROM_PYPY, REFCNT_OVERFLOW, REFCNT_CYCLE_BUFFERED, - REFCNT_CLR_MASK, REFCNT_CLR_GREEN, REFCNT_CLR_PURPLE) +from rpython.rlib.rawrefcount import (REFCNT_MASK, REFCNT_FROM_PYPY, + REFCNT_OVERFLOW) from pypy.module.cpyext.api import slot_function from pypy.module.cpyext.typeobjectdefs import visitproc @@ -401,31 +400,13 @@ rawrefcount.decref(pyobj) rc = pyobj.c_ob_refcnt if rc & REFCNT_MASK == 0: - if rc & REFCNT_FROM_PYPY == 0 and rc & REFCNT_CLR_MASK != REFCNT_CLR_PURPLE: - state = space.fromcache(State) - generic_cpy_call(space, state.C._Py_Dealloc, pyobj) - elif rc & REFCNT_CLR_MASK != REFCNT_CLR_GREEN: - possible_root(space, pyobj) + state = space.fromcache(State) + generic_cpy_call(space, state.C._Py_Dealloc, pyobj) #else: # w_obj = rawrefcount.to_obj(W_Root, ref) # if w_obj is not None: # assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY - at jit.dont_look_inside -def possible_root(space, obj): - #debug_print("possible root", obj) - rc = obj.c_ob_refcnt - if not obj.c_ob_type or not obj.c_ob_type.c_tp_traverse: - #debug_print("mark green", obj) - rc = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_GREEN - elif rc & REFCNT_CLR_MASK != REFCNT_CLR_PURPLE: - rc = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_PURPLE - if rc & REFCNT_CYCLE_BUFFERED == 0: - #debug_print("mark purple", obj) - rawrefcount.buffer_pyobj(obj) - rc = rc | REFCNT_CYCLE_BUFFERED - obj.c_ob_refcnt = rc - @cpython_api([PyObject], lltype.Void) def Py_IncRef(space, obj): incref(space, obj) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3067,9 +3067,6 @@ objint = llmemory.cast_adr_to_int(obj, "symbolic") self._pyobj(pyobject).c_ob_pypy_link = objint - def rawrefcount_buffer_pyobj(self, pyobject): - self.rrc_buffered.append(pyobject) - def rawrefcount_from_obj(self, gcobj): obj = llmemory.cast_ptr_to_adr(gcobj) if self.is_in_nursery(obj): @@ -3254,12 +3251,11 @@ self.rrc_more_pyobjects_to_scan = self.AddressStack() def _rrc_mark_cpyobj(self, pyobj): - from rpython.rlib.rawrefcount import (REFCNT_CLR_GRAY, - REFCNT_CLR_MASK) + from rpython.rlib.rawrefcount import REFCNT_VISITED # if the pyobj is not marked, remember it and if there is a linked pypy # object also remember it - if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_GRAY: - pyobj.c_ob_refcnt = REFCNT_CLR_GRAY + if pyobj.c_ob_refcnt & REFCNT_VISITED != REFCNT_VISITED: + pyobj.c_ob_refcnt |= REFCNT_VISITED pyobject = llmemory.cast_ptr_to_adr(pyobj) self.rrc_more_pyobjects_to_scan.append(pyobject) intobj = pyobj.c_ob_pypy_link @@ -3270,8 +3266,7 @@ self.objects_to_trace.append(obj) def _rrc_major_scan_non_rc_roots(self, pyobject, ignore): - from rpython.rlib.rawrefcount import (REFCNT_CLR_GRAY, - REFCNT_CLR_MASK) + from rpython.rlib.rawrefcount import REFCNT_VISITED # check in the object header of the linked pypy object, if it is marked # or not pyobj = self._pyobj(pyobject) @@ -3279,9 +3274,9 @@ obj = llmemory.cast_int_to_adr(intobj) hdr = self.header(obj) if hdr.tid & GCFLAG_VISITED: - if pyobj.c_ob_refcnt & REFCNT_CLR_MASK != REFCNT_CLR_GRAY: # TODO change to black, but make white default + if pyobj.c_ob_refcnt & REFCNT_VISITED != REFCNT_VISITED: # process the pyobject now - pyobj.c_ob_refcnt = REFCNT_CLR_GRAY + pyobj.c_ob_refcnt |= REFCNT_VISITED self.rrc_pyobjects_to_trace.append(pyobject) else: # save the pyobject for later, in case its linked object becomes diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -128,13 +128,6 @@ r1.base.c_ob_refcnt = 1 return r1 - def _rawrefcount_buffer_obj(self, obj): - from rpython.rlib.rawrefcount import REFCNT_CLR_MASK, REFCNT_CLR_PURPLE - rc = obj.base.c_ob_refcnt - obj.base.c_ob_refcnt = rc & ~REFCNT_CLR_MASK | REFCNT_CLR_PURPLE - objaddr = llmemory.cast_ptr_to_adr(obj) - self.gc.rawrefcount_buffer_pyobj(objaddr) - def test_rawrefcount_objects_basic(self, old=False): p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=True, create_old=old)) diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -490,10 +490,6 @@ GCClass.rawrefcount_mark_deallocating, [s_gc, s_gcref, SomeAddress()], annmodel.s_None) - self.rawrefcount_buffer_pyobj = getfn( - GCClass.rawrefcount_buffer_pyobj, - [s_gc, SomeAddress()], - annmodel.s_None) self.rawrefcount_from_obj_ptr = getfn( GCClass.rawrefcount_from_obj, [s_gc, s_gcref], SomeAddress(), inline = True) @@ -1346,13 +1342,6 @@ [self.rawrefcount_mark_deallocating, self.c_const_gc, v_gcobj, v_pyobject]) - def gct_gc_rawrefcount_buffer_pyobj(self, hop): - [v_pyobject] = hop.spaceop.args - assert v_pyobject.concretetype == llmemory.Address - hop.genop("direct_call", - [self.rawrefcount_buffer_pyobj, self.c_const_gc, - v_pyobject]) - def gct_gc_rawrefcount_from_obj(self, hop): [v_gcobj] = hop.spaceop.args assert v_gcobj.concretetype == llmemory.GCREF diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -17,31 +17,9 @@ # Flags REFCNT_FROM_PYPY = 1 << MAX_BIT - 2 # Reference from a pypy object REFCNT_FROM_PYPY_LIGHT = (1 << MAX_BIT - 1) + REFCNT_FROM_PYPY # Light reference from a pypy object -REFCNT_CYCLE_BUFFERED = 1 << MAX_BIT - 3 # Object in roots buffer (for potential cycles) -REFCNT_IN_WAVEFRONT = 1 << MAX_BIT - 4 # Object in any wavefront - -# Offsets and sizes -REFCNT_CLR_OFFS = MAX_BIT - 7 -REFCNT_CRC_OFFS = REFCNT_CLR_OFFS / 2 -REFCNT_BITS = REFCNT_CRC_OFFS - 1 - -# Concurrent cycle collection colors -REFCNT_CLR_BLACK = 0 << REFCNT_CLR_OFFS # In use or free (default) -REFCNT_CLR_GRAY = 1 << REFCNT_CLR_OFFS # Possible member of cycle -REFCNT_CLR_YELLOW = 2 << REFCNT_CLR_OFFS # Member of garbage cycle -REFCNT_CLR_PURPLE = 3 << REFCNT_CLR_OFFS # Possible root of cycle -REFCNT_CLR_GREEN = 4 << REFCNT_CLR_OFFS # Acyclic -REFCNT_CLR_ORANGE = 5 << REFCNT_CLR_OFFS # In orange wavefront (might change to YELLOW + IN_WAVEFRONT + phase = 3) -REFCNT_CLR_MASK = 7 << REFCNT_CLR_OFFS - -# Cyclic reference count with overflow bit -REFCNT_CRC_OVERFLOW = 1 << REFCNT_CRC_OFFS + REFCNT_BITS -REFCNT_CRC_MASK = (1 << REFCNT_CRC_OFFS + REFCNT_BITS + 1) - 1 -REFCNT_CRC = 1 < REFCNT_CRC_OFFS - -# True reference count with overflow bit -REFCNT_OVERFLOW = 1 << REFCNT_BITS -REFCNT_MASK = (1 << REFCNT_BITS + 1) - 1 +REFCNT_VISITED = 1 << MAX_BIT - 3 # Object visited during marking +REFCNT_OVERFLOW = 1 << MAX_BIT - 4 # Overflow bit for reference count +REFCNT_MASK = (REFCNT_OVERFLOW << 1) - 1 # Mask for reference count (including overflow bit) PYOBJ_HDR = lltype.Struct('GCHdr_PyObject', ('c_ob_refcnt', lltype.Signed), @@ -101,7 +79,6 @@ def overflow_get(obj): return _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] -# TODO: _cyclic_refcount_overflow = dict() @not_rpython def init(dealloc_trigger_callback=None, tp_traverse=None): @@ -161,10 +138,6 @@ ob.c_ob_pypy_link = _build_pypy_link(marker) @not_rpython -def buffer_pyobj(ob): - pass # TODO: implement? - - at not_rpython def from_obj(OB_PTR_TYPE, p): ob = _pypy2ob.get(p) if ob is None: @@ -350,18 +323,6 @@ hop.genop('direct_call', [c_func]) class Entry(ExtRegistryEntry): - _about_ = buffer_pyobj - - def compute_result_annotation(self, s_ob): - pass - - def specialize_call(self, hop): - name = 'gc_rawrefcount_buffer_pyobj' - hop.exception_cannot_occur() - v_ob = hop.inputarg(hop.args_r[0], arg=0) - hop.genop(name, [_unspec_ob(hop, v_ob)]) - -class Entry(ExtRegistryEntry): _about_ = from_obj def compute_result_annotation(self, s_OB_PTR_TYPE, s_p): diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -969,9 +969,6 @@ def op_gc_rawrefcount_mark_deallocating(self, *args): raise NotImplementedError("gc_rawrefcount_mark_deallocating") - def op_gc_rawrefcount_buffer_pyobj(self, *args): - raise NotImplementedError("gc_rawrefcount_buffer_pyobj") - def op_gc_rawrefcount_next_dead(self, *args): raise NotImplementedError("gc_rawrefcount_next_dead") diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -494,7 +494,6 @@ 'gc_rawrefcount_create_link_pypy': LLOp(), 'gc_rawrefcount_create_link_pyobj': LLOp(), 'gc_rawrefcount_mark_deallocating': LLOp(), - 'gc_rawrefcount_buffer_pyobj': LLOp(), 'gc_rawrefcount_from_obj': LLOp(sideeffects=False), 'gc_rawrefcount_to_obj': LLOp(sideeffects=False), 'gc_rawrefcount_next_dead': LLOp(), From pypy.commits at gmail.com Fri Jan 11 05:39:05 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:05 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Removed unnecessary code Message-ID: <5c387249.1c69fb81.f31b1.bf2a@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95603:b74906a7ac4b Date: 2018-05-10 11:12 +0200 http://bitbucket.org/pypy/pypy/changeset/b74906a7ac4b/ Log: Removed unnecessary code diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -43,6 +43,7 @@ from rpython.rlib import rstackovf from pypy.objspace.std.typeobject import W_TypeObject, find_best_base from pypy.module.cpyext.cparser import CTypeSpace +from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop DEBUG_WRAPPER = True @@ -974,6 +975,8 @@ # we hope that malloc removal removes the newtuple() that is # inserted exactly here by the varargs specializer + print "start to pypy" + # see "Handling of the GIL" above (careful, we don't have the GIL here) tid = rthread.get_or_make_ident() _gil_auto = False @@ -1085,6 +1088,9 @@ rffi.stackcounter.stacks_counter -= 1 _restore_gil_state(pygilstate_release, gilstate, gil_release, _gil_auto, tid) + + print "end to pypy" + return retval wrapper_second_level._dont_inline_ = True @@ -1773,8 +1779,10 @@ preexist_error = PyErr_Occurred(space) try: + print "start cpyext_call" # Call the function result = call_external_function(func, *boxed_args) + print "end cpyext_call" finally: assert cpyext_glob_tid_ptr[0] == tid cpyext_glob_tid_ptr[0] = tid_before diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -105,6 +105,14 @@ "asmgcc": [("translation.gctransformer", "framework"), ("translation.backend", "c")], }), + ChoiceOption("cpyextgc", "Garbage Collection Strategy for cpyext", + ["boehm", "ref", "ref_trialdel", "none"], + default="ref", + requires={ + "boehm": [("translation.gc", "incminimark")], + "ref_trialdel": [("translation.gc", "incminimark")], + }, + cmdline="--cpyextgc"), # other noticeable options BoolOption("thread", "enable use of threading primitives", diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py --- a/rpython/translator/c/genc.py +++ b/rpython/translator/c/genc.py @@ -177,6 +177,8 @@ defines = defines.copy() if self.config.translation.countmallocs: defines['COUNT_OP_MALLOCS'] = 1 + if self.config.translation.cpyextgc == "boehm": + defines['CPYEXT_BOEHM'] = 1 if self.config.translation.sandbox: defines['RPY_SANDBOXED'] = 1 if CBuilder.have___thread is None: From pypy.commits at gmail.com Fri Jan 11 05:39:06 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:06 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Implemented cpython-like GC list for cpyext Message-ID: <5c38724a.1c69fb81.e655.a6c0@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95604:e5ba3fd47f96 Date: 2018-07-04 17:56 +0200 http://bitbucket.org/pypy/pypy/changeset/e5ba3fd47f96/ Log: Implemented cpython-like GC list for cpyext Added some code for cpyext-only boehm GC support diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -751,6 +751,8 @@ PyVarObjectFields = PyObjectFields + (("ob_size", Py_ssize_t), ) PyVarObjectStruct = cts.gettype('PyVarObject') PyVarObject = cts.gettype('PyVarObject *') +PyGC_Head = cts.gettype('PyGC_Head') +PyGC_HeadPtr = cts.gettype('PyGC_Head *') Py_buffer = cts.gettype('Py_buffer') Py_bufferP = cts.gettype('Py_buffer *') @@ -1173,6 +1175,9 @@ state.C._PyPy_object_dealloc = rffi.llexternal( '_PyPy_object_dealloc', [PyObject], lltype.Void, compilation_info=eci, _nowrapper=True) + state.C._PyPy_InitPyObjList = rffi.llexternal( + '_PyPy_InitPyObjList', [], PyGC_HeadPtr, + compilation_info=eci, _nowrapper=True) def init_function(func): @@ -1294,6 +1299,9 @@ ll2ctypes.lltype2ctypes(func.get_llhelper(space)), ctypes.c_void_p) + # initialize the pyobj_list for the gc + space.fromcache(State).C._PyPy_InitPyObjList() + # we need to call this *after* the init code above, because it might # indirectly call some functions which are attached to pypyAPI (e.g., we # if do tuple_attach of the prebuilt empty tuple, we need to call @@ -1826,75 +1834,3 @@ return result return generic_cpy_call - - at specialize.memo() -def make_generic_cpy_call_gc(FT, expect_null): - from pypy.module.cpyext.pyobject import is_pyobj, make_ref, decref - from pypy.module.cpyext.pyobject import get_w_obj_and_decref - from pypy.module.cpyext.pyerrors import PyErr_Occurred - unrolling_arg_types = unrolling_iterable(enumerate(FT.ARGS)) - RESULT_TYPE = FT.RESULT - - # copied and modified from rffi.py - # We need tons of care to ensure that no GC operation and no - # exception checking occurs in call_external_function. - argnames = ', '.join(['a%d' % i for i in range(len(FT.ARGS))]) - source = py.code.Source(""" - def cpy_call_external(funcptr, %(argnames)s): - # NB. it is essential that no exception checking occurs here! - res = funcptr(%(argnames)s) - return res - """ % locals()) - miniglobals = {'__name__': __name__, # for module name propagation - } - exec source.compile() in miniglobals - call_external_function = specialize.ll()(miniglobals['cpy_call_external']) - call_external_function._dont_inline_ = True - call_external_function._gctransformer_hint_close_stack_ = True - # don't inline, as a hack to guarantee that no GC pointer is alive - # anywhere in call_external_function - - @specialize.ll() - def generic_cpy_call(func, *args): - boxed_args = () - to_decref = () - assert len(args) == len(FT.ARGS) - for i, ARG in unrolling_arg_types: - arg = args[i] - _pyobj = None - if is_PyObject(ARG): - assert is_pyobj(arg) - - boxed_args += (arg,) - to_decref += (_pyobj,) - - # see "Handling of the GIL" above - tid = rthread.get_ident() - tid_before = cpyext_glob_tid_ptr[0] - assert tid_before == 0 or tid_before == tid - cpyext_glob_tid_ptr[0] = tid - - try: - # Call the function - result = call_external_function(func, *boxed_args) - finally: - assert cpyext_glob_tid_ptr[0] == tid - cpyext_glob_tid_ptr[0] = tid_before - for i, ARG in unrolling_arg_types: - # note that this loop is nicely unrolled statically by RPython - _pyobj = to_decref[i] - if _pyobj is not None: - pyobj = rffi.cast(PyObject, _pyobj) - rawrefcount.decref(pyobj) - - if is_PyObject(RESULT_TYPE): - ret = None - - # Check for exception consistency - # XXX best attempt, will miss preexisting error that is - # overwritten with a new error of the same type - - return ret - return result - - return generic_cpy_call \ No newline at end of file diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -279,7 +279,7 @@ ) & ~(SIZEOF_VOID_P - 1) \ ) - + #define PyObject_INIT(op, typeobj) \ ( Py_TYPE(op) = (typeobj), ((PyObject *)(op))->ob_refcnt = 1,\ ((PyObject *)(op))->ob_pypy_link = 0, (op) ) @@ -309,22 +309,65 @@ #define PyObject_GC_NewVar(type, typeobj, n) \ ( (type *) _PyObject_GC_NewVar((typeobj), (n)) ) - -/* A dummy PyGC_Head, just to please some tests. Don't use it! */ -typedef union _gc_head { - char dummy; -} PyGC_Head; -/* dummy GC macros */ -#define _PyGC_FINALIZED(o) 1 -#define PyType_IS_GC(tp) 1 +extern PyGC_Head *_pypy_rawrefcount_pyobj_list; -/* TODO: implement like in cpython - (see https://github.com/python/cpython/blob/517da1e58f4c489d4b31579852cde5f7113da08e/Include/objimpl.h#L295) */ -#define PyObject_GC_Track(o) do { } while(0) -#define PyObject_GC_UnTrack(o) do { } while(0) -#define _PyObject_GC_TRACK(o) do { } while(0) -#define _PyObject_GC_UNTRACK(o) do { } while(0) +#define _Py_AS_GC(o) ((PyGC_Head *)(o)-1) +#define _Py_FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) + +/* Bit 0 is set when tp_finalize is called */ +#define _PyGC_REFS_MASK_FINALIZED (1 << 0) +/* The (N-1) most significant bits contain the gc state / refcount */ +#define _PyGC_REFS_SHIFT (1) +#define _PyGC_REFS_MASK (((size_t) -1) << _PyGC_REFS_SHIFT) + +#define _PyGCHead_REFS(g) ((g)->gc_refs >> _PyGC_REFS_SHIFT) +#define _PyGCHead_SET_REFS(g, v) do { \ + (g)->gc_refs = ((g)->gc_refs & ~_PyGC_REFS_MASK) \ + | (((size_t)(v)) << _PyGC_REFS_SHIFT); \ + } while (0) +#define _PyGCHead_DECREF(g) ((g)->gc_refs -= 1 << _PyGC_REFS_SHIFT) + +#define _PyGCHead_FINALIZED(g) (((g)->gc_refs & _PyGC_REFS_MASK_FINALIZED) != 0) +#define _PyGCHead_SET_FINALIZED(g, v) do { \ + (g)->gc_refs = ((g)->gc_refs & ~_PyGC_REFS_MASK_FINALIZED) \ + | (v != 0); \ + } while (0) + +#define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) +#define _PyGC_SET_FINALIZED(o, v) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o), v) + +#define _PyGC_REFS(o) _PyGCHead_REFS(_Py_AS_GC(o)) + +#define _PyGC_REFS_UNTRACKED (-2) +#define _PyGC_REFS_REACHABLE (-3) +#define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) + +#define _PyGC_IS_TRACKED(o) (_PyGC_REFS(o) != _PyGC_REFS_UNTRACKED) + +#define PyType_IS_GC(t) PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) + +PyAPI_FUNC(void) PyObject_GC_Track(void *); +PyAPI_FUNC(void) PyObject_GC_UnTrack(void *); + +#define _PyObject_GC_TRACK(o) do { \ + PyGC_Head *g = _Py_AS_GC(o); \ + if (_PyGCHead_REFS(g) != _PyGC_REFS_UNTRACKED) \ + Py_FatalError("GC object already tracked"); \ + _PyGCHead_SET_REFS(g, _PyGC_REFS_REACHABLE); \ + g->gc_next = _pypy_rawrefcount_pyobj_list; \ + g->gc_prev = _pypy_rawrefcount_pyobj_list->gc_prev; \ + ((PyGC_Head *)g->gc_prev)->gc_next = g; \ + _pypy_rawrefcount_pyobj_list->gc_prev = g; \ + } while(0) +#define _PyObject_GC_UNTRACK(o) do { \ + PyGC_Head *g = _Py_AS_GC(o); \ + assert(_PyGCHead_REFS(g) != _PyGC_REFS_UNTRACKED); \ + _PyGCHead_SET_REFS(g, _PyGC_REFS_UNTRACKED); \ + ((PyGC_Head *)g->gc_prev)->gc_next = g->gc_next; \ + ((PyGC_Head *)g->gc_next)->gc_prev = g->gc_prev; \ + g->gc_next = NULL; \ + } while(0) /* Utility macro to help write tp_traverse functions. * To use this macro, the tp_traverse function must name its arguments @@ -405,7 +448,7 @@ #define _PyObject_GC_Del PyObject_GC_Del PyAPI_FUNC(void) _PyPy_subtype_dealloc(PyObject *); PyAPI_FUNC(void) _PyPy_object_dealloc(PyObject *); - +PyAPI_FUNC(PyGC_Head *) _PyPy_InitPyObjList(); #ifdef __cplusplus } diff --git a/pypy/module/cpyext/parse/cpyext_object.h b/pypy/module/cpyext/parse/cpyext_object.h --- a/pypy/module/cpyext/parse/cpyext_object.h +++ b/pypy/module/cpyext/parse/cpyext_object.h @@ -321,3 +321,10 @@ PyBufferProcs as_buffer; PyObject *ht_name, *ht_slots; } PyHeapTypeObject; + + +typedef struct _gc_head { + void *gc_next; + void *gc_prev; + Py_ssize_t gc_refs; +} PyGC_Head; \ No newline at end of file diff --git a/pypy/module/cpyext/src/object.c b/pypy/module/cpyext/src/object.c --- a/pypy/module/cpyext/src/object.c +++ b/pypy/module/cpyext/src/object.c @@ -2,6 +2,12 @@ #include "Python.h" +/* Get an object's GC head */ +#define AS_GC(o) ((PyGC_Head *)(o)-1) + +/* Get the object given the GC head */ +#define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) + extern void _PyPy_Free(void *ptr); extern void *_PyPy_Malloc(Py_ssize_t size); @@ -30,6 +36,17 @@ */ Py_ssize_t _pypy_rawrefcount_w_marker_deallocating = 0xDEADFFF; +static PyGC_Head _internal_pyobj_list; +PyGC_Head *_pypy_rawrefcount_pyobj_list = &_internal_pyobj_list; + +PyGC_Head * +_PyPy_InitPyObjList() +{ + _pypy_rawrefcount_pyobj_list->gc_next = _pypy_rawrefcount_pyobj_list; + _pypy_rawrefcount_pyobj_list->gc_prev = _pypy_rawrefcount_pyobj_list; + return _pypy_rawrefcount_pyobj_list; +} + void _Py_Dealloc(PyObject *obj) { @@ -57,9 +74,25 @@ } void +PyObject_GC_Track(void *obj) +{ + _PyObject_GC_TRACK(obj); +} + +void +PyObject_GC_UnTrack(void *obj) +{ + if (_PyGC_IS_TRACKED(obj)) + _PyObject_GC_UNTRACK(obj); +} + +void PyObject_GC_Del(void *obj) { - _PyPy_Free(obj); + PyGC_Head *g = AS_GC(obj); + if (_PyGC_IS_TRACKED(obj)) + _PyObject_GC_UNTRACK(obj); + _PyPy_Free(g); } PyObject * @@ -74,14 +107,51 @@ return (PyObject*)_PyObject_NewVar(type, 0); } +static PyObject * +_generic_gc_alloc(PyTypeObject *type, Py_ssize_t nitems) +{ + Py_ssize_t size; + PyObject *pyobj; + PyGC_Head *g; + if (type->tp_flags & Py_TPFLAGS_HEAPTYPE) + Py_INCREF(type); + + size = sizeof(PyGC_Head) + type->tp_basicsize; + if (type->tp_itemsize) + size += nitems * type->tp_itemsize; + + g = (PyObject*)_PyPy_Malloc(size); + if (g == NULL) + return NULL; + g->gc_refs = 0; + _PyGCHead_SET_REFS(g, _PyGC_REFS_UNTRACKED); + + pyobj = FROM_GC(g); + if (type->tp_itemsize) + ((PyVarObject*)pyobj)->ob_size = nitems; + + pyobj->ob_refcnt = 1; + /* pyobj->ob_pypy_link should get assigned very quickly */ + pyobj->ob_type = type; + return pyobj; +} + + PyObject * _PyObject_GC_New(PyTypeObject *type) { - return _PyObject_New(type); + return (PyObject*)_PyObject_GC_NewVar(type, 0); } PyVarObject * _PyObject_GC_NewVar(PyTypeObject *type, Py_ssize_t nitems) { - return _PyObject_NewVar(type, nitems); + PyObject *py_obj = _generic_gc_alloc(type, nitems); + if (!py_obj) + return (PyVarObject*)PyErr_NoMemory(); + + if (type->tp_itemsize == 0) + return (PyVarObject*)PyObject_INIT(py_obj, type); + else + return PyObject_INIT_VAR((PyVarObject*)py_obj, type, nitems); } static PyObject * @@ -141,4 +211,4 @@ { obj->ob_size = size; return (PyVarObject*)PyObject_Init((PyObject*)obj, type); -} +} \ No newline at end of file diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -59,6 +59,7 @@ def setup_rawrefcount(self): space = self.space if not self.space.config.translating: + from pypy.module.cpyext.api import PyGC_HeadPtr def dealloc_trigger(): from pypy.module.cpyext.pyobject import PyObject, decref print 'dealloc_trigger...' @@ -73,7 +74,9 @@ def tp_traverse(obj_addr, callback, args): # TODO: implement pass - rawrefcount.init(dealloc_trigger, tp_traverse) + pyobj_list = lltype.malloc(PyGC_HeadPtr.TO, + flavor='raw', immortal=True, zero=True) + rawrefcount.init(dealloc_trigger, tp_traverse, pyobj_list) else: if space.config.translation.gc == "boehm": action = BoehmPyObjDeallocAction(space) @@ -128,11 +131,13 @@ if space.config.translation.gc != "boehm": # This must be called in RPython, the untranslated version # does something different. Sigh. + pypyobj_list = self.C._PyPy_InitPyObjList() rawrefcount.init( llhelper(rawrefcount.RAWREFCOUNT_DEALLOC_TRIGGER, self.dealloc_trigger), llhelper(rawrefcount.RAWREFCOUNT_TRAVERSE, - self.tp_traverse)) + self.tp_traverse), + pypyobj_list) self.builder.attach_all(space) setup_new_method_def(space) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -2994,6 +2994,12 @@ ('c_ob_refcnt', lltype.Signed), ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) + PYOBJ_GC_HDR = lltype.Struct('PyGC_Head', + ('c_gc_next', rffi.VOIDP), + ('c_gc_prev', rffi.VOIDP), + ('c_gc_refs', lltype.Signed)) + PYOBJ_GC_HDR_PTR = lltype.Ptr(PYOBJ_GC_HDR) + RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) VISIT_FUNCTYPE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], rffi.INT_real)) @@ -3004,8 +3010,11 @@ def _pyobj(self, pyobjaddr): return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR) + def _pygchdr(self, pygchdraddr): + return llmemory.cast_adr_to_ptr(pygchdraddr, self.PYOBJ_GC_HDR_PTR) - def rawrefcount_init(self, dealloc_trigger_callback, tp_traverse): + def rawrefcount_init(self, dealloc_trigger_callback, tp_traverse, + pyobj_list): # see pypy/doc/discussion/rawrefcount.rst if not self.rrc_enabled: self.rrc_p_list_young = self.AddressStack() @@ -3021,6 +3030,7 @@ self.rrc_pyobjects_to_scan = self.AddressStack() self.rrc_more_pyobjects_to_scan = self.AddressStack() self.rrc_pyobjects_to_trace = self.AddressStack() + self.rrc_pyobj_list = self._pygchdr(pyobj_list) self.rrc_enabled = True def check_no_more_rawrefcount_state(self): @@ -3220,6 +3230,8 @@ assert not self.rrc_more_pyobjects_to_scan.non_empty() assert not self.rrc_pyobjects_to_trace.non_empty() + self._rrc_gc_print_list() + # initially, scan all real pyobjects (not proxies) which are linked to objects #self.rrc_p_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) self.rrc_o_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) @@ -3334,4 +3346,15 @@ callback_ptr = llhelper(self.VISIT_FUNCTYPE, IncrementalMiniMarkGC._rrc_visit) self_ptr = rffi.cast(rffi.VOIDP, cast_nongc_instance_to_adr(self)) - self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) \ No newline at end of file + self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) + + def _rrc_gc_list_init(self, pygclist): + pygclist.c_gc_next = rffi.cast(rffi.VOIDP, pygclist) + pygclist.c_gc_prev = rffi.cast(rffi.VOIDP, pygclist) + + def _rrc_gc_print_list(self): + debug_print("gc_print_list start!") + curr = rffi.cast(self.PYOBJ_GC_HDR_PTR, self.rrc_pyobj_list.c_gc_next) + while curr != self.rrc_pyobj_list: + debug_print("gc_print_list: ", curr) + curr = rffi.cast(self.PYOBJ_GC_HDR_PTR, curr.c_gc_next) diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -5,7 +5,7 @@ from rpython.rlib.rawrefcount import (REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT, REFCNT_MASK) from pypy.module.cpyext.api import (PyObject, PyTypeObject, PyTypeObjectPtr, - PyObjectFields, cpython_struct) + PyObjectFields, cpython_struct, PyGC_Head) from pypy.module.cpyext.complexobject import PyComplexObject from rpython.rtyper.lltypesystem import rffi from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc @@ -54,8 +54,11 @@ rc = REFCNT_FROM_PYPY self.trigger = [] self.trigger2 = [] + self.pyobj_list = lltype.malloc(PyGC_Head.TO, flavor='raw', + immortal=True) self.gc.rawrefcount_init(lambda: self.trigger.append(1), - lambda: self.trigger2.append(1)) + lambda: self.trigger2.append(1), + self.pyobj_list) # if create_immortal: p1 = lltype.malloc(S, immortal=True) diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -476,7 +476,7 @@ self.rawrefcount_init_ptr = getfn( GCClass.rawrefcount_init, [s_gc, SomePtr(GCClass.RAWREFCOUNT_DEALLOC_TRIGGER), - SomePtr(GCClass.RAWREFCOUNT_TRAVERSE)], + SomePtr(GCClass.RAWREFCOUNT_TRAVERSE), SomeAddress()], annmodel.s_None) self.rawrefcount_create_link_pypy_ptr = getfn( GCClass.rawrefcount_create_link_pypy, @@ -1311,12 +1311,13 @@ self.pop_roots(hop, livevars) def gct_gc_rawrefcount_init(self, hop): - [v_fnptr, v_fnptr2] = hop.spaceop.args + [v_fnptr, v_fnptr2, v_pyobj_list] = hop.spaceop.args assert v_fnptr.concretetype == self.GCClass.RAWREFCOUNT_DEALLOC_TRIGGER assert v_fnptr2.concretetype == self.GCClass.RAWREFCOUNT_TRAVERSE + # TODO add assert for v_pyobj_list hop.genop("direct_call", [self.rawrefcount_init_ptr, self.c_const_gc, v_fnptr, - v_fnptr2]) + v_fnptr2, v_pyobj_list]) def gct_gc_rawrefcount_create_link_pypy(self, hop): [v_gcobj, v_pyobject] = hop.spaceop.args diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -81,12 +81,12 @@ @not_rpython -def init(dealloc_trigger_callback=None, tp_traverse=None): +def init(dealloc_trigger_callback=None, tp_traverse=None, pyobj_list=None): """set up rawrefcount with the GC. This is only used for tests; it should not be called at all during translation. """ global _p_list, _o_list, _adr2pypy, _pypy2ob, _pypy2ob_rev - global _d_list, _dealloc_trigger_callback, _tp_traverse + global _d_list, _dealloc_trigger_callback, _tp_traverse, _pygclist _p_list = [] _o_list = [] _adr2pypy = [None] @@ -96,6 +96,7 @@ _d_marker = None _dealloc_trigger_callback = dealloc_trigger_callback _tp_traverse = tp_traverse + _pygclist = pyobj_list # def init_traverse(traverse_cpy_call): # global _traverse_cpy_call @@ -289,15 +290,18 @@ class Entry(ExtRegistryEntry): _about_ = init - def compute_result_annotation(self, s_dealloc_callback, tp_traverse): + def compute_result_annotation(self, s_dealloc_callback, tp_traverse, + pyobj_list): from rpython.rtyper.llannotation import SomePtr assert isinstance(s_dealloc_callback, SomePtr) # ll-ptr-to-function # add assert? def specialize_call(self, hop): hop.exception_cannot_occur() - v_dealloc_callback, v_tp_traverse = hop.inputargs(*hop.args_r) - hop.genop('gc_rawrefcount_init', [v_dealloc_callback, v_tp_traverse]) + v_dealloc_callback, v_tp_traverse, v_pyobj_list = \ + hop.inputargs(*hop.args_r) + hop.genop('gc_rawrefcount_init', [v_dealloc_callback, v_tp_traverse, + v_pyobj_list]) class Entry(ExtRegistryEntry): diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -2,11 +2,32 @@ /************************************************************/ /*** C header subsection: operations on LowLevelTypes ***/ -#include +#ifdef CPYEXT_BOEHM +/* use boehm for cpyext -> redirect all calls to malloc + - use __libc_malloc / __libc_calloc for pypy + - cpyext modules will use redirected malloc +*/ +#define REDIRECT_MALLOC=GC_malloc +/* TODO: use own version of boehm, where before sweep new roots can be added */ +#include -/* used by rpython.rlib.rstack, but also by asmgcc */ -#define OP_STACK_CURRENT(r) r = (Signed)&r +static void (*real_malloc)(size_t) = NULL; +static void (*real_calloc)(size_t, size_t) = NULL; +/* TODO: fix, does not work. but patch should be made here */ +#define OP_RAW_MALLOC(size, zero, result) { \ + if (zero) { \ + real_write = dlsym(RTLD_NEXT, "write"); + real_write(fd, buf, count); + result = __libc_calloc(size, 1); \ + } else \ + result = __libc_malloc(size); \ + if (result != NULL) { \ + COUNT_MALLOC; \ + } \ + } + +#else #define OP_RAW_MALLOC(size, zero, result) { \ if (zero) \ @@ -18,6 +39,13 @@ } \ } +#endif + +#include + +/* used by rpython.rlib.rstack, but also by asmgcc */ +#define OP_STACK_CURRENT(r) r = (Signed)&r + #define OP_RAW_FREE(p, r) free(p); COUNT_FREE; #define OP_RAW_MEMCLEAR(p, size, r) memset((void*)p, 0, size) From pypy.commits at gmail.com Fri Jan 11 05:39:08 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:08 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Removed extra flags in cpython refcount Message-ID: <5c38724c.1c69fb81.ca43.7a78@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95605:878ff32d88a1 Date: 2018-07-05 13:01 +0200 http://bitbucket.org/pypy/pypy/changeset/878ff32d88a1/ Log: Removed extra flags in cpython refcount Fixed tests in test_rawrefcount and test_cpyext Removed references from rawrefcount to cpyext Added some comments diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1787,10 +1787,10 @@ preexist_error = PyErr_Occurred(space) try: - print "start cpyext_call" + #print "start cpyext_call" # Call the function result = call_external_function(func, *boxed_args) - print "end cpyext_call" + #print "end cpyext_call" finally: assert cpyext_glob_tid_ptr[0] == tid cpyext_glob_tid_ptr[0] = tid_before diff --git a/pypy/module/cpyext/include/boolobject.h b/pypy/module/cpyext/include/boolobject.h --- a/pypy/module/cpyext/include/boolobject.h +++ b/pypy/module/cpyext/include/boolobject.h @@ -13,8 +13,8 @@ #define Py_True ((PyObject *) &_Py_TrueStruct) /* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE do { Py_INCREF(Py_True); return Py_True; } while(0) -#define Py_RETURN_FALSE do { Py_INCREF(Py_False); return Py_False; } while(0) +#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True +#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False #ifdef __cplusplus } diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -2,7 +2,6 @@ #define Py_OBJECT_H #include -#include #ifdef __cplusplus extern "C" { @@ -13,12 +12,7 @@ #define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) #define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) -#define PY_REFCNT_FROM_PYPY (4L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 2))) -#define PY_REFCNT_OVERFLOW (1L << ((long)(log(PY_SSIZE_T_MAX) / log(2) - 4) - 1L)) -#define PY_REFCNT_MASK ((PY_REFCNT_OVERFLOW << 1L) - 1L) -#define Py_RETURN_NONE return (((((PyObject *)(Py_None))->ob_refcnt & PY_REFCNT_OVERFLOW) == 0) ? \ - ((PyObject *)(Py_None))->ob_refcnt++ : Py_IncRef((PyObject *)(Py_None))), Py_None - +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None /* CPython has this for backwards compatibility with really old extensions, and now @@ -40,20 +34,14 @@ #define Py_XDECREF(ob) (Py_DecRef((PyObject *)(ob))) #else /* Fast version */ -#define Py_INCREF(ob) do { \ - if (!(((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW)) \ - ((PyObject *)(ob))->ob_refcnt++; \ - else \ - Py_IncRef((PyObject *)(ob)); \ - } while (0) -#define Py_DECREF(ob) do { \ - if ((((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW)) \ - Py_DecRef((PyObject *)(ob)); \ - else if (--((PyObject *)(ob))->ob_refcnt & PY_REFCNT_MASK) \ - ; \ - else \ - _Py_Dealloc((PyObject *)(ob)); \ - } while (0) +#define Py_INCREF(ob) (((PyObject *)(ob))->ob_refcnt++) +#define Py_DECREF(op) \ + do { \ + if (--((PyObject *)(op))->ob_refcnt != 0) \ + ; \ + else \ + _Py_Dealloc((PyObject *)(op)); \ + } while (0) #define Py_XINCREF(op) do { if ((op) == NULL) ; else Py_INCREF(op); } while (0) #define Py_XDECREF(op) do { if ((op) == NULL) ; else Py_DECREF(op); } while (0) @@ -73,8 +61,7 @@ } \ } while (0) -#define Py_REFCNT(ob) ((((PyObject *)(ob))->ob_refcnt & PY_REFCNT_OVERFLOW == 0) ? \ - (((PyObject*)(ob))->ob_refcnt & PY_REFCNT_MASK) : _Py_RefCnt_Overflow(ob)) +#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -11,7 +11,6 @@ from pypy.module.cpyext.pyerrors import PyErr_NoMemory, PyErr_BadInternalCall from pypy.objspace.std.typeobject import W_TypeObject from pypy.interpreter.error import OperationError, oefmt -from rpython.rlib.rawrefcount import REFCNT_MASK import pypy.module.__builtin__.operation as operation @@ -51,7 +50,7 @@ def _dealloc(space, obj): # This frees an object after its refcount dropped to zero, so we # assert that it is really zero here. - assert obj.c_ob_refcnt & REFCNT_MASK == 0 + assert obj.c_ob_refcnt == 0 pto = obj.c_ob_type obj_voidp = rffi.cast(rffi.VOIDP, obj) generic_cpy_call(space, pto.c_tp_free, obj_voidp) diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -17,11 +17,8 @@ from rpython.rlib.objectmodel import keepalive_until_here from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_base_ptr from rpython.rlib import rawrefcount, jit -from rpython.rlib.debug import ll_assert, fatalerror, debug_print -from rpython.rlib.rawrefcount import (REFCNT_MASK, REFCNT_FROM_PYPY, - REFCNT_OVERFLOW) -from pypy.module.cpyext.api import slot_function -from pypy.module.cpyext.typeobjectdefs import visitproc +from rpython.rlib.debug import ll_assert, fatalerror + #________________________________________________________ # type description @@ -344,7 +341,7 @@ pyobj = as_pyobj(space, w_obj, w_userdata, immortal=immortal) if pyobj: # != NULL assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY - rawrefcount.incref(pyobj) + pyobj.c_ob_refcnt += 1 keepalive_until_here(w_obj) return pyobj @@ -378,7 +375,7 @@ pyobj = rffi.cast(PyObject, pyobj) w_obj = from_ref(space, pyobj) if pyobj: - rawrefcount.decref(pyobj) + pyobj.c_ob_refcnt -= 1 assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY keepalive_until_here(w_obj) return w_obj @@ -389,7 +386,7 @@ assert is_pyobj(pyobj) pyobj = rffi.cast(PyObject, pyobj) assert pyobj.c_ob_refcnt >= 1 - rawrefcount.incref(pyobj) + pyobj.c_ob_refcnt += 1 @specialize.ll() def decref(space, pyobj): @@ -397,9 +394,11 @@ assert is_pyobj(pyobj) pyobj = rffi.cast(PyObject, pyobj) if pyobj: - rawrefcount.decref(pyobj) - rc = pyobj.c_ob_refcnt - if rc & REFCNT_MASK == 0: + assert pyobj.c_ob_refcnt > 0 + assert (pyobj.c_ob_pypy_link == 0 or + pyobj.c_ob_refcnt > rawrefcount.REFCNT_FROM_PYPY) + pyobj.c_ob_refcnt -= 1 + if pyobj.c_ob_refcnt == 0: state = space.fromcache(State) generic_cpy_call(space, state.C._Py_Dealloc, pyobj) #else: @@ -407,32 +406,6 @@ # if w_obj is not None: # assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY - at cpython_api([PyObject], lltype.Void) -def Py_IncRef(space, obj): - incref(space, obj) - - at cpython_api([PyObject], lltype.Void) -def Py_DecRef(space, obj): - decref(space, obj) - - at cpython_api([PyObject], lltype.SignedLongLong, error=CANNOT_FAIL) -def _Py_RefCnt_Overflow(space, obj): - return refcnt_overflow(space, obj) - - at specialize.ll() -def refcnt_overflow(space, obj): - if is_pyobj(obj): - pyobj = rffi.cast(PyObject, obj) - else: - pyobj = as_pyobj(space, obj, None) - if pyobj: - if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: - return REFCNT_OVERFLOW - else: - return (pyobj.c_ob_refcnt & REFCNT_MASK) + \ - rawrefcount.overflow_get(pyobj) - return 0 - @init_function def write_w_marker_deallocating(space): if we_are_translated(): diff --git a/pypy/module/cpyext/src/object.c b/pypy/module/cpyext/src/object.c --- a/pypy/module/cpyext/src/object.c +++ b/pypy/module/cpyext/src/object.c @@ -11,7 +11,6 @@ extern void _PyPy_Free(void *ptr); extern void *_PyPy_Malloc(Py_ssize_t size); -/* void Py_IncRef(PyObject *o) { @@ -23,7 +22,6 @@ { Py_XDECREF(o); } -*/ /* * The actual value of this variable will be the address of diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -74,8 +74,12 @@ def tp_traverse(obj_addr, callback, args): # TODO: implement pass + # Warning: This list ist different than the list actually used + # by the extension modules (see _PyPy_InitPyObjList). pyobj_list = lltype.malloc(PyGC_HeadPtr.TO, flavor='raw', immortal=True, zero=True) + pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, pyobj_list); + pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, pyobj_list); rawrefcount.init(dealloc_trigger, tp_traverse, pyobj_list) else: if space.config.translation.gc == "boehm": diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -9,12 +9,11 @@ PyString_ConcatAndDel, PyString_Format, PyString_InternFromString, PyString_AsEncodedObject, PyString_AsDecodedObject, _PyString_Eq, _PyString_Join) -from pypy.module.cpyext.api import ( - PyObjectP, PyObject, Py_ssize_tP, generic_cpy_call) -from pypy.module.cpyext.pyobject import ( - Py_DecRef, Py_IncRef, _Py_RefCnt_Overflow, from_ref, make_ref, decref) +from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP, \ + generic_cpy_call +from pypy.module.cpyext.pyobject import decref, from_ref, make_ref from pypy.module.cpyext.buffer import PyObject_AsCharBuffer -from rpython.rlib import rawrefcount +from pypy.module.cpyext.api import PyTypeObjectPtr class AppTestBytesObject(AppTestCpythonExtensionBase): @@ -512,9 +511,9 @@ ref = make_ref(space, space.wrap('abc')) ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') ptr[0] = ref - prev_refcnt = ref.c_ob_refcnt & rawrefcount.REFCNT_MASK + prev_refcnt = ref.c_ob_refcnt PyString_Concat(space, ptr, space.wrap('def')) - assert ref.c_ob_refcnt & rawrefcount.REFCNT_MASK == prev_refcnt - 1 + assert ref.c_ob_refcnt == prev_refcnt - 1 assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' with pytest.raises(OperationError): PyString_Concat(space, ptr, space.w_None) @@ -550,9 +549,9 @@ w_text = space.wrap("text") ref = make_ref(space, w_text) - prev_refcnt = ref.c_ob_refcnt & rawrefcount.REFCNT_MASK + prev_refcnt = ref.c_ob_refcnt assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 - assert ref.c_ob_refcnt & rawrefcount.REFCNT_MASK == prev_refcnt + assert ref.c_ob_refcnt == prev_refcnt assert lenp[0] == 4 assert rffi.charp2str(bufp[0]) == 'text' lltype.free(bufp, flavor='raw') @@ -611,53 +610,3 @@ w_seq = space.wrap(['a', 'b']) w_joined = _PyString_Join(space, w_sep, w_seq) assert space.unwrap(w_joined) == 'ab' - - def test_refcnt_overflow(self, space): - ref1 = make_ref(space, space.wrap('foo')) - ref1.c_ob_refcnt = rawrefcount.REFCNT_OVERFLOW - 1 - - Py_IncRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW - - Py_IncRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW + 1 - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW + 1 - - Py_IncRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW + 1 - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW + 2 - - Py_IncRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW + 1 - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW + 3 - - Py_DecRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW + 1 - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW + 2 - - Py_DecRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW + 1 - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW + 1 - - Py_DecRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW - assert _Py_RefCnt_Overflow(space, ref1) \ - == rawrefcount.REFCNT_OVERFLOW - - Py_DecRef(space, ref1) - assert ref1.c_ob_refcnt & rawrefcount.REFCNT_MASK \ - == rawrefcount.REFCNT_OVERFLOW - 1 diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -756,7 +756,7 @@ module = self.import_module(name='foo', init=init, body=body) # uncaught interplevel exceptions are turned into SystemError - expected = "ZeroDivisionError('integer division or modulo by zero',)" + expected = "ZeroDivisionError('integer division by zero',)" exc = raises(SystemError, module.crash1) assert exc.value[0] == expected diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3107,10 +3107,11 @@ self.singleaddr) def _rrc_minor_trace(self, pyobject, singleaddr): - from rpython.rlib.rawrefcount import REFCNT_MASK + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT # rc = self._pyobj(pyobject).c_ob_refcnt - if rc & REFCNT_MASK == 0: + if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: pass # the corresponding object may die else: # force the corresponding object to be alive @@ -3169,12 +3170,11 @@ def _rrc_free(self, pyobject): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT - from rpython.rlib.rawrefcount import REFCNT_MASK # rc = self._pyobj(pyobject).c_ob_refcnt if rc >= REFCNT_FROM_PYPY_LIGHT: rc -= REFCNT_FROM_PYPY_LIGHT - if rc & REFCNT_MASK == 0: + if rc == 0: lltype.free(self._pyobj(pyobject), flavor='raw') else: # can only occur if LIGHT is used in create_link_pyobj() @@ -3186,7 +3186,7 @@ "refcount underflow from REFCNT_FROM_PYPY_LIGHT?") rc -= REFCNT_FROM_PYPY self._pyobj(pyobject).c_ob_pypy_link = 0 - if rc & REFCNT_MASK == 0: + if rc == 0: self.rrc_dealloc_pending.append(pyobject) # an object with refcnt == 0 cannot stay around waiting # for its deallocator to be called. Some code (lxml) @@ -3212,10 +3212,11 @@ debug_stop("gc-rrc-trace") def _rrc_major_trace(self, pyobject, ignore): - from rpython.rlib.rawrefcount import REFCNT_MASK + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT # rc = self._pyobj(pyobject).c_ob_refcnt - if rc & REFCNT_MASK == 0: + if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: pass # the corresponding object may die else: # force the corresponding object to be alive @@ -3263,11 +3264,11 @@ self.rrc_more_pyobjects_to_scan = self.AddressStack() def _rrc_mark_cpyobj(self, pyobj): - from rpython.rlib.rawrefcount import REFCNT_VISITED # if the pyobj is not marked, remember it and if there is a linked pypy # object also remember it - if pyobj.c_ob_refcnt & REFCNT_VISITED != REFCNT_VISITED: - pyobj.c_ob_refcnt |= REFCNT_VISITED + visited = True # TODO: check if visited (via 'cast' to PyGC_Head) + if not visited: + # TODO: mark visited pyobject = llmemory.cast_ptr_to_adr(pyobj) self.rrc_more_pyobjects_to_scan.append(pyobject) intobj = pyobj.c_ob_pypy_link @@ -3278,7 +3279,6 @@ self.objects_to_trace.append(obj) def _rrc_major_scan_non_rc_roots(self, pyobject, ignore): - from rpython.rlib.rawrefcount import REFCNT_VISITED # check in the object header of the linked pypy object, if it is marked # or not pyobj = self._pyobj(pyobject) @@ -3286,9 +3286,10 @@ obj = llmemory.cast_int_to_adr(intobj) hdr = self.header(obj) if hdr.tid & GCFLAG_VISITED: - if pyobj.c_ob_refcnt & REFCNT_VISITED != REFCNT_VISITED: + visited = True # TODO: check if visited + if not visited: # process the pyobject now - pyobj.c_ob_refcnt |= REFCNT_VISITED + # TODO: mark visited self.rrc_pyobjects_to_trace.append(pyobject) else: # save the pyobject for later, in case its linked object becomes diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -2,17 +2,15 @@ from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from rpython.memory.gc.test.test_direct import BaseDirectGCTest -from rpython.rlib.rawrefcount import (REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT, - REFCNT_MASK) -from pypy.module.cpyext.api import (PyObject, PyTypeObject, PyTypeObjectPtr, - PyObjectFields, cpython_struct, PyGC_Head) -from pypy.module.cpyext.complexobject import PyComplexObject +from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY +from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT from rpython.rtyper.lltypesystem import rffi -from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc from rpython.rtyper.annlowlevel import llhelper -from rpython.rtyper.tool import rffi_platform +#from pypy.module.cpyext.api import (PyTypeObject) +#from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc PYOBJ_HDR = IncrementalMiniMarkGC.PYOBJ_HDR PYOBJ_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_HDR_PTR +PYOBJ_GC_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_GC_HDR_PTR S = lltype.GcForwardReference() S.become(lltype.GcStruct('S', @@ -22,14 +20,14 @@ T = lltype.Ptr(lltype.ForwardReference()) T.TO.become(lltype.Struct('test', - ('base', PyObject.TO), + ('base', PYOBJ_HDR_PTR.TO), ('next', T), ('prev', T), ('value', lltype.Signed))) -TRAVERSE_FUNCTYPE = rffi.CCallback([PyObject, visitproc, rffi.VOIDP], - rffi.INT_real) -t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) +#TRAVERSE_FUNCTYPE = rffi.CCallback([PYOBJ_HDR_PTR, visitproc, rffi.VOIDP], +# rffi.INT_real) +#t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) class TestRawRefCount(BaseDirectGCTest): @@ -54,11 +52,13 @@ rc = REFCNT_FROM_PYPY self.trigger = [] self.trigger2 = [] - self.pyobj_list = lltype.malloc(PyGC_Head.TO, flavor='raw', + self.pyobj_list = lltype.malloc(PYOBJ_GC_HDR_PTR.TO, flavor='raw', immortal=True) + self.pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, self.pyobj_list); + self.pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, self.pyobj_list); self.gc.rawrefcount_init(lambda: self.trigger.append(1), lambda: self.trigger2.append(1), - self.pyobj_list) + llmemory.cast_ptr_to_adr(self.pyobj_list)) # if create_immortal: p1 = lltype.malloc(S, immortal=True) @@ -78,10 +78,10 @@ self._collect(major=False) p1 = self.stackroots.pop() p1ref = lltype.cast_opaque_ptr(llmemory.GCREF, p1) - r1 = lltype.malloc(PyObject.TO, flavor='raw', immortal=create_immortal) + r1 = lltype.malloc(PYOBJ_HDR_PTR.TO, flavor='raw', immortal=create_immortal) r1.c_ob_refcnt = rc r1.c_ob_pypy_link = 0 - r1.c_ob_type = lltype.nullptr(PyTypeObject) + #r1.c_ob_type = lltype.nullptr(PyTypeObject) r1addr = llmemory.cast_ptr_to_adr(r1) if is_pyobj: assert not is_light @@ -104,40 +104,13 @@ return p1 return p1, p1ref, r1, r1addr, check_alive - def _rawrefcount_cycle_obj(self): - - def test_tp_traverse(obj, visit, args): - test = rffi.cast(T, obj) - vret = 0 - if llmemory.cast_ptr_to_adr(test.next).ptr is not None: - next = rffi.cast(PyObject, test.next) - vret = visit(next, args) - if vret != 0: - return vret - if llmemory.cast_ptr_to_adr(test.prev).ptr is not None: - next = rffi.cast(PyObject, test.prev) - vret = visit(next, args) - if vret != 0: - return vret - return vret - - func_ptr = llhelper(TRAVERSE_FUNCTYPE, test_tp_traverse) - rffi_func_ptr = rffi.cast(traverseproc, func_ptr) - t1.c_tp_traverse = rffi_func_ptr - - r1 = lltype.malloc(T.TO, flavor='raw', immortal=True) - r1.base.c_ob_pypy_link = 0 - r1.base.c_ob_type = t1 - r1.base.c_ob_refcnt = 1 - return r1 - def test_rawrefcount_objects_basic(self, old=False): p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, is_light=True, create_old=old)) p2 = self.malloc(S) p2.x = 84 p2ref = lltype.cast_opaque_ptr(llmemory.GCREF, p2) - r2 = lltype.malloc(PyObject.TO, flavor='raw') + r2 = lltype.malloc(PYOBJ_HDR_PTR.TO, flavor='raw') r2.c_ob_refcnt = 1 r2.c_ob_pypy_link = 0 r2addr = llmemory.cast_ptr_to_adr(r2) @@ -340,6 +313,33 @@ self._collect(major=True) check_alive(0) + # def _rawrefcount_cycle_obj(self): + # + # def test_tp_traverse(obj, visit, args): + # test = rffi.cast(T, obj) + # vret = 0 + # if llmemory.cast_ptr_to_adr(test.next).ptr is not None: + # next = rffi.cast(PYOBJ_HDR_PTR, test.next) + # vret = visit(next, args) + # if vret != 0: + # return vret + # if llmemory.cast_ptr_to_adr(test.prev).ptr is not None: + # next = rffi.cast(PYOBJ_HDR_PTR, test.prev) + # vret = visit(next, args) + # if vret != 0: + # return vret + # return vret + # + # func_ptr = llhelper(TRAVERSE_FUNCTYPE, test_tp_traverse) + # rffi_func_ptr = rffi.cast(traverseproc, func_ptr) + # t1.c_tp_traverse = rffi_func_ptr + # + # r1 = lltype.malloc(T.TO, flavor='raw', immortal=True) + # r1.base.c_ob_pypy_link = 0 + # r1.base.c_ob_type = t1 + # r1.base.c_ob_refcnt = 1 + # return r1 + # # def test_cycle_self_reference_free(self): # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) # r1 = self._rawrefcount_cycle_obj() diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -4,22 +4,16 @@ # This is meant for pypy's cpyext module, but is a generally # useful interface over our GC. XXX "pypy" should be removed here # -import sys, weakref, py, math +import sys, weakref, py from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rlib.objectmodel import we_are_translated, specialize, not_rpython from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.rlib import rgc, objectmodel +from rpython.rlib import rgc -MAX_BIT = int(math.log(sys.maxint, 2)) - -# Flags -REFCNT_FROM_PYPY = 1 << MAX_BIT - 2 # Reference from a pypy object -REFCNT_FROM_PYPY_LIGHT = (1 << MAX_BIT - 1) + REFCNT_FROM_PYPY # Light reference from a pypy object -REFCNT_VISITED = 1 << MAX_BIT - 3 # Object visited during marking -REFCNT_OVERFLOW = 1 << MAX_BIT - 4 # Overflow bit for reference count -REFCNT_MASK = (REFCNT_OVERFLOW << 1) - 1 # Mask for reference count (including overflow bit) +REFCNT_FROM_PYPY = sys.maxint // 4 + 1 +REFCNT_FROM_PYPY_LIGHT = REFCNT_FROM_PYPY + (sys.maxint // 2 + 1) PYOBJ_HDR = lltype.Struct('GCHdr_PyObject', ('c_ob_refcnt', lltype.Signed), @@ -39,46 +33,6 @@ _adr2pypy.append(p) return res -def incref(pyobj): - if pyobj.c_ob_refcnt & REFCNT_OVERFLOW == 0: - pyobj.c_ob_refcnt += 1 - else: - if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: - pyobj.c_ob_refcnt += 1 - overflow_new(pyobj) - else: - overflow_add(pyobj) - -def decref(pyobj): - if pyobj.c_ob_refcnt & REFCNT_OVERFLOW == 0: - pyobj.c_ob_refcnt -= 1 - else: - if pyobj.c_ob_refcnt & REFCNT_MASK == REFCNT_OVERFLOW: - pyobj.c_ob_refcnt -= 1 - elif overflow_sub(pyobj): - pyobj.c_ob_refcnt -= 1 - -_refcount_overflow = dict() - -def overflow_new(obj): - _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] = 0 - -def overflow_add(obj): - _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] += 1 - -def overflow_sub(obj): - addr = objectmodel.current_object_addr_as_int(obj) - c = _refcount_overflow[addr] - if c > 0: - _refcount_overflow[addr] = c - 1 - return False - else: - _refcount_overflow.pop(addr) - return True - -def overflow_get(obj): - return _refcount_overflow[objectmodel.current_object_addr_as_int(obj)] - @not_rpython def init(dealloc_trigger_callback=None, tp_traverse=None, pyobj_list=None): @@ -189,8 +143,7 @@ wr_p_list = [] new_p_list = [] for ob in reversed(_p_list): - if ob.c_ob_refcnt & REFCNT_MASK > 0 \ - or ob.c_ob_refcnt & REFCNT_FROM_PYPY == 0: + if ob.c_ob_refcnt not in (REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT): new_p_list.append(ob) else: p = detach(ob, wr_p_list) @@ -223,8 +176,7 @@ if ob.c_ob_refcnt >= REFCNT_FROM_PYPY_LIGHT: ob.c_ob_refcnt -= REFCNT_FROM_PYPY_LIGHT ob.c_ob_pypy_link = 0 - if ob.c_ob_refcnt & REFCNT_MASK == 0 \ - and ob.c_ob_refcnt < REFCNT_FROM_PYPY: + if ob.c_ob_refcnt == 0: lltype.free(ob, flavor='raw', track_allocation=track_allocation) else: @@ -232,9 +184,8 @@ assert ob.c_ob_refcnt < int(REFCNT_FROM_PYPY_LIGHT * 0.99) ob.c_ob_refcnt -= REFCNT_FROM_PYPY ob.c_ob_pypy_link = 0 - if ob.c_ob_refcnt & REFCNT_MASK == 0 \ - and ob.c_ob_refcnt < REFCNT_FROM_PYPY: - ob.c_ob_refcnt += 1 + if ob.c_ob_refcnt == 0: + ob.c_ob_refcnt = 1 _d_list.append(ob) return None diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -915,6 +915,10 @@ lst.append(str(g)) g = seen.get(g) lst.append('') + # TODO: remove code (see below) to make this check pass + # pypy/module/cpyext/api.py: + # print "start cpyext_call" + # print "end cpyext_call" raise TyperError("the RPython-level __del__() method " "in %r calls:%s" % (graph, '\n\t'.join(lst[::-1]))) From pypy.commits at gmail.com Fri Jan 11 05:39:10 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:10 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Implemented pyobj_list for rawrefcount (to be used in cpyext tests) Message-ID: <5c38724e.1c69fb81.d147c.efeb@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95606:9e5001a6604b Date: 2018-07-05 15:54 +0200 http://bitbucket.org/pypy/pypy/changeset/9e5001a6604b/ Log: Implemented pyobj_list for rawrefcount (to be used in cpyext tests) Added own cpyext test file for GC-related tests diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1300,7 +1300,8 @@ ctypes.c_void_p) # initialize the pyobj_list for the gc - space.fromcache(State).C._PyPy_InitPyObjList() + pyobj_list = space.fromcache(State).C._PyPy_InitPyObjList() + rawrefcount._init_pyobj_list(pyobj_list) # we need to call this *after* the init code above, because it might # indirectly call some functions which are attached to pypyAPI (e.g., we diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -59,28 +59,23 @@ def setup_rawrefcount(self): space = self.space if not self.space.config.translating: - from pypy.module.cpyext.api import PyGC_HeadPtr def dealloc_trigger(): - from pypy.module.cpyext.pyobject import PyObject, decref + from pypy.module.cpyext.pyobject import PyObject, decref, cts print 'dealloc_trigger...' while True: ob = rawrefcount.next_dead(PyObject) if not ob: break - print 'deallocating PyObject', ob + pto = ob.c_ob_type + name = rffi.charp2str(cts.cast('char*', pto.c_tp_name)) + print 'deallocating PyObject', ob, 'of type', name decref(space, ob) print 'dealloc_trigger DONE' return "RETRY" def tp_traverse(obj_addr, callback, args): # TODO: implement pass - # Warning: This list ist different than the list actually used - # by the extension modules (see _PyPy_InitPyObjList). - pyobj_list = lltype.malloc(PyGC_HeadPtr.TO, - flavor='raw', immortal=True, zero=True) - pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, pyobj_list); - pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, pyobj_list); - rawrefcount.init(dealloc_trigger, tp_traverse, pyobj_list) + rawrefcount.init(dealloc_trigger, tp_traverse) else: if space.config.translation.gc == "boehm": action = BoehmPyObjDeallocAction(space) diff --git a/pypy/module/cpyext/test/test_cpyext_gc.py b/pypy/module/cpyext/test/test_cpyext_gc.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cpyext_gc.py @@ -0,0 +1,801 @@ +import sys +import weakref + +import pytest + +from pypy.tool.cpyext.extbuild import ( + SystemCompilationInfo, HERE, get_sys_info_app) +from pypy.interpreter.gateway import unwrap_spec, interp2app +from rpython.rtyper.lltypesystem import lltype, ll2ctypes +from pypy.module.cpyext import api +from pypy.module.cpyext.state import State +from rpython.tool.identity_dict import identity_dict +from rpython.tool import leakfinder +from rpython.rlib import rawrefcount +from rpython.tool.udir import udir + +only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names" + + at api.cpython_api([], api.PyObject) +def PyPy_Crash1(space): + 1/0 + + at api.cpython_api([], lltype.Signed, error=-1) +def PyPy_Crash2(space): + 1/0 + +class SpaceCompiler(SystemCompilationInfo): + """Extension compiler for regular (untranslated PyPy) mode""" + def __init__(self, space, *args, **kwargs): + self.space = space + SystemCompilationInfo.__init__(self, *args, **kwargs) + + def load_module(self, mod, name): + space = self.space + api.load_extension_module(space, mod, name) + return space.getitem( + space.sys.get('modules'), space.wrap(name)) + + +def get_cpyext_info(space): + from pypy.module.imp.importing import get_so_extension + state = space.fromcache(State) + api_library = state.api_lib + if sys.platform == 'win32': + libraries = [api_library] + # '%s' undefined; assuming extern returning int + compile_extra = ["/we4013"] + # prevent linking with PythonXX.lib + w_maj, w_min = space.fixedview(space.sys.get('version_info'), 5)[:2] + link_extra = ["/NODEFAULTLIB:Python%d%d.lib" % + (space.int_w(w_maj), space.int_w(w_min))] + else: + libraries = [] + if sys.platform.startswith('linux'): + compile_extra = [ + "-Werror", "-g", "-O0", "-Wp,-U_FORTIFY_SOURCE", "-fPIC"] + link_extra = ["-g"] + else: + compile_extra = link_extra = None + return SpaceCompiler(space, + builddir_base=udir, + include_extra=api.include_dirs, + compile_extra=compile_extra, + link_extra=link_extra, + extra_libs=libraries, + ext=get_so_extension(space)) + + +def freeze_refcnts(self): + rawrefcount._dont_free_any_more() + return #ZZZ + state = self.space.fromcache(RefcountState) + self.frozen_refcounts = {} + for w_obj, obj in state.py_objects_w2r.iteritems(): + self.frozen_refcounts[w_obj] = obj.c_ob_refcnt + #state.print_refcounts() + self.frozen_ll2callocations = set(ll2ctypes.ALLOCATED.values()) + +class LeakCheckingTest(object): + """Base class for all cpyext tests.""" + spaceconfig = dict(usemodules=['cpyext', 'thread', 'struct', 'array', + 'itertools', 'time', 'binascii', + 'micronumpy', 'mmap' + ]) + + enable_leak_checking = True + + @staticmethod + def cleanup_references(space): + return #ZZZ + state = space.fromcache(RefcountState) + + import gc; gc.collect() + # Clear all lifelines, objects won't resurrect + for w_obj, obj in state.lifeline_dict._dict.items(): + if w_obj not in state.py_objects_w2r: + state.lifeline_dict.set(w_obj, None) + del obj + import gc; gc.collect() + + + for w_obj in state.non_heaptypes_w: + w_obj.c_ob_refcnt -= 1 + state.non_heaptypes_w[:] = [] + state.reset_borrowed_references() + + def check_and_print_leaks(self): + rawrefcount._collect() + # check for sane refcnts + import gc + + if 1: #ZZZ not self.enable_leak_checking: + leakfinder.stop_tracking_allocations(check=False) + return False + + leaking = False + state = self.space.fromcache(RefcountState) + gc.collect() + lost_objects_w = identity_dict() + lost_objects_w.update((key, None) for key in self.frozen_refcounts.keys()) + + for w_obj, obj in state.py_objects_w2r.iteritems(): + base_refcnt = self.frozen_refcounts.get(w_obj) + delta = obj.c_ob_refcnt + if base_refcnt is not None: + delta -= base_refcnt + lost_objects_w.pop(w_obj) + if delta != 0: + leaking = True + print >>sys.stderr, "Leaking %r: %i references" % (w_obj, delta) + try: + weakref.ref(w_obj) + except TypeError: + lifeline = None + else: + lifeline = state.lifeline_dict.get(w_obj) + if lifeline is not None: + refcnt = lifeline.pyo.c_ob_refcnt + if refcnt > 0: + print >>sys.stderr, "\tThe object also held by C code." + else: + referrers_repr = [] + for o in gc.get_referrers(w_obj): + try: + repr_str = repr(o) + except TypeError as e: + repr_str = "%s (type of o is %s)" % (str(e), type(o)) + referrers_repr.append(repr_str) + referrers = ", ".join(referrers_repr) + print >>sys.stderr, "\tThe object is referenced by these objects:", \ + referrers + for w_obj in lost_objects_w: + print >>sys.stderr, "Lost object %r" % (w_obj, ) + leaking = True + # the actual low-level leak checking is done by pypy.tool.leakfinder, + # enabled automatically by pypy.conftest. + return leaking + +class AppTestApi(LeakCheckingTest): + def setup_class(cls): + from rpython.rlib.clibffi import get_libc_name + if cls.runappdirect: + cls.libc = get_libc_name() + else: + cls.w_libc = cls.space.wrap(get_libc_name()) + + def setup_method(self, meth): + if not self.runappdirect: + freeze_refcnts(self) + + def teardown_method(self, meth): + if self.runappdirect: + return + self.space.getexecutioncontext().cleanup_cpyext_state() + self.cleanup_references(self.space) + # XXX: like AppTestCpythonExtensionBase.teardown_method: + # find out how to disable check_and_print_leaks() if the + # test failed + assert not self.check_and_print_leaks(), ( + "Test leaks or loses object(s). You should also check if " + "the test actually passed in the first place; if it failed " + "it is likely to reach this place.") + + +def _unwrap_include_dirs(space, w_include_dirs): + if w_include_dirs is None: + return None + else: + return [space.str_w(s) for s in space.listview(w_include_dirs)] + +def debug_collect(space): + rawrefcount._collect() + +class AppTestCpythonExtensionBase(LeakCheckingTest): + + def setup_class(cls): + space = cls.space + cls.w_here = space.wrap(str(HERE)) + cls.w_udir = space.wrap(str(udir)) + cls.w_runappdirect = space.wrap(cls.runappdirect) + if not cls.runappdirect: + cls.sys_info = get_cpyext_info(space) + space.getbuiltinmodule("cpyext") + # 'import os' to warm up reference counts + w_import = space.builtin.getdictvalue(space, '__import__') + space.call_function(w_import, space.wrap("os")) + #state = cls.space.fromcache(RefcountState) ZZZ + #state.non_heaptypes_w[:] = [] + cls.w_debug_collect = space.wrap(interp2app(debug_collect)) + else: + def w_import_module(self, name, init=None, body='', filename=None, + include_dirs=None, PY_SSIZE_T_CLEAN=False): + from extbuild import get_sys_info_app + sys_info = get_sys_info_app(self.udir) + return sys_info.import_module( + name, init=init, body=body, filename=filename, + include_dirs=include_dirs, + PY_SSIZE_T_CLEAN=PY_SSIZE_T_CLEAN) + cls.w_import_module = w_import_module + + def w_import_extension(self, modname, functions, prologue="", + include_dirs=None, more_init="", PY_SSIZE_T_CLEAN=False): + from extbuild import get_sys_info_app + sys_info = get_sys_info_app(self.udir) + return sys_info.import_extension( + modname, functions, prologue=prologue, + include_dirs=include_dirs, more_init=more_init, + PY_SSIZE_T_CLEAN=PY_SSIZE_T_CLEAN) + cls.w_import_extension = w_import_extension + + def w_compile_module(self, name, + source_files=None, source_strings=None): + from extbuild import get_sys_info_app + sys_info = get_sys_info_app(self.udir) + return sys_info.compile_extension_module(name, + source_files=source_files, source_strings=source_strings) + cls.w_compile_module = w_compile_module + + def w_load_module(self, mod, name): + from extbuild import get_sys_info_app + sys_info = get_sys_info_app(self.udir) + return sys_info.load_module(mod, name) + cls.w_load_module = w_load_module + + def w_debug_collect(self): + import gc + gc.collect() + gc.collect() + gc.collect() + cls.w_debug_collect = w_debug_collect + + + def record_imported_module(self, name): + """ + Record a module imported in a test so that it can be cleaned up in + teardown before the check for leaks is done. + + name gives the name of the module in the space's sys.modules. + """ + self.imported_module_names.append(name) + + def setup_method(self, func): + if self.runappdirect: + return + + @unwrap_spec(name='text') + def compile_module(space, name, + w_source_files=None, + w_source_strings=None): + """ + Build an extension module linked against the cpyext api library. + """ + if not space.is_none(w_source_files): + source_files = space.listview_bytes(w_source_files) + else: + source_files = None + if not space.is_none(w_source_strings): + source_strings = space.listview_bytes(w_source_strings) + else: + source_strings = None + pydname = self.sys_info.compile_extension_module( + name, + source_files=source_files, + source_strings=source_strings) + + # hackish, but tests calling compile_module() always end up + # importing the result + self.record_imported_module(name) + + return space.wrap(pydname) + + @unwrap_spec(name='text', init='text_or_none', body='text', + filename='fsencode_or_none', PY_SSIZE_T_CLEAN=bool) + def import_module(space, name, init=None, body='', + filename=None, w_include_dirs=None, + PY_SSIZE_T_CLEAN=False): + include_dirs = _unwrap_include_dirs(space, w_include_dirs) + w_result = self.sys_info.import_module( + name, init, body, filename, include_dirs, PY_SSIZE_T_CLEAN) + self.record_imported_module(name) + return w_result + + + @unwrap_spec(mod='text', name='text') + def load_module(space, mod, name): + return self.sys_info.load_module(mod, name) + + @unwrap_spec(modname='text', prologue='text', + more_init='text', PY_SSIZE_T_CLEAN=bool) + def import_extension(space, modname, w_functions, prologue="", + w_include_dirs=None, more_init="", PY_SSIZE_T_CLEAN=False): + functions = space.unwrap(w_functions) + include_dirs = _unwrap_include_dirs(space, w_include_dirs) + w_result = self.sys_info.import_extension( + modname, functions, prologue, include_dirs, more_init, + PY_SSIZE_T_CLEAN) + self.record_imported_module(modname) + return w_result + + # A list of modules which the test caused to be imported (in + # self.space). These will be cleaned up automatically in teardown. + self.imported_module_names = [] + + wrap = self.space.wrap + self.w_compile_module = wrap(interp2app(compile_module)) + self.w_load_module = wrap(interp2app(load_module)) + self.w_import_module = wrap(interp2app(import_module)) + self.w_import_extension = wrap(interp2app(import_extension)) + + # create the file lock before we count allocations + self.space.call_method(self.space.sys.get("stdout"), "flush") + + freeze_refcnts(self) + #self.check_and_print_leaks() + + def unimport_module(self, name): + """ + Remove the named module from the space's sys.modules. + """ + w_modules = self.space.sys.get('modules') + w_name = self.space.wrap(name) + self.space.delitem(w_modules, w_name) + + def teardown_method(self, func): + if self.runappdirect: + return + for name in self.imported_module_names: + self.unimport_module(name) + self.space.getexecutioncontext().cleanup_cpyext_state() + self.cleanup_references(self.space) + # XXX: find out how to disable check_and_print_leaks() if the + # test failed... + assert not self.check_and_print_leaks(), ( + "Test leaks or loses object(s). You should also check if " + "the test actually passed in the first place; if it failed " + "it is likely to reach this place.") + +def collect(space): + import gc + rawrefcount._collect() + gc.collect(2) + +def print_pyobj_list(space): + rawrefcount._print_pyobj_list() + +class AppTestCpythonExtensionCycleGC(AppTestCpythonExtensionBase): + + def setup_method(self, func): + if self.runappdirect: + return + + @unwrap_spec(methods='text') + def import_cycle_module(space, methods): + init = """ + if (Py_IsInitialized()) { + PyObject* m; + if (PyType_Ready(&CycleType) < 0) + return; + m = Py_InitModule("cycle", module_methods); + if (m == NULL) + return; + Py_INCREF(&CycleType); + PyModule_AddObject(m, "Cycle", (PyObject *)&CycleType); + } + """ + body = """ + #include + #include "structmember.h" + typedef struct { + PyObject_HEAD + PyObject *next; + PyObject *val; + } Cycle; + static PyTypeObject CycleType; + static int Cycle_traverse(Cycle *self, visitproc visit, void *arg) + { + int vret; + if (self->next) { + vret = visit(self->next, arg); + if (vret != 0) + return vret; + } + if (self->val) { + vret = visit(self->val, arg); + if (vret != 0) + return vret; + } + return 0; + } + static int Cycle_clear(Cycle *self) + { + PyObject *tmp; + tmp = self->next; + self->next = NULL; + Py_XDECREF(tmp); + tmp = self->val; + self->val = NULL; + Py_XDECREF(tmp); + return 0; + } + static void Cycle_dealloc(Cycle* self) + { + Cycle_clear(self); + Py_TYPE(self)->tp_free((PyObject*)self); + } + static PyObject* Cycle_new(PyTypeObject *type, PyObject *args, + PyObject *kwds) + { + Cycle *self; + self = (Cycle *)type->tp_alloc(type, 0); + if (self != NULL) { + self->next = PyString_FromString(""); + if (self->next == NULL) { + Py_DECREF(self); + return NULL; + } + } + PyObject_GC_Track(self); + return (PyObject *)self; + } + static int Cycle_init(Cycle *self, PyObject *args, PyObject *kwds) + { + PyObject *next=NULL, *tmp; + static char *kwlist[] = {"next", NULL}; + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, + &next)) + return -1; + if (next) { + tmp = self->next; + Py_INCREF(next); + self->next = next; + Py_XDECREF(tmp); + } + return 0; + } + static PyMemberDef Cycle_members[] = { + {"next", T_OBJECT_EX, offsetof(Cycle, next), 0, "next"}, + {"val", T_OBJECT_EX, offsetof(Cycle, val), 0, "val"}, + {NULL} /* Sentinel */ + }; + static PyMethodDef Cycle_methods[] = { + {NULL} /* Sentinel */ + }; + static PyTypeObject CycleType = { + PyVarObject_HEAD_INIT(NULL, 0) + "Cycle.Cycle", /* tp_name */ + sizeof(Cycle), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Cycle_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_HAVE_GC, /* tp_flags */ + "Cycle objects", /* tp_doc */ + (traverseproc)Cycle_traverse, /* tp_traverse */ + (inquiry)Cycle_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Cycle_methods, /* tp_methods */ + Cycle_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Cycle_init, /* tp_init */ + 0, /* tp_alloc */ + Cycle_new, /* tp_new */ + }; + """ + w_result = self.sys_info.import_module("cycle", init, + body + methods, + None, None, False) + return w_result + + self.imported_module_names = [] + + wrap = self.space.wrap + self.w_import_cycle_module = wrap(interp2app(import_cycle_module)) + self.w_collect = wrap(interp2app(collect)) + self.w_print_pyobj_list = wrap(interp2app(print_pyobj_list)) + + # def test_free_self_reference_cycle_child_pypyobj(self): + # cycle = self.import_cycle_module(""" + # static Cycle *c; + # static PyObject * Cycle_cc(Cycle *self, PyObject *val) + # { + # c = PyObject_GC_New(Cycle, &CycleType); + # if (c == NULL) + # return NULL; + # Py_INCREF(val); + # c->val = val; // set value + # Py_INCREF(c); + # c->next = (PyObject *)c; // create self reference + # Py_INCREF(Py_None); + # return Py_None; + # } + # static PyObject * Cycle_cd(Cycle *self) + # { + # Py_DECREF(c); // throw cycle away + # Py_INCREF(Py_None); + # return Py_None; + # } + # static PyMethodDef module_methods[] = { + # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, + # {"discardCycle", (PyCFunction)Cycle_cd, METH_NOARGS, ""}, + # {NULL} /* Sentinel */ + # }; + # """) + # + # class Example(object): + # del_called = -1 + # + # def __init__(self, val): + # self.val = val + # Example.del_called = 0 + # + # def __del__(self): + # Example.del_called = self.val + # + # # don't keep any reference in pypy + # cycle.createCycle(Example(42)) + # self.collect() + # assert Example.del_called == 0 + # cycle.discardCycle() + # self.collect() + # assert Example.del_called == 42 + # + # # keep a temporary reference in pypy + # e = Example(43) + # cycle.createCycle(e) + # cycle.discardCycle() + # self.collect() + # assert Example.del_called == 0 + # e = None + # self.collect() + # assert Example.del_called == 43 + # + # # keep a reference in pypy, free afterwards + # e = Example(44) + # cycle.createCycle(e) + # self.collect() + # assert Example.del_called == 0 + # e = None + # self.collect() + # assert Example.del_called == 0 + # cycle.discardCycle() + # self.collect() + # assert Example.del_called == 44 + # + # def test_free_self_reference_cycle_parent_pypyobj(self): + # # create and return a second object which references the cycle, because + # # otherwise we will end up with a cycle that spans across cpy/pypy, + # # which we don't want to test here + # cycle = self.import_cycle_module(""" + # static PyObject * Cycle_cc(Cycle *self, PyObject *val) + # { + # Cycle *c = PyObject_GC_New(Cycle, &CycleType); + # if (c == NULL) + # return NULL; + # Cycle *c2 = PyObject_GC_New(Cycle, &CycleType); + # if (c2 == NULL) + # return NULL; + # Py_INCREF(val); + # c2->val = val; // set value + # Py_INCREF(c2); + # c2->next = (PyObject *)c2; // create self reference + # c->next = (PyObject *)c2; + # return (PyObject *)c; // return other object + # } + # static PyMethodDef module_methods[] = { + # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, + # {NULL} /* Sentinel */ + # }; + # """) + # + # class Example(object): + # del_called = -1 + # + # def __init__(self, val): + # self.val = val + # Example.del_called = 0 + # + # def __del__(self): + # Example.del_called = self.val + # + # c = cycle.createCycle(Example(42)) + # self.collect() + # assert Example.del_called == 0 + # c = None + # self.collect() + # assert Example.del_called == 42 + # + # def test_free_simple_cycle_child_pypyobj(self): + # cycle = self.import_cycle_module(""" + # static Cycle *c; + # static PyObject * Cycle_cc(Cycle *self, PyObject *val) + # { + # c = PyObject_GC_New(Cycle, &CycleType); + # if (c == NULL) + # return NULL; + # Cycle *c2 = PyObject_GC_New(Cycle, &CycleType); + # if (c2 == NULL) + # return NULL; + # Py_INCREF(val); + # c->val = val; // set value + # c->next = (PyObject *)c2; + # Py_INCREF(c); + # c2->next = (PyObject *)c; // simple cycle across two objects + # Py_INCREF(Py_None); + # return Py_None; + # } + # static PyObject * Cycle_cd(Cycle *self) + # { + # Py_DECREF(c); // throw cycle away + # Py_INCREF(Py_None); + # return Py_None; + # } + # static PyMethodDef module_methods[] = { + # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, + # {"discardCycle", (PyCFunction)Cycle_cd, METH_NOARGS, ""}, + # {NULL} /* Sentinel */ + # }; + # """) + # + # class Example(object): + # del_called = -1 + # + # def __init__(self, val): + # self.val = val + # Example.del_called = 0 + # + # def __del__(self): + # Example.del_called = self.val + # + # # don't keep any reference in pypy + # cycle.createCycle(Example(42)) + # self.collect() + # cycle.discardCycle() + # assert Example.del_called == 0 + # self.collect() + # assert Example.del_called == 42 + # + # # keep a temporary reference in pypy + # e = Example(43) + # cycle.createCycle(e) + # cycle.discardCycle() + # self.collect() + # assert Example.del_called == 0 + # e = None + # self.collect() + # assert Example.del_called == 43 + # + # # keep a reference in pypy, free afterwards + # e = Example(44) + # cycle.createCycle(e) + # self.collect() + # assert Example.del_called == 0 + # e = None + # self.collect() + # assert Example.del_called == 0 + # cycle.discardCycle() + # self.collect() + # assert Example.del_called == 44 + # + # + # def test_free_complex_cycle_child_pypyobj(self): + # cycle = self.import_cycle_module(""" + # static PyObject * Cycle_cc(Cycle *self, PyObject *val) + # { + # Cycle *c = PyObject_GC_New(Cycle, &CycleType); + # if (c == NULL) + # return NULL; + # Cycle *c2 = PyObject_GC_New(Cycle, &CycleType); + # if (c2 == NULL) + # return NULL; + # Cycle *c3 = PyObject_GC_New(Cycle, &CycleType); + # if (c3 == NULL) + # return NULL; + # Py_INCREF(val); + # c->val = val; // set value + # Py_INCREF(val); + # c3->val = val; // set value + # Py_INCREF(c2); + # c->next = (PyObject *)c2; + # Py_INCREF(c); + # c2->next = (PyObject *)c; // inner cycle + # Py_INCREF(c3); + # c2->val = (PyObject *)c3; + # Py_INCREF(c); + # c3->next = (PyObject *)c; // outer cycle + # Py_DECREF(c); + # Py_DECREF(c2); + # Py_DECREF(c3); // throw all objects away + # Py_INCREF(Py_None); + # return Py_None; + # } + # static PyMethodDef module_methods[] = { + # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, + # {NULL} /* Sentinel */ + # }; + # """) + # + # class Example(object): + # del_called = -1 + # + # def __init__(self, val): + # self.val = val + # Example.del_called = 0 + # + # def __del__(self): + # Example.del_called = self.val + # + # # don't keep any reference in pypy + # cycle.createCycle(Example(42)) + # assert Example.del_called == 0 + # self.collect() + # assert Example.del_called == 42 + # + # # keep a temporary reference in pypy + # e = Example(43) + # cycle.createCycle(e) + # e = None + # assert Example.del_called == 0 + # self.collect() + # assert Example.del_called == 43 + # + # # keep a reference in pypy, free afterwards + # e = Example(44) + # cycle.createCycle(e) + # self.collect() + # assert Example.del_called == 0 + # e = None + # self.collect() + # assert Example.del_called == 44 + + def test_objects_in_global_list(self): + cycle = self.import_cycle_module(""" + static PyObject * Cycle_Create(Cycle *self, PyObject *val) + { + Cycle *c = PyObject_GC_New(Cycle, &CycleType); + if (c == NULL) + return NULL; + c->next = val; + return (PyObject *)c; + } + static PyMethodDef module_methods[] = { + {"create", (PyCFunction)Cycle_Create, METH_OLDARGS, ""}, + {NULL} /* Sentinel */ + }; + """) + + class Example(object): + def __init__(self, val): + self.val = val + + c = cycle.create(Example(41)) + + self.print_pyobj_list() + c = cycle.create(Example(42)) + self.print_pyobj_list() + + # TODO: fix rawrefcount, so that the Cycle objects are properly added + # to the ALLOCATED list of leakfinder or alternatively not freed + # by collect diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -19,6 +19,11 @@ ('c_ob_refcnt', lltype.Signed), ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) +PYOBJ_GC_HDR = lltype.Struct('PyGC_Head', + ('c_gc_next', rffi.VOIDP), + ('c_gc_prev', rffi.VOIDP), + ('c_gc_refs', lltype.Signed)) +PYOBJ_GC_HDR_PTR = lltype.Ptr(PYOBJ_GC_HDR) RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) VISIT_FUNCTYPE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], rffi.INT_real)) @@ -41,16 +46,22 @@ """ global _p_list, _o_list, _adr2pypy, _pypy2ob, _pypy2ob_rev global _d_list, _dealloc_trigger_callback, _tp_traverse, _pygclist + global _pyobj_list _p_list = [] _o_list = [] _adr2pypy = [None] _pypy2ob = {} _pypy2ob_rev = {} _d_list = [] - _d_marker = None _dealloc_trigger_callback = dealloc_trigger_callback _tp_traverse = tp_traverse - _pygclist = pyobj_list + if pyobj_list is not None: + _init_pyobj_list(pyobj_list) + + at not_rpython +def _init_pyobj_list(pyobj_list): + global _pyobj_list + _pyobj_list = rffi.cast(PYOBJ_GC_HDR_PTR, pyobj_list) # def init_traverse(traverse_cpy_call): # global _traverse_cpy_call @@ -214,6 +225,17 @@ _keepalive_forever.add(to_obj(object, ob)) del _d_list[:] + at not_rpython +def _print_pyobj_list(): + "for tests only" + # TODO: change to get_pyobj_list, that returns a list of PyObjects + global _pyobj_list + print "_print_pyobj_list start!" + curr = rffi.cast(PYOBJ_GC_HDR_PTR, _pyobj_list.c_gc_next) + while curr != _pyobj_list: + print "_print_pyobj_list: ", curr + curr = rffi.cast(PYOBJ_GC_HDR_PTR, curr.c_gc_next) + # ____________________________________________________________ From pypy.commits at gmail.com Fri Jan 11 05:39:12 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:12 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Implemented pyobj as gc and vice-versa Message-ID: <5c387250.1c69fb81.40e21.93c4@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95607:a189719f68e4 Date: 2018-07-06 23:56 +0200 http://bitbucket.org/pypy/pypy/changeset/a189719f68e4/ Log: Implemented pyobj as gc and vice-versa Cleaned cpyext and gc/rawrefcount tests Cleaned translation options diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -43,7 +43,6 @@ from rpython.rlib import rstackovf from pypy.objspace.std.typeobject import W_TypeObject, find_best_base from pypy.module.cpyext.cparser import CTypeSpace -from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop DEBUG_WRAPPER = True @@ -753,6 +752,7 @@ PyVarObject = cts.gettype('PyVarObject *') PyGC_Head = cts.gettype('PyGC_Head') PyGC_HeadPtr = cts.gettype('PyGC_Head *') +GCHdr_PyObject = cts.gettype('GCHdr_PyObject *') Py_buffer = cts.gettype('Py_buffer') Py_bufferP = cts.gettype('Py_buffer *') @@ -1175,8 +1175,17 @@ state.C._PyPy_object_dealloc = rffi.llexternal( '_PyPy_object_dealloc', [PyObject], lltype.Void, compilation_info=eci, _nowrapper=True) - state.C._PyPy_InitPyObjList = rffi.llexternal( - '_PyPy_InitPyObjList', [], PyGC_HeadPtr, + state.C._PyPy_subtype_dealloc = rffi.llexternal( + '_PyPy_subtype_dealloc', [PyObject], lltype.Void, + compilation_info=eci, _nowrapper=True) + state.C._PyPy_init_pyobj_list = rffi.llexternal( + '_PyPy_init_pyobj_list', [], PyGC_HeadPtr, + compilation_info=eci, _nowrapper=True) + state.C._PyPy_gc_as_pyobj = rffi.llexternal( + '_PyPy_gc_as_pyobj', [PyGC_HeadPtr], GCHdr_PyObject, + compilation_info=eci, _nowrapper=True) + state.C._PyPy_pyobj_as_gc = rffi.llexternal( + '_PyPy_pyobj_as_gc', [GCHdr_PyObject], PyGC_HeadPtr, compilation_info=eci, _nowrapper=True) @@ -1300,7 +1309,7 @@ ctypes.c_void_p) # initialize the pyobj_list for the gc - pyobj_list = space.fromcache(State).C._PyPy_InitPyObjList() + pyobj_list = space.fromcache(State).C._PyPy_init_pyobj_list() rawrefcount._init_pyobj_list(pyobj_list) # we need to call this *after* the init code above, because it might @@ -1309,9 +1318,6 @@ # _PyPy_Malloc) builder.attach_all(space) - #import rpython.rlib.rawrefcount - #rawrefcount.init_traverse(generic_cpy_call_gc) - setup_init_functions(eci, prefix) return modulename.new(ext='') diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -326,8 +326,8 @@ #define _PyGC_REFS(o) _PyGCHead_REFS(_Py_AS_GC(o)) -#define _PyGC_REFS_UNTRACKED (-2) -#define _PyGC_REFS_REACHABLE (-3) +#define _PyGC_REFS_UNTRACKED (-2) +#define _PyGC_REFS_REACHABLE (-3) #define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) #define _PyGC_IS_TRACKED(o) (_PyGC_REFS(o) != _PyGC_REFS_UNTRACKED) @@ -435,7 +435,9 @@ #define _PyObject_GC_Del PyObject_GC_Del PyAPI_FUNC(void) _PyPy_subtype_dealloc(PyObject *); PyAPI_FUNC(void) _PyPy_object_dealloc(PyObject *); -PyAPI_FUNC(PyGC_Head *) _PyPy_InitPyObjList(); +PyAPI_FUNC(PyGC_Head *) _PyPy_init_pyobj_list(); +PyAPI_FUNC(GCHdr_PyObject *) _PyPy_gc_as_pyobj(PyGC_Head *); +PyAPI_FUNC(PyGC_Head *) _PyPy_pyobj_as_gc(GCHdr_PyObject *); #ifdef __cplusplus } diff --git a/pypy/module/cpyext/parse/cpyext_object.h b/pypy/module/cpyext/parse/cpyext_object.h --- a/pypy/module/cpyext/parse/cpyext_object.h +++ b/pypy/module/cpyext/parse/cpyext_object.h @@ -324,7 +324,12 @@ typedef struct _gc_head { - void *gc_next; - void *gc_prev; + struct _gc_head *gc_next; + struct _gc_head *gc_prev; Py_ssize_t gc_refs; -} PyGC_Head; \ No newline at end of file +} PyGC_Head; + +typedef struct _gchdr_pyobject { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; +} GCHdr_PyObject; \ No newline at end of file diff --git a/pypy/module/cpyext/src/object.c b/pypy/module/cpyext/src/object.c --- a/pypy/module/cpyext/src/object.c +++ b/pypy/module/cpyext/src/object.c @@ -38,13 +38,25 @@ PyGC_Head *_pypy_rawrefcount_pyobj_list = &_internal_pyobj_list; PyGC_Head * -_PyPy_InitPyObjList() +_PyPy_init_pyobj_list() { _pypy_rawrefcount_pyobj_list->gc_next = _pypy_rawrefcount_pyobj_list; _pypy_rawrefcount_pyobj_list->gc_prev = _pypy_rawrefcount_pyobj_list; return _pypy_rawrefcount_pyobj_list; } +GCHdr_PyObject * +_PyPy_gc_as_pyobj(PyGC_Head *g) +{ + return (GCHdr_PyObject *)FROM_GC(g); +} + +PyGC_Head * +_PyPy_pyobj_as_gc(GCHdr_PyObject *obj) +{ + return AS_GC(obj); +} + void _Py_Dealloc(PyObject *obj) { @@ -118,7 +130,7 @@ if (type->tp_itemsize) size += nitems * type->tp_itemsize; - g = (PyObject*)_PyPy_Malloc(size); + g = (PyGC_Head*)_PyPy_Malloc(size); if (g == NULL) return NULL; g->gc_refs = 0; diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -2,7 +2,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype, llmemory from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter import executioncontext -from rpython.rtyper.annlowlevel import llhelper, llhelper_args +from rpython.rtyper.annlowlevel import llhelper from rpython.rlib.rdynload import DLLHANDLE from rpython.rlib import rawrefcount import sys @@ -85,7 +85,7 @@ pyobj_dealloc_action = PyObjDeallocAction(space) self.dealloc_trigger = lambda: pyobj_dealloc_action.fire() - def _rawrefcount_tp_traverse(pyobj_ptr, callback, args): + def _tp_traverse(pyobj_ptr, callback, args): from pypy.module.cpyext.api import PyObject from pypy.module.cpyext.typeobjectdefs import visitproc # convert to pointers with correct types (PyObject) @@ -98,8 +98,8 @@ if pyobj.c_ob_type and pyobj.c_ob_type.c_tp_traverse: pyobj.c_ob_type.c_tp_traverse(pyobj, callback_ptr, args) - self.tp_traverse = (lambda o, v, a: - _rawrefcount_tp_traverse(o, v, a)) + + self.tp_traverse = (lambda o, v, a:_tp_traverse(o, v, a)) def build_api(self): """NOT_RPYTHON @@ -130,13 +130,14 @@ if space.config.translation.gc != "boehm": # This must be called in RPython, the untranslated version # does something different. Sigh. - pypyobj_list = self.C._PyPy_InitPyObjList() + pypyobj_list = self.C._PyPy_init_pyobj_list() rawrefcount.init( llhelper(rawrefcount.RAWREFCOUNT_DEALLOC_TRIGGER, - self.dealloc_trigger), + self.dealloc_trigger), llhelper(rawrefcount.RAWREFCOUNT_TRAVERSE, - self.tp_traverse), - pypyobj_list) + self.tp_traverse), + pypyobj_list, + self.C._PyPy_gc_as_pyobj, self.C._PyPy_pyobj_as_gc) self.builder.attach_all(space) setup_new_method_def(space) diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -9,8 +9,7 @@ PyString_ConcatAndDel, PyString_Format, PyString_InternFromString, PyString_AsEncodedObject, PyString_AsDecodedObject, _PyString_Eq, _PyString_Join) -from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP, \ - generic_cpy_call +from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP, generic_cpy_call from pypy.module.cpyext.pyobject import decref, from_ref, make_ref from pypy.module.cpyext.buffer import PyObject_AsCharBuffer from pypy.module.cpyext.api import PyTypeObjectPtr diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -214,6 +214,8 @@ def debug_collect(space): rawrefcount._collect() +def print_pyobj_list(space): + rawrefcount._print_pyobj_list() class AppTestCpythonExtensionBase(LeakCheckingTest): @@ -225,6 +227,7 @@ if not cls.runappdirect: cls.sys_info = get_cpyext_info(space) cls.w_debug_collect = space.wrap(interp2app(debug_collect)) + cls.w_print_pyobj_list = space.wrap(interp2app(print_pyobj_list)) cls.preload_builtins(space) else: def w_import_module(self, name, init=None, body='', filename=None, @@ -319,7 +322,6 @@ self.record_imported_module(name) return w_result - @unwrap_spec(mod='text', name='text') def load_module(space, mod, name): return self.sys_info.load_module(mod, name) @@ -926,3 +928,179 @@ ''' ), ]) + + def test_gc_pyobj_list(self): + """ + Test if Py_GC_Track and Py_GC_Untrack are adding and removing container + objects from the list of all garbage-collected PyObjects. + """ + if self.runappdirect: + skip('cannot import module with undefined functions') + + # TODO: remove unnecessary stuff, add tests for gc_untrack, add asserts + init = """ + if (Py_IsInitialized()) { + PyObject* m; + if (PyType_Ready(&CycleType) < 0) + return; + m = Py_InitModule("cycle", module_methods); + if (m == NULL) + return; + Py_INCREF(&CycleType); + PyModule_AddObject(m, "Cycle", (PyObject *)&CycleType); + } + """ + body = """ + #include + #include "structmember.h" + typedef struct { + PyObject_HEAD + PyObject *next; + PyObject *val; + } Cycle; + static PyTypeObject CycleType; + static int Cycle_traverse(Cycle *self, visitproc visit, void *arg) + { + int vret; + if (self->next) { + vret = visit(self->next, arg); + if (vret != 0) + return vret; + } + if (self->val) { + vret = visit(self->val, arg); + if (vret != 0) + return vret; + } + return 0; + } + static int Cycle_clear(Cycle *self) + { + PyObject *tmp; + tmp = self->next; + self->next = NULL; + Py_XDECREF(tmp); + tmp = self->val; + self->val = NULL; + Py_XDECREF(tmp); + return 0; + } + static void Cycle_dealloc(Cycle* self) + { + Cycle_clear(self); + Py_TYPE(self)->tp_free((PyObject*)self); + } + static PyObject* Cycle_new(PyTypeObject *type, PyObject *args, + PyObject *kwds) + { + Cycle *self; + self = (Cycle *)type->tp_alloc(type, 0); + if (self != NULL) { + self->next = PyString_FromString(""); + if (self->next == NULL) { + Py_DECREF(self); + return NULL; + } + } + PyObject_GC_Track(self); + return (PyObject *)self; + } + static int Cycle_init(Cycle *self, PyObject *args, PyObject *kwds) + { + PyObject *next=NULL, *tmp; + static char *kwlist[] = {"next", NULL}; + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, + &next)) + return -1; + if (next) { + tmp = self->next; + Py_INCREF(next); + self->next = next; + Py_XDECREF(tmp); + } + return 0; + } + static PyMemberDef Cycle_members[] = { + {"next", T_OBJECT_EX, offsetof(Cycle, next), 0, "next"}, + {"val", T_OBJECT_EX, offsetof(Cycle, val), 0, "val"}, + {NULL} /* Sentinel */ + }; + static PyMethodDef Cycle_methods[] = { + {NULL} /* Sentinel */ + }; + static PyTypeObject CycleType = { + PyVarObject_HEAD_INIT(NULL, 0) + "Cycle.Cycle", /* tp_name */ + sizeof(Cycle), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Cycle_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_HAVE_GC, /* tp_flags */ + "Cycle objects", /* tp_doc */ + (traverseproc)Cycle_traverse, /* tp_traverse */ + (inquiry)Cycle_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Cycle_methods, /* tp_methods */ + Cycle_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Cycle_init, /* tp_init */ + 0, /* tp_alloc */ + Cycle_new, /* tp_new */ + }; + + extern PyGC_Head *_pypy_rawrefcount_pyobj_list; + + static PyObject * Cycle_Create(Cycle *self, PyObject *val) + { + Cycle *c = PyObject_GC_New(Cycle, &CycleType); + if (c == NULL) + return NULL; + c->next = val; + + // TODO: check if _pypy_rawrefcount_pyobj_list contains c + + return (PyObject *)c; + } + static PyMethodDef module_methods[] = { + {"create", (PyCFunction)Cycle_Create, METH_OLDARGS, ""}, + {NULL} /* Sentinel */ + }; + """ + module = self.import_module(name='cycle', init=init, body=body) + + class Example(object): + def __init__(self, val): + self.val = val + + c = module.create(Example(41)) + + self.print_pyobj_list() + c = module.create(Example(42)) + self.print_pyobj_list() + + # TODO: fix rawrefcount, so that the Cycle objects are properly added + # to the ALLOCATED list of leakfinder or alternatively not freed + # by collect diff --git a/pypy/module/cpyext/test/test_cpyext_gc.py b/pypy/module/cpyext/test/test_cpyext_gc.py deleted file mode 100644 --- a/pypy/module/cpyext/test/test_cpyext_gc.py +++ /dev/null @@ -1,801 +0,0 @@ -import sys -import weakref - -import pytest - -from pypy.tool.cpyext.extbuild import ( - SystemCompilationInfo, HERE, get_sys_info_app) -from pypy.interpreter.gateway import unwrap_spec, interp2app -from rpython.rtyper.lltypesystem import lltype, ll2ctypes -from pypy.module.cpyext import api -from pypy.module.cpyext.state import State -from rpython.tool.identity_dict import identity_dict -from rpython.tool import leakfinder -from rpython.rlib import rawrefcount -from rpython.tool.udir import udir - -only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names" - - at api.cpython_api([], api.PyObject) -def PyPy_Crash1(space): - 1/0 - - at api.cpython_api([], lltype.Signed, error=-1) -def PyPy_Crash2(space): - 1/0 - -class SpaceCompiler(SystemCompilationInfo): - """Extension compiler for regular (untranslated PyPy) mode""" - def __init__(self, space, *args, **kwargs): - self.space = space - SystemCompilationInfo.__init__(self, *args, **kwargs) - - def load_module(self, mod, name): - space = self.space - api.load_extension_module(space, mod, name) - return space.getitem( - space.sys.get('modules'), space.wrap(name)) - - -def get_cpyext_info(space): - from pypy.module.imp.importing import get_so_extension - state = space.fromcache(State) - api_library = state.api_lib - if sys.platform == 'win32': - libraries = [api_library] - # '%s' undefined; assuming extern returning int - compile_extra = ["/we4013"] - # prevent linking with PythonXX.lib - w_maj, w_min = space.fixedview(space.sys.get('version_info'), 5)[:2] - link_extra = ["/NODEFAULTLIB:Python%d%d.lib" % - (space.int_w(w_maj), space.int_w(w_min))] - else: - libraries = [] - if sys.platform.startswith('linux'): - compile_extra = [ - "-Werror", "-g", "-O0", "-Wp,-U_FORTIFY_SOURCE", "-fPIC"] - link_extra = ["-g"] - else: - compile_extra = link_extra = None - return SpaceCompiler(space, - builddir_base=udir, - include_extra=api.include_dirs, - compile_extra=compile_extra, - link_extra=link_extra, - extra_libs=libraries, - ext=get_so_extension(space)) - - -def freeze_refcnts(self): - rawrefcount._dont_free_any_more() - return #ZZZ - state = self.space.fromcache(RefcountState) - self.frozen_refcounts = {} - for w_obj, obj in state.py_objects_w2r.iteritems(): - self.frozen_refcounts[w_obj] = obj.c_ob_refcnt - #state.print_refcounts() - self.frozen_ll2callocations = set(ll2ctypes.ALLOCATED.values()) - -class LeakCheckingTest(object): - """Base class for all cpyext tests.""" - spaceconfig = dict(usemodules=['cpyext', 'thread', 'struct', 'array', - 'itertools', 'time', 'binascii', - 'micronumpy', 'mmap' - ]) - - enable_leak_checking = True - - @staticmethod - def cleanup_references(space): - return #ZZZ - state = space.fromcache(RefcountState) - - import gc; gc.collect() - # Clear all lifelines, objects won't resurrect - for w_obj, obj in state.lifeline_dict._dict.items(): - if w_obj not in state.py_objects_w2r: - state.lifeline_dict.set(w_obj, None) - del obj - import gc; gc.collect() - - - for w_obj in state.non_heaptypes_w: - w_obj.c_ob_refcnt -= 1 - state.non_heaptypes_w[:] = [] - state.reset_borrowed_references() - - def check_and_print_leaks(self): - rawrefcount._collect() - # check for sane refcnts - import gc - - if 1: #ZZZ not self.enable_leak_checking: - leakfinder.stop_tracking_allocations(check=False) - return False - - leaking = False - state = self.space.fromcache(RefcountState) - gc.collect() - lost_objects_w = identity_dict() - lost_objects_w.update((key, None) for key in self.frozen_refcounts.keys()) - - for w_obj, obj in state.py_objects_w2r.iteritems(): - base_refcnt = self.frozen_refcounts.get(w_obj) - delta = obj.c_ob_refcnt - if base_refcnt is not None: - delta -= base_refcnt - lost_objects_w.pop(w_obj) - if delta != 0: - leaking = True - print >>sys.stderr, "Leaking %r: %i references" % (w_obj, delta) - try: - weakref.ref(w_obj) - except TypeError: - lifeline = None - else: - lifeline = state.lifeline_dict.get(w_obj) - if lifeline is not None: - refcnt = lifeline.pyo.c_ob_refcnt - if refcnt > 0: - print >>sys.stderr, "\tThe object also held by C code." - else: - referrers_repr = [] - for o in gc.get_referrers(w_obj): - try: - repr_str = repr(o) - except TypeError as e: - repr_str = "%s (type of o is %s)" % (str(e), type(o)) - referrers_repr.append(repr_str) - referrers = ", ".join(referrers_repr) - print >>sys.stderr, "\tThe object is referenced by these objects:", \ - referrers - for w_obj in lost_objects_w: - print >>sys.stderr, "Lost object %r" % (w_obj, ) - leaking = True - # the actual low-level leak checking is done by pypy.tool.leakfinder, - # enabled automatically by pypy.conftest. - return leaking - -class AppTestApi(LeakCheckingTest): - def setup_class(cls): - from rpython.rlib.clibffi import get_libc_name - if cls.runappdirect: - cls.libc = get_libc_name() - else: - cls.w_libc = cls.space.wrap(get_libc_name()) - - def setup_method(self, meth): - if not self.runappdirect: - freeze_refcnts(self) - - def teardown_method(self, meth): - if self.runappdirect: - return - self.space.getexecutioncontext().cleanup_cpyext_state() - self.cleanup_references(self.space) - # XXX: like AppTestCpythonExtensionBase.teardown_method: - # find out how to disable check_and_print_leaks() if the - # test failed - assert not self.check_and_print_leaks(), ( - "Test leaks or loses object(s). You should also check if " - "the test actually passed in the first place; if it failed " - "it is likely to reach this place.") - - -def _unwrap_include_dirs(space, w_include_dirs): - if w_include_dirs is None: - return None - else: - return [space.str_w(s) for s in space.listview(w_include_dirs)] - -def debug_collect(space): - rawrefcount._collect() - -class AppTestCpythonExtensionBase(LeakCheckingTest): - - def setup_class(cls): - space = cls.space - cls.w_here = space.wrap(str(HERE)) - cls.w_udir = space.wrap(str(udir)) - cls.w_runappdirect = space.wrap(cls.runappdirect) - if not cls.runappdirect: - cls.sys_info = get_cpyext_info(space) - space.getbuiltinmodule("cpyext") - # 'import os' to warm up reference counts - w_import = space.builtin.getdictvalue(space, '__import__') - space.call_function(w_import, space.wrap("os")) - #state = cls.space.fromcache(RefcountState) ZZZ - #state.non_heaptypes_w[:] = [] - cls.w_debug_collect = space.wrap(interp2app(debug_collect)) - else: - def w_import_module(self, name, init=None, body='', filename=None, - include_dirs=None, PY_SSIZE_T_CLEAN=False): - from extbuild import get_sys_info_app - sys_info = get_sys_info_app(self.udir) - return sys_info.import_module( - name, init=init, body=body, filename=filename, - include_dirs=include_dirs, - PY_SSIZE_T_CLEAN=PY_SSIZE_T_CLEAN) - cls.w_import_module = w_import_module - - def w_import_extension(self, modname, functions, prologue="", - include_dirs=None, more_init="", PY_SSIZE_T_CLEAN=False): - from extbuild import get_sys_info_app - sys_info = get_sys_info_app(self.udir) - return sys_info.import_extension( - modname, functions, prologue=prologue, - include_dirs=include_dirs, more_init=more_init, - PY_SSIZE_T_CLEAN=PY_SSIZE_T_CLEAN) - cls.w_import_extension = w_import_extension - - def w_compile_module(self, name, - source_files=None, source_strings=None): - from extbuild import get_sys_info_app - sys_info = get_sys_info_app(self.udir) - return sys_info.compile_extension_module(name, - source_files=source_files, source_strings=source_strings) - cls.w_compile_module = w_compile_module - - def w_load_module(self, mod, name): - from extbuild import get_sys_info_app - sys_info = get_sys_info_app(self.udir) - return sys_info.load_module(mod, name) - cls.w_load_module = w_load_module - - def w_debug_collect(self): - import gc - gc.collect() - gc.collect() - gc.collect() - cls.w_debug_collect = w_debug_collect - - - def record_imported_module(self, name): - """ - Record a module imported in a test so that it can be cleaned up in - teardown before the check for leaks is done. - - name gives the name of the module in the space's sys.modules. - """ - self.imported_module_names.append(name) - - def setup_method(self, func): - if self.runappdirect: - return - - @unwrap_spec(name='text') - def compile_module(space, name, - w_source_files=None, - w_source_strings=None): - """ - Build an extension module linked against the cpyext api library. - """ - if not space.is_none(w_source_files): - source_files = space.listview_bytes(w_source_files) - else: - source_files = None - if not space.is_none(w_source_strings): - source_strings = space.listview_bytes(w_source_strings) - else: - source_strings = None - pydname = self.sys_info.compile_extension_module( - name, - source_files=source_files, - source_strings=source_strings) - - # hackish, but tests calling compile_module() always end up - # importing the result - self.record_imported_module(name) - - return space.wrap(pydname) - - @unwrap_spec(name='text', init='text_or_none', body='text', - filename='fsencode_or_none', PY_SSIZE_T_CLEAN=bool) - def import_module(space, name, init=None, body='', - filename=None, w_include_dirs=None, - PY_SSIZE_T_CLEAN=False): - include_dirs = _unwrap_include_dirs(space, w_include_dirs) - w_result = self.sys_info.import_module( - name, init, body, filename, include_dirs, PY_SSIZE_T_CLEAN) - self.record_imported_module(name) - return w_result - - - @unwrap_spec(mod='text', name='text') - def load_module(space, mod, name): - return self.sys_info.load_module(mod, name) - - @unwrap_spec(modname='text', prologue='text', - more_init='text', PY_SSIZE_T_CLEAN=bool) - def import_extension(space, modname, w_functions, prologue="", - w_include_dirs=None, more_init="", PY_SSIZE_T_CLEAN=False): - functions = space.unwrap(w_functions) - include_dirs = _unwrap_include_dirs(space, w_include_dirs) - w_result = self.sys_info.import_extension( - modname, functions, prologue, include_dirs, more_init, - PY_SSIZE_T_CLEAN) - self.record_imported_module(modname) - return w_result - - # A list of modules which the test caused to be imported (in - # self.space). These will be cleaned up automatically in teardown. - self.imported_module_names = [] - - wrap = self.space.wrap - self.w_compile_module = wrap(interp2app(compile_module)) - self.w_load_module = wrap(interp2app(load_module)) - self.w_import_module = wrap(interp2app(import_module)) - self.w_import_extension = wrap(interp2app(import_extension)) - - # create the file lock before we count allocations - self.space.call_method(self.space.sys.get("stdout"), "flush") - - freeze_refcnts(self) - #self.check_and_print_leaks() - - def unimport_module(self, name): - """ - Remove the named module from the space's sys.modules. - """ - w_modules = self.space.sys.get('modules') - w_name = self.space.wrap(name) - self.space.delitem(w_modules, w_name) - - def teardown_method(self, func): - if self.runappdirect: - return - for name in self.imported_module_names: - self.unimport_module(name) - self.space.getexecutioncontext().cleanup_cpyext_state() - self.cleanup_references(self.space) - # XXX: find out how to disable check_and_print_leaks() if the - # test failed... - assert not self.check_and_print_leaks(), ( - "Test leaks or loses object(s). You should also check if " - "the test actually passed in the first place; if it failed " - "it is likely to reach this place.") - -def collect(space): - import gc - rawrefcount._collect() - gc.collect(2) - -def print_pyobj_list(space): - rawrefcount._print_pyobj_list() - -class AppTestCpythonExtensionCycleGC(AppTestCpythonExtensionBase): - - def setup_method(self, func): - if self.runappdirect: - return - - @unwrap_spec(methods='text') - def import_cycle_module(space, methods): - init = """ - if (Py_IsInitialized()) { - PyObject* m; - if (PyType_Ready(&CycleType) < 0) - return; - m = Py_InitModule("cycle", module_methods); - if (m == NULL) - return; - Py_INCREF(&CycleType); - PyModule_AddObject(m, "Cycle", (PyObject *)&CycleType); - } - """ - body = """ - #include - #include "structmember.h" - typedef struct { - PyObject_HEAD - PyObject *next; - PyObject *val; - } Cycle; - static PyTypeObject CycleType; - static int Cycle_traverse(Cycle *self, visitproc visit, void *arg) - { - int vret; - if (self->next) { - vret = visit(self->next, arg); - if (vret != 0) - return vret; - } - if (self->val) { - vret = visit(self->val, arg); - if (vret != 0) - return vret; - } - return 0; - } - static int Cycle_clear(Cycle *self) - { - PyObject *tmp; - tmp = self->next; - self->next = NULL; - Py_XDECREF(tmp); - tmp = self->val; - self->val = NULL; - Py_XDECREF(tmp); - return 0; - } - static void Cycle_dealloc(Cycle* self) - { - Cycle_clear(self); - Py_TYPE(self)->tp_free((PyObject*)self); - } - static PyObject* Cycle_new(PyTypeObject *type, PyObject *args, - PyObject *kwds) - { - Cycle *self; - self = (Cycle *)type->tp_alloc(type, 0); - if (self != NULL) { - self->next = PyString_FromString(""); - if (self->next == NULL) { - Py_DECREF(self); - return NULL; - } - } - PyObject_GC_Track(self); - return (PyObject *)self; - } - static int Cycle_init(Cycle *self, PyObject *args, PyObject *kwds) - { - PyObject *next=NULL, *tmp; - static char *kwlist[] = {"next", NULL}; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, - &next)) - return -1; - if (next) { - tmp = self->next; - Py_INCREF(next); - self->next = next; - Py_XDECREF(tmp); - } - return 0; - } - static PyMemberDef Cycle_members[] = { - {"next", T_OBJECT_EX, offsetof(Cycle, next), 0, "next"}, - {"val", T_OBJECT_EX, offsetof(Cycle, val), 0, "val"}, - {NULL} /* Sentinel */ - }; - static PyMethodDef Cycle_methods[] = { - {NULL} /* Sentinel */ - }; - static PyTypeObject CycleType = { - PyVarObject_HEAD_INIT(NULL, 0) - "Cycle.Cycle", /* tp_name */ - sizeof(Cycle), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)Cycle_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | - Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /* tp_flags */ - "Cycle objects", /* tp_doc */ - (traverseproc)Cycle_traverse, /* tp_traverse */ - (inquiry)Cycle_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - Cycle_methods, /* tp_methods */ - Cycle_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)Cycle_init, /* tp_init */ - 0, /* tp_alloc */ - Cycle_new, /* tp_new */ - }; - """ - w_result = self.sys_info.import_module("cycle", init, - body + methods, - None, None, False) - return w_result - - self.imported_module_names = [] - - wrap = self.space.wrap - self.w_import_cycle_module = wrap(interp2app(import_cycle_module)) - self.w_collect = wrap(interp2app(collect)) - self.w_print_pyobj_list = wrap(interp2app(print_pyobj_list)) - - # def test_free_self_reference_cycle_child_pypyobj(self): - # cycle = self.import_cycle_module(""" - # static Cycle *c; - # static PyObject * Cycle_cc(Cycle *self, PyObject *val) - # { - # c = PyObject_GC_New(Cycle, &CycleType); - # if (c == NULL) - # return NULL; - # Py_INCREF(val); - # c->val = val; // set value - # Py_INCREF(c); - # c->next = (PyObject *)c; // create self reference - # Py_INCREF(Py_None); - # return Py_None; - # } - # static PyObject * Cycle_cd(Cycle *self) - # { - # Py_DECREF(c); // throw cycle away - # Py_INCREF(Py_None); - # return Py_None; - # } - # static PyMethodDef module_methods[] = { - # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, - # {"discardCycle", (PyCFunction)Cycle_cd, METH_NOARGS, ""}, - # {NULL} /* Sentinel */ - # }; - # """) - # - # class Example(object): - # del_called = -1 - # - # def __init__(self, val): - # self.val = val - # Example.del_called = 0 - # - # def __del__(self): - # Example.del_called = self.val - # - # # don't keep any reference in pypy - # cycle.createCycle(Example(42)) - # self.collect() - # assert Example.del_called == 0 - # cycle.discardCycle() - # self.collect() - # assert Example.del_called == 42 - # - # # keep a temporary reference in pypy - # e = Example(43) - # cycle.createCycle(e) - # cycle.discardCycle() - # self.collect() - # assert Example.del_called == 0 - # e = None - # self.collect() - # assert Example.del_called == 43 - # - # # keep a reference in pypy, free afterwards - # e = Example(44) - # cycle.createCycle(e) - # self.collect() - # assert Example.del_called == 0 - # e = None - # self.collect() - # assert Example.del_called == 0 - # cycle.discardCycle() - # self.collect() - # assert Example.del_called == 44 - # - # def test_free_self_reference_cycle_parent_pypyobj(self): - # # create and return a second object which references the cycle, because - # # otherwise we will end up with a cycle that spans across cpy/pypy, - # # which we don't want to test here - # cycle = self.import_cycle_module(""" - # static PyObject * Cycle_cc(Cycle *self, PyObject *val) - # { - # Cycle *c = PyObject_GC_New(Cycle, &CycleType); - # if (c == NULL) - # return NULL; - # Cycle *c2 = PyObject_GC_New(Cycle, &CycleType); - # if (c2 == NULL) - # return NULL; - # Py_INCREF(val); - # c2->val = val; // set value - # Py_INCREF(c2); - # c2->next = (PyObject *)c2; // create self reference - # c->next = (PyObject *)c2; - # return (PyObject *)c; // return other object - # } - # static PyMethodDef module_methods[] = { - # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, - # {NULL} /* Sentinel */ - # }; - # """) - # - # class Example(object): - # del_called = -1 - # - # def __init__(self, val): - # self.val = val - # Example.del_called = 0 - # - # def __del__(self): - # Example.del_called = self.val - # - # c = cycle.createCycle(Example(42)) - # self.collect() - # assert Example.del_called == 0 - # c = None - # self.collect() - # assert Example.del_called == 42 - # - # def test_free_simple_cycle_child_pypyobj(self): - # cycle = self.import_cycle_module(""" - # static Cycle *c; - # static PyObject * Cycle_cc(Cycle *self, PyObject *val) - # { - # c = PyObject_GC_New(Cycle, &CycleType); - # if (c == NULL) - # return NULL; - # Cycle *c2 = PyObject_GC_New(Cycle, &CycleType); - # if (c2 == NULL) - # return NULL; - # Py_INCREF(val); - # c->val = val; // set value - # c->next = (PyObject *)c2; - # Py_INCREF(c); - # c2->next = (PyObject *)c; // simple cycle across two objects - # Py_INCREF(Py_None); - # return Py_None; - # } - # static PyObject * Cycle_cd(Cycle *self) - # { - # Py_DECREF(c); // throw cycle away - # Py_INCREF(Py_None); - # return Py_None; - # } - # static PyMethodDef module_methods[] = { - # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, - # {"discardCycle", (PyCFunction)Cycle_cd, METH_NOARGS, ""}, - # {NULL} /* Sentinel */ - # }; - # """) - # - # class Example(object): - # del_called = -1 - # - # def __init__(self, val): - # self.val = val - # Example.del_called = 0 - # - # def __del__(self): - # Example.del_called = self.val - # - # # don't keep any reference in pypy - # cycle.createCycle(Example(42)) - # self.collect() - # cycle.discardCycle() - # assert Example.del_called == 0 - # self.collect() - # assert Example.del_called == 42 - # - # # keep a temporary reference in pypy - # e = Example(43) - # cycle.createCycle(e) - # cycle.discardCycle() - # self.collect() - # assert Example.del_called == 0 - # e = None - # self.collect() - # assert Example.del_called == 43 - # - # # keep a reference in pypy, free afterwards - # e = Example(44) - # cycle.createCycle(e) - # self.collect() - # assert Example.del_called == 0 - # e = None - # self.collect() - # assert Example.del_called == 0 - # cycle.discardCycle() - # self.collect() - # assert Example.del_called == 44 - # - # - # def test_free_complex_cycle_child_pypyobj(self): - # cycle = self.import_cycle_module(""" - # static PyObject * Cycle_cc(Cycle *self, PyObject *val) - # { - # Cycle *c = PyObject_GC_New(Cycle, &CycleType); - # if (c == NULL) - # return NULL; - # Cycle *c2 = PyObject_GC_New(Cycle, &CycleType); - # if (c2 == NULL) - # return NULL; - # Cycle *c3 = PyObject_GC_New(Cycle, &CycleType); - # if (c3 == NULL) - # return NULL; - # Py_INCREF(val); - # c->val = val; // set value - # Py_INCREF(val); - # c3->val = val; // set value - # Py_INCREF(c2); - # c->next = (PyObject *)c2; - # Py_INCREF(c); - # c2->next = (PyObject *)c; // inner cycle - # Py_INCREF(c3); - # c2->val = (PyObject *)c3; - # Py_INCREF(c); - # c3->next = (PyObject *)c; // outer cycle - # Py_DECREF(c); - # Py_DECREF(c2); - # Py_DECREF(c3); // throw all objects away - # Py_INCREF(Py_None); - # return Py_None; - # } - # static PyMethodDef module_methods[] = { - # {"createCycle", (PyCFunction)Cycle_cc, METH_OLDARGS, ""}, - # {NULL} /* Sentinel */ - # }; - # """) - # - # class Example(object): - # del_called = -1 - # - # def __init__(self, val): - # self.val = val - # Example.del_called = 0 - # - # def __del__(self): - # Example.del_called = self.val - # - # # don't keep any reference in pypy - # cycle.createCycle(Example(42)) - # assert Example.del_called == 0 - # self.collect() - # assert Example.del_called == 42 - # - # # keep a temporary reference in pypy - # e = Example(43) - # cycle.createCycle(e) - # e = None - # assert Example.del_called == 0 - # self.collect() - # assert Example.del_called == 43 - # - # # keep a reference in pypy, free afterwards - # e = Example(44) - # cycle.createCycle(e) - # self.collect() - # assert Example.del_called == 0 - # e = None - # self.collect() - # assert Example.del_called == 44 - - def test_objects_in_global_list(self): - cycle = self.import_cycle_module(""" - static PyObject * Cycle_Create(Cycle *self, PyObject *val) - { - Cycle *c = PyObject_GC_New(Cycle, &CycleType); - if (c == NULL) - return NULL; - c->next = val; - return (PyObject *)c; - } - static PyMethodDef module_methods[] = { - {"create", (PyCFunction)Cycle_Create, METH_OLDARGS, ""}, - {NULL} /* Sentinel */ - }; - """) - - class Example(object): - def __init__(self, val): - self.val = val - - c = cycle.create(Example(41)) - - self.print_pyobj_list() - c = cycle.create(Example(42)) - self.print_pyobj_list() - - # TODO: fix rawrefcount, so that the Cycle objects are properly added - # to the ALLOCATED list of leakfinder or alternatively not freed - # by collect diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -106,11 +106,11 @@ ("translation.backend", "c")], }), ChoiceOption("cpyextgc", "Garbage Collection Strategy for cpyext", - ["boehm", "ref", "ref_trialdel", "none"], - default="ref", + ["boehm", "trialdeletion", "none"], + default="trialdeletion", requires={ "boehm": [("translation.gc", "incminimark")], - "ref_trialdel": [("translation.gc", "incminimark")], + "trialdeletion": [("translation.gc", "incminimark")], }, cmdline="--cpyextgc"), diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -2335,7 +2335,7 @@ self.visit_all_objects() # if self.rrc_enabled: - self.rrc_major_collection_trace() + self.rrc_major_collection_trace() # ll_assert(not (self.probably_young_objects_with_finalizers .non_empty()), @@ -2994,43 +2994,48 @@ ('c_ob_refcnt', lltype.Signed), ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) - PYOBJ_GC_HDR = lltype.Struct('PyGC_Head', - ('c_gc_next', rffi.VOIDP), - ('c_gc_prev', rffi.VOIDP), - ('c_gc_refs', lltype.Signed)) - PYOBJ_GC_HDR_PTR = lltype.Ptr(PYOBJ_GC_HDR) - RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) - VISIT_FUNCTYPE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], - rffi.INT_real)) + RAWREFCOUNT_VISIT = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], + rffi.INT_real)) RAWREFCOUNT_TRAVERSE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, - VISIT_FUNCTYPE, + RAWREFCOUNT_VISIT, rffi.VOIDP], lltype.Void)) + PYOBJ_GC_HDR_PTR = lltype.Ptr(lltype.ForwardReference()) + PYOBJ_GC_HDR = lltype.Struct('PyGC_Head', + ('c_gc_next', PYOBJ_GC_HDR_PTR), + ('c_gc_prev', PYOBJ_GC_HDR_PTR), + ('c_gc_refs', lltype.Signed)) + PYOBJ_GC_HDR_PTR.TO.become(PYOBJ_GC_HDR) + RAWREFCOUNT_GC_AS_PYOBJ = lltype.Ptr(lltype.FuncType([PYOBJ_GC_HDR_PTR], + PYOBJ_HDR_PTR)) + RAWREFCOUNT_PYOBJ_AS_GC = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR], + PYOBJ_GC_HDR_PTR)) def _pyobj(self, pyobjaddr): - return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR) + return llmemory.cast_adr_to_ptr(pyobjaddr, self.PYOBJ_HDR_PTR) def _pygchdr(self, pygchdraddr): - return llmemory.cast_adr_to_ptr(pygchdraddr, self.PYOBJ_GC_HDR_PTR) + return llmemory.cast_adr_to_ptr(pygchdraddr, self.PYOBJ_GC_HDR_PTR) def rawrefcount_init(self, dealloc_trigger_callback, tp_traverse, - pyobj_list): + pyobj_list, gc_as_pyobj, pyobj_as_gc): # see pypy/doc/discussion/rawrefcount.rst if not self.rrc_enabled: self.rrc_p_list_young = self.AddressStack() self.rrc_p_list_old = self.AddressStack() self.rrc_o_list_young = self.AddressStack() self.rrc_o_list_old = self.AddressStack() - self.rrc_buffered = self.AddressStack() self.rrc_p_dict = self.AddressDict() # non-nursery keys only self.rrc_p_dict_nurs = self.AddressDict() # nursery keys only self.rrc_dealloc_trigger_callback = dealloc_trigger_callback + self.rrc_dealloc_pending = self.AddressStack() self.rrc_tp_traverse = tp_traverse - self.rrc_dealloc_pending = self.AddressStack() self.rrc_pyobjects_to_scan = self.AddressStack() self.rrc_more_pyobjects_to_scan = self.AddressStack() self.rrc_pyobjects_to_trace = self.AddressStack() self.rrc_pyobj_list = self._pygchdr(pyobj_list) + self.rrc_gc_as_pyobj = gc_as_pyobj + self.rrc_pyobj_as_gc = pyobj_as_gc self.rrc_enabled = True def check_no_more_rawrefcount_state(self): @@ -3344,18 +3349,21 @@ llhelper) # pyobj = self._pyobj(pyobject) - callback_ptr = llhelper(self.VISIT_FUNCTYPE, + callback_ptr = llhelper(self.RAWREFCOUNT_VISIT, IncrementalMiniMarkGC._rrc_visit) self_ptr = rffi.cast(rffi.VOIDP, cast_nongc_instance_to_adr(self)) self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) def _rrc_gc_list_init(self, pygclist): - pygclist.c_gc_next = rffi.cast(rffi.VOIDP, pygclist) - pygclist.c_gc_prev = rffi.cast(rffi.VOIDP, pygclist) + pygclist.c_gc_next = pygclist + pygclist.c_gc_prev = pygclist def _rrc_gc_print_list(self): debug_print("gc_print_list start!") - curr = rffi.cast(self.PYOBJ_GC_HDR_PTR, self.rrc_pyobj_list.c_gc_next) + curr = self.rrc_pyobj_list.c_gc_next while curr != self.rrc_pyobj_list: - debug_print("gc_print_list: ", curr) - curr = rffi.cast(self.PYOBJ_GC_HDR_PTR, curr.c_gc_next) + currobj = self.rrc_gc_as_pyobj(curr) + curr2 = self.rrc_pyobj_as_gc(currobj) + debug_print("gc_print_list: ", curr, ", obj:", currobj, ", curr: ", + curr2) + curr = curr.c_gc_next diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -1,15 +1,13 @@ import py -from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.memory.gc.incminimark import IncrementalMiniMarkGC from rpython.memory.gc.test.test_direct import BaseDirectGCTest from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT -from rpython.rtyper.lltypesystem import rffi -from rpython.rtyper.annlowlevel import llhelper -#from pypy.module.cpyext.api import (PyTypeObject) -#from pypy.module.cpyext.typeobjectdefs import visitproc, traverseproc PYOBJ_HDR = IncrementalMiniMarkGC.PYOBJ_HDR PYOBJ_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_HDR_PTR +RAWREFCOUNT_VISIT = IncrementalMiniMarkGC.RAWREFCOUNT_VISIT +PYOBJ_GC_HDR = IncrementalMiniMarkGC.PYOBJ_GC_HDR PYOBJ_GC_HDR_PTR = IncrementalMiniMarkGC.PYOBJ_GC_HDR_PTR S = lltype.GcForwardReference() @@ -18,17 +16,6 @@ ('prev', lltype.Ptr(S)), ('next', lltype.Ptr(S)))) -T = lltype.Ptr(lltype.ForwardReference()) -T.TO.become(lltype.Struct('test', - ('base', PYOBJ_HDR_PTR.TO), - ('next', T), - ('prev', T), - ('value', lltype.Signed))) - -#TRAVERSE_FUNCTYPE = rffi.CCallback([PYOBJ_HDR_PTR, visitproc, rffi.VOIDP], -# rffi.INT_real) -#t1 = lltype.malloc(PyTypeObject, flavor='raw', immortal=True) - class TestRawRefCount(BaseDirectGCTest): GCClass = IncrementalMiniMarkGC @@ -51,14 +38,37 @@ else: rc = REFCNT_FROM_PYPY self.trigger = [] - self.trigger2 = [] + visit = self.gc._rrc_visit + self.pyobj_gc_map = {} + self.gc_pyobj_map = {} + + def rawrefcount_tp_traverse(obj, foo, args): + print "VISITED!!!!!!!!!!!!!!!!!!!!!1" + test = rffi.cast(S, obj) + if llmemory.cast_ptr_to_adr(test.next).ptr is not None: + next = rffi.cast(PYOBJ_HDR_PTR, test.next) + vret = visit(next, args) + if vret != 0: + return + if llmemory.cast_ptr_to_adr(test.prev).ptr is not None: + next = rffi.cast(PYOBJ_HDR_PTR, test.prev) + visit(next, args) + + def rawrefcount_gc_as_pyobj(gc): + return self.gc_pyobj_map[1] # TODO fix + + def rawrefcount_pyobj_as_gc(pyobj): + return self.pyobj_gc_map[1] # TODO fix + self.pyobj_list = lltype.malloc(PYOBJ_GC_HDR_PTR.TO, flavor='raw', immortal=True) - self.pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, self.pyobj_list); - self.pyobj_list.c_gc_next = rffi.cast(rffi.VOIDP, self.pyobj_list); + self.pyobj_list.c_gc_next = self.pyobj_list + self.pyobj_list.c_gc_next = self.pyobj_list self.gc.rawrefcount_init(lambda: self.trigger.append(1), - lambda: self.trigger2.append(1), - llmemory.cast_ptr_to_adr(self.pyobj_list)) + rawrefcount_tp_traverse, + llmemory.cast_ptr_to_adr(self.pyobj_list), + rawrefcount_gc_as_pyobj, + rawrefcount_pyobj_as_gc) # if create_immortal: p1 = lltype.malloc(S, immortal=True) @@ -78,11 +88,22 @@ self._collect(major=False) p1 = self.stackroots.pop() p1ref = lltype.cast_opaque_ptr(llmemory.GCREF, p1) - r1 = lltype.malloc(PYOBJ_HDR_PTR.TO, flavor='raw', immortal=create_immortal) + r1 = lltype.malloc(PYOBJ_HDR, flavor='raw', + immortal=create_immortal) r1.c_ob_refcnt = rc r1.c_ob_pypy_link = 0 - #r1.c_ob_type = lltype.nullptr(PyTypeObject) r1addr = llmemory.cast_ptr_to_adr(r1) + + r1gc = lltype.malloc(PYOBJ_GC_HDR, flavor='raw', + immortal=True) + r1gc.c_gc_next = self.pyobj_list + r1gc.c_gc_prev = self.pyobj_list + self.pyobj_list.c_gc_next = r1gc + self.pyobj_list.c_gc_prev = r1gc + + self.pyobj_gc_map[1] = r1gc + self.gc_pyobj_map[1] = r1 + if is_pyobj: assert not is_light self.gc.rawrefcount_create_link_pyobj(p1ref, r1addr) @@ -313,50 +334,24 @@ self._collect(major=True) check_alive(0) - # def _rawrefcount_cycle_obj(self): - # - # def test_tp_traverse(obj, visit, args): - # test = rffi.cast(T, obj) - # vret = 0 - # if llmemory.cast_ptr_to_adr(test.next).ptr is not None: - # next = rffi.cast(PYOBJ_HDR_PTR, test.next) - # vret = visit(next, args) - # if vret != 0: - # return vret - # if llmemory.cast_ptr_to_adr(test.prev).ptr is not None: - # next = rffi.cast(PYOBJ_HDR_PTR, test.prev) - # vret = visit(next, args) - # if vret != 0: - # return vret - # return vret - # - # func_ptr = llhelper(TRAVERSE_FUNCTYPE, test_tp_traverse) - # rffi_func_ptr = rffi.cast(traverseproc, func_ptr) - # t1.c_tp_traverse = rffi_func_ptr - # - # r1 = lltype.malloc(T.TO, flavor='raw', immortal=True) - # r1.base.c_ob_pypy_link = 0 - # r1.base.c_ob_type = t1 - # r1.base.c_ob_refcnt = 1 - # return r1 - # - # def test_cycle_self_reference_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r1.next = r1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 - # - # def test_cycle_self_reference_not_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r1.base.c_ob_refcnt += 1 - # r1.next = r1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 - # + def test_cycle_self_reference_free(self): + p1, p1ref, r1, r1addr, check_alive = ( + self._rawrefcount_pair(42, create_immortal=True)) + p1.next = p1 + check_alive(0) + self._collect(major=True) + py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead + py.test.raises(RuntimeError, "p1.x") # dead + + def test_cycle_self_reference_not_free(self): + p1, p1ref, r1, r1addr, check_alive = ( + self._rawrefcount_pair(42, create_immortal=True)) + r1.c_ob_refcnt += 1 # the pyobject is kept alive + p1.next = p1 + check_alive(+1) + self._collect(major=True) + check_alive(+1) + # def test_simple_cycle_free(self): # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) # r1 = self._rawrefcount_cycle_obj() diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -476,7 +476,9 @@ self.rawrefcount_init_ptr = getfn( GCClass.rawrefcount_init, [s_gc, SomePtr(GCClass.RAWREFCOUNT_DEALLOC_TRIGGER), - SomePtr(GCClass.RAWREFCOUNT_TRAVERSE), SomeAddress()], + SomePtr(GCClass.RAWREFCOUNT_TRAVERSE), SomeAddress(), + SomePtr(GCClass.RAWREFCOUNT_GC_AS_PYOBJ), + SomePtr(GCClass.RAWREFCOUNT_PYOBJ_AS_GC)], annmodel.s_None) self.rawrefcount_create_link_pypy_ptr = getfn( GCClass.rawrefcount_create_link_pypy, @@ -1311,13 +1313,15 @@ self.pop_roots(hop, livevars) def gct_gc_rawrefcount_init(self, hop): - [v_fnptr, v_fnptr2, v_pyobj_list] = hop.spaceop.args + [v_fnptr, v_fnptr2, v_pyobj_list, v_fnptr3, v_fnptr4] = hop.spaceop.args assert v_fnptr.concretetype == self.GCClass.RAWREFCOUNT_DEALLOC_TRIGGER assert v_fnptr2.concretetype == self.GCClass.RAWREFCOUNT_TRAVERSE - # TODO add assert for v_pyobj_list + # TODO add assert for v_pyobj_list, improve asserts (types not same but equal) + # assert v_fnptr3.concretetype == self.GCClass.RAWREFCOUNT_GC_AS_PYOBJ + # assert v_fnptr4.concretetype == self.GCClass.RAWREFCOUNT_PYOBJ_AS_GC hop.genop("direct_call", [self.rawrefcount_init_ptr, self.c_const_gc, v_fnptr, - v_fnptr2, v_pyobj_list]) + v_fnptr2, v_pyobj_list, v_fnptr3, v_fnptr4]) def gct_gc_rawrefcount_create_link_pypy(self, hop): [v_gcobj, v_pyobject] = hop.spaceop.args diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -19,11 +19,12 @@ ('c_ob_refcnt', lltype.Signed), ('c_ob_pypy_link', lltype.Signed)) PYOBJ_HDR_PTR = lltype.Ptr(PYOBJ_HDR) +PYOBJ_GC_HDR_PTR = lltype.Ptr(lltype.ForwardReference()) PYOBJ_GC_HDR = lltype.Struct('PyGC_Head', - ('c_gc_next', rffi.VOIDP), - ('c_gc_prev', rffi.VOIDP), + ('c_gc_next', PYOBJ_GC_HDR_PTR), + ('c_gc_prev', PYOBJ_GC_HDR_PTR), ('c_gc_refs', lltype.Signed)) -PYOBJ_GC_HDR_PTR = lltype.Ptr(PYOBJ_GC_HDR) +PYOBJ_GC_HDR_PTR.TO.become(PYOBJ_GC_HDR) RAWREFCOUNT_DEALLOC_TRIGGER = lltype.Ptr(lltype.FuncType([], lltype.Void)) VISIT_FUNCTYPE = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR, rffi.VOIDP], rffi.INT_real)) @@ -31,6 +32,10 @@ VISIT_FUNCTYPE, rffi.VOIDP], lltype.Void)) +RAWREFCOUNT_GC_AS_PYOBJ = lltype.Ptr(lltype.FuncType([PYOBJ_GC_HDR_PTR], + PYOBJ_HDR_PTR)) +RAWREFCOUNT_PYOBJ_AS_GC = lltype.Ptr(lltype.FuncType([PYOBJ_HDR_PTR], + PYOBJ_GC_HDR_PTR)) def _build_pypy_link(p): @@ -45,8 +50,7 @@ for tests; it should not be called at all during translation. """ global _p_list, _o_list, _adr2pypy, _pypy2ob, _pypy2ob_rev - global _d_list, _dealloc_trigger_callback, _tp_traverse, _pygclist - global _pyobj_list + global _d_list, _dealloc_trigger_callback, _tp_traverse _p_list = [] _o_list = [] _adr2pypy = [None] @@ -63,15 +67,6 @@ global _pyobj_list _pyobj_list = rffi.cast(PYOBJ_GC_HDR_PTR, pyobj_list) -# def init_traverse(traverse_cpy_call): -# global _traverse_cpy_call -# _traverse_cpy_call = traverse_cpy_call -# -# def traverse_cpy_call(pyobj, visitproc_ptr, arg): -# global _traverse_cpy_call -# _traverse_cpy_call(pyobj.c_ob_type.c_tp_traverse, pyobj, -# visitproc_ptr, arg) - @not_rpython def create_link_pypy(p, ob): "a link where the PyPy object contains some or all the data" @@ -229,12 +224,13 @@ def _print_pyobj_list(): "for tests only" # TODO: change to get_pyobj_list, that returns a list of PyObjects + # or alternatively checks if a certain object is in the list global _pyobj_list print "_print_pyobj_list start!" - curr = rffi.cast(PYOBJ_GC_HDR_PTR, _pyobj_list.c_gc_next) + curr = _pyobj_list.c_gc_next while curr != _pyobj_list: print "_print_pyobj_list: ", curr - curr = rffi.cast(PYOBJ_GC_HDR_PTR, curr.c_gc_next) + curr = curr.c_gc_next # ____________________________________________________________ @@ -263,18 +259,20 @@ class Entry(ExtRegistryEntry): _about_ = init - def compute_result_annotation(self, s_dealloc_callback, tp_traverse, - pyobj_list): + def compute_result_annotation(self, s_dealloc_callback, s_tp_traverse, + s_pyobj_list, s_as_gc, s_as_pyobj): from rpython.rtyper.llannotation import SomePtr assert isinstance(s_dealloc_callback, SomePtr) # ll-ptr-to-function - # add assert? + assert isinstance(s_tp_traverse, SomePtr) + assert isinstance(s_as_gc, SomePtr) + assert isinstance(s_as_pyobj, SomePtr) def specialize_call(self, hop): hop.exception_cannot_occur() - v_dealloc_callback, v_tp_traverse, v_pyobj_list = \ - hop.inputargs(*hop.args_r) + v_dealloc_callback, v_tp_traverse, v_pyobj_list, v_as_gc, \ + v_as_pyobj = hop.inputargs(*hop.args_r) hop.genop('gc_rawrefcount_init', [v_dealloc_callback, v_tp_traverse, - v_pyobj_list]) + v_pyobj_list, v_as_gc, v_as_pyobj]) class Entry(ExtRegistryEntry): diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -915,7 +915,7 @@ lst.append(str(g)) g = seen.get(g) lst.append('') - # TODO: remove code (see below) to make this check pass + # TODO: this check fails if this code is uncommented: # pypy/module/cpyext/api.py: # print "start cpyext_call" # print "end cpyext_call" diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py --- a/rpython/translator/c/genc.py +++ b/rpython/translator/c/genc.py @@ -179,6 +179,8 @@ defines['COUNT_OP_MALLOCS'] = 1 if self.config.translation.cpyextgc == "boehm": defines['CPYEXT_BOEHM'] = 1 + if self.config.translation.cpyextgc == "trialdeletion": + defines['CPYEXT_TRIALDELETION'] = 1 if self.config.translation.sandbox: defines['RPY_SANDBOXED'] = 1 if CBuilder.have___thread is None: From pypy.commits at gmail.com Fri Jan 11 05:39:14 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:14 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed cpyext test Message-ID: <5c387252.1c69fb81.1cddb.d558@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95608:ada4b64c0816 Date: 2018-08-02 10:59 +0200 http://bitbucket.org/pypy/pypy/changeset/ada4b64c0816/ Log: Fixed cpyext test diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -1069,6 +1069,7 @@ (initproc)Cycle_init, /* tp_init */ 0, /* tp_alloc */ Cycle_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ }; extern PyGC_Head *_pypy_rawrefcount_pyobj_list; @@ -1078,6 +1079,8 @@ Cycle *c = PyObject_GC_New(Cycle, &CycleType); if (c == NULL) return NULL; + + Py_INCREF(val); c->next = val; // TODO: check if _pypy_rawrefcount_pyobj_list contains c @@ -1100,7 +1103,3 @@ self.print_pyobj_list() c = module.create(Example(42)) self.print_pyobj_list() - - # TODO: fix rawrefcount, so that the Cycle objects are properly added - # to the ALLOCATED list of leakfinder or alternatively not freed - # by collect From pypy.commits at gmail.com Fri Jan 11 05:39:15 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:15 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed cpyext test Message-ID: <5c387253.1c69fb81.51b65.4f57@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95609:860d9f8d29b6 Date: 2018-08-02 14:53 +0200 http://bitbucket.org/pypy/pypy/changeset/860d9f8d29b6/ Log: Fixed cpyext test diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -937,7 +937,7 @@ if self.runappdirect: skip('cannot import module with undefined functions') - # TODO: remove unnecessary stuff, add tests for gc_untrack, add asserts + # TODO: remove unnecessary stuff, add gc_(un)track asserts init = """ if (Py_IsInitialized()) { PyObject* m; @@ -994,7 +994,7 @@ PyObject *kwds) { Cycle *self; - self = (Cycle *)type->tp_alloc(type, 0); + self = PyObject_GC_New(Cycle, type); if (self != NULL) { self->next = PyString_FromString(""); if (self->next == NULL) { @@ -1074,23 +1074,23 @@ extern PyGC_Head *_pypy_rawrefcount_pyobj_list; - static PyObject * Cycle_Create(Cycle *self, PyObject *val) - { - Cycle *c = PyObject_GC_New(Cycle, &CycleType); - if (c == NULL) - return NULL; - - Py_INCREF(val); - c->next = val; + static PyObject * Cycle_Create(Cycle *self, PyObject *val) + { + Cycle *c = (Cycle *)Cycle_new(&CycleType, NULL, NULL); + if (c == NULL) + return NULL; + + Py_INCREF(val); + c->next = val; - // TODO: check if _pypy_rawrefcount_pyobj_list contains c + // TODO: check if _pypy_rawrefcount_pyobj_list contains c - return (PyObject *)c; - } - static PyMethodDef module_methods[] = { - {"create", (PyCFunction)Cycle_Create, METH_OLDARGS, ""}, - {NULL} /* Sentinel */ - }; + return (PyObject *)c; + } + static PyMethodDef module_methods[] = { + {"create", (PyCFunction)Cycle_Create, METH_OLDARGS, ""}, + {NULL} /* Sentinel */ + }; """ module = self.import_module(name='cycle', init=init, body=body) From pypy.commits at gmail.com Fri Jan 11 05:39:17 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:17 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed rawrefcount tests Message-ID: <5c387255.1c69fb81.75650.c7a2@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95610:c1219f8ea34b Date: 2018-08-03 09:43 +0200 http://bitbucket.org/pypy/pypy/changeset/c1219f8ea34b/ Log: Fixed rawrefcount tests diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -334,146 +334,66 @@ self._collect(major=True) check_alive(0) - def test_cycle_self_reference_free(self): + def test_linked_cycle_self_reference_dies_without_external_reference(self): + p1, p1ref, r1, r1addr, check_alive = ( + self._rawrefcount_pair(42)) + r1.c_ob_refcnt += 1 + p1.next = p1 + check_alive(+1) + self._collect(major=True, expected_trigger=1) + py.test.raises(RuntimeError, "p1.x") # dead + assert r1.c_ob_refcnt == 1 # in the pending list + assert r1.c_ob_pypy_link == 0 + assert self.gc.rawrefcount_next_dead() == r1addr + assert self.gc.rawrefcount_next_dead() == llmemory.NULL + assert self.gc.rawrefcount_next_dead() == llmemory.NULL + self.gc.check_no_more_rawrefcount_state() + lltype.free(r1, flavor='raw') + + def test_linked_cycle_self_reference_survives_with_pyobj_reference(self): p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, create_immortal=True)) + r1.c_ob_refcnt += 2 # the pyobject is kept alive p1.next = p1 - check_alive(0) + check_alive(+2) self._collect(major=True) - py.test.raises(RuntimeError, "r1.c_ob_refcnt") # dead + check_alive(+2) + r1.c_ob_refcnt -= 1 # the external reference from pyobj is removed + check_alive(+1) + self._collect(major=True, expected_trigger=1) py.test.raises(RuntimeError, "p1.x") # dead + assert r1.c_ob_refcnt == 1 # in the pending list + assert r1.c_ob_pypy_link == 0 + assert self.gc.rawrefcount_next_dead() == r1addr + assert self.gc.rawrefcount_next_dead() == llmemory.NULL + assert self.gc.rawrefcount_next_dead() == llmemory.NULL + self.gc.check_no_more_rawrefcount_state() + lltype.free(r1, flavor='raw') - def test_cycle_self_reference_not_free(self): + def test_linked_cycle_self_reference_survives_with_pypy_reference(self): p1, p1ref, r1, r1addr, check_alive = ( self._rawrefcount_pair(42, create_immortal=True)) - r1.c_ob_refcnt += 1 # the pyobject is kept alive + r1.c_ob_refcnt += 1 p1.next = p1 + self.stackroots.append(p1) check_alive(+1) self._collect(major=True) + assert p1.x == 42 + assert self.trigger == [] check_alive(+1) + p1 = self.stackroots.pop() + check_alive(+1) + self._collect(major=True, expected_trigger=1) + py.test.raises(RuntimeError, "p1.x") # dead + assert r1.c_ob_refcnt == 1 + assert r1.c_ob_pypy_link == 0 + assert self.gc.rawrefcount_next_dead() == r1addr + self.gc.check_no_more_rawrefcount_state() + lltype.free(r1, flavor='raw') - # def test_simple_cycle_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r2.next = r1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 - # - # def test_simple_cycle_not_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r2.next = r1 - # r2.base.c_ob_refcnt += 1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 1 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 2 - # - # def test_complex_cycle_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r3 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r1.prev = r2 - # r2.base.c_ob_refcnt += 1 - # r2.next = r3 - # r3.prev = r1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 - # - # def test_complex_cycle_not_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r3 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r1.prev = r2 - # r2.base.c_ob_refcnt += 1 - # r2.next = r3 - # r3.prev = r1 - # r3.base.c_ob_refcnt += 1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 1 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 2 - # assert r3.base.c_ob_refcnt & REFCNT_MASK == 2 - # - # def test_cycle_2_buffered_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r2.prev = r1 - # self._rawrefcount_buffer_obj(r1) - # self._rawrefcount_buffer_obj(r2) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 - # - # def test_cycle_2_buffered_not_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r2.prev = r1 - # r1.base.c_ob_refcnt += 1 - # self._rawrefcount_buffer_obj(r1) - # self._rawrefcount_buffer_obj(r2) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 2 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 1 - # - # def test_multiple_cycles_partial_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r3 = self._rawrefcount_cycle_obj() - # r4 = self._rawrefcount_cycle_obj() - # r5 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r2.next = r3 - # r3.next = r1 - # r2.prev = r5 - # r5.next = r4 - # r4.next = r5 - # r5.base.c_ob_refcnt += 1 - # r4.base.c_ob_refcnt += 1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r4.base.c_ob_refcnt & REFCNT_MASK == 2 - # assert r5.base.c_ob_refcnt & REFCNT_MASK == 1 - # - # def test_multiple_cycles_all_free(self): - # self.gc.rawrefcount_init(lambda: self.trigger.append(1)) - # r1 = self._rawrefcount_cycle_obj() - # r2 = self._rawrefcount_cycle_obj() - # r3 = self._rawrefcount_cycle_obj() - # r4 = self._rawrefcount_cycle_obj() - # r5 = self._rawrefcount_cycle_obj() - # r1.next = r2 - # r2.next = r3 - # r3.next = r1 - # r2.prev = r5 - # r5.next = r4 - # r4.next = r5 - # r5.base.c_ob_refcnt += 1 - # self._rawrefcount_buffer_obj(r1) - # self.gc.rrc_collect_cycles() - # assert r1.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r2.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r3.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r4.base.c_ob_refcnt & REFCNT_MASK == 0 - # assert r5.base.c_ob_refcnt & REFCNT_MASK == 0 +# TODO: pyobj_cycle_self_reference (without linked pypy object) +# TODO: linked_cycle_simple +# TODO: pyobj_cycle_simple +# TODO: linked_cycle_complex +# TODO: pyobj_cycle_complex +# TODO: pyobj_cycle_dies_including_linked_pypy From pypy.commits at gmail.com Fri Jan 11 05:39:19 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:19 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed cpyext test Message-ID: <5c387257.1c69fb81.fcc65.d324@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95611:fa88e83164e0 Date: 2018-08-03 13:11 +0200 http://bitbucket.org/pypy/pypy/changeset/fa88e83164e0/ Log: Fixed cpyext test diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -3,7 +3,7 @@ import pytest from pypy.tool.cpyext.extbuild import SystemCompilationInfo, HERE -from pypy.interpreter.gateway import unwrap_spec, interp2app +from pypy.interpreter.gateway import unwrap_spec, interp2app, ObjSpace from pypy.interpreter.error import OperationError from rpython.rtyper.lltypesystem import lltype from pypy.module.cpyext import api @@ -214,8 +214,8 @@ def debug_collect(space): rawrefcount._collect() -def print_pyobj_list(space): - rawrefcount._print_pyobj_list() +def in_pygclist(space, int_addr): + return space.wrap(rawrefcount._in_pygclist(int_addr)) class AppTestCpythonExtensionBase(LeakCheckingTest): @@ -227,7 +227,8 @@ if not cls.runappdirect: cls.sys_info = get_cpyext_info(space) cls.w_debug_collect = space.wrap(interp2app(debug_collect)) - cls.w_print_pyobj_list = space.wrap(interp2app(print_pyobj_list)) + cls.w_in_pygclist = space.wrap( + interp2app(in_pygclist, unwrap_spec=[ObjSpace, int])) cls.preload_builtins(space) else: def w_import_module(self, name, init=None, body='', filename=None, @@ -929,7 +930,7 @@ ), ]) - def test_gc_pyobj_list(self): + def test_gc_track(self): """ Test if Py_GC_Track and Py_GC_Untrack are adding and removing container objects from the list of all garbage-collected PyObjects. @@ -937,17 +938,16 @@ if self.runappdirect: skip('cannot import module with undefined functions') - # TODO: remove unnecessary stuff, add gc_(un)track asserts init = """ if (Py_IsInitialized()) { PyObject* m; - if (PyType_Ready(&CycleType) < 0) + if (PyType_Ready(&FooType) < 0) return; - m = Py_InitModule("cycle", module_methods); + m = Py_InitModule("foo", module_methods); if (m == NULL) return; - Py_INCREF(&CycleType); - PyModule_AddObject(m, "Cycle", (PyObject *)&CycleType); + Py_INCREF(&FooType); + PyModule_AddObject(m, "Foo", (PyObject *)&FooType); } """ body = """ @@ -955,85 +955,22 @@ #include "structmember.h" typedef struct { PyObject_HEAD - PyObject *next; - PyObject *val; - } Cycle; - static PyTypeObject CycleType; - static int Cycle_traverse(Cycle *self, visitproc visit, void *arg) - { - int vret; - if (self->next) { - vret = visit(self->next, arg); - if (vret != 0) - return vret; - } - if (self->val) { - vret = visit(self->val, arg); - if (vret != 0) - return vret; - } - return 0; - } - static int Cycle_clear(Cycle *self) - { - PyObject *tmp; - tmp = self->next; - self->next = NULL; - Py_XDECREF(tmp); - tmp = self->val; - self->val = NULL; - Py_XDECREF(tmp); - return 0; - } - static void Cycle_dealloc(Cycle* self) - { - Cycle_clear(self); - Py_TYPE(self)->tp_free((PyObject*)self); - } - static PyObject* Cycle_new(PyTypeObject *type, PyObject *args, + } Foo; + static PyTypeObject FooType; + static PyObject* Foo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - Cycle *self; - self = PyObject_GC_New(Cycle, type); - if (self != NULL) { - self->next = PyString_FromString(""); - if (self->next == NULL) { - Py_DECREF(self); - return NULL; - } - } + Foo *self; + self = PyObject_GC_New(Foo, type); PyObject_GC_Track(self); return (PyObject *)self; } - static int Cycle_init(Cycle *self, PyObject *args, PyObject *kwds) - { - PyObject *next=NULL, *tmp; - static char *kwlist[] = {"next", NULL}; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, - &next)) - return -1; - if (next) { - tmp = self->next; - Py_INCREF(next); - self->next = next; - Py_XDECREF(tmp); - } - return 0; - } - static PyMemberDef Cycle_members[] = { - {"next", T_OBJECT_EX, offsetof(Cycle, next), 0, "next"}, - {"val", T_OBJECT_EX, offsetof(Cycle, val), 0, "val"}, - {NULL} /* Sentinel */ - }; - static PyMethodDef Cycle_methods[] = { - {NULL} /* Sentinel */ - }; - static PyTypeObject CycleType = { + static PyTypeObject FooType = { PyVarObject_HEAD_INIT(NULL, 0) - "Cycle.Cycle", /* tp_name */ - sizeof(Cycle), /* tp_basicsize */ + "foo.Foo", /* tp_name */ + sizeof(Foo), /* tp_basicsize */ 0, /* tp_itemsize */ - (destructor)Cycle_dealloc, /* tp_dealloc */ + 0, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -1049,57 +986,57 @@ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | - Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - "Cycle objects", /* tp_doc */ - (traverseproc)Cycle_traverse, /* tp_traverse */ - (inquiry)Cycle_clear, /* tp_clear */ + "", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - Cycle_methods, /* tp_methods */ - Cycle_members, /* tp_members */ + 0, /* tp_methods */ + 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ - (initproc)Cycle_init, /* tp_init */ + 0, /* tp_init */ 0, /* tp_alloc */ - Cycle_new, /* tp_new */ + Foo_new, /* tp_new */ PyObject_GC_Del, /* tp_free */ }; + + static PyObject * Foo_pygchead(PyObject *self, Foo *foo) + { + PyGC_Head * gc_head = _PyPy_pyobj_as_gc((GCHdr_PyObject *)foo); + PyObject *obj = PyObject_MALLOC(sizeof(PyIntObject)); + obj = PyObject_Init(obj, &PyInt_Type); + if (obj != NULL) + ((PyIntObject *)obj)->ob_ival = (long)gc_head; + return obj; + } - extern PyGC_Head *_pypy_rawrefcount_pyobj_list; - - static PyObject * Cycle_Create(Cycle *self, PyObject *val) + static PyObject * Foo_untrack(PyObject *self, Foo *foo) { - Cycle *c = (Cycle *)Cycle_new(&CycleType, NULL, NULL); - if (c == NULL) - return NULL; - - Py_INCREF(val); - c->next = val; - - // TODO: check if _pypy_rawrefcount_pyobj_list contains c - - return (PyObject *)c; + PyObject_GC_UnTrack(foo); + Py_RETURN_NONE; } + static PyMethodDef module_methods[] = { - {"create", (PyCFunction)Cycle_Create, METH_OLDARGS, ""}, + {"pygchead", (PyCFunction)Foo_pygchead, METH_OLDARGS, ""}, + {"untrack", (PyCFunction)Foo_untrack, METH_OLDARGS, ""}, {NULL} /* Sentinel */ }; """ - module = self.import_module(name='cycle', init=init, body=body) + module = self.import_module(name='foo', init=init, body=body) - class Example(object): - def __init__(self, val): - self.val = val + f = module.Foo() + pygchead = module.pygchead(f) + result = self.in_pygclist(pygchead) + assert result - c = module.create(Example(41)) - - self.print_pyobj_list() - c = module.create(Example(42)) - self.print_pyobj_list() + module.untrack(f) + result = self.in_pygclist(pygchead) + assert not result diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -221,16 +221,17 @@ del _d_list[:] @not_rpython -def _print_pyobj_list(): - "for tests only" - # TODO: change to get_pyobj_list, that returns a list of PyObjects - # or alternatively checks if a certain object is in the list +def _in_pygclist(int_addr): + """For tests only. Checks if the address is in the gc list of pyobjects.""" global _pyobj_list - print "_print_pyobj_list start!" curr = _pyobj_list.c_gc_next while curr != _pyobj_list: - print "_print_pyobj_list: ", curr + curr_addr = llmemory.cast_ptr_to_adr(curr) + curr_int_addr = llmemory.cast_adr_to_int(curr_addr) + if int_addr == curr_int_addr: + return True curr = curr.c_gc_next + return False # ____________________________________________________________ From pypy.commits at gmail.com Fri Jan 11 05:39:20 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:20 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Cleaned up code in incminimark Message-ID: <5c387258.1c69fb81.fcc65.d32a@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95612:5b3e2b3a25bf Date: 2018-08-24 09:17 +0200 http://bitbucket.org/pypy/pypy/changeset/5b3e2b3a25bf/ Log: Cleaned up code in incminimark diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3030,9 +3030,6 @@ self.rrc_dealloc_trigger_callback = dealloc_trigger_callback self.rrc_dealloc_pending = self.AddressStack() self.rrc_tp_traverse = tp_traverse - self.rrc_pyobjects_to_scan = self.AddressStack() - self.rrc_more_pyobjects_to_scan = self.AddressStack() - self.rrc_pyobjects_to_trace = self.AddressStack() self.rrc_pyobj_list = self._pygchdr(pyobj_list) self.rrc_gc_as_pyobj = gc_as_pyobj self.rrc_pyobj_as_gc = pyobj_as_gc @@ -3205,16 +3202,8 @@ self._pyobj(pyobject).c_ob_refcnt = rc _rrc_free._always_inline_ = True - NO_CYCLE_DETECTION = False - def rrc_major_collection_trace(self): - debug_start("gc-rrc-trace") - if self.NO_CYCLE_DETECTION: - self.rrc_p_list_old.foreach(self._rrc_major_trace, None) - else: - self.rrc_major_collection_trace_cycle() - self.rrc_p_list_old.foreach(self._rrc_major_trace, None) # for now, remove later - debug_stop("gc-rrc-trace") + self.rrc_p_list_old.foreach(self._rrc_major_trace, None) def _rrc_major_trace(self, pyobject, ignore): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY @@ -3230,77 +3219,6 @@ self.objects_to_trace.append(obj) self.visit_all_objects() - def rrc_major_collection_trace_cycle(self): - assert not self.objects_to_trace.non_empty() - assert not self.rrc_pyobjects_to_scan.non_empty() - assert not self.rrc_more_pyobjects_to_scan.non_empty() - assert not self.rrc_pyobjects_to_trace.non_empty() - - self._rrc_gc_print_list() - - # initially, scan all real pyobjects (not proxies) which are linked to objects - #self.rrc_p_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) - self.rrc_o_list_old.foreach(self._rrc_major_scan_non_rc_roots, None) - - # as long as we find new pyobjects which should be marked, recursively - # mark them - while self.rrc_pyobjects_to_trace.non_empty(): - while self.rrc_pyobjects_to_trace.non_empty(): - pyobject = self.rrc_pyobjects_to_trace.pop() - self._rrc_traverse(pyobject) - - # see if we found new pypy objects to trace - if self.objects_to_trace.non_empty(): - self.visit_all_objects() - self.objects_to_trace.delete() - self.objects_to_trace = self.AddressStack() - - # look if there are some pyobjects with linked objects which were - # not marked previously, but are marked now - swap = self.rrc_pyobjects_to_scan - self.rrc_pyobjects_to_scan = self.rrc_more_pyobjects_to_scan - self.rrc_more_pyobjects_to_scan = swap - self.rrc_pyobjects_to_scan.foreach( - self._rrc_major_scan_non_rc_roots, None) - self.rrc_pyobjects_to_scan.delete() - self.rrc_pyobjects_to_scan = self.AddressStack() - - self.rrc_more_pyobjects_to_scan.delete() - self.rrc_more_pyobjects_to_scan = self.AddressStack() - - def _rrc_mark_cpyobj(self, pyobj): - # if the pyobj is not marked, remember it and if there is a linked pypy - # object also remember it - visited = True # TODO: check if visited (via 'cast' to PyGC_Head) - if not visited: - # TODO: mark visited - pyobject = llmemory.cast_ptr_to_adr(pyobj) - self.rrc_more_pyobjects_to_scan.append(pyobject) - intobj = pyobj.c_ob_pypy_link - if intobj != 0: - obj = llmemory.cast_int_to_adr(intobj) - hdr = self.header(obj) - if not (hdr.tid & GCFLAG_VISITED): - self.objects_to_trace.append(obj) - - def _rrc_major_scan_non_rc_roots(self, pyobject, ignore): - # check in the object header of the linked pypy object, if it is marked - # or not - pyobj = self._pyobj(pyobject) - intobj = pyobj.c_ob_pypy_link - obj = llmemory.cast_int_to_adr(intobj) - hdr = self.header(obj) - if hdr.tid & GCFLAG_VISITED: - visited = True # TODO: check if visited - if not visited: - # process the pyobject now - # TODO: mark visited - self.rrc_pyobjects_to_trace.append(pyobject) - else: - # save the pyobject for later, in case its linked object becomes - # marked - self.rrc_more_pyobjects_to_scan.append(pyobject) - def rrc_major_collection_free(self): ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 2") length_estimate = self.rrc_p_dict.length() @@ -3335,13 +3253,15 @@ else: self._rrc_free(pyobject) + def _rrc_visit_pyobj(self, pyobj): + pass + def _rrc_visit(pyobj, self_ptr): from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance # - debug_print("visit called!") self_adr = rffi.cast(llmemory.Address, self_ptr) self = cast_adr_to_nongc_instance(IncrementalMiniMarkGC, self_adr) - self._rrc_mark_cpyobj(pyobj) + self._rrc_visit_pyobj(pyobj) return rffi.cast(rffi.INT_real, 0) def _rrc_traverse(self, pyobject): @@ -3357,13 +3277,3 @@ def _rrc_gc_list_init(self, pygclist): pygclist.c_gc_next = pygclist pygclist.c_gc_prev = pygclist - - def _rrc_gc_print_list(self): - debug_print("gc_print_list start!") - curr = self.rrc_pyobj_list.c_gc_next - while curr != self.rrc_pyobj_list: - currobj = self.rrc_gc_as_pyobj(curr) - curr2 = self.rrc_pyobj_as_gc(currobj) - debug_print("gc_print_list: ", curr, ", obj:", currobj, ", curr: ", - curr2) - curr = curr.c_gc_next From pypy.commits at gmail.com Fri Jan 11 05:39:22 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:22 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed some formatting issues Message-ID: <5c38725a.1c69fb81.73b0b.fe16@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95613:80824650968a Date: 2018-08-24 09:30 +0200 http://bitbucket.org/pypy/pypy/changeset/80824650968a/ Log: Fixed some formatting issues diff --git a/pypy/module/cpyext/parse/cpyext_object.h b/pypy/module/cpyext/parse/cpyext_object.h --- a/pypy/module/cpyext/parse/cpyext_object.h +++ b/pypy/module/cpyext/parse/cpyext_object.h @@ -332,4 +332,4 @@ typedef struct _gchdr_pyobject { Py_ssize_t ob_refcnt; Py_ssize_t ob_pypy_link; -} GCHdr_PyObject; \ No newline at end of file +} GCHdr_PyObject; diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -406,6 +406,7 @@ # if w_obj is not None: # assert pyobj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY + @init_function def write_w_marker_deallocating(space): if we_are_translated(): diff --git a/pypy/module/cpyext/src/object.c b/pypy/module/cpyext/src/object.c --- a/pypy/module/cpyext/src/object.c +++ b/pypy/module/cpyext/src/object.c @@ -221,4 +221,4 @@ { obj->ob_size = size; return (PyVarObject*)PyObject_Init((PyObject*)obj, type); -} \ No newline at end of file +} From pypy.commits at gmail.com Fri Jan 11 05:39:24 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:24 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Added complex rawrefcount tests using dot files Message-ID: <5c38725c.1c69fb81.bf977.9a81@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95614:2e7b85611e30 Date: 2018-12-21 11:55 +0100 http://bitbucket.org/pypy/pypy/changeset/2e7b85611e30/ Log: Added complex rawrefcount tests using dot files Adapted traverse support in incminimark to support tests diff too long, truncating to 2000 out of 9197 lines diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3253,9 +3253,6 @@ else: self._rrc_free(pyobject) - def _rrc_visit_pyobj(self, pyobj): - pass - def _rrc_visit(pyobj, self_ptr): from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance # @@ -3265,13 +3262,18 @@ return rffi.cast(rffi.INT_real, 0) def _rrc_traverse(self, pyobject): + from rpython.rlib.objectmodel import we_are_translated from rpython.rtyper.annlowlevel import (cast_nongc_instance_to_adr, llhelper) # pyobj = self._pyobj(pyobject) - callback_ptr = llhelper(self.RAWREFCOUNT_VISIT, - IncrementalMiniMarkGC._rrc_visit) - self_ptr = rffi.cast(rffi.VOIDP, cast_nongc_instance_to_adr(self)) + if we_are_translated(): + callback_ptr = llhelper(self.RAWREFCOUNT_VISIT, + IncrementalMiniMarkGC._rrc_visit) + self_ptr = rffi.cast(rffi.VOIDP, cast_nongc_instance_to_adr(self)) + else: + callback_ptr = self._rrc_visit_pyobj + self_ptr = None self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) def _rrc_gc_list_init(self, pygclist): diff --git a/rpython/memory/gc/test/__init__.py b/rpython/memory/gc/test/dot/__init__.py copy from rpython/memory/gc/test/__init__.py copy to rpython/memory/gc/test/dot/__init__.py diff --git a/rpython/memory/gc/test/dot/dot_parser.py b/rpython/memory/gc/test/dot/dot_parser.py new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/dot_parser.py @@ -0,0 +1,555 @@ +"""Graphviz's dot language parser. + +The dotparser parses GraphViz files in +dot and dot files and transforms them +into a class representation defined by `pydot`. + +Author: Michael Krause +Fixes by: Ero Carrera +""" +from __future__ import division +from __future__ import print_function +import sys + +from pyparsing import ( + nestedExpr, Literal, CaselessLiteral, + Word, OneOrMore, + Forward, + Group, Optional, Combine, + restOfLine, cStyleComment, nums, alphanums, + printables, + ParseException, ParseResults, CharsNotIn, + QuotedString) + +import pydot + +__author__ = ['Michael Krause', 'Ero Carrera'] +__license__ = 'MIT' + + +PY3 = sys.version_info >= (3, 0, 0) +if PY3: + str_type = str +else: + str_type = basestring + + +class P_AttrList(object): + + def __init__(self, toks): + + self.attrs = {} + i = 0 + + while i < len(toks): + attrname = toks[i] + if i+2 < len(toks) and toks[i+1] == '=': + attrvalue = toks[i+2] + i += 3 + else: + attrvalue = None + i += 1 + + self.attrs[attrname] = attrvalue + + + def __repr__(self): + + return "%s(%r)" % (self.__class__.__name__, self.attrs) + + + +class DefaultStatement(P_AttrList): + + def __init__(self, default_type, attrs): + + self.default_type = default_type + self.attrs = attrs + + def __repr__(self): + + return "%s(%s, %r)" % (self.__class__.__name__, + self.default_type, self.attrs) + + +top_graphs = list() + +def push_top_graph_stmt(str, loc, toks): + + attrs = {} + g = None + + for element in toks: + + if (isinstance(element, (ParseResults, tuple, list)) and + len(element) == 1 and + isinstance(element[0], str_type)): + + element = element[0] + + if element == 'strict': + attrs['strict'] = True + + elif element in ['graph', 'digraph']: + + attrs = {} + + g = pydot.Dot(graph_type=element, **attrs) + attrs['type'] = element + + top_graphs.append( g ) + + elif isinstance( element, str_type): + g.set_name( element ) + + elif isinstance(element, pydot.Subgraph): + + g.obj_dict['attributes'].update( element.obj_dict['attributes'] ) + g.obj_dict['edges'].update( element.obj_dict['edges'] ) + g.obj_dict['nodes'].update( element.obj_dict['nodes'] ) + g.obj_dict['subgraphs'].update( element.obj_dict['subgraphs'] ) + + g.set_parent_graph(g) + + elif isinstance(element, P_AttrList): + attrs.update(element.attrs) + + elif isinstance(element, (ParseResults, list)): + add_elements(g, element) + + else: + raise ValueError( + 'Unknown element statement: {s}'.format(s=element)) + + + for g in top_graphs: + update_parent_graph_hierarchy(g) + + if len( top_graphs ) == 1: + return top_graphs[0] + + return top_graphs + + +def update_parent_graph_hierarchy(g, parent_graph=None, level=0): + + + if parent_graph is None: + parent_graph = g + + for key_name in ('edges',): + + if isinstance(g, pydot.frozendict): + item_dict = g + else: + item_dict = g.obj_dict + + if key_name not in item_dict: + continue + + for key, objs in item_dict[key_name].items(): + for obj in objs: + if ('parent_graph' in obj and + obj['parent_graph'].get_parent_graph()==g): + if obj['parent_graph'] is g: + pass + else: + obj['parent_graph'].set_parent_graph(parent_graph) + + if key_name == 'edges' and len(key) == 2: + for idx, vertex in enumerate( obj['points'] ): + if isinstance( vertex, + (pydot.Graph, + pydot.Subgraph, pydot.Cluster)): + vertex.set_parent_graph(parent_graph) + if isinstance( vertex, pydot.frozendict): + if vertex['parent_graph'] is g: + pass + else: + vertex['parent_graph'].set_parent_graph( + parent_graph) + + + +def add_defaults(element, defaults): + + d = element.__dict__ + for key, value in defaults.items(): + if not d.get(key): + d[key] = value + + + +def add_elements(g, toks, defaults_graph=None, + defaults_node=None, defaults_edge=None): + + if defaults_graph is None: + defaults_graph = {} + if defaults_node is None: + defaults_node = {} + if defaults_edge is None: + defaults_edge = {} + + for elm_idx, element in enumerate(toks): + + if isinstance(element, (pydot.Subgraph, pydot.Cluster)): + + add_defaults(element, defaults_graph) + g.add_subgraph(element) + + elif isinstance(element, pydot.Node): + + add_defaults(element, defaults_node) + g.add_node(element) + + elif isinstance(element, pydot.Edge): + + add_defaults(element, defaults_edge) + g.add_edge(element) + + elif isinstance(element, ParseResults): + + for e in element: + add_elements(g, [e], defaults_graph, + defaults_node, defaults_edge) + + elif isinstance(element, DefaultStatement): + + if element.default_type == 'graph': + + default_graph_attrs = pydot.Node('graph', **element.attrs) + g.add_node(default_graph_attrs) + + elif element.default_type == 'node': + + default_node_attrs = pydot.Node('node', **element.attrs) + g.add_node(default_node_attrs) + + elif element.default_type == 'edge': + + default_edge_attrs = pydot.Node('edge', **element.attrs) + g.add_node(default_edge_attrs) + defaults_edge.update(element.attrs) + + else: + raise ValueError( + 'Unknown DefaultStatement: {s}'.format( + s=element.default_type)) + + elif isinstance(element, P_AttrList): + + g.obj_dict['attributes'].update(element.attrs) + + else: + raise ValueError( + 'Unknown element statement: {s}'.format(s=element)) + + +def push_graph_stmt(str, loc, toks): + + g = pydot.Subgraph('') + add_elements(g, toks) + return g + + +def push_subgraph_stmt(str, loc, toks): + + g = pydot.Subgraph('') + for e in toks: + if len(e)==3: + e[2].set_name(e[1]) + if e[0] == 'subgraph': + e[2].obj_dict['show_keyword'] = True + return e[2] + else: + if e[0] == 'subgraph': + e[1].obj_dict['show_keyword'] = True + return e[1] + + return g + + +def push_default_stmt(str, loc, toks): + + # The pydot class instances should be marked as + # default statements to be inherited by actual + # graphs, nodes and edges. + # + default_type = toks[0][0] + if len(toks) > 1: + attrs = toks[1].attrs + else: + attrs = {} + + if default_type in ['graph', 'node', 'edge']: + return DefaultStatement(default_type, attrs) + else: + raise ValueError( + 'Unknown default statement: {s}'.format(s=toks)) + + +def push_attr_list(str, loc, toks): + + p = P_AttrList(toks) + return p + + +def get_port(node): + + if len(node)>1: + if isinstance(node[1], ParseResults): + if len(node[1][0])==2: + if node[1][0][0]==':': + return node[1][0][1] + + return None + + +def do_node_ports(node): + + node_port = '' + if len(node) > 1: + node_port = ''.join( [str(a)+str(b) for a,b in node[1] ] ) + + return node_port + + +def push_edge_stmt(str, loc, toks): + + tok_attrs = [a for a in toks if isinstance(a, P_AttrList)] + attrs = {} + for a in tok_attrs: + attrs.update(a.attrs) + + e = [] + + if isinstance(toks[0][0], pydot.Graph): + + n_prev = pydot.frozendict(toks[0][0].obj_dict) + else: + n_prev = toks[0][0] + do_node_ports( toks[0] ) + + if isinstance(toks[2][0], ParseResults): + + n_next_list = [[n.get_name(),] for n in toks[2][0] ] + for n_next in [n for n in n_next_list]: + n_next_port = do_node_ports(n_next) + e.append(pydot.Edge(n_prev, n_next[0]+n_next_port, **attrs)) + + elif isinstance(toks[2][0], pydot.Graph): + + e.append(pydot.Edge(n_prev, + pydot.frozendict(toks[2][0].obj_dict), + **attrs)) + + elif isinstance(toks[2][0], pydot.Node): + + node = toks[2][0] + + if node.get_port() is not None: + name_port = node.get_name() + ":" + node.get_port() + else: + name_port = node.get_name() + + e.append(pydot.Edge(n_prev, name_port, **attrs)) + + # if the target of this edge is the name of a node + elif isinstance(toks[2][0], str_type): + + for n_next in [n for n in tuple(toks)[2::2]]: + + if (isinstance(n_next, P_AttrList) or + not isinstance(n_next[0], str_type)): + continue + + n_next_port = do_node_ports( n_next ) + e.append(pydot.Edge(n_prev, n_next[0]+n_next_port, **attrs)) + + n_prev = n_next[0]+n_next_port + else: + raise Exception( + 'Edge target {r} with type {s} unsupported.'.format( + r=toks[2][0], s=type(toks[2][0]))) + + return e + + + +def push_node_stmt(s, loc, toks): + + if len(toks) == 2: + attrs = toks[1].attrs + else: + attrs = {} + + node_name = toks[0] + if isinstance(node_name, list) or isinstance(node_name, tuple): + if len(node_name)>0: + node_name = node_name[0] + + n = pydot.Node(str(node_name), **attrs) + return n + + + + + + +graphparser = None + +def graph_definition(): + + global graphparser + + if not graphparser: + + # punctuation + colon = Literal(":") + lbrace = Literal("{") + rbrace = Literal("}") + lbrack = Literal("[") + rbrack = Literal("]") + lparen = Literal("(") + rparen = Literal(")") + equals = Literal("=") + comma = Literal(",") + dot = Literal(".") + slash = Literal("/") + bslash = Literal("\\") + star = Literal("*") + semi = Literal(";") + at = Literal("@") + minus = Literal("-") + + # keywords + strict_ = CaselessLiteral("strict") + graph_ = CaselessLiteral("graph") + digraph_ = CaselessLiteral("digraph") + subgraph_ = CaselessLiteral("subgraph") + node_ = CaselessLiteral("node") + edge_ = CaselessLiteral("edge") + + + # token definitions + + identifier = Word(alphanums + "_." ).setName("identifier") + + double_quoted_string = QuotedString( + '"', multiline=True, unquoteResults=False, escChar='\\') # dblQuotedString + + noncomma = "".join([c for c in printables if c != ","]) + alphastring_ = OneOrMore(CharsNotIn(noncomma + ' ')) + + def parse_html(s, loc, toks): + return '<%s>' % ''.join(toks[0]) + + + opener = '<' + closer = '>' + html_text = nestedExpr( opener, closer, + ( CharsNotIn( opener + closer ) ) + ).setParseAction(parse_html).leaveWhitespace() + + ID = ( identifier | html_text | + double_quoted_string | #.setParseAction(strip_quotes) | + alphastring_ ).setName("ID") + + + float_number = Combine(Optional(minus) + + OneOrMore(Word(nums + "."))).setName("float_number") + + righthand_id = (float_number | ID ).setName("righthand_id") + + port_angle = (at + ID).setName("port_angle") + + port_location = (OneOrMore(Group(colon + ID)) | + Group(colon + lparen + + ID + comma + ID + rparen)).setName("port_location") + + port = (Group(port_location + Optional(port_angle)) | + Group(port_angle + Optional(port_location))).setName("port") + + node_id = (ID + Optional(port)) + a_list = OneOrMore(ID + Optional(equals + righthand_id) + + Optional(comma.suppress())).setName("a_list") + + attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + + rbrack.suppress()).setName("attr_list") + + attr_stmt = (Group(graph_ | node_ | edge_) + + attr_list).setName("attr_stmt") + + edgeop = (Literal("--") | Literal("->")).setName("edgeop") + + stmt_list = Forward() + graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + + rbrace.suppress() + + Optional(semi.suppress())).setName("graph_stmt") + + + edge_point = Forward() + + edgeRHS = OneOrMore(edgeop + edge_point) + edge_stmt = edge_point + edgeRHS + Optional(attr_list) + + subgraph = Group( + subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") + + edge_point << Group( + subgraph | graph_stmt | node_id).setName('edge_point') + + node_stmt = ( + node_id + Optional(attr_list) + + Optional(semi.suppress())).setName("node_stmt") + + assignment = (ID + equals + righthand_id).setName("assignment") + stmt = (assignment | edge_stmt | attr_stmt | + subgraph | graph_stmt | node_stmt).setName("stmt") + stmt_list << OneOrMore(stmt + Optional(semi.suppress())) + + graphparser = OneOrMore( + (Optional(strict_) + Group((graph_ | digraph_)) + + Optional(ID) + graph_stmt).setResultsName("graph")) + + singleLineComment = Group( + "//" + restOfLine) | Group("#" + restOfLine) + + + # actions + + graphparser.ignore(singleLineComment) + graphparser.ignore(cStyleComment) + + assignment.setParseAction(push_attr_list) + a_list.setParseAction(push_attr_list) + edge_stmt.setParseAction(push_edge_stmt) + node_stmt.setParseAction(push_node_stmt) + attr_stmt.setParseAction(push_default_stmt) + + subgraph.setParseAction(push_subgraph_stmt) + graph_stmt.setParseAction(push_graph_stmt) + graphparser.setParseAction(push_top_graph_stmt) + + + return graphparser + + +def parse_dot_data(s): + """Parse DOT description in (unicode) string `s`. + + @return: Graphs that result from parsing. + @rtype: `list` of `pydot.Dot` + """ + global top_graphs + top_graphs = list() + try: + graphparser = graph_definition() + graphparser.parseWithTabs() + tokens = graphparser.parseString(s) + return list(tokens) + except ParseException as err: + print( + err.line + + " "*(err.column-1) + "^" + + err) + return None diff --git a/rpython/memory/gc/test/dot/free_self_cpython.dot b/rpython/memory/gc/test/dot/free_self_cpython.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_self_cpython.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=C, alive=n]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_self_pypy.dot b/rpython/memory/gc/test/dot/free_self_pypy.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_self_pypy.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=P, alive=n]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_simple_cross.dot b/rpython/memory/gc/test/dot/free_simple_cross.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_simple_cross.dot @@ -0,0 +1,6 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_self_cpython.dot b/rpython/memory/gc/test/dot/keep_self_cpython.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_self_cpython.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=C, alive=y, ext_refcnt=1]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_self_pypy.dot b/rpython/memory/gc/test/dot/keep_self_pypy.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_self_pypy.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=P, alive=y, rooted=y]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/pydot.py b/rpython/memory/gc/test/dot/pydot.py new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/pydot.py @@ -0,0 +1,1943 @@ +"""An interface to GraphViz.""" +from __future__ import division +from __future__ import print_function +import copy +import io +import errno +import os +import re +import subprocess +import sys +import tempfile +import warnings + +try: + import dot_parser +except Exception as e: + warnings.warn( + "Couldn't import dot_parser, " + "loading of dot files will not be possible.") + + +__author__ = 'Ero Carrera' +__version__ = '1.4.1.dev0' +__license__ = 'MIT' + + +PY3 = sys.version_info >= (3, 0, 0) +if PY3: + str_type = str +else: + str_type = basestring + + +GRAPH_ATTRIBUTES = { 'Damping', 'K', 'URL', 'aspect', 'bb', 'bgcolor', + 'center', 'charset', 'clusterrank', 'colorscheme', 'comment', 'compound', + 'concentrate', 'defaultdist', 'dim', 'dimen', 'diredgeconstraints', + 'dpi', 'epsilon', 'esep', 'fontcolor', 'fontname', 'fontnames', + 'fontpath', 'fontsize', 'id', 'label', 'labeljust', 'labelloc', + 'landscape', 'layers', 'layersep', 'layout', 'levels', 'levelsgap', + 'lheight', 'lp', 'lwidth', 'margin', 'maxiter', 'mclimit', 'mindist', + 'mode', 'model', 'mosek', 'nodesep', 'nojustify', 'normalize', 'nslimit', + 'nslimit1', 'ordering', 'orientation', 'outputorder', 'overlap', + 'overlap_scaling', 'pack', 'packmode', 'pad', 'page', 'pagedir', + 'quadtree', 'quantum', 'rankdir', 'ranksep', 'ratio', 'remincross', + 'repulsiveforce', 'resolution', 'root', 'rotate', 'searchsize', 'sep', + 'showboxes', 'size', 'smoothing', 'sortv', 'splines', 'start', + 'stylesheet', 'target', 'truecolor', 'viewport', 'voro_margin', + # for subgraphs + 'rank' } + + +EDGE_ATTRIBUTES = { 'URL', 'arrowhead', 'arrowsize', 'arrowtail', + 'color', 'colorscheme', 'comment', 'constraint', 'decorate', 'dir', + 'edgeURL', 'edgehref', 'edgetarget', 'edgetooltip', 'fontcolor', + 'fontname', 'fontsize', 'headURL', 'headclip', 'headhref', 'headlabel', + 'headport', 'headtarget', 'headtooltip', 'href', 'id', 'label', + 'labelURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor', + 'labelfontname', 'labelfontsize', 'labelhref', 'labeltarget', + 'labeltooltip', 'layer', 'len', 'lhead', 'lp', 'ltail', 'minlen', + 'nojustify', 'penwidth', 'pos', 'samehead', 'sametail', 'showboxes', + 'style', 'tailURL', 'tailclip', 'tailhref', 'taillabel', 'tailport', + 'tailtarget', 'tailtooltip', 'target', 'tooltip', 'weight', + 'rank' } + + +NODE_ATTRIBUTES = { 'URL', 'color', 'colorscheme', 'comment', + 'distortion', 'fillcolor', 'fixedsize', 'fontcolor', 'fontname', + 'fontsize', 'group', 'height', 'id', 'image', 'imagescale', 'label', + 'labelloc', 'layer', 'margin', 'nojustify', 'orientation', 'penwidth', + 'peripheries', 'pin', 'pos', 'rects', 'regular', 'root', 'samplepoints', + 'shape', 'shapefile', 'showboxes', 'sides', 'skew', 'sortv', 'style', + 'target', 'tooltip', 'vertices', 'width', 'z', + # The following are attributes dot2tex + 'texlbl', 'texmode' } + + +CLUSTER_ATTRIBUTES = { 'K', 'URL', 'bgcolor', 'color', 'colorscheme', + 'fillcolor', 'fontcolor', 'fontname', 'fontsize', 'label', 'labeljust', + 'labelloc', 'lheight', 'lp', 'lwidth', 'nojustify', 'pencolor', + 'penwidth', 'peripheries', 'sortv', 'style', 'target', 'tooltip' } + + +DEFAULT_PROGRAMS = { + 'dot', + 'twopi', + 'neato', + 'circo', + 'fdp', + 'sfdp', +} + + +def is_windows(): + # type: () -> bool + return os.name == 'nt' + + +def is_anacoda(): + # type: () -> bool + return os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + + +def get_executable_extension(): + # type: () -> str + if is_windows(): + return '.bat' if is_anacoda() else '.exe' + else: + return '' + + +def call_graphviz(program, arguments, working_dir, **kwargs): + # explicitly inherit `$PATH`, on Windows too, + # with `shell=False` + + if program in DEFAULT_PROGRAMS: + extension = get_executable_extension() + program += extension + + if arguments is None: + arguments = [] + + env = { + 'PATH': os.environ.get('PATH', ''), + 'LD_LIBRARY_PATH': os.environ.get('LD_LIBRARY_PATH', ''), + } + + program_with_args = [program, ] + arguments + + process = subprocess.Popen( + program_with_args, + env=env, + cwd=working_dir, + shell=False, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + **kwargs + ) + stdout_data, stderr_data = process.communicate() + + return stdout_data, stderr_data, process + + +# +# Extended version of ASPN's Python Cookbook Recipe: +# Frozen dictionaries. +# https://code.activestate.com/recipes/414283/ +# +# This version freezes dictionaries used as values within dictionaries. +# +class frozendict(dict): + def _blocked_attribute(obj): + raise AttributeError('A frozendict cannot be modified.') + _blocked_attribute = property(_blocked_attribute) + + __delitem__ = __setitem__ = clear = _blocked_attribute + pop = popitem = setdefault = update = _blocked_attribute + + def __new__(cls, *args, **kw): + new = dict.__new__(cls) + + args_ = [] + for arg in args: + if isinstance(arg, dict): + arg = copy.copy(arg) + for k in arg: + v = arg[k] + if isinstance(v, frozendict): + arg[k] = v + elif isinstance(v, dict): + arg[k] = frozendict(v) + elif isinstance(v, list): + v_ = list() + for elm in v: + if isinstance(elm, dict): + v_.append( frozendict(elm) ) + else: + v_.append( elm ) + arg[k] = tuple(v_) + args_.append( arg ) + else: + args_.append( arg ) + + dict.__init__(new, *args_, **kw) + return new + + def __init__(self, *args, **kw): + pass + + def __hash__(self): + try: + return self._cached_hash + except AttributeError: + h = self._cached_hash = hash(tuple(sorted(self.items()))) + return h + + def __repr__(self): + return "frozendict(%s)" % dict.__repr__(self) + + +dot_keywords = ['graph', 'subgraph', 'digraph', 'node', 'edge', 'strict'] + +id_re_alpha_nums = re.compile('^[_a-zA-Z][a-zA-Z0-9_,]*$', re.UNICODE) +id_re_alpha_nums_with_ports = re.compile( + '^[_a-zA-Z][a-zA-Z0-9_,:\"]*[a-zA-Z0-9_,\"]+$', re.UNICODE) +id_re_num = re.compile('^[0-9,]+$', re.UNICODE) +id_re_with_port = re.compile('^([^:]*):([^:]*)$', re.UNICODE) +id_re_dbl_quoted = re.compile('^\".*\"$', re.S|re.UNICODE) +id_re_html = re.compile('^<.*>$', re.S|re.UNICODE) + + +def needs_quotes( s ): + """Checks whether a string is a dot language ID. + + It will check whether the string is solely composed + by the characters allowed in an ID or not. + If the string is one of the reserved keywords it will + need quotes too but the user will need to add them + manually. + """ + + # If the name is a reserved keyword it will need quotes but pydot + # can't tell when it's being used as a keyword or when it's simply + # a name. Hence the user needs to supply the quotes when an element + # would use a reserved keyword as name. This function will return + # false indicating that a keyword string, if provided as-is, won't + # need quotes. + if s in dot_keywords: + return False + + chars = [ord(c) for c in s if ord(c)>0x7f or ord(c)==0] + if chars and not id_re_dbl_quoted.match(s) and not id_re_html.match(s): + return True + + for test_re in [id_re_alpha_nums, id_re_num, + id_re_dbl_quoted, id_re_html, + id_re_alpha_nums_with_ports]: + if test_re.match(s): + return False + + m = id_re_with_port.match(s) + if m: + return needs_quotes(m.group(1)) or needs_quotes(m.group(2)) + + return True + + +def quote_if_necessary(s): + """Enclode attribute value in quotes, if needed.""" + if isinstance(s, bool): + if s is True: + return 'True' + return 'False' + + if not isinstance( s, str_type): + return s + + if not s: + return s + + if needs_quotes(s): + replace = {'"' : r'\"', + "\n" : r'\n', + "\r" : r'\r'} + for (a,b) in replace.items(): + s = s.replace(a, b) + + return '"' + s + '"' + + return s + + + +def graph_from_dot_data(s): + """Load graphs from DOT description in string `s`. + + @param s: string in [DOT language]( + https://en.wikipedia.org/wiki/DOT_(graph_description_language)) + + @return: Graphs that result from parsing. + @rtype: `list` of `pydot.Dot` + """ + return dot_parser.parse_dot_data(s) + + +def graph_from_dot_file(path, encoding=None): + """Load graphs from DOT file at `path`. + + @param path: to DOT file + @param encoding: as passed to `io.open`. + For example, `'utf-8'`. + + @return: Graphs that result from parsing. + @rtype: `list` of `pydot.Dot` + """ + with io.open(path, 'rt', encoding=encoding) as f: + s = f.read() + if not PY3: + s = unicode(s) + graphs = graph_from_dot_data(s) + return graphs + + + +def graph_from_edges(edge_list, node_prefix='', directed=False): + """Creates a basic graph out of an edge list. + + The edge list has to be a list of tuples representing + the nodes connected by the edge. + The values can be anything: bool, int, float, str. + + If the graph is undirected by default, it is only + calculated from one of the symmetric halves of the matrix. + """ + + if directed: + graph = Dot(graph_type='digraph') + + else: + graph = Dot(graph_type='graph') + + for edge in edge_list: + + if isinstance(edge[0], str): + src = node_prefix + edge[0] + else: + src = node_prefix + str(edge[0]) + + if isinstance(edge[1], str): + dst = node_prefix + edge[1] + else: + dst = node_prefix + str(edge[1]) + + e = Edge( src, dst ) + graph.add_edge(e) + + return graph + + +def graph_from_adjacency_matrix(matrix, node_prefix= u'', directed=False): + """Creates a basic graph out of an adjacency matrix. + + The matrix has to be a list of rows of values + representing an adjacency matrix. + The values can be anything: bool, int, float, as long + as they can evaluate to True or False. + """ + + node_orig = 1 + + if directed: + graph = Dot(graph_type='digraph') + else: + graph = Dot(graph_type='graph') + + for row in matrix: + if not directed: + skip = matrix.index(row) + r = row[skip:] + else: + skip = 0 + r = row + node_dest = skip+1 + + for e in r: + if e: + graph.add_edge( + Edge( node_prefix + node_orig, + node_prefix + node_dest) ) + node_dest += 1 + node_orig += 1 + + return graph + + + +def graph_from_incidence_matrix(matrix, node_prefix='', directed=False): + """Creates a basic graph out of an incidence matrix. + + The matrix has to be a list of rows of values + representing an incidence matrix. + The values can be anything: bool, int, float, as long + as they can evaluate to True or False. + """ + + node_orig = 1 + + if directed: + graph = Dot(graph_type='digraph') + else: + graph = Dot(graph_type='graph') + + for row in matrix: + nodes = [] + c = 1 + + for node in row: + if node: + nodes.append(c*node) + c += 1 + nodes.sort() + + if len(nodes) == 2: + graph.add_edge( + Edge( node_prefix + abs(nodes[0]), + node_prefix + nodes[1] )) + + if not directed: + graph.set_simplify(True) + + return graph + + +class Common(object): + """Common information to several classes. + + Should not be directly used, several classes are derived from + this one. + """ + + + def __getstate__(self): + + dict = copy.copy(self.obj_dict) + + return dict + + + def __setstate__(self, state): + + self.obj_dict = state + + + def __get_attribute__(self, attr): + """Look for default attributes for this node""" + + attr_val = self.obj_dict['attributes'].get(attr, None) + + if attr_val is None: + # get the defaults for nodes/edges + + default_node_name = self.obj_dict['type'] + + # The defaults for graphs are set on a node named 'graph' + if default_node_name in ('subgraph', 'digraph', 'cluster'): + default_node_name = 'graph' + + g = self.get_parent_graph() + if g is not None: + defaults = g.get_node( default_node_name ) + else: + return None + + # Multiple defaults could be set by having repeated 'graph [...]' + # 'node [...]', 'edge [...]' statements. In such case, if the + # same attribute is set in different statements, only the first + # will be returned. In order to get all, one would call the + # get_*_defaults() methods and handle those. Or go node by node + # (of the ones specifying defaults) and modify the attributes + # individually. + # + if not isinstance(defaults, (list, tuple)): + defaults = [defaults] + + for default in defaults: + attr_val = default.obj_dict['attributes'].get(attr, None) + if attr_val: + return attr_val + else: + return attr_val + + return None + + + def set_parent_graph(self, parent_graph): + + self.obj_dict['parent_graph'] = parent_graph + + + def get_parent_graph(self): + + return self.obj_dict.get('parent_graph', None) + + + def set(self, name, value): + """Set an attribute value by name. + + Given an attribute 'name' it will set its value to 'value'. + There's always the possibility of using the methods: + + set_'name'(value) + + which are defined for all the existing attributes. + """ + + self.obj_dict['attributes'][name] = value + + + def get(self, name): + """Get an attribute value by name. + + Given an attribute 'name' it will get its value. + There's always the possibility of using the methods: + + get_'name'() + + which are defined for all the existing attributes. + """ + + return self.obj_dict['attributes'].get(name, None) + + + def get_attributes(self): + """""" + + return self.obj_dict['attributes'] + + + def set_sequence(self, seq): + + self.obj_dict['sequence'] = seq + + + def get_sequence(self): + + return self.obj_dict['sequence'] + + + def create_attribute_methods(self, obj_attributes): + + #for attr in self.obj_dict['attributes']: + for attr in obj_attributes: + + # Generate all the Setter methods. + # + self.__setattr__( + 'set_'+attr, + lambda x, a=attr : + self.obj_dict['attributes'].__setitem__(a, x) ) + + # Generate all the Getter methods. + # + self.__setattr__( + 'get_'+attr, lambda a=attr : self.__get_attribute__(a)) + + + +class Error(Exception): + """General error handling class. + """ + def __init__(self, value): + self.value = value + def __str__(self): + return self.value + + +class InvocationException(Exception): + """Indicate ploblem while running any GraphViz executable. + """ + def __init__(self, value): + self.value = value + def __str__(self): + return self.value + + + +class Node(Common): + """A graph node. + + This class represents a graph's node with all its attributes. + + node(name, attribute=value, ...) + + name: node's name + + All the attributes defined in the Graphviz dot language should + be supported. + """ + + def __init__(self, name = '', obj_dict = None, **attrs): + + # + # Nodes will take attributes of + # all other types because the defaults + # for any GraphViz object are dealt with + # as if they were Node definitions + # + + if obj_dict is not None: + + self.obj_dict = obj_dict + + else: + + self.obj_dict = dict() + + # Copy the attributes + # + self.obj_dict[ 'attributes' ] = dict( attrs ) + self.obj_dict[ 'type' ] = 'node' + self.obj_dict[ 'parent_graph' ] = None + self.obj_dict[ 'parent_node_list' ] = None + self.obj_dict[ 'sequence' ] = None + + # Remove the compass point + # + port = None + if isinstance(name, str_type) and not name.startswith('"'): + idx = name.find(':') + if idx > 0 and idx+1 < len(name): + name, port = name[:idx], name[idx:] + + if isinstance(name, int): + name = str(name) + + self.obj_dict['name'] = quote_if_necessary(name) + self.obj_dict['port'] = port + + self.create_attribute_methods(NODE_ATTRIBUTES) + + def __str__(self): + return self.to_string() + + + def set_name(self, node_name): + """Set the node's name.""" + + self.obj_dict['name'] = node_name + + + def get_name(self): + """Get the node's name.""" + + return self.obj_dict['name'] + + + def get_port(self): + """Get the node's port.""" + + return self.obj_dict['port'] + + + def add_style(self, style): + + styles = self.obj_dict['attributes'].get('style', None) + if not styles and style: + styles = [ style ] + else: + styles = styles.split(',') + styles.append( style ) + + self.obj_dict['attributes']['style'] = ','.join( styles ) + + + def to_string(self): + """Return string representation of node in DOT language.""" + + + # RMF: special case defaults for node, edge and graph properties. + # + node = quote_if_necessary(self.obj_dict['name']) + + node_attr = list() + + for attr in sorted(self.obj_dict['attributes']): + value = self.obj_dict['attributes'][attr] + if value == '': + value = '""' + if value is not None: + node_attr.append( + '%s=%s' % (attr, quote_if_necessary(value) ) ) + else: + node_attr.append( attr ) + + + # No point in having nodes setting any defaults if the don't set + # any attributes... + # + if node in ('graph', 'node', 'edge') and len(node_attr) == 0: + return '' + + node_attr = ', '.join(node_attr) + + if node_attr: + node += ' [' + node_attr + ']' + + return node + ';' + + + +class Edge(Common): + """A graph edge. + + This class represents a graph's edge with all its attributes. + + edge(src, dst, attribute=value, ...) + + src: source node + dst: destination node + + `src` and `dst` can be specified as a `Node` object, + or as the node's name string. + + All the attributes defined in the Graphviz dot language should + be supported. + + Attributes can be set through the dynamically generated methods: + + set_[attribute name], i.e. set_label, set_fontname + + or directly by using the instance's special dictionary: + + Edge.obj_dict['attributes'][attribute name], i.e. + + edge_instance.obj_dict['attributes']['label'] + edge_instance.obj_dict['attributes']['fontname'] + + """ + + def __init__(self, src='', dst='', obj_dict=None, **attrs): + self.obj_dict = dict() + if isinstance(src, Node): + src = src.get_name() + if isinstance(dst, Node): + dst = dst.get_name() + points = (quote_if_necessary(src), + quote_if_necessary(dst)) + self.obj_dict['points'] = points + if obj_dict is None: + # Copy the attributes + self.obj_dict[ 'attributes' ] = dict( attrs ) + self.obj_dict[ 'type' ] = 'edge' + self.obj_dict[ 'parent_graph' ] = None + self.obj_dict[ 'parent_edge_list' ] = None + self.obj_dict[ 'sequence' ] = None + else: + self.obj_dict = obj_dict + self.create_attribute_methods(EDGE_ATTRIBUTES) + + def __str__(self): + return self.to_string() + + + def get_source(self): + """Get the edges source node name.""" + + return self.obj_dict['points'][0] + + + def get_destination(self): + """Get the edge's destination node name.""" + + return self.obj_dict['points'][1] + + + def __hash__(self): + + return hash( hash(self.get_source()) + + hash(self.get_destination()) ) + + + def __eq__(self, edge): + """Compare two edges. + + If the parent graph is directed, arcs linking + node A to B are considered equal and A->B != B->A + + If the parent graph is undirected, any edge + connecting two nodes is equal to any other + edge connecting the same nodes, A->B == B->A + """ + + if not isinstance(edge, Edge): + raise Error('Can not compare and ' + 'edge to a non-edge object.') + + if self.get_parent_graph().get_top_graph_type() == 'graph': + + # If the graph is undirected, the edge has neither + # source nor destination. + # + if ( ( self.get_source() == edge.get_source() and + self.get_destination() == edge.get_destination() ) or + ( edge.get_source() == self.get_destination() and + edge.get_destination() == self.get_source() ) ): + return True + + else: + + if (self.get_source()==edge.get_source() and + self.get_destination()==edge.get_destination()): + return True + + return False + + + + def parse_node_ref(self, node_str): + + if not isinstance(node_str, str): + return node_str + + if node_str.startswith('"') and node_str.endswith('"'): + + return node_str + + node_port_idx = node_str.rfind(':') + + if (node_port_idx>0 and node_str[0]=='"' and + node_str[node_port_idx-1]=='"'): + + return node_str + + if node_port_idx>0: + + a = node_str[:node_port_idx] + b = node_str[node_port_idx+1:] + + node = quote_if_necessary(a) + + node += ':'+quote_if_necessary(b) + + return node + + return node_str + + + def to_string(self): + """Return string representation of edge in DOT language.""" + + src = self.parse_node_ref( self.get_source() ) + dst = self.parse_node_ref( self.get_destination() ) + + if isinstance(src, frozendict): + edge = [ Subgraph(obj_dict=src).to_string() ] + elif isinstance(src, int): + edge = [ str(src) ] + else: + edge = [ src ] + + if (self.get_parent_graph() and + self.get_parent_graph().get_top_graph_type() and + self.get_parent_graph().get_top_graph_type() == 'digraph' ): + + edge.append( '->' ) + + else: + edge.append( '--' ) + + if isinstance(dst, frozendict): + edge.append( Subgraph(obj_dict=dst).to_string() ) + elif isinstance(dst, int): + edge.append( str(dst) ) + else: + edge.append( dst ) + + + edge_attr = list() + + for attr in sorted(self.obj_dict['attributes']): + value = self.obj_dict['attributes'][attr] + if value == '': + value = '""' + if value is not None: + edge_attr.append( + '%s=%s' % (attr, quote_if_necessary(value) ) ) + else: + edge_attr.append( attr ) + + edge_attr = ', '.join(edge_attr) + + if edge_attr: + edge.append( ' [' + edge_attr + ']' ) + + return ' '.join(edge) + ';' + + + + + +class Graph(Common): + """Class representing a graph in Graphviz's dot language. + + This class implements the methods to work on a representation + of a graph in Graphviz's dot language. + + graph( graph_name='G', graph_type='digraph', + strict=False, suppress_disconnected=False, attribute=value, ...) + + graph_name: + the graph's name + graph_type: + can be 'graph' or 'digraph' + suppress_disconnected: + defaults to False, which will remove from the + graph any disconnected nodes. + simplify: + if True it will avoid displaying equal edges, i.e. + only one edge between two nodes. removing the + duplicated ones. + + All the attributes defined in the Graphviz dot language should + be supported. + + Attributes can be set through the dynamically generated methods: + + set_[attribute name], i.e. set_size, set_fontname + + or using the instance's attributes: + + Graph.obj_dict['attributes'][attribute name], i.e. + + graph_instance.obj_dict['attributes']['label'] + graph_instance.obj_dict['attributes']['fontname'] + """ + + + def __init__(self, graph_name='G', obj_dict=None, + graph_type='digraph', strict=False, + suppress_disconnected=False, simplify=False, **attrs): + + if obj_dict is not None: + self.obj_dict = obj_dict + + else: + + self.obj_dict = dict() + + self.obj_dict['attributes'] = dict(attrs) + + if graph_type not in ['graph', 'digraph']: + raise Error(( + 'Invalid type "{t}". ' + 'Accepted graph types are: ' + 'graph, digraph').format(t=graph_type)) + + + self.obj_dict['name'] = quote_if_necessary(graph_name) + self.obj_dict['type'] = graph_type + + self.obj_dict['strict'] = strict + self.obj_dict['suppress_disconnected'] = suppress_disconnected + self.obj_dict['simplify'] = simplify + + self.obj_dict['current_child_sequence'] = 1 + self.obj_dict['nodes'] = dict() + self.obj_dict['edges'] = dict() + self.obj_dict['subgraphs'] = dict() + + self.set_parent_graph(self) + + + self.create_attribute_methods(GRAPH_ATTRIBUTES) + + def __str__(self): + return self.to_string() + + + def get_graph_type(self): + + return self.obj_dict['type'] + + + def get_top_graph_type(self): + + parent = self + while True: + parent_ = parent.get_parent_graph() + if parent_ == parent: + break + parent = parent_ + + return parent.obj_dict['type'] + + + def set_graph_defaults(self, **attrs): + + self.add_node( Node('graph', **attrs) ) + + + def get_graph_defaults(self, **attrs): + + graph_nodes = self.get_node('graph') + + if isinstance( graph_nodes, (list, tuple)): + return [ node.get_attributes() for node in graph_nodes ] + + return graph_nodes.get_attributes() + + + + def set_node_defaults(self, **attrs): + + self.add_node( Node('node', **attrs) ) + + + def get_node_defaults(self, **attrs): + + + graph_nodes = self.get_node('node') + + if isinstance( graph_nodes, (list, tuple)): + return [ node.get_attributes() for node in graph_nodes ] + + return graph_nodes.get_attributes() + + + def set_edge_defaults(self, **attrs): + + self.add_node( Node('edge', **attrs) ) + + + + def get_edge_defaults(self, **attrs): + + graph_nodes = self.get_node('edge') + + if isinstance( graph_nodes, (list, tuple)): + return [ node.get_attributes() for node in graph_nodes ] + + return graph_nodes.get_attributes() + + + + def set_simplify(self, simplify): + """Set whether to simplify or not. + + If True it will avoid displaying equal edges, i.e. + only one edge between two nodes. removing the + duplicated ones. + """ + + self.obj_dict['simplify'] = simplify + + + + def get_simplify(self): + """Get whether to simplify or not. + + Refer to set_simplify for more information. + """ + + return self.obj_dict['simplify'] + + + def set_type(self, graph_type): + """Set the graph's type, 'graph' or 'digraph'.""" + + self.obj_dict['type'] = graph_type + + + + def get_type(self): + """Get the graph's type, 'graph' or 'digraph'.""" + + return self.obj_dict['type'] + + + + def set_name(self, graph_name): + """Set the graph's name.""" + + self.obj_dict['name'] = graph_name + + + + def get_name(self): + """Get the graph's name.""" + + return self.obj_dict['name'] + + + + def set_strict(self, val): + """Set graph to 'strict' mode. + + This option is only valid for top level graphs. + """ + + self.obj_dict['strict'] = val + + + + def get_strict(self, val): + """Get graph's 'strict' mode (True, False). + + This option is only valid for top level graphs. + """ + + return self.obj_dict['strict'] + + + + def set_suppress_disconnected(self, val): + """Suppress disconnected nodes in the output graph. + + This option will skip nodes in + the graph with no incoming or outgoing + edges. This option works also + for subgraphs and has effect only in the + current graph/subgraph. + """ + + self.obj_dict['suppress_disconnected'] = val + + + + def get_suppress_disconnected(self, val): + """Get if suppress disconnected is set. + + Refer to set_suppress_disconnected for more information. + """ + + return self.obj_dict['suppress_disconnected'] + + + def get_next_sequence_number(self): + + seq = self.obj_dict['current_child_sequence'] + + self.obj_dict['current_child_sequence'] += 1 + + return seq + + + + def add_node(self, graph_node): + """Adds a node object to the graph. + + It takes a node object as its only argument and returns + None. + """ + + if not isinstance(graph_node, Node): + raise TypeError( + 'add_node() received ' + + 'a non node class object: ' + str(graph_node)) + + + node = self.get_node(graph_node.get_name()) + + if not node: + + self.obj_dict['nodes'][graph_node.get_name()] = [ + graph_node.obj_dict ] + + #self.node_dict[graph_node.get_name()] = graph_node.attributes + graph_node.set_parent_graph(self.get_parent_graph()) + + else: + + self.obj_dict['nodes'][graph_node.get_name()].append( + graph_node.obj_dict ) + + graph_node.set_sequence(self.get_next_sequence_number()) + + + + def del_node(self, name, index=None): + """Delete a node from the graph. + + Given a node's name all node(s) with that same name + will be deleted if 'index' is not specified or set + to None. + If there are several nodes with that same name and + 'index' is given, only the node in that position + will be deleted. + + 'index' should be an integer specifying the position + of the node to delete. If index is larger than the + number of nodes with that name, no action is taken. + + If nodes are deleted it returns True. If no action + is taken it returns False. + """ + + if isinstance(name, Node): + name = name.get_name() + + if name in self.obj_dict['nodes']: + + if (index is not None and + index < len(self.obj_dict['nodes'][name])): + del self.obj_dict['nodes'][name][index] + return True + else: + del self.obj_dict['nodes'][name] + return True + + return False + + + def get_node(self, name): + """Retrieve a node from the graph. + + Given a node's name the corresponding Node + instance will be returned. + + If one or more nodes exist with that name a list of + Node instances is returned. + An empty list is returned otherwise. + """ + + match = list() + + if name in self.obj_dict['nodes']: + + match.extend( + [Node(obj_dict=obj_dict) + for obj_dict in self.obj_dict['nodes'][name]]) + + return match + + + def get_nodes(self): + """Get the list of Node instances.""" + + return self.get_node_list() + + + def get_node_list(self): + """Get the list of Node instances. + + This method returns the list of Node instances + composing the graph. + """ + + node_objs = list() + + for node in self.obj_dict['nodes']: + obj_dict_list = self.obj_dict['nodes'][node] + node_objs.extend( [ Node( obj_dict = obj_d ) + for obj_d in obj_dict_list ] ) + + return node_objs + + + + def add_edge(self, graph_edge): + """Adds an edge object to the graph. + + It takes a edge object as its only argument and returns + None. + """ + + if not isinstance(graph_edge, Edge): + raise TypeError( + 'add_edge() received a non edge class object: ' + + str(graph_edge)) + + edge_points = ( graph_edge.get_source(), + graph_edge.get_destination() ) + + if edge_points in self.obj_dict['edges']: + + edge_list = self.obj_dict['edges'][edge_points] + edge_list.append(graph_edge.obj_dict) + + else: + + self.obj_dict['edges'][edge_points] = [ graph_edge.obj_dict ] + + + graph_edge.set_sequence( self.get_next_sequence_number() ) + + graph_edge.set_parent_graph( self.get_parent_graph() ) + + + + def del_edge(self, src_or_list, dst=None, index=None): + """Delete an edge from the graph. + + Given an edge's (source, destination) node names all + matching edges(s) will be deleted if 'index' is not + specified or set to None. + If there are several matching edges and 'index' is + given, only the edge in that position will be deleted. + + 'index' should be an integer specifying the position + of the edge to delete. If index is larger than the + number of matching edges, no action is taken. + + If edges are deleted it returns True. If no action + is taken it returns False. + """ + + if isinstance( src_or_list, (list, tuple)): + if dst is not None and isinstance(dst, int): + index = dst + src, dst = src_or_list + else: + src, dst = src_or_list, dst + + if isinstance(src, Node): + src = src.get_name() + + if isinstance(dst, Node): + dst = dst.get_name() + + if (src, dst) in self.obj_dict['edges']: + + if (index is not None and + index < len(self.obj_dict['edges'][(src, dst)])): + del self.obj_dict['edges'][(src, dst)][index] + return True + else: + del self.obj_dict['edges'][(src, dst)] + return True + + return False + + + def get_edge(self, src_or_list, dst=None): + """Retrieved an edge from the graph. + + Given an edge's source and destination the corresponding + Edge instance(s) will be returned. + + If one or more edges exist with that source and destination + a list of Edge instances is returned. + An empty list is returned otherwise. + """ + + if isinstance( src_or_list, (list, tuple)) and dst is None: + edge_points = tuple(src_or_list) + edge_points_reverse = (edge_points[1], edge_points[0]) + else: + edge_points = (src_or_list, dst) + edge_points_reverse = (dst, src_or_list) + + match = list() + + if edge_points in self.obj_dict['edges'] or ( + self.get_top_graph_type() == 'graph' and + edge_points_reverse in self.obj_dict['edges']): + + edges_obj_dict = self.obj_dict['edges'].get( + edge_points, + self.obj_dict['edges'].get( edge_points_reverse, None )) + + for edge_obj_dict in edges_obj_dict: + match.append( + Edge(edge_points[0], + edge_points[1], + obj_dict=edge_obj_dict)) + + return match + + + def get_edges(self): + return self.get_edge_list() From pypy.commits at gmail.com Fri Jan 11 05:39:26 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:26 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Finished implementation of dot file tests for rawrefcount Message-ID: <5c38725e.1c69fb81.84614.11b0@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95615:48c91f03eaa6 Date: 2018-12-27 16:22 +0100 http://bitbucket.org/pypy/pypy/changeset/48c91f03eaa6/ Log: Finished implementation of dot file tests for rawrefcount Removed obsolete tests, that will be replaced by dot tests diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -387,66 +387,11 @@ self._collect(major=True) check_alive(0) - def test_linked_cycle_self_reference_dies_without_external_reference(self): - p1, p1ref, r1, r1addr, check_alive = ( - self._rawrefcount_pair(42)) - r1.c_ob_refcnt += 1 - p1.next = p1 - check_alive(+1) - self._collect(major=True, expected_trigger=1) - py.test.raises(RuntimeError, "p1.x") # dead - assert r1.c_ob_refcnt == 1 # in the pending list - assert r1.c_ob_pypy_link == 0 - assert self.gc.rawrefcount_next_dead() == r1addr - assert self.gc.rawrefcount_next_dead() == llmemory.NULL - assert self.gc.rawrefcount_next_dead() == llmemory.NULL - self.gc.check_no_more_rawrefcount_state() - lltype.free(r1, flavor='raw') - - def test_linked_cycle_self_reference_survives_with_pyobj_reference(self): - p1, p1ref, r1, r1addr, check_alive = ( - self._rawrefcount_pair(42, create_immortal=True)) - r1.c_ob_refcnt += 2 # the pyobject is kept alive - p1.next = p1 - check_alive(+2) - self._collect(major=True) - check_alive(+2) - r1.c_ob_refcnt -= 1 # the external reference from pyobj is removed - check_alive(+1) - self._collect(major=True, expected_trigger=1) - py.test.raises(RuntimeError, "p1.x") # dead - assert r1.c_ob_refcnt == 1 # in the pending list - assert r1.c_ob_pypy_link == 0 - assert self.gc.rawrefcount_next_dead() == r1addr - assert self.gc.rawrefcount_next_dead() == llmemory.NULL - assert self.gc.rawrefcount_next_dead() == llmemory.NULL - self.gc.check_no_more_rawrefcount_state() - lltype.free(r1, flavor='raw') - - def test_linked_cycle_self_reference_survives_with_pypy_reference(self): - p1, p1ref, r1, r1addr, check_alive = ( - self._rawrefcount_pair(42, create_immortal=True)) - r1.c_ob_refcnt += 1 - p1.next = p1 - self.stackroots.append(p1) - check_alive(+1) - self._collect(major=True) - assert p1.x == 42 - assert self.trigger == [] - check_alive(+1) - p1 = self.stackroots.pop() - check_alive(+1) - self._collect(major=True, expected_trigger=1) - py.test.raises(RuntimeError, "p1.x") # dead - assert r1.c_ob_refcnt == 1 - assert r1.c_ob_pypy_link == 0 - assert self.gc.rawrefcount_next_dead() == r1addr - self.gc.check_no_more_rawrefcount_state() - lltype.free(r1, flavor='raw') - dot_dir = os.path.join(os.path.realpath(os.path.dirname(__file__)), "dot") dot_files = [file for file in os.listdir(dot_dir) if file.endswith(".dot")] + @py.test.mark.dont_track_allocations('intentionally keep objects alive, ' + 'because we do the checks ourselves') @py.test.mark.parametrize("file", dot_files) def test_dots(self, file): from rpython.memory.gc.test.dot import pydot @@ -488,6 +433,7 @@ g = pydot.graph_from_dot_file(path)[0] nodes = {} + # create objects from graph for n in g.get_nodes(): name = n.get_name() attr = n.obj_dict['attributes'] @@ -512,6 +458,8 @@ r.c_ob_refcnt = ext_refcnt nodes[name] = BorderNode(p, pref, r, raddr, check_alive, info) pass + + # add references between objects from graph for e in g.get_edges(): source = nodes[e.get_source()] dest = nodes[e.get_destination()] @@ -526,33 +474,36 @@ else: assert False # only 2 refs supported from pypy obj + # quick self check, if traverse works properly + dests_by_source = {} + for e in g.get_edges(): + source = nodes[e.get_source()] + dest = nodes[e.get_destination()] + if source.info.type == "C" or dest.info.type == "C": + if not dests_by_source.has_key(source): + dests_by_source[source] = [] + dests_by_source[source].append(dest.r) + for source in dests_by_source: + dests_target = dests_by_source[source] + def append(self, pyobj): + dests_target.remove(pyobj) + self.gc._rrc_visit_pyobj = append + self.gc._rrc_traverse(source.raddr) + assert len(dests_target) == 0 + + # do collection self.gc.collect() - # def foo(self, pyobj): - # print "foo " + str(pyobj) - # self.gc._rrc_visit_pyobj = foo - # self.gc._rrc_traverse(nodes[u'"a"'].raddr) - + # check livelihood of objects, according to graph for name in nodes: n = nodes[name] - if n.info.alive: if n.info.type == "P": - print self.stackroots n.check_alive() else: n.check_alive(n.info.ext_refcnt) else: - if n.info.type == "C": - assert False - elif n.info.type == "P": + if n.info.type == "P": py.test.raises(RuntimeError, "n.p.x") # dead else: - assert False - -# TODO: pyobj_cycle_self_reference (without linked pypy object) -# TODO: linked_cycle_simple -# TODO: pyobj_cycle_simple -# TODO: linked_cycle_complex -# TODO: pyobj_cycle_complex -# TODO: pyobj_cycle_dies_including_linked_pypy + py.test.raises(RuntimeError, "n.r.c_ob_refcnt") # dead From pypy.commits at gmail.com Fri Jan 11 05:39:27 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:27 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed dot tests by allocating all PyPy objects old Message-ID: <5c38725f.1c69fb81.75f4.2659@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95616:ea95f4bc807f Date: 2019-01-10 11:43 +0100 http://bitbucket.org/pypy/pypy/changeset/ea95f4bc807f/ Log: Fixed dot tests by allocating all PyPy objects old Add to stackroots instead of immortal PyPy objects for dot tests Sorted dot tests diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -64,25 +64,16 @@ refs.append(pyobj_to) pyobj_to.c_ob_refcnt += 1 - def _rawrefcount_pypyobj(self, intval, create_old=False, - create_immortal=False, force_external=False): - if create_immortal: - p1 = lltype.malloc(S, immortal=True) - else: - saved = self.gc.nonlarge_max - try: - if force_external: - self.gc.nonlarge_max = 1 - p1 = self.malloc(S) - finally: - self.gc.nonlarge_max = saved + def _rawrefcount_pypyobj(self, intval, rooted=False, create_old=True): + p1 = self.malloc(S) p1.x = intval - if create_immortal: - self.consider_constant(p1) - elif create_old: + + if create_old: self.stackroots.append(p1) self._collect(major=False) p1 = self.stackroots.pop() + if rooted: + self.stackroots.append(p1) p1ref = lltype.cast_opaque_ptr(llmemory.GCREF, p1) def check_alive(): @@ -116,7 +107,7 @@ def _rawrefcount_pair(self, intval, is_light=False, is_pyobj=False, create_old=False, create_immortal=False, - force_external=False): + rooted=False, force_external=False): if is_light: rc = REFCNT_FROM_PYPY_LIGHT else: @@ -139,6 +130,8 @@ self.stackroots.append(p1) self._collect(major=False) p1 = self.stackroots.pop() + if rooted: + self.stackroots.append(p1) p1ref = lltype.cast_opaque_ptr(llmemory.GCREF, p1) r1 = lltype.malloc(PYOBJ_HDR, flavor='raw', immortal=create_immortal) @@ -389,6 +382,7 @@ dot_dir = os.path.join(os.path.realpath(os.path.dirname(__file__)), "dot") dot_files = [file for file in os.listdir(dot_dir) if file.endswith(".dot")] + dot_files.sort() @py.test.mark.dont_track_allocations('intentionally keep objects alive, ' 'because we do the checks ourselves') @@ -433,7 +427,8 @@ g = pydot.graph_from_dot_file(path)[0] nodes = {} - # create objects from graph + # create objects from graph (always create old to prevent moving) + i = 0 for n in g.get_nodes(): name = n.get_name() attr = n.obj_dict['attributes'] @@ -449,15 +444,18 @@ nodes[name] = CPythonNode(r, raddr, check_alive, info) elif type == "P": p, pref, check_alive = \ - self._rawrefcount_pypyobj(42, create_immortal=rooted) + self._rawrefcount_pypyobj(42 + i, rooted=rooted, + create_old=True) nodes[name] = PyPyNode(p, pref, check_alive, info) + i += 1 elif type == "B": p, pref, r, raddr, check_alive =\ - self._rawrefcount_pair(42, create_immortal=rooted) + self._rawrefcount_pair(42 + i, rooted=rooted, + create_old=True) if ext_refcnt > 0: r.c_ob_refcnt = ext_refcnt nodes[name] = BorderNode(p, pref, r, raddr, check_alive, info) - pass + i += 1 # add references between objects from graph for e in g.get_edges(): From pypy.commits at gmail.com Fri Jan 11 05:39:29 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 11 Jan 2019 02:39:29 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Added some dot tests Message-ID: <5c387261.1c69fb81.a0c43.0ada@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95617:c46c894a7c06 Date: 2019-01-11 10:40 +0100 http://bitbucket.org/pypy/pypy/changeset/c46c894a7c06/ Log: Added some dot tests diff --git a/rpython/memory/gc/test/dot/free_cpython_self.dot b/rpython/memory/gc/test/dot/free_cpython_self.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cpython_self.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=C, alive=n]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_1a.dot b/rpython/memory/gc/test/dot/free_cross_multi_1a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_1a.dot @@ -0,0 +1,9 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "a" -> "c"; + "c" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_1b.dot b/rpython/memory/gc/test/dot/free_cross_multi_1b.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_1b.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "a" -> "c"; + "c" -> "a"; + "b" -> "c"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_2a.dot b/rpython/memory/gc/test/dot/free_cross_multi_2a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_2a.dot @@ -0,0 +1,9 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "a"; + "b" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_2b.dot b/rpython/memory/gc/test/dot/free_cross_multi_2b.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_2b.dot @@ -0,0 +1,11 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=n]; + "d" [type=B, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "b" -> "a"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_2c.dot b/rpython/memory/gc/test/dot/free_cross_multi_2c.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_2c.dot @@ -0,0 +1,13 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=n]; + "d" [type=B, alive=n]; + "e" [type=P, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "b" -> "a"; + "d" -> "e"; + "e" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_3a.dot b/rpython/memory/gc/test/dot/free_cross_multi_3a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_3a.dot @@ -0,0 +1,9 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=B, alive=n]; + "c" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "b" -> "c"; + "c" -> "b"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_3b.dot b/rpython/memory/gc/test/dot/free_cross_multi_3b.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_3b.dot @@ -0,0 +1,11 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=B, alive=n]; + "c" [type=C, alive=n]; + "d" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "b" -> "c"; + "c" -> "d"; + "d" -> "b"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_multi_3c.dot b/rpython/memory/gc/test/dot/free_cross_multi_3c.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_multi_3c.dot @@ -0,0 +1,11 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=P, alive=n]; + "c" [type=B, alive=n]; + "d" [type=C, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "c"; + "c" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_simple_1.dot b/rpython/memory/gc/test/dot/free_cross_simple_1.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_simple_1.dot @@ -0,0 +1,6 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_simple_2.dot b/rpython/memory/gc/test/dot/free_cross_simple_2.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_simple_2.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=B, alive=n]; + "c" [type=C, alive=n]; + "d" [type=B, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_simple_3.dot b/rpython/memory/gc/test/dot/free_cross_simple_3.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_simple_3.dot @@ -0,0 +1,12 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=P, alive=n]; + "c" [type=B, alive=n]; + "d" [type=C, alive=n]; + "e" [type=B, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "e"; + "e" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_simple_4.dot b/rpython/memory/gc/test/dot/free_cross_simple_4.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_simple_4.dot @@ -0,0 +1,12 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=B, alive=n]; + "c" [type=C, alive=n]; + "d" [type=C, alive=n]; + "e" [type=B, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "e"; + "e" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_special_1a.dot b/rpython/memory/gc/test/dot/free_cross_special_1a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_special_1a.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=B, alive=n]; + "d" [type=C, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_special_1b.dot b/rpython/memory/gc/test/dot/free_cross_special_1b.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_special_1b.dot @@ -0,0 +1,14 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=B, alive=n]; + "d" [type=C, alive=n]; + "e" [type=B, alive=n]; + "f" [type=C, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "e"; + "e" -> "f"; + "f" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_special_1c.dot b/rpython/memory/gc/test/dot/free_cross_special_1c.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_special_1c.dot @@ -0,0 +1,18 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=B, alive=n]; + "c" [type=C, alive=n]; + "d" [type=B, alive=n]; + "e" [type=P, alive=n]; + "f" [type=B, alive=n]; + "g" [type=C, alive=n]; + "h" [type=B, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "e"; + "e" -> "f"; + "f" -> "g"; + "g" -> "h"; + "h" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_cross_special_2a.dot b/rpython/memory/gc/test/dot/free_cross_special_2a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_cross_special_2a.dot @@ -0,0 +1,19 @@ +digraph G { + "a" [type=P, alive=n]; + "b" [type=B, alive=n]; + "c" [type=C, alive=n]; + "d" [type=B, alive=n]; + "e" [type=P, alive=n]; + "f" [type=B, alive=n]; + "g" [type=C, alive=n]; + "h" [type=B, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "e"; + "e" -> "f"; + "f" -> "g"; + "g" -> "h"; + "h" -> "a"; + "e" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_mixed_multi_1a.dot b/rpython/memory/gc/test/dot/free_mixed_multi_1a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_mixed_multi_1a.dot @@ -0,0 +1,9 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=n]; + "a" -> "b"; + "b" -> "c"; + "c" -> "a"; + "c" -> "b"; +} diff --git a/rpython/memory/gc/test/dot/free_pypy_self.dot b/rpython/memory/gc/test/dot/free_pypy_self.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/free_pypy_self.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=P, alive=n]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/free_self_cpython.dot b/rpython/memory/gc/test/dot/free_self_cpython.dot deleted file mode 100644 --- a/rpython/memory/gc/test/dot/free_self_cpython.dot +++ /dev/null @@ -1,4 +0,0 @@ -digraph G { - "a" [type=C, alive=n]; - "a" -> "a"; -} diff --git a/rpython/memory/gc/test/dot/free_self_pypy.dot b/rpython/memory/gc/test/dot/free_self_pypy.dot deleted file mode 100644 --- a/rpython/memory/gc/test/dot/free_self_pypy.dot +++ /dev/null @@ -1,4 +0,0 @@ -digraph G { - "a" [type=P, alive=n]; - "a" -> "a"; -} diff --git a/rpython/memory/gc/test/dot/free_simple_cross.dot b/rpython/memory/gc/test/dot/free_simple_cross.dot deleted file mode 100644 --- a/rpython/memory/gc/test/dot/free_simple_cross.dot +++ /dev/null @@ -1,6 +0,0 @@ -digraph G { - "a" [type=B, alive=n]; - "b" [type=C, alive=n]; - "a" -> "b"; - "b" -> "a"; -} diff --git a/rpython/memory/gc/test/dot/keep_cpython_self.dot b/rpython/memory/gc/test/dot/keep_cpython_self.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cpython_self.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=C, alive=y, ext_refcnt=1]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_cross_simple_1a.dot b/rpython/memory/gc/test/dot/keep_cross_simple_1a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cross_simple_1a.dot @@ -0,0 +1,6 @@ +digraph G { + "a" [type=B, alive=y, rooted=y]; + "b" [type=C, alive=y]; + "a" -> "b"; + "b" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_cross_simple_1b.dot b/rpython/memory/gc/test/dot/keep_cross_simple_1b.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cross_simple_1b.dot @@ -0,0 +1,6 @@ +digraph G { + "a" [type=B, alive=y]; + "b" [type=C, alive=y, ext_refcnt=1]; + "a" -> "b"; + "b" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_cross_simple_2a.dot b/rpython/memory/gc/test/dot/keep_cross_simple_2a.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cross_simple_2a.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=P, alive=y, rooted=y]; + "b" [type=B, alive=y]; + "c" [type=C, alive=y]; + "d" [type=B, alive=y]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_cross_simple_2b.dot b/rpython/memory/gc/test/dot/keep_cross_simple_2b.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cross_simple_2b.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=P, alive=y]; + "b" [type=B, alive=y, rooted=y]; + "c" [type=C, alive=y]; + "d" [type=B, alive=y]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_cross_simple_2c.dot b/rpython/memory/gc/test/dot/keep_cross_simple_2c.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cross_simple_2c.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=P, alive=y]; + "b" [type=B, alive=y]; + "c" [type=C, alive=y, ext_refcnt=1]; + "d" [type=B, alive=y]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_cross_simple_2d.dot b/rpython/memory/gc/test/dot/keep_cross_simple_2d.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_cross_simple_2d.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=P, alive=y]; + "b" [type=B, alive=y]; + "c" [type=C, alive=y]; + "d" [type=B, alive=y, rooted=y]; + "a" -> "b"; + "b" -> "c"; + "c" -> "d"; + "d" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_pypy_self.dot b/rpython/memory/gc/test/dot/keep_pypy_self.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_pypy_self.dot @@ -0,0 +1,4 @@ +digraph G { + "a" [type=P, alive=y, rooted=y]; + "a" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_pypy_simple.dot b/rpython/memory/gc/test/dot/keep_pypy_simple.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/keep_pypy_simple.dot @@ -0,0 +1,6 @@ +digraph G { + "a" [type=P, alive=y, rooted=y]; + "b" [type=P, alive=y]; + "a" -> "b"; + "b" -> "a"; +} diff --git a/rpython/memory/gc/test/dot/keep_self_cpython.dot b/rpython/memory/gc/test/dot/keep_self_cpython.dot deleted file mode 100644 --- a/rpython/memory/gc/test/dot/keep_self_cpython.dot +++ /dev/null @@ -1,4 +0,0 @@ -digraph G { - "a" [type=C, alive=y, ext_refcnt=1]; - "a" -> "a"; -} diff --git a/rpython/memory/gc/test/dot/keep_self_pypy.dot b/rpython/memory/gc/test/dot/keep_self_pypy.dot deleted file mode 100644 --- a/rpython/memory/gc/test/dot/keep_self_pypy.dot +++ /dev/null @@ -1,4 +0,0 @@ -digraph G { - "a" [type=P, alive=y, rooted=y]; - "a" -> "a"; -} diff --git a/rpython/memory/gc/test/dot/partial_free_cross_simple_1.dot b/rpython/memory/gc/test/dot/partial_free_cross_simple_1.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/partial_free_cross_simple_1.dot @@ -0,0 +1,10 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=y, ext_refcnt=1]; + "d" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "b" -> "c"; + "b" -> "d"; +} diff --git a/rpython/memory/gc/test/dot/partial_free_cross_simple_2.dot b/rpython/memory/gc/test/dot/partial_free_cross_simple_2.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/partial_free_cross_simple_2.dot @@ -0,0 +1,12 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=C, alive=y]; + "d" [type=C, alive=y, ext_refcnt=1]; + "e" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "b" -> "c"; + "d" -> "c"; + "b" -> "e"; +} diff --git a/rpython/memory/gc/test/dot/partial_free_cross_simple_3.dot b/rpython/memory/gc/test/dot/partial_free_cross_simple_3.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/partial_free_cross_simple_3.dot @@ -0,0 +1,12 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=B, alive=y, rooted=y]; + "d" [type=C, alive=y]; + "e" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "b" -> "d"; + "c" -> "d"; + "b" -> "e"; +} diff --git a/rpython/memory/gc/test/dot/partial_free_cross_simple_4.dot b/rpython/memory/gc/test/dot/partial_free_cross_simple_4.dot new file mode 100644 --- /dev/null +++ b/rpython/memory/gc/test/dot/partial_free_cross_simple_4.dot @@ -0,0 +1,14 @@ +digraph G { + "a" [type=B, alive=n]; + "b" [type=C, alive=n]; + "c" [type=P, alive=y, rooted=y]; + "d" [type=B, alive=y]; + "e" [type=C, alive=y]; + "f" [type=C, alive=n]; + "a" -> "b"; + "b" -> "a"; + "b" -> "e"; + "c" -> "d"; + "d" -> "e"; + "b" -> "f"; +} From pypy.commits at gmail.com Fri Jan 11 07:06:47 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 11 Jan 2019 04:06:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: vastly speed up own tests Message-ID: <5c3886d7.1c69fb81.f9b4d.6541@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95618:cc276d4cd166 Date: 2019-01-11 14:05 +0200 http://bitbucket.org/pypy/pypy/changeset/cc276d4cd166/ Log: vastly speed up own tests diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -42,9 +42,10 @@ self._index_storage = rutf8.null_storage() # XXX checking, remove before any performance measurments # ifdef not_running_in_benchmark - if not we_are_translated(): - lgt = rutf8.codepoints_in_utf8(utf8str) - assert lgt == length + # if not we_are_translated(): + # print 'UnicodeObject.__init__' + # lgt = rutf8.codepoints_in_utf8(utf8str) + # assert lgt == length @staticmethod def from_utf8builder(builder): From pypy.commits at gmail.com Fri Jan 11 08:57:11 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 11 Jan 2019 05:57:11 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: remove untranslated check for unicode object creation Message-ID: <5c38a0b7.1c69fb81.7d44d.43b0@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95619:baef7e3e3ac0 Date: 2019-01-11 15:56 +0200 http://bitbucket.org/pypy/pypy/changeset/baef7e3e3ac0/ Log: remove untranslated check for unicode object creation diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -41,15 +41,6 @@ self._utf8 = utf8str self._length = length self._index_storage = rutf8.null_storage() - # XXX checking, remove before any performance measurments - # ifdef not_running_in_benchmark - if not we_are_translated(): - try: - lgt = rutf8.check_utf8(utf8str, True) - assert lgt == length - except: - # array.array can return invalid unicode - pass @staticmethod def from_utf8builder(builder): From pypy.commits at gmail.com Fri Jan 11 16:50:05 2019 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 11 Jan 2019 13:50:05 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: three XXX, will try to fix this weekend Message-ID: <5c390f8d.1c69fb81.eefc6.146e@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8-py3 Changeset: r95620:1da3240effbd Date: 2019-01-11 22:48 +0100 http://bitbucket.org/pypy/pypy/changeset/1da3240effbd/ Log: three XXX, will try to fix this weekend diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1171,6 +1171,8 @@ return self.space.utf8_w(wrapped) def is_correct_type(self, w_obj): + # XXX the ascii restriction needs to be lifted, otherwise the + # assumptions about get/setitem_str are just broken space = self.space return type(w_obj) is space.UnicodeObjectCls and w_obj.is_ascii() @@ -1189,11 +1191,14 @@ def setitem_str(self, w_dict, key, w_value): assert key is not None + # XXX this is not valid! UnicodeDictStrategy can right now only store ascii, but + # this path can lead to non-ascii utf8 strings ending up as keys self.unerase(w_dict.dstorage)[self.decodekey_str(key)] = w_value def getitem(self, w_dict, w_key): space = self.space # -- This is called extremely often. Hack for performance -- + # XXX this shortcut looks wrong to me if type(w_key) is space.StringObjectCls: return self.getitem_str(w_dict, w_key.unwrap(space)) # -- End of performance hack -- @@ -1201,6 +1206,7 @@ def getitem_str(self, w_dict, key): assert key is not None + # XXX why can't we just key here? return self.unerase(w_dict.dstorage).get(self.decodekey_str(key), None) def listview_utf8(self, w_dict): From pypy.commits at gmail.com Sat Jan 12 15:02:31 2019 From: pypy.commits at gmail.com (cfbolz) Date: Sat, 12 Jan 2019 12:02:31 -0800 (PST) Subject: [pypy-commit] pypy py3.5: IntDictStrategy has been re-enabled for a while Message-ID: <5c3a47d7.1c69fb81.dcc79.24e3@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: py3.5 Changeset: r95621:de91516349f5 Date: 2019-01-12 20:59 +0100 http://bitbucket.org/pypy/pypy/changeset/de91516349f5/ Log: IntDictStrategy has been re-enabled for a while diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -1177,7 +1177,6 @@ assert type(list(d.keys())[0]) is str def test_empty_to_int(self): - skip('IntDictStrategy is disabled for now, re-enable it!') import sys d = {} d[1] = "hi" From pypy.commits at gmail.com Sun Jan 13 00:45:30 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 12 Jan 2019 21:45:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: os.fsencode('\x80') fails on python3 windows Message-ID: <5c3ad07a.1c69fb81.ddf95.d4d6@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95623:27b376c2d467 Date: 2019-01-12 19:53 +0200 http://bitbucket.org/pypy/pypy/changeset/27b376c2d467/ Log: os.fsencode('\x80') fails on python3 windows diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -502,15 +502,20 @@ space.mul(space.wrap(sys.maxint), space.wrap(-7))) def test_interp2app_unwrap_spec_fsencode(self): + import sys space = self.space w = space.wrap def f(filename): return space.newbytes(filename) app_f = gateway.interp2app_temp(f, unwrap_spec=['fsencode']) w_app_f = space.wrap(app_f) - assert space.eq_w( - space.call_function(w_app_f, w(u'\udc80')), - space.newbytes('\x80')) + if sys.platform == 'win32': + raises(gateway.OperationError, space.call_function, + w_app_f, w(u'\udc80')) + else: + assert space.eq_w( + space.call_function(w_app_f, w(u'\udc80')), + space.newbytes('\x80')) def test_interp2app_unwrap_spec_typechecks(self): from rpython.rlib.rarithmetic import r_longlong From pypy.commits at gmail.com Sun Jan 13 09:09:42 2019 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 13 Jan 2019 06:09:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 Message-ID: <5c3b46a6.1c69fb81.8bd3b.1afb@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8-py3 Changeset: r95624:b3a828a76f63 Date: 2019-01-12 22:30 +0100 http://bitbucket.org/pypy/pypy/changeset/b3a828a76f63/ Log: merge py3.5 diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -1190,7 +1190,6 @@ assert d["ä"] == 2 def test_empty_to_int(self): - skip('IntDictStrategy is disabled for now, re-enable it!') import sys d = {} d[1] = "hi" From pypy.commits at gmail.com Sun Jan 13 09:09:44 2019 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 13 Jan 2019 06:09:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: disable the fromkeys fast path, it's yet another way to get a non-ascii key Message-ID: <5c3b46a8.1c69fb81.91033.d4fc@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8-py3 Changeset: r95625:0e7fd7135bc7 Date: 2019-01-12 23:16 +0100 http://bitbucket.org/pypy/pypy/changeset/0e7fd7135bc7/ Log: disable the fromkeys fast path, it's yet another way to get a non- ascii key diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -122,17 +122,11 @@ if w_fill is None: w_fill = space.w_None if space.is_w(w_type, space.w_dict): - ulist = space.listview_utf8(w_keys) - if ulist is not None: - strategy = space.fromcache(UnicodeDictStrategy) - storage = strategy.get_storage_fromkeys(ulist, w_fill) - w_dict = space.allocate_instance(W_DictObject, w_type) - W_DictObject.__init__(w_dict, space, strategy, storage) - else: - w_dict = W_DictMultiObject.allocate_and_init_instance(space, - w_type) - for w_key in space.listview(w_keys): - w_dict.setitem(w_key, w_fill) + # XXX consider re-enabling a fast-path here + w_dict = W_DictMultiObject.allocate_and_init_instance(space, + w_type) + for w_key in space.listview(w_keys): + w_dict.setitem(w_key, w_fill) else: w_dict = space.call_function(w_type) for w_key in space.listview(w_keys): @@ -1217,14 +1211,6 @@ i += 1 return keys, values - def get_storage_fromkeys(self, keys_w, w_fill): - """Return an initialized storage with keys and fill values""" - storage = {} - mark_dict_non_null(storage) - for key in keys_w: - storage[key] = w_fill - return self.erase(storage) - create_iterator_classes(UnicodeDictStrategy) diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -125,6 +125,7 @@ assert self.space.eq_w(space.call_function(get, w("33"), w(44)), w(44)) def test_fromkeys_fastpath(self): + py.test.skip("doesn't make sense here") space = self.space w = space.wrap From pypy.commits at gmail.com Sun Jan 13 09:09:46 2019 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 13 Jan 2019 06:09:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix the kwargsdict problem Message-ID: <5c3b46aa.1c69fb81.24671.0f05@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8-py3 Changeset: r95626:e75259de9a45 Date: 2019-01-13 15:08 +0100 http://bitbucket.org/pypy/pypy/changeset/e75259de9a45/ Log: fix the kwargsdict problem diff --git a/pypy/objspace/std/kwargsdict.py b/pypy/objspace/std/kwargsdict.py --- a/pypy/objspace/std/kwargsdict.py +++ b/pypy/objspace/std/kwargsdict.py @@ -3,7 +3,7 @@ Based on two lists containing unwrapped key value pairs. """ -from rpython.rlib import jit, rerased, objectmodel +from rpython.rlib import jit, rerased, objectmodel, rutf8 from pypy.objspace.std.dictmultiobject import ( DictStrategy, EmptyDictStrategy, ObjectDictStrategy, UnicodeDictStrategy, @@ -149,11 +149,12 @@ strategy = self.space.fromcache(UnicodeDictStrategy) keys, values_w = self.unerase(w_dict.dstorage) storage = strategy.get_empty_storage() - d_new = strategy.unerase(storage) - for i in range(len(keys)): - d_new[strategy.decodekey_str(keys[i])] = values_w[i] w_dict.set_strategy(strategy) w_dict.dstorage = storage + for i in range(len(keys)): + # NB: this can turn the dict into an object strategy, if a key is + # not ASCII! + w_dict.setitem_str(keys[i], values_w[i]) def view_as_kwargs(self, w_dict): keys, values_w = self.unerase(w_dict.dstorage) diff --git a/pypy/objspace/std/test/test_kwargsdict.py b/pypy/objspace/std/test/test_kwargsdict.py --- a/pypy/objspace/std/test/test_kwargsdict.py +++ b/pypy/objspace/std/test/test_kwargsdict.py @@ -76,6 +76,14 @@ assert d.get_strategy() is not strategy assert "UnicodeDictStrategy" == d.get_strategy().__class__.__name__ +def test_limit_size_non_ascii(): + storage = strategy.get_empty_storage() + d = W_DictObject(space, strategy, storage) + for i in range(100): + assert d.setitem_str("ה%s" % i, 4) is None + assert d.get_strategy() is not strategy + assert "ObjectDictStrategy" == d.get_strategy().__class__.__name__ + def test_keys_doesnt_wrap(): space = FakeSpace() space.newlist = None From pypy.commits at gmail.com Sun Jan 13 09:09:47 2019 From: pypy.commits at gmail.com (cfbolz) Date: Sun, 13 Jan 2019 06:09:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge heads Message-ID: <5c3b46ab.1c69fb81.864eb.66b7@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8-py3 Changeset: r95627:59c5294dfb36 Date: 2019-01-13 15:08 +0100 http://bitbucket.org/pypy/pypy/changeset/59c5294dfb36/ Log: merge heads diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -122,17 +122,11 @@ if w_fill is None: w_fill = space.w_None if space.is_w(w_type, space.w_dict): - ulist = space.listview_utf8(w_keys) - if ulist is not None: - strategy = space.fromcache(UnicodeDictStrategy) - storage = strategy.get_storage_fromkeys(ulist, w_fill) - w_dict = space.allocate_instance(W_DictObject, w_type) - W_DictObject.__init__(w_dict, space, strategy, storage) - else: - w_dict = W_DictMultiObject.allocate_and_init_instance(space, - w_type) - for w_key in space.listview(w_keys): - w_dict.setitem(w_key, w_fill) + # XXX consider re-enabling a fast-path here + w_dict = W_DictMultiObject.allocate_and_init_instance(space, + w_type) + for w_key in space.listview(w_keys): + w_dict.setitem(w_key, w_fill) else: w_dict = space.call_function(w_type) for w_key in space.listview(w_keys): @@ -1217,14 +1211,6 @@ i += 1 return keys, values - def get_storage_fromkeys(self, keys_w, w_fill): - """Return an initialized storage with keys and fill values""" - storage = {} - mark_dict_non_null(storage) - for key in keys_w: - storage[key] = w_fill - return self.erase(storage) - create_iterator_classes(UnicodeDictStrategy) diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -125,6 +125,7 @@ assert self.space.eq_w(space.call_function(get, w("33"), w(44)), w(44)) def test_fromkeys_fastpath(self): + py.test.skip("doesn't make sense here") space = self.space w = space.wrap @@ -1190,7 +1191,6 @@ assert d["ä"] == 2 def test_empty_to_int(self): - skip('IntDictStrategy is disabled for now, re-enable it!') import sys d = {} d[1] = "hi" From pypy.commits at gmail.com Sun Jan 13 13:22:39 2019 From: pypy.commits at gmail.com (arigo) Date: Sun, 13 Jan 2019 10:22:39 -0800 (PST) Subject: [pypy-commit] pypy default: Fix the tests for #2904 Message-ID: <5c3b81ef.1c69fb81.d8741.0abd@mx.google.com> Author: Armin Rigo Branch: Changeset: r95628:d0187cf2f1b7 Date: 2019-01-13 19:21 +0100 http://bitbucket.org/pypy/pypy/changeset/d0187cf2f1b7/ Log: Fix the tests for #2904 diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py --- a/pypy/module/pypyjit/test_pypy_c/test_containers.py +++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py @@ -20,7 +20,7 @@ assert log.result % 1000 == 0 loop, = log.loops_by_filename(self.filepath) ops = loop.ops_by_id('look') - assert log.opnames(ops) == [] + assert log.opnames(ops) == ['guard_nonnull_class'] def test_identitydict(self): def fn(n): diff --git a/pypy/module/pypyjit/test_pypy_c/test_instance.py b/pypy/module/pypyjit/test_pypy_c/test_instance.py --- a/pypy/module/pypyjit/test_pypy_c/test_instance.py +++ b/pypy/module/pypyjit/test_pypy_c/test_instance.py @@ -254,6 +254,7 @@ guard_no_exception(descr=...) i29 = int_lt(i26, 0) guard_true(i29, descr=...) + guard_nonnull_class(p58, ConstClass(W_IntObject), descr=...) ''') assert loop.match_by_id('loadattr2', "") # completely folded away diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py --- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py +++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py @@ -243,6 +243,8 @@ f80 = raw_load_f(i67, i79, descr=) i81 = int_add(i71, 1) --TICK-- + i92 = int_le(i33, _) + guard_true(i92, descr=...) jump(..., descr=...) """) @@ -282,6 +284,8 @@ f86 = float_add(f74, f85) i87 = int_add(i76, 1) --TICK-- + i98 = int_le(i36, _) + guard_true(i98, descr=...) jump(..., descr=...) """) @@ -389,6 +393,8 @@ assert log.result == [0.] * N loop, = log.loops_by_filename(self.filepath) assert loop.match(""" + i4 = int_lt(i91, 0) + guard_false(i4, descr=...) i92 = int_ge(i91, i37) guard_false(i92, descr=...) i93 = int_add(i91, 1) diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py --- a/pypy/module/pypyjit/test_pypy_c/test_misc.py +++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py @@ -113,6 +113,7 @@ i12 = int_is_true(i4) guard_true(i12, descr=...) guard_not_invalidated(descr=...) + guard_nonnull_class(p10, ConstClass(W_IntObject), descr=...) i10p = getfield_gc_i(p10, descr=...) i10 = int_mul_ovf(2, i10p) guard_no_overflow(descr=...) @@ -148,6 +149,8 @@ setfield_gc(p9, i17, descr=<.* .*W_XRangeIterator.inst_current .*>) guard_not_invalidated(descr=...) i18 = force_token() + i83 = int_lt(0, i14) + guard_true(i83, descr=...) i84 = int_sub(i14, 1) i21 = int_lt(i10, 0) guard_false(i21, descr=...) @@ -175,12 +178,16 @@ loop, = log.loops_by_filename(self.filepath) assert loop.match(""" guard_not_invalidated? + i80 = int_lt(i11, 0) + guard_false(i80, descr=...) i16 = int_ge(i11, i12) guard_false(i16, descr=...) i20 = int_add(i11, 1) setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>) guard_not_invalidated? i21 = force_token() + i89 = int_lt(0, i9) + guard_true(i89, descr=...) i88 = int_sub(i9, 1) i25 = int_ge(i11, i9) guard_false(i25, descr=...) @@ -214,6 +221,8 @@ setfield_gc(p4, i20, descr=<.* .*W_AbstractSeqIterObject.inst_index .*>) guard_not_invalidated? i21 = force_token() + i94 = int_lt(0, i9) + guard_true(i94, descr=...) i95 = int_sub(i9, 1) i23 = int_lt(i18, 0) guard_false(i23, descr=...) From pypy.commits at gmail.com Mon Jan 14 15:10:49 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 14 Jan 2019 12:10:49 -0800 (PST) Subject: [pypy-commit] pypy default: since this file is run with cpython in test_xpickle, use stdlib2.7.14 code here Message-ID: <5c3cecc9.1c69fb81.70cf8.24a8@mx.google.com> Author: Matti Picus Branch: Changeset: r95629:9cbebcb6df9a Date: 2019-01-14 22:10 +0200 http://bitbucket.org/pypy/pypy/changeset/9cbebcb6df9a/ Log: since this file is run with cpython in test_xpickle, use stdlib2.7.14 code here diff --git a/lib-python/2.7/test/pickletester.py b/lib-python/2.7/test/pickletester.py --- a/lib-python/2.7/test/pickletester.py +++ b/lib-python/2.7/test/pickletester.py @@ -158,7 +158,7 @@ # Shouldn't support the recursion itself return K, (self.value,) -import __main__ +__main__ = sys.modules['__main__'] __main__.C = C C.__module__ = "__main__" __main__.D = D From pypy.commits at gmail.com Mon Jan 14 23:43:21 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 14 Jan 2019 20:43:21 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5c3d64e9.1c69fb81.94828.398f@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95633:19dbd394fcf6 Date: 2019-01-15 06:39 +0200 http://bitbucket.org/pypy/pypy/changeset/19dbd394fcf6/ Log: merge py3.5 into branch diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -325,16 +325,22 @@ a = array.array('H', [10000, 20000, 30000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 6 ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 3 + assert c[1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -239,19 +239,31 @@ def test_ffi_from_buffer(): import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + py.test.raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1676,24 +1676,6 @@ py.test.raises(TypeError, len, q.a) py.test.raises(TypeError, list, q.a) - def test_from_buffer(self): - import array - a = array.array('H', [10000, 20000, 30000]) - c = ffi.from_buffer(a) - assert ffi.typeof(c) is ffi.typeof("char[]") - ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) - assert c == ffi.from_buffer(a, require_writable=True) - # - p = ffi.from_buffer(b"abcd") - assert p[2] == b"c" - # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", - require_writable=True) - def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ "char", diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -16,6 +16,8 @@ # Python 3.x basestring = str +_unspecified = object() + class FFI(object): @@ -341,15 +343,22 @@ # """ # note that 'buffer' is a type, set on this instance by __init__ - def from_buffer(self, python_buffer, require_writable=False): - """Return a that points to the data of the + def from_buffer(self, cdecl, python_buffer=_unspecified, + require_writable=False): + """Return a cdata of the given type pointing to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types str or unicode (you can build 'char[]' arrays explicitly) but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. + + The first argument is optional and default to 'char[]'. """ - return self._backend.from_buffer(self.BCharA, python_buffer, + if python_buffer is _unspecified: + cdecl, python_buffer = self.BCharA, cdecl + elif isinstance(cdecl, basestring): + cdecl = self._typeof(cdecl) + return self._backend.from_buffer(cdecl, python_buffer, require_writable) def memmove(self, dest, src, n): diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -13,7 +13,6 @@ assert b.build() == s b.append("123") assert b.build() == s + "123" - assert type(b.build()) is str def test_preallocate(self): from __pypy__.builders import StringBuilder @@ -22,7 +21,6 @@ b.append("123") s = b.build() assert s == "abc123" - assert type(s) is str def test_append_slice(self): from __pypy__.builders import StringBuilder @@ -45,8 +43,3 @@ assert len(b) == 16 assert s == b"abc123you and me" assert b.build() == s - - def test_encode(self): - from __pypy__.builders import UnicodeBuilder - b = UnicodeBuilder() - raises(UnicodeDecodeError, b.append, b'\xc0') diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -478,8 +478,9 @@ def enter_exit(self, exit_now): raise oefmt(self.space.w_ValueError, - "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " - "can be used with the 'with' keyword or ffi.release()") + "only 'cdata' object from ffi.new(), ffi.gc(), ffi.from_buffer() " + "or ffi.new_allocator()() can be used with the 'with' keyword or " + "ffi.release()") def descr_enter(self): self.enter_exit(False) @@ -657,24 +658,28 @@ class W_CDataFromBuffer(W_CData): _attrs_ = ['buf', 'length', 'w_keepalive'] - _immutable_fields_ = ['buf', 'length', 'w_keepalive'] + _immutable_fields_ = ['buf', 'length'] - def __init__(self, space, cdata, ctype, buf, w_object): + def __init__(self, space, cdata, length, ctype, buf, w_object): W_CData.__init__(self, space, cdata, ctype) self.buf = buf - self.length = buf.getlength() + self.length = length self.w_keepalive = w_object def get_array_length(self): return self.length def _repr_extra(self): - w_repr = self.space.repr(self.w_keepalive) - return "buffer len %d from '%s' object" % ( - self.length, self.space.type(self.w_keepalive).name) + if self.w_keepalive is not None: + name = self.space.type(self.w_keepalive).name + else: + name = "(released)" + return "buffer len %d from '%s' object" % (self.length, name) def enter_exit(self, exit_now): - pass # for now, no effect on PyPy + # for now, limited effect on PyPy + if exit_now: + self.w_keepalive = None class W_CDataGCP(W_CData): diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -25,7 +25,7 @@ assert isinstance(ctptr, W_CTypePointer) W_CTypePtrOrArray.__init__(self, space, arraysize, extra, 0, ctptr.ctitem) - self.length = length + self.length = length # -1 if no length is given, e.g. 'int[]' self.ctptr = ctptr def _alignof(self): @@ -86,7 +86,7 @@ def _check_subscript_index(self, w_cdata, i): space = self.space if i < 0: - raise oefmt(space.w_IndexError, "negative index not supported") + raise oefmt(space.w_IndexError, "negative index") if i >= w_cdata.get_array_length(): raise oefmt(space.w_IndexError, "index too large for cdata '%s' (expected %d < %d)", @@ -96,7 +96,7 @@ def _check_slice_index(self, w_cdata, start, stop): space = self.space if start < 0: - raise oefmt(space.w_IndexError, "negative index not supported") + raise oefmt(space.w_IndexError, "negative index") if stop > w_cdata.get_array_length(): raise oefmt(space.w_IndexError, "index too large (expected %d <= %d)", diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -328,7 +328,8 @@ @unwrap_spec(require_writable=int) - def descr_from_buffer(self, w_python_buffer, require_writable=0): + def descr_from_buffer(self, w_cdecl, w_python_buffer=None, + require_writable=0): """\ Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is @@ -337,9 +338,13 @@ containing large quantities of raw data in some other format, like 'array.array' or numpy arrays.""" # - w_ctchara = newtype._new_chara_type(self.space) - return func._from_buffer(self.space, w_ctchara, w_python_buffer, - require_writable) + if w_python_buffer is None: + w_python_buffer = w_cdecl + w_ctype = newtype._new_chara_type(self.space) + else: + w_ctype = self.ffi_type(w_cdecl, ACCEPT_STRING | ACCEPT_CTYPE) + return func.from_buffer(self.space, w_ctype, w_python_buffer, + require_writable) @unwrap_spec(w_arg=W_CData) diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -112,16 +112,10 @@ @unwrap_spec(w_ctype=ctypeobj.W_CType, require_writable=int) def from_buffer(space, w_ctype, w_x, require_writable=0): - from pypy.module._cffi_backend import ctypearray, ctypeprim - # - if (not isinstance(w_ctype, ctypearray.W_CTypeArray) or - not isinstance(w_ctype.ctptr.ctitem, ctypeprim.W_CTypePrimitiveChar)): - raise oefmt(space.w_TypeError, - "needs 'char[]', got '%s'", w_ctype.name) - # - return _from_buffer(space, w_ctype, w_x, require_writable) - -def _from_buffer(space, w_ctype, w_x, require_writable): + from pypy.module._cffi_backend import ctypearray + if not isinstance(w_ctype, ctypearray.W_CTypeArray): + raise oefmt(space.w_TypeError, "expected an array ctype, got '%s'", + w_ctype.name) if space.isinstance_w(w_x, space.w_unicode): raise oefmt(space.w_TypeError, "from_buffer() cannot return the address of a unicode object") @@ -140,7 +134,37 @@ "buffer interface but cannot be rendered as a plain " "raw address on PyPy", w_x) # - return cdataobj.W_CDataFromBuffer(space, _cdata, w_ctype, buf, w_x) + buffersize = buf.getlength() + arraylength = w_ctype.length + if arraylength >= 0: + # it's an array with a fixed length; make sure that the + # buffer contains enough bytes. + if buffersize < w_ctype.size: + raise oefmt(space.w_ValueError, + "buffer is too small (%d bytes) for '%s' (%d bytes)", + buffersize, w_ctype.name, w_ctype.size) + else: + # it's an open 'array[]' + itemsize = w_ctype.ctitem.size + if itemsize == 1: + # fast path, performance only + arraylength = buffersize + elif itemsize > 0: + # give it as many items as fit the buffer. Ignore a + # partial last element. + arraylength = buffersize / itemsize + else: + # it's an array 'empty[]'. Unsupported obscure case: + # the problem is that setting the length of the result + # to anything large (like SSIZE_T_MAX) is dangerous, + # because if someone tries to loop over it, it will + # turn effectively into an infinite loop. + raise oefmt(space.w_ZeroDivisionError, + "from_buffer('%s', ..): the actual length of the array " + "cannot be computed", w_ctype.name) + # + return cdataobj.W_CDataFromBuffer(space, _cdata, arraylength, + w_ctype, buf, w_x) # ____________________________________________________________ diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3742,6 +3742,64 @@ p1[0] = b"g" assert ba == b"goo" +def test_from_buffer_types(): + BInt = new_primitive_type("int") + BIntP = new_pointer_type(BInt) + BIntA = new_array_type(BIntP, None) + lst = [-12345678, 87654321, 489148] + bytestring = buffer(newp(BIntA, lst))[:] + b'XYZ' + # + p1 = from_buffer(BIntA, bytestring) # int[] + assert typeof(p1) is BIntA + assert len(p1) == 3 + assert p1[0] == lst[0] + assert p1[1] == lst[1] + assert p1[2] == lst[2] + py.test.raises(IndexError, "p1[3]") + py.test.raises(IndexError, "p1[-1]") + # + py.test.raises(TypeError, from_buffer, BInt, bytestring) + py.test.raises(TypeError, from_buffer, BIntP, bytestring) + # + BIntA2 = new_array_type(BIntP, 2) + p2 = from_buffer(BIntA2, bytestring) # int[2] + assert typeof(p2) is BIntA2 + assert len(p2) == 2 + assert p2[0] == lst[0] + assert p2[1] == lst[1] + py.test.raises(IndexError, "p2[2]") + py.test.raises(IndexError, "p2[-1]") + assert p2 == p1 + # + BIntA4 = new_array_type(BIntP, 4) # int[4]: too big + py.test.raises(ValueError, from_buffer, BIntA4, bytestring) + # + BStruct = new_struct_type("foo") + complete_struct_or_union(BStruct, [('a1', BInt, -1), + ('a2', BInt, -1)]) + BStructP = new_pointer_type(BStruct) + BStructA = new_array_type(BStructP, None) + p1 = from_buffer(BStructA, bytestring) # struct[] + assert len(p1) == 1 + assert typeof(p1) is BStructA + assert p1[0].a1 == lst[0] + assert p1[0].a2 == lst[1] + py.test.raises(IndexError, "p1[1]") + # + BEmptyStruct = new_struct_type("empty") + complete_struct_or_union(BEmptyStruct, [], Ellipsis, 0) + assert sizeof(BEmptyStruct) == 0 + BEmptyStructP = new_pointer_type(BEmptyStruct) + BEmptyStructA = new_array_type(BEmptyStructP, None) + py.test.raises(ZeroDivisionError, from_buffer, # empty[] + BEmptyStructA, bytestring) + # + BEmptyStructA5 = new_array_type(BEmptyStructP, 5) + p1 = from_buffer(BEmptyStructA5, bytestring) # struct empty[5] + assert typeof(p1) is BEmptyStructA5 + assert len(p1) == 5 + assert cast(BIntP, p1) == from_buffer(BIntA, bytestring) + def test_memmove(): Short = new_primitive_type("short") ShortA = new_array_type(new_pointer_type(Short), None) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -282,19 +282,31 @@ import _cffi_backend as _cffi1_backend import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py --- a/pypy/module/pypyjit/test_pypy_c/test_containers.py +++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py @@ -20,7 +20,7 @@ assert log.result % 1000 == 0 loop, = log.loops_by_filename(self.filepath) ops = loop.ops_by_id('look') - assert log.opnames(ops) == [] + assert log.opnames(ops) == ['guard_nonnull_class'] def test_identitydict(self): def fn(n): diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py --- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py +++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py @@ -407,6 +407,7 @@ i138 = call_i(ConstClass(_ll_1_raw_malloc_varsize_zero__Signed), 6, descr=...) check_memory_error(i138) setfield_gc(p132, i138, descr=...) + setfield_gc(p132, 0, descr=...) setfield_gc(p132, ConstPtr(ptr139), descr=...) setfield_gc(p132, -1, descr=...) setfield_gc(p0, p133, descr=...) diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py --- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py +++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py @@ -244,6 +244,8 @@ f80 = raw_load_f(i67, i79, descr=) i81 = int_add(i71, 1) --TICK-- + i92 = int_le(i33, _) + guard_true(i92, descr=...) jump(..., descr=...) """) @@ -283,6 +285,8 @@ f86 = float_add(f74, f85) i87 = int_add(i76, 1) --TICK-- + i98 = int_le(i36, _) + guard_true(i98, descr=...) jump(..., descr=...) """) @@ -390,6 +394,8 @@ assert log.result == [0.] * N loop, = log.loops_by_filename(self.filepath) assert loop.match(""" + i4 = int_lt(i91, 0) + guard_false(i4, descr=...) i92 = int_ge(i91, i37) guard_false(i92, descr=...) i93 = int_add(i91, 1) diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py --- a/pypy/module/pypyjit/test_pypy_c/test_misc.py +++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py @@ -113,6 +113,7 @@ i12 = int_is_true(i4) guard_true(i12, descr=...) guard_not_invalidated(descr=...) + guard_nonnull_class(p10, ConstClass(W_IntObject), descr=...) i10p = getfield_gc_i(p10, descr=...) i10 = int_mul_ovf(2, i10p) guard_no_overflow(descr=...) @@ -146,12 +147,16 @@ RANGE_ITER_STEP_1 = """ guard_not_invalidated? # W_IntRangeStepOneIterator.next() + i80 = int_lt(i11, 0) + guard_false(i80, descr=...) i16 = int_lt(i11, i12) guard_true(i16, descr=...) i20 = int_add(i11, 1) setfield_gc(p4, i20, descr=<.* .*W_IntRangeIterator.inst_current .*>) guard_not_invalidated? i21 = force_token() + i89 = int_lt(0, i9) + guard_true(i89, descr=...) i88 = int_sub(i9, 1) # Compared with pypy2, we get these two operations extra. @@ -186,6 +191,8 @@ assert log.result == 1000 * 999 / 2 loop, = log.loops_by_filename(self.filepath) assert loop.match(self.RANGE_ITER_STEP_1) + i94 = int_lt(0, i9) + guard_true(i94, descr=...) def test_chain_of_guards(self): src = """ From pypy.commits at gmail.com Mon Jan 14 23:43:23 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 14 Jan 2019 20:43:23 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch Message-ID: <5c3d64eb.1c69fb81.7f7ca.4cb7@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95634:ce6f5d27b597 Date: 2019-01-15 06:42 +0200 http://bitbucket.org/pypy/pypy/changeset/ce6f5d27b597/ Log: merge unicode-utf8 into branch diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -338,6 +338,8 @@ assert u'A\u03a3A'.title() == u'A\u03c3a' assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" assert u'\ud800'.title() == u'\ud800' + assert (unichr(0x345) + u'abc').title() == u'\u0399Abc' + assert (unichr(0x345) + u'ABC').title() == u'\u0399Abc' def test_istitle(self): assert u"".istitle() == False diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -40,12 +40,6 @@ self._utf8 = utf8str self._length = length self._index_storage = rutf8.null_storage() - # XXX checking, remove before any performance measurments - # ifdef not_running_in_benchmark - # if not we_are_translated(): - # print 'UnicodeObject.__init__' - # lgt = rutf8.codepoints_in_utf8(utf8str) - # assert lgt == length @staticmethod def from_utf8builder(builder): From pypy.commits at gmail.com Tue Jan 15 03:33:08 2019 From: pypy.commits at gmail.com (mjacob) Date: Tue, 15 Jan 2019 00:33:08 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: Update. Message-ID: <5c3d9ac4.1c69fb81.e252b.a97c@mx.google.com> Author: Manuel Jacob Branch: extradoc Changeset: r5938:75aa58cd996b Date: 2019-01-15 09:32 +0100 http://bitbucket.org/pypy/extradoc/changeset/75aa58cd996b/ Log: Update. diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -10,7 +10,7 @@ ============================ ============== =========================== Carl Friedrich Bolz-Tereick always there private Matti Picus Feb 4? - 9? airbnb -Manuel? Feb 4 - 7 share a room? +Manuel Feb 3 - 7? share a room? Antonio Cuni Feb 3 - 9 airbnb Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf Alexander Schremmer Feb 4 - 8 Essen, guest room available From pypy.commits at gmail.com Tue Jan 15 15:55:03 2019 From: pypy.commits at gmail.com (rlamy) Date: Tue, 15 Jan 2019 12:55:03 -0800 (PST) Subject: [pypy-commit] pypy default: Disable hypothesis deadlines Message-ID: <5c3e48a7.1c69fb81.a9c68.1f31@mx.google.com> Author: Ronan Lamy Branch: Changeset: r95637:9f51b370e8ca Date: 2019-01-15 20:54 +0000 http://bitbucket.org/pypy/pypy/changeset/9f51b370e8ca/ Log: Disable hypothesis deadlines diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -5,6 +5,14 @@ rsyncdirs = ['.', '../lib-python', '../lib_pypy', '../demo'] rsyncignore = ['_cache'] +try: + from hypothesis import settings +except ImportError: + pass +else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + # PyPy's command line extra options (these are added # to py.test's standard options) # From pypy.commits at gmail.com Tue Jan 15 18:18:20 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 15:18:20 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix merge Message-ID: <5c3e6a3c.1c69fb81.c605f.98f2@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95638:bba58ab418cb Date: 2019-01-15 08:17 +0200 http://bitbucket.org/pypy/pypy/changeset/bba58ab418cb/ Log: fix merge diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py --- a/pypy/module/pypyjit/test_pypy_c/test_misc.py +++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py @@ -191,8 +191,6 @@ assert log.result == 1000 * 999 / 2 loop, = log.loops_by_filename(self.filepath) assert loop.match(self.RANGE_ITER_STEP_1) - i94 = int_lt(0, i9) - guard_true(i94, descr=...) def test_chain_of_guards(self): src = """ From pypy.commits at gmail.com Tue Jan 15 18:18:22 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 15:18:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix tests for python3 Message-ID: <5c3e6a3e.1c69fb81.fdee4.0101@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95639:ef1027524c11 Date: 2019-01-15 16:35 +0200 http://bitbucket.org/pypy/pypy/changeset/ef1027524c11/ Log: fix tests for python3 diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -58,17 +58,17 @@ @given(st.text()) def test_read_buffer(text): - buf = DecodeBuffer(text) - assert buf.get_chars(-1) == text + buf = DecodeBuffer(text.encode('utf8')) + assert buf.get_chars(-1).decode('utf8') == text assert buf.exhausted() @given(st.text(), st.lists(st.integers(min_value=0))) @example(u'\x80', [1]) def test_readn_buffer(text, sizes): - buf = DecodeBuffer(text) + buf = DecodeBuffer(text.encode('utf8')) strings = [] for n in sizes: - s = buf.get_chars(n) + s = buf.get_chars(n).decode('utf8') if not buf.exhausted(): assert len(s) == n else: @@ -79,11 +79,11 @@ @given(st.text()) @example(u'\x800') def test_next_char(text): - buf = DecodeBuffer(text) + buf = DecodeBuffer(text.encode('utf8')) chars = [] try: while True: - ch = buf.next_char() + ch = buf.next_char().decode('utf8') chars.append(ch) except StopIteration: pass diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -338,8 +338,8 @@ assert u'A\u03a3A'.title() == u'A\u03c3a' assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" assert u'\ud800'.title() == u'\ud800' - assert (unichr(0x345) + u'abc').title() == u'\u0399Abc' - assert (unichr(0x345) + u'ABC').title() == u'\u0399Abc' + assert (chr(0x345) + u'abc').title() == u'\u0399abc' + assert (chr(0x345) + u'ABC').title() == u'\u0399abc' def test_istitle(self): assert u"".istitle() == False From pypy.commits at gmail.com Tue Jan 15 18:18:24 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 15:18:24 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: decoder returns result, len(utf8), not result, len(unicode) Message-ID: <5c3e6a40.1c69fb81.d95fc.0c22@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95640:4821ebc434d0 Date: 2019-01-15 20:09 +0200 http://bitbucket.org/pypy/pypy/changeset/4821ebc434d0/ Log: decoder returns result, len(utf8), not result, len(unicode) diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1009,7 +1009,7 @@ string, errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(result, lgt), - space.newint(lgt)]) + space.newint(len(string))]) @unwrap_spec(errors='text_or_none') def unicode_internal_encode(space, w_uni, errors="strict"): From pypy.commits at gmail.com Tue Jan 15 18:18:26 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 15:18:26 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: more tests, fix backslashreplace_errors: decode handles utf8 not unicode Message-ID: <5c3e6a42.1c69fb81.50a0a.c453@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95641:f9702f2e0e19 Date: 2019-01-15 21:17 +0200 http://bitbucket.org/pypy/pypy/changeset/f9702f2e0e19/ Log: more tests, fix backslashreplace_errors: decode handles utf8 not unicode diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -302,9 +302,9 @@ builder = StringBuilder() pos = start while pos < end: - oc = rutf8.codepoint_at_pos(obj, pos) + oc = ord(obj[pos]) raw_unicode_escape_helper(builder, oc) - pos = rutf8.next_codepoint_pos(obj, pos) + pos += 1 return space.newtuple([space.newtext(builder.build()), w_end]) else: raise oefmt(space.w_TypeError, diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -971,12 +971,27 @@ ('utf-16-be', b'\xdc\x80'), ('utf-32-le', b'\x80\xdc\x00\x00'), ('utf-32-be', b'\x00\x00\xdc\x80')]: - before, after = "[", "]" - before_sequence = before.encode(encoding) - after_sequence = after.encode(encoding) - test_string = before + "\uDC80" + after - test_sequence = before_sequence + ill_surrogate + after_sequence - raises(UnicodeDecodeError, test_sequence.decode, encoding) + ill_formed_sequence_replace = "\ufffd" + if encoding == 'utf-8': + ill_formed_sequence_replace *= 3 + bom = "".encode(encoding) + for before, after in [("\U00010fff", "A"), ("[", "]"), + ("A", "\U00010fff")]: + before_sequence = before.encode(encoding)[len(bom):] + after_sequence = after.encode(encoding)[len(bom):] + test_string = before + "\uDC80" + after + test_sequence = (bom + before_sequence + ill_surrogate + after_sequence) + raises(UnicodeDecodeError, test_sequence.decode, encoding) + assert test_string.encode(encoding, 'surrogatepass') == test_sequence + assert test_sequence.decode(encoding, 'surrogatepass') == test_string + assert test_sequence.decode(encoding, 'ignore') == before + after + assert test_sequence.decode(encoding, 'replace') == (before + + ill_formed_sequence_replace + after), str( + (encoding, test_sequence, before + ill_formed_sequence_replace + after)) + backslashreplace = ''.join('\\x%02x' % b for b in ill_surrogate) + assert test_sequence.decode(encoding, "backslashreplace") == (before + + backslashreplace + after) + def test_charmap_encode(self): assert 'xxx'.encode('charmap') == b'xxx' From pypy.commits at gmail.com Tue Jan 15 18:18:28 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 15:18:28 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix for format specification alignment repeats with a unicode codepoint Message-ID: <5c3e6a44.1c69fb81.474ed.4587@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95642:762bda764d4f Date: 2019-01-16 01:17 +0200 http://bitbucket.org/pypy/pypy/changeset/762bda764d4f/ Log: test, fix for format specification alignment repeats with a unicode codepoint diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -455,11 +455,16 @@ i = 0 got_align = True got_fill_char = False - if length - i >= 2 and self._is_alignment(spec[i + 1]): - self._align = spec[i + 1] - self._fill_char = spec[i] + # The single character could be utf8-encoded unicode + if self.is_unicode: + after_i = rutf8.next_codepoint_pos(spec, i) + else: + after_i = i + 1 + if length - i >= 2 and self._is_alignment(spec[after_i]): + self._align = spec[after_i] + self._fill_char = spec[i:after_i] got_fill_char = True - i += 2 + i = after_i + 1 elif length - i >= 1 and self._is_alignment(spec[i]): self._align = spec[i] i += 1 @@ -552,7 +557,10 @@ return builder.build() def _builder(self): - return rstring.StringBuilder() + if self.is_unicode: + return rutf8.Utf8StringBuilder() + else: + return rstring.StringBuilder() def _unknown_presentation(self, tp): raise oefmt(self.space.w_ValueError, diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1130,6 +1130,10 @@ return u'\u1234' '%s' % X() + def test_format_repeat(self): + assert format(u"abc", u"z<5") == u"abczz" + assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007" + def test_formatting_char(self): for num in range(0x80,0x100): uchar = unichr(num) diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -706,7 +706,7 @@ return s_None def method_append_multiple_char(self, s_char, s_times): - assert isinstance(s_char, SomeChar) + assert isinstance(s_char, (SomeString, SomeChar)) assert isinstance(s_times, SomeInteger) return s_None diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -721,6 +721,11 @@ self._lgt += length @always_inline + def append_multiple_char(self, utf8, times): + self._s.append(utf8 * times) + self._lgt += times + + @always_inline def build(self): return self._s.build() From pypy.commits at gmail.com Tue Jan 15 18:20:12 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 15:20:12 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch Message-ID: <5c3e6aac.1c69fb81.9d9a6.d585@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95643:5dbc4374c8c0 Date: 2019-01-16 01:19 +0200 http://bitbucket.org/pypy/pypy/changeset/5dbc4374c8c0/ Log: merge unicode-utf8 into branch diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -463,11 +463,16 @@ i = 0 got_align = True got_fill_char = False - if length - i >= 2 and self._is_alignment(spec[i + 1]): - self._align = spec[i + 1] - self._fill_char = spec[i] + # The single character could be utf8-encoded unicode + if self.is_unicode: + after_i = rutf8.next_codepoint_pos(spec, i) + else: + after_i = i + 1 + if length - i >= 2 and self._is_alignment(spec[after_i]): + self._align = spec[after_i] + self._fill_char = spec[i:after_i] got_fill_char = True - i += 2 + i = after_i + 1 elif length - i >= 1 and self._is_alignment(spec[i]): self._align = spec[i] i += 1 @@ -560,7 +565,10 @@ return builder.build() def _builder(self): - return rstring.StringBuilder() + if self.is_unicode: + return rutf8.Utf8StringBuilder() + else: + return rstring.StringBuilder() def _unknown_presentation(self, tp): raise oefmt(self.space.w_ValueError, diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1127,6 +1127,10 @@ return u'\u1234' '%s' % X() + def test_format_repeat(self): + assert format(u"abc", u"z<5") == u"abczz" + assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007" + def test_formatting_unicode__repr__(self): # Printable character assert '%r' % chr(0xe9) == "'\xe9'" diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -706,7 +706,7 @@ return s_None def method_append_multiple_char(self, s_char, s_times): - assert isinstance(s_char, SomeChar) + assert isinstance(s_char, (SomeString, SomeChar)) assert isinstance(s_times, SomeInteger) return s_None diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -723,6 +723,11 @@ self._lgt += length @always_inline + def append_multiple_char(self, utf8, times): + self._s.append(utf8 * times) + self._lgt += times + + @always_inline def build(self): return self._s.build() From pypy.commits at gmail.com Wed Jan 16 01:43:22 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 22:43:22 -0800 (PST) Subject: [pypy-commit] pypy default: add even more debug to msvc compiler detection. Message-ID: <5c3ed28a.1c69fb81.6bd5d.1041@mx.google.com> Author: Matti Picus Branch: Changeset: r95644:e515811660b6 Date: 2019-01-16 08:42 +0200 http://bitbucket.org/pypy/pypy/changeset/e515811660b6/ Log: add even more debug to msvc compiler detection. We should rip this out and use distutils/setuptools. diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -86,12 +86,14 @@ stderr=subprocess.PIPE) stdout, stderr = popen.communicate() - if popen.wait() != 0: + if popen.wait() != 0 or stdout[:5].lower() == 'error': + log.msg('Running "%s" errored: \n\nstdout:\n%s\n\nstderr:\n%s' % ( + vcvars, stdout.split()[0], stderr)) return None - if stdout[:5].lower() == 'error': - log.msg('Running "%s" errored: %s' %(vcvars, stdout.split()[0])) - return None - except: + else: + log.msg('Running "%s" succeeded' %(vcvars,)) + except Exception as e: + log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") @@ -180,8 +182,13 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, [], + try: + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) + except EnvironmentError: + log.msg('Could not run %s using PATH=\n%s' %(self.cc, + '\n'.join(self.c_environ['PATH'].split(';')))) + raise r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: self.version = int(''.join(r.groups())) / 10 - 60 From pypy.commits at gmail.com Wed Jan 16 01:51:11 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 22:51:11 -0800 (PST) Subject: [pypy-commit] pypy default: more debug; make sure PATH is found Message-ID: <5c3ed45f.1c69fb81.47bb4.9612@mx.google.com> Author: Matti Picus Branch: Changeset: r95645:3e4d7fa709ba Date: 2019-01-16 08:50 +0200 http://bitbucket.org/pypy/pypy/changeset/3e4d7fa709ba/ Log: more debug; make sure PATH is found diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -107,6 +107,8 @@ for key, value in vcdict.items(): if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value + if 'PATH' not in env: + log.msg('Did not find "PATH" in stdout\n%s' %(stdout)) log.msg("Updated environment with vsver %d, using x64 %s" % (vsver, x64flag,)) return env From pypy.commits at gmail.com Wed Jan 16 01:54:35 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 15 Jan 2019 22:54:35 -0800 (PST) Subject: [pypy-commit] pypy default: more debug; unconditionally print stdout Message-ID: <5c3ed52b.1c69fb81.bf977.9da5@mx.google.com> Author: Matti Picus Branch: Changeset: r95646:1c58fe28d95e Date: 2019-01-16 08:53 +0200 http://bitbucket.org/pypy/pypy/changeset/1c58fe28d95e/ Log: more debug; unconditionally print stdout diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -97,6 +97,7 @@ return None stdout = stdout.replace("\r\n", "\n") + log.msg('stdout\n%s' %(stdout)) vcdict = {} for line in stdout.split("\n"): if '=' not in line: @@ -107,8 +108,6 @@ for key, value in vcdict.items(): if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - if 'PATH' not in env: - log.msg('Did not find "PATH" in stdout\n%s' %(stdout)) log.msg("Updated environment with vsver %d, using x64 %s" % (vsver, x64flag,)) return env From pypy.commits at gmail.com Wed Jan 16 11:27:06 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 16 Jan 2019 08:27:06 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix merge and reduce diff Message-ID: <5c3f5b5a.1c69fb81.f1eb.7c48@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95648:59d7c3f4eb6d Date: 2019-01-16 16:26 +0000 http://bitbucket.org/pypy/pypy/changeset/59d7c3f4eb6d/ Log: Fix merge and reduce diff diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -4,33 +4,32 @@ def test_simple(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append("abc") - b.append("123") - b.append("1") + b.append(u"abc") + b.append(u"123") + b.append(u"1") s = b.build() - assert s == "abc1231" - assert type(s) is unicode + assert s == u"abc1231" assert b.build() == s - b.append("123") - assert b.build() == s + "123" + b.append(u"123") + assert b.build() == s + u"123" def test_preallocate(self): from __pypy__.builders import StringBuilder b = StringBuilder(10) - b.append("abc") - b.append("123") + b.append(u"abc") + b.append(u"123") s = b.build() - assert s == "abc123" + assert s == u"abc123" def test_append_slice(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append_slice("abcdefgh", 2, 5) - raises(ValueError, b.append_slice, "1", 2, 1) + b.append_slice(u"abcdefgh", 2, 5) + raises(ValueError, b.append_slice, u"1", 2, 1) s = b.build() - assert s == "cde" - b.append_slice("abc", 1, 2) - assert b.build() == "cdeb" + assert s == u"cde" + b.append_slice(u"abc", 1, 2) + assert b.build() == u"cdeb" def test_stringbuilder(self): from __pypy__.builders import BytesBuilder From pypy.commits at gmail.com Wed Jan 16 15:10:13 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 16 Jan 2019 12:10:13 -0800 (PST) Subject: [pypy-commit] pypy py3.5: py3 mappings don't have the 'has_key' method Message-ID: <5c3f8fa5.1c69fb81.82d21.c297@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95650:f3d5c2e6f30d Date: 2019-01-16 18:54 +0000 http://bitbucket.org/pypy/pypy/changeset/f3d5c2e6f30d/ Log: py3 mappings don't have the 'has_key' method diff --git a/lib_pypy/_gdbm.py b/lib_pypy/_gdbm.py --- a/lib_pypy/_gdbm.py +++ b/lib_pypy/_gdbm.py @@ -74,12 +74,11 @@ self.__check_closed() key = _checkstr(key) return lib.pygdbm_exists(self.__ll_dbm, key, len(key)) - has_key = __contains__ def get(self, key, default=None): with _lock: self.__check_closed() - key = _checkstr(key) + key = _checkstr(key) drec = lib.pygdbm_fetch(self.__ll_dbm, key, len(key)) if not drec.dptr: return default From pypy.commits at gmail.com Wed Jan 16 15:10:15 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 16 Jan 2019 12:10:15 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Relax test and add a note of the inconsistency in CPython Message-ID: <5c3f8fa7.1c69fb81.aef0d.c0b2@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95651:1631e316d8aa Date: 2019-01-16 20:09 +0000 http://bitbucket.org/pypy/pypy/changeset/1631e316d8aa/ Log: Relax test and add a note of the inconsistency in CPython diff --git a/extra_tests/test_datetime.py b/extra_tests/test_datetime.py --- a/extra_tests/test_datetime.py +++ b/extra_tests/test_datetime.py @@ -33,7 +33,9 @@ (timedelta_safe(1, 2, 3), "timedelta_safe(1, 2, 3)"), ]) def test_repr(obj, expected): - assert repr(obj) == expected + # XXX: there's a discrepancy between datetime.py and CPython's _datetime + # for the repr() of Python-defined subclasses of datetime classes. + assert repr(obj).endswith(expected) @pytest.mark.parametrize("obj", [ datetime.date.today(), From pypy.commits at gmail.com Wed Jan 16 15:28:29 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 16 Jan 2019 12:28:29 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix extra_tests/ctypes_tests/test_extra.py::test_truth_value() Message-ID: <5c3f93ed.1c69fb81.47bda.1be0@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95653:cd262738eee3 Date: 2019-01-16 20:27 +0000 http://bitbucket.org/pypy/pypy/changeset/cd262738eee3/ Log: Fix extra_tests/ctypes_tests/test_extra.py::test_truth_value() diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -410,6 +410,6 @@ id(self)) def __bool__(self): - return self._buffer[0] not in (0, '\x00') + return self._buffer[0] not in (0, b'\x00') from _ctypes.function import CFuncPtr From pypy.commits at gmail.com Wed Jan 16 15:42:19 2019 From: pypy.commits at gmail.com (rlamy) Date: Wed, 16 Jan 2019 12:42:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5c3f972b.1c69fb81.2ba73.3862@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95654:f7b6aeba6694 Date: 2019-01-16 20:36 +0000 http://bitbucket.org/pypy/pypy/changeset/f7b6aeba6694/ Log: hg merge default diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -14,6 +14,14 @@ rsyncdirs = ['.', '../lib-python', '../lib_pypy', '../demo'] rsyncignore = ['_cache'] +try: + from hypothesis import settings +except ImportError: + pass +else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + # PyPy's command line extra options (these are added # to py.test's standard options) # diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -83,12 +83,14 @@ stderr=subprocess.PIPE) stdout, stderr = popen.communicate() - if popen.wait() != 0: + if popen.wait() != 0 or stdout[:5].lower() == 'error': + log.msg('Running "%s" errored: \n\nstdout:\n%s\n\nstderr:\n%s' % ( + vcvars, stdout.split()[0], stderr)) return None - if stdout[:5].lower() == 'error': - log.msg('Running "%s" errored: %s' %(vcvars, stdout.split()[0])) - return None - except: + else: + log.msg('Running "%s" succeeded' %(vcvars,)) + except Exception as e: + log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") @@ -189,8 +191,13 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, [], + try: + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) + except EnvironmentError: + log.msg('Could not run %s using PATH=\n%s' %(self.cc, + '\n'.join(self.c_environ['PATH'].split(';')))) + raise r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: self.version = int(''.join(r.groups())) / 10 - 60 From pypy.commits at gmail.com Wed Jan 16 17:40:22 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:40:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: test, fix for StringIO(unicode).read(cnt) Message-ID: <5c3fb2d6.1c69fb81.4bd74.7f57@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95655:6185982e509f Date: 2019-01-16 23:42 +0200 http://bitbucket.org/pypy/pypy/changeset/6185982e509f/ Log: test, fix for StringIO(unicode).read(cnt) diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -1,4 +1,4 @@ -from rpython.rlib.rutf8 import get_utf8_length +from rpython.rlib.rutf8 import get_utf8_length, next_codepoint_pos from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import ( @@ -11,8 +11,16 @@ class UnicodeIO(object): def __init__(self, data=None, pos=0): if data is None: - data = [] - self.data = data + data = '' + self.data = [] + self.pos = 0 + # break the data into unicode codepoints + _pos = 0 + while _pos < pos: + _pos = next_codepoint_pos(data, _pos) + if _pos >= len(data): + break + self.write(data[_pos:]) self.pos = pos def resize(self, newlength): @@ -85,12 +93,14 @@ return result def write(self, string): - length = len(string) + length = get_utf8_length(string) if self.pos + length > len(self.data): self.resize(self.pos + length) - + pos = 0 for i in range(length): - self.data[self.pos + i] = string[i] + nextpos = next_codepoint_pos(string, pos) + self.data[self.pos + i] = string[pos:nextpos] + pos = nextpos self.pos += length def seek(self, pos): @@ -186,7 +196,7 @@ if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.buf = UnicodeIO(list(initval), pos) + self.buf = UnicodeIO(initval, pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): raise oefmt( diff --git a/pypy/module/_io/test/test_stringio.py b/pypy/module/_io/test/test_stringio.py --- a/pypy/module/_io/test/test_stringio.py +++ b/pypy/module/_io/test/test_stringio.py @@ -42,6 +42,17 @@ assert buf[5:] == sio.read(900) assert u"" == sio.read() + def test_read_binary(self): + # data is from a test_imghdr test for a GIF file + import io + buf_in = (u'\x47\x49\x46\x38\x39\x61\x10\x00\x10\x00\xf6\x64\x00\xeb' + u'\xbb\x18\xeb\xbe\x21\xf3\xc1\x1a\xfa\xc7\x19\xfd\xcb\x1b' + u'\xff\xcc\x1c\xeb') + assert len(buf_in) == 32 + sio = io.StringIO(buf_in) + buf_out = sio.read(32) + assert buf_in == buf_out + def test_readline(self): import io sio = io.StringIO(u'123\n456') From pypy.commits at gmail.com Wed Jan 16 17:44:37 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:44:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix utf_7_decode(b'+') which should return (u'', 0) Message-ID: <5c3fb3d5.1c69fb81.13d75.00b8@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95656:a57fddf5aa3c Date: 2019-01-16 11:48 +0200 http://bitbucket.org/pypy/pypy/changeset/a57fddf5aa3c/ Log: fix utf_7_decode(b'+') which should return (u'', 0) diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -949,11 +949,11 @@ # end of string final_length = result.getlength() if inShift and final: # in shift sequence, no more to follow - # if we're in an inconsistent state, that's an error inShift = 0 if (surrogate or base64bits >= 6 or (base64bits > 0 and base64buffer != 0)): + # if we're in an inconsistent state, that's an error msg = "unterminated shift sequence" r, pos, rettype = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) reslen = rutf8.check_utf8(r, True) @@ -961,7 +961,7 @@ result.append(r) final_length = result.getlength() elif inShift: - pos = startinpos + size = startinpos final_length = shiftOutStartPos # back off output assert final_length >= 0 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -686,7 +686,7 @@ final = space.is_true(w_final) state = space.fromcache(CodecState) result, length, pos = func(string, errors, final, state.decode_error_handler) - # must return bytes, len_of_original_string + # must return bytes, pos return space.newtuple([space.newutf8(result, length), space.newint(pos)]) wrap_decoder.__name__ = func.__name__ globals()[name] = wrap_decoder diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -1029,6 +1029,11 @@ def test_utf7_surrogate(self): assert b'+3ADYAA-'.decode('utf-7') == u'\udc00\ud800' + def test_utf_7_decode(self): + from _codecs import utf_7_decode + res = utf_7_decode(b'+') + assert res == (u'', 0) + def test_utf7_errors(self): import codecs tests = [ From pypy.commits at gmail.com Wed Jan 16 17:44:38 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:44:38 -0800 (PST) Subject: [pypy-commit] pypy default: backport changes to win32 from py3.5, cleanup Message-ID: <5c3fb3d6.1c69fb81.98724.d026@mx.google.com> Author: Matti Picus Branch: Changeset: r95657:b55b33a08ee8 Date: 2019-01-16 23:43 +0200 http://bitbucket.org/pypy/pypy/changeset/b55b33a08ee8/ Log: backport changes to win32 from py3.5, cleanup diff --git a/rpython/rlib/rwinreg.py b/rpython/rlib/rwinreg.py --- a/rpython/rlib/rwinreg.py +++ b/rpython/rlib/rwinreg.py @@ -47,7 +47,7 @@ HKEY = rwin32.HANDLE PHKEY = rffi.CArrayPtr(HKEY) REGSAM = rwin32.DWORD -suffix = 'W' + def get_traits(suffix): RegSetValue = external( 'RegSetValue' + suffix, diff --git a/rpython/translator/platform/test/test_makefile.py b/rpython/translator/platform/test/test_makefile.py --- a/rpython/translator/platform/test/test_makefile.py +++ b/rpython/translator/platform/test/test_makefile.py @@ -85,7 +85,8 @@ txt = '#include \n' for i in range(ncfiles): txt += "int func%03d();\n" % i - txt += "\nint main(int argc, char * argv[])\n" + txt += "\n__declspec(dllexport) int\n" + txt += "pypy_main_startup(int argc, char * argv[])\n" txt += "{\n int i=0;\n" for i in range(ncfiles): txt += " i += func%03d();\n" % i @@ -119,7 +120,7 @@ clean = ('clean', '', 'rm -f $(OBJECTS) $(TARGET) ') get_time = time.time #write a non-precompiled header makefile - mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir) + mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir, shared=True) mk.rule(*clean) mk.write() t0 = get_time() @@ -128,7 +129,7 @@ t_normal = t1 - t0 self.platform.execute_makefile(mk, extra_opts=['clean']) # Write a super-duper makefile with precompiled headers - mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir, + mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir, shared=True, headers_to_precompile=cfiles_precompiled_headers,) mk.rule(*clean) mk.write() diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -9,14 +9,14 @@ import rpython rpydir = str(py.path.local(rpython.__file__).join('..')) -def _get_compiler_type(cc, x64_flag, ver0=None): +def _get_compiler_type(cc, x64_flag): if not cc: cc = os.environ.get('CC','') if not cc: - return MsvcPlatform(x64=x64_flag, ver0=ver0) + return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - return MsvcPlatform(cc=cc, x64=x64_flag, ver0=ver0) + return MsvcPlatform(cc=cc, x64=x64_flag) def _get_vcver0(): # try to get the compiler which served to compile python @@ -28,17 +28,13 @@ return vsver return None -def Windows(cc=None, ver0=None): - if ver0 is None: - ver0 = _get_vcver0() - return _get_compiler_type(cc, False, ver0=ver0) +def Windows(cc=None): + return _get_compiler_type(cc, False) def Windows_x64(cc=None, ver0=None): raise Exception("Win64 is not supported. You must either build for Win32" " or contribute the missing support in PyPy.") - if ver0 is None: - ver0 = _get_vcver0() - return _get_compiler_type(cc, True, ver0=ver0) + return _get_compiler_type(cc, True) def _find_vcvarsall(version, x64flag): import rpython.tool.setuptools_msvc as msvc @@ -46,19 +42,16 @@ arch = 'x64' else: arch = 'x86' - if version == 140: + if version >= 140: return msvc.msvc14_get_vc_env(arch) else: return msvc.msvc9_query_vcvarsall(version / 10.0, arch) - + def _get_msvc_env(vsver, x64flag): vcdict = None toolsdir = None try: - if vsver < 140: - toolsdir = os.environ['VS%sCOMNTOOLS' % vsver] - else: - raise KeyError('always use registry values') + toolsdir = os.environ['VS%sCOMNTOOLS' % vsver] except KeyError: # use setuptools from python3 to find tools try: @@ -76,8 +69,8 @@ if not os.path.exists(vcvars): # even msdn does not know which to run # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx - # wich names both - vcvars = os.path.join(toolsdir, 'vcvars32.bat') + # which names both + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -109,15 +102,27 @@ env[key.upper()] = value if 'PATH' not in env: log.msg('Did not find "PATH" in stdout\n%s' %(stdout)) + if not _find_executable('mt.exe', env['PATH']): + # For some reason the sdk bin path is missing? + # put it together from some other env variables that happened to exist + # on the buildbot where this occurred + if 'WindowsSDKVersion' in vcdict and 'WindowsSdkDir' in vcdict: + binpath = vcdict['WindowsSdkDir'] + '\\bin\\' + vcdict['WindowsSDKVersion'] + 'x86' + env['PATH'] += ';' + binpath + if not _find_executable('mt.exe', env['PATH']): + log.msg('Could not find mt.exe on path=%s' % env['PATH']) + log.msg('Running vsver %s set this env' % vsver) + for key, value in vcdict.items(): + log.msg('%s=%s' %(key, value)) log.msg("Updated environment with vsver %d, using x64 %s" % (vsver, x64flag,)) return env def find_msvc_env(x64flag=False, ver0=None): - vcvers = [140, 90, 100] + vcvers = [140, 141, 150, 90, 100] if ver0 in vcvers: vcvers.insert(0, ver0) errs = [] - for vsver in vcvers: + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) if env is not None: return env, vsver @@ -172,6 +177,9 @@ patch_os_env(self.externals) self.c_environ = os.environ.copy() if cc is None: + # prefer compiler used to build host. Python2 only + if ver0 is None: + ver0 = _get_vcver0() msvc_compiler_environ, self.vsver = find_msvc_env(x64, ver0=ver0) Platform.__init__(self, 'cl.exe') if msvc_compiler_environ: @@ -276,23 +284,21 @@ if not standalone: args = self._args_for_shared(args) - if self.version >= 80: - # Tell the linker to generate a manifest file - temp_manifest = exe_name.dirpath().join( - exe_name.purebasename + '.manifest') - args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] + # Tell the linker to generate a manifest file + temp_manifest = exe_name.dirpath().join( + exe_name.purebasename + '.manifest') + args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] self._execute_c_compiler(self.link, args, exe_name) - if self.version >= 80: - # Now, embed the manifest into the program - if standalone: - mfid = 1 - else: - mfid = 2 - out_arg = '-outputresource:%s;%s' % (exe_name, mfid) - args = ['-nologo', '-manifest', str(temp_manifest), out_arg] - self._execute_c_compiler('mt.exe', args, exe_name) + # Now, embed the manifest into the program + if standalone: + mfid = 1 + else: + mfid = 2 + out_arg = '-outputresource:%s;%s' % (exe_name, mfid) + args = ['-nologo', '-manifest', str(temp_manifest), out_arg] + self._execute_c_compiler('mt.exe', args, exe_name) return exe_name @@ -396,7 +402,8 @@ if len(headers_to_precompile)>0: if shared: - no_precompile_cfiles += ['main.c', 'wmain.c'] + no_precompile_cfiles += [m.makefile_dir / 'main.c', + m.makefile_dir / 'wmain.c'] stdafx_h = path.join('stdafx.h') txt = '#ifndef PYPY_STDAFX_H\n' txt += '#define PYPY_STDAFX_H\n' From pypy.commits at gmail.com Wed Jan 16 17:44:40 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:44:40 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5c3fb3d8.1c69fb81.f4993.6b26@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95658:48303f7469f9 Date: 2019-01-17 00:28 +0200 http://bitbucket.org/pypy/pypy/changeset/48303f7469f9/ Log: merge default into branch diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -14,6 +14,14 @@ rsyncdirs = ['.', '../lib-python', '../lib_pypy', '../demo'] rsyncignore = ['_cache'] +try: + from hypothesis import settings +except ImportError: + pass +else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + # PyPy's command line extra options (these are added # to py.test's standard options) # diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -9,14 +9,14 @@ import rpython rpydir = str(py.path.local(rpython.__file__).join('..')) -def _get_compiler_type(cc, x64_flag, ver0=None): +def _get_compiler_type(cc, x64_flag): if not cc: cc = os.environ.get('CC','') if not cc: - return MsvcPlatform(x64=x64_flag, ver0=ver0) + return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - return MsvcPlatform(cc=cc, x64=x64_flag, ver0=ver0) + return MsvcPlatform(cc=cc, x64=x64_flag) def _get_vcver0(): # try to get the compiler which served to compile python @@ -28,17 +28,13 @@ return vsver return None -def Windows(cc=None, ver0=None): - #if ver0 is None: - # ver0 = _get_vcver0() - return _get_compiler_type(cc, False, ver0=ver0) +def Windows(cc=None): + return _get_compiler_type(cc, False) def Windows_x64(cc=None, ver0=None): raise Exception("Win64 is not supported. You must either build for Win32" " or contribute the missing support in PyPy.") - if ver0 is None: - ver0 = _get_vcver0() - return _get_compiler_type(cc, True, ver0=ver0) + return _get_compiler_type(cc, True) def _find_vcvarsall(version, x64flag): import rpython.tool.setuptools_msvc as msvc @@ -50,7 +46,7 @@ return msvc.msvc14_get_vc_env(arch) else: return msvc.msvc9_query_vcvarsall(version / 10.0, arch) - + def _get_msvc_env(vsver, x64flag): vcdict = None toolsdir = None @@ -74,7 +70,7 @@ # even msdn does not know which to run # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx # which names both - vcvars = os.path.join(toolsdir, 'vcvars32.bat') + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -83,12 +79,14 @@ stderr=subprocess.PIPE) stdout, stderr = popen.communicate() - if popen.wait() != 0: + if popen.wait() != 0 or stdout[:5].lower() == 'error': + log.msg('Running "%s" errored: \n\nstdout:\n%s\n\nstderr:\n%s' % ( + vcvars, stdout.split()[0], stderr)) return None - if stdout[:5].lower() == 'error': - log.msg('Running "%s" errored: %s' %(vcvars, stdout.split()[0])) - return None - except: + else: + log.msg('Running "%s" succeeded' %(vcvars,)) + except Exception as e: + log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") @@ -102,6 +100,8 @@ for key, value in vcdict.items(): if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value + if 'PATH' not in env: + log.msg('Did not find "PATH" in stdout\n%s' %(stdout)) if not _find_executable('mt.exe', env['PATH']): # For some reason the sdk bin path is missing? # put it together from some other env variables that happened to exist @@ -113,7 +113,7 @@ log.msg('Could not find mt.exe on path=%s' % env['PATH']) log.msg('Running vsver %s set this env' % vsver) for key, value in vcdict.items(): - log.msg('%s=%s' %(key, value)) + log.msg('%s=%s' %(key, value)) log.msg("Updated environment with vsver %d, using x64 %s" % (vsver, x64flag,)) return env @@ -122,7 +122,7 @@ if ver0 in vcvers: vcvers.insert(0, ver0) errs = [] - for vsver in vcvers: + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) if env is not None: return env, vsver @@ -189,8 +189,13 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, [], + try: + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) + except EnvironmentError: + log.msg('Could not run %s using PATH=\n%s' %(self.cc, + '\n'.join(self.c_environ['PATH'].split(';')))) + raise r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: self.version = int(''.join(r.groups())) / 10 - 60 @@ -276,23 +281,21 @@ if not standalone: args = self._args_for_shared(args) - if self.version >= 80: - # Tell the linker to generate a manifest file - temp_manifest = exe_name.dirpath().join( - exe_name.purebasename + '.manifest') - args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] + # Tell the linker to generate a manifest file + temp_manifest = exe_name.dirpath().join( + exe_name.purebasename + '.manifest') + args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] self._execute_c_compiler(self.link, args, exe_name) - if self.version >= 80: - # Now, embed the manifest into the program - if standalone: - mfid = 1 - else: - mfid = 2 - out_arg = '-outputresource:%s;%s' % (exe_name, mfid) - args = ['-nologo', '-manifest', str(temp_manifest), out_arg] - self._execute_c_compiler('mt.exe', args, exe_name) + # Now, embed the manifest into the program + if standalone: + mfid = 1 + else: + mfid = 2 + out_arg = '-outputresource:%s;%s' % (exe_name, mfid) + args = ['-nologo', '-manifest', str(temp_manifest), out_arg] + self._execute_c_compiler('mt.exe', args, exe_name) return exe_name @@ -396,7 +399,7 @@ if len(headers_to_precompile)>0: if shared: - no_precompile_cfiles += [m.makefile_dir / 'main.c', + no_precompile_cfiles += [m.makefile_dir / 'main.c', m.makefile_dir / 'wmain.c'] stdafx_h = path.join('stdafx.h') txt = '#ifndef PYPY_STDAFX_H\n' From pypy.commits at gmail.com Wed Jan 16 17:44:42 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:44:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge heads Message-ID: <5c3fb3da.1c69fb81.76f1f.9d91@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95659:cd77d2fb6cb8 Date: 2019-01-17 00:42 +0200 http://bitbucket.org/pypy/pypy/changeset/cd77d2fb6cb8/ Log: merge heads diff --git a/extra_tests/test_bufferedreader.py b/extra_tests/test_bufferedreader.py --- a/extra_tests/test_bufferedreader.py +++ b/extra_tests/test_bufferedreader.py @@ -88,7 +88,7 @@ assert self.stream.readline(80) == expected @pytest.mark.parametrize('StreamCls', [Stream, StreamCFFI]) - at settings(max_examples=50) + at settings(max_examples=50, deadline=None) @given(params=data_and_sizes(), chunk_size=st.integers(MIN_READ_SIZE, 8192)) def test_stateful(params, chunk_size, StreamCls): data, sizes = params diff --git a/extra_tests/test_datetime.py b/extra_tests/test_datetime.py --- a/extra_tests/test_datetime.py +++ b/extra_tests/test_datetime.py @@ -33,7 +33,9 @@ (timedelta_safe(1, 2, 3), "timedelta_safe(1, 2, 3)"), ]) def test_repr(obj, expected): - assert repr(obj) == expected + # XXX: there's a discrepancy between datetime.py and CPython's _datetime + # for the repr() of Python-defined subclasses of datetime classes. + assert repr(obj).endswith(expected) @pytest.mark.parametrize("obj", [ datetime.date.today(), diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -410,6 +410,6 @@ id(self)) def __bool__(self): - return self._buffer[0] not in (0, '\x00') + return self._buffer[0] not in (0, b'\x00') from _ctypes.function import CFuncPtr diff --git a/lib_pypy/_gdbm.py b/lib_pypy/_gdbm.py --- a/lib_pypy/_gdbm.py +++ b/lib_pypy/_gdbm.py @@ -74,12 +74,11 @@ self.__check_closed() key = _checkstr(key) return lib.pygdbm_exists(self.__ll_dbm, key, len(key)) - has_key = __contains__ def get(self, key, default=None): with _lock: self.__check_closed() - key = _checkstr(key) + key = _checkstr(key) drec = lib.pygdbm_fetch(self.__ll_dbm, key, len(key)) if not drec.dptr: return default diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -4,33 +4,32 @@ def test_simple(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append("abc") - b.append("123") - b.append("1") + b.append(u"abc") + b.append(u"123") + b.append(u"1") s = b.build() - assert s == "abc1231" - assert type(s) is unicode + assert s == u"abc1231" assert b.build() == s - b.append("123") - assert b.build() == s + "123" + b.append(u"123") + assert b.build() == s + u"123" def test_preallocate(self): from __pypy__.builders import StringBuilder b = StringBuilder(10) - b.append("abc") - b.append("123") + b.append(u"abc") + b.append(u"123") s = b.build() - assert s == "abc123" + assert s == u"abc123" def test_append_slice(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append_slice("abcdefgh", 2, 5) - raises(ValueError, b.append_slice, "1", 2, 1) + b.append_slice(u"abcdefgh", 2, 5) + raises(ValueError, b.append_slice, u"1", 2, 1) s = b.build() - assert s == "cde" - b.append_slice("abc", 1, 2) - assert b.build() == "cdeb" + assert s == u"cde" + b.append_slice(u"abc", 1, 2) + assert b.build() == u"cdeb" def test_stringbuilder(self): from __pypy__.builders import BytesBuilder diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py --- a/pypy/module/pypyjit/test_pypy_c/test_misc.py +++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py @@ -191,8 +191,6 @@ assert log.result == 1000 * 999 / 2 loop, = log.loops_by_filename(self.filepath) assert loop.match(self.RANGE_ITER_STEP_1) - i94 = int_lt(0, i9) - guard_true(i94, descr=...) def test_chain_of_guards(self): src = """ From pypy.commits at gmail.com Wed Jan 16 17:44:44 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:44:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch Message-ID: <5c3fb3dc.1c69fb81.f04e7.4434@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95660:a041db80ea09 Date: 2019-01-17 00:43 +0200 http://bitbucket.org/pypy/pypy/changeset/a041db80ea09/ Log: merge unicode-utf8 into branch diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -1,4 +1,4 @@ -from rpython.rlib.rutf8 import get_utf8_length +from rpython.rlib.rutf8 import get_utf8_length, next_codepoint_pos from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.typedef import ( @@ -11,8 +11,16 @@ class UnicodeIO(object): def __init__(self, data=None, pos=0): if data is None: - data = [] - self.data = data + data = '' + self.data = [] + self.pos = 0 + # break the data into unicode codepoints + _pos = 0 + while _pos < pos: + _pos = next_codepoint_pos(data, _pos) + if _pos >= len(data): + break + self.write(data[_pos:]) self.pos = pos def resize(self, newlength): @@ -90,12 +98,14 @@ return result def write(self, string): - length = len(string) + length = get_utf8_length(string) if self.pos + length > len(self.data): self.resize(self.pos + length) - + pos = 0 for i in range(length): - self.data[self.pos + i] = string[i] + nextpos = next_codepoint_pos(string, pos) + self.data[self.pos + i] = string[pos:nextpos] + pos = nextpos self.pos += length def seek(self, pos): @@ -192,7 +202,7 @@ if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.buf = UnicodeIO(list(initval), pos) + self.buf = UnicodeIO(initval, pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): raise oefmt( diff --git a/pypy/module/_io/test/test_stringio.py b/pypy/module/_io/test/test_stringio.py --- a/pypy/module/_io/test/test_stringio.py +++ b/pypy/module/_io/test/test_stringio.py @@ -42,6 +42,17 @@ assert buf[5:] == sio.read(900) assert "" == sio.read() + def test_read_binary(self): + # data is from a test_imghdr test for a GIF file + import io + buf_in = (u'\x47\x49\x46\x38\x39\x61\x10\x00\x10\x00\xf6\x64\x00\xeb' + u'\xbb\x18\xeb\xbe\x21\xf3\xc1\x1a\xfa\xc7\x19\xfd\xcb\x1b' + u'\xff\xcc\x1c\xeb') + assert len(buf_in) == 32 + sio = io.StringIO(buf_in) + buf_out = sio.read(32) + assert buf_in == buf_out + def test_readline(self): import io sio = io.StringIO('123\n456') From pypy.commits at gmail.com Wed Jan 16 17:44:46 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 14:44:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5c3fb3de.1c69fb81.b0673.23d8@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95661:f751ef782eeb Date: 2019-01-17 00:43 +0200 http://bitbucket.org/pypy/pypy/changeset/f751ef782eeb/ Log: merge py3.5 into branch diff --git a/extra_tests/test_bufferedreader.py b/extra_tests/test_bufferedreader.py --- a/extra_tests/test_bufferedreader.py +++ b/extra_tests/test_bufferedreader.py @@ -88,7 +88,7 @@ assert self.stream.readline(80) == expected @pytest.mark.parametrize('StreamCls', [Stream, StreamCFFI]) - at settings(max_examples=50) + at settings(max_examples=50, deadline=None) @given(params=data_and_sizes(), chunk_size=st.integers(MIN_READ_SIZE, 8192)) def test_stateful(params, chunk_size, StreamCls): data, sizes = params diff --git a/extra_tests/test_datetime.py b/extra_tests/test_datetime.py --- a/extra_tests/test_datetime.py +++ b/extra_tests/test_datetime.py @@ -33,7 +33,9 @@ (timedelta_safe(1, 2, 3), "timedelta_safe(1, 2, 3)"), ]) def test_repr(obj, expected): - assert repr(obj) == expected + # XXX: there's a discrepancy between datetime.py and CPython's _datetime + # for the repr() of Python-defined subclasses of datetime classes. + assert repr(obj).endswith(expected) @pytest.mark.parametrize("obj", [ datetime.date.today(), diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -410,6 +410,6 @@ id(self)) def __bool__(self): - return self._buffer[0] not in (0, '\x00') + return self._buffer[0] not in (0, b'\x00') from _ctypes.function import CFuncPtr diff --git a/lib_pypy/_gdbm.py b/lib_pypy/_gdbm.py --- a/lib_pypy/_gdbm.py +++ b/lib_pypy/_gdbm.py @@ -74,12 +74,11 @@ self.__check_closed() key = _checkstr(key) return lib.pygdbm_exists(self.__ll_dbm, key, len(key)) - has_key = __contains__ def get(self, key, default=None): with _lock: self.__check_closed() - key = _checkstr(key) + key = _checkstr(key) drec = lib.pygdbm_fetch(self.__ll_dbm, key, len(key)) if not drec.dptr: return default diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -14,6 +14,14 @@ rsyncdirs = ['.', '../lib-python', '../lib_pypy', '../demo'] rsyncignore = ['_cache'] +try: + from hypothesis import settings +except ImportError: + pass +else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + # PyPy's command line extra options (these are added # to py.test's standard options) # diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -4,33 +4,32 @@ def test_simple(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append("abc") - b.append("123") - b.append("1") + b.append(u"abc") + b.append(u"123") + b.append(u"1") s = b.build() - assert s == "abc1231" - assert type(s) is str + assert s == u"abc1231" assert b.build() == s - b.append("123") - assert b.build() == s + "123" + b.append(u"123") + assert b.build() == s + u"123" def test_preallocate(self): from __pypy__.builders import StringBuilder b = StringBuilder(10) - b.append("abc") - b.append("123") + b.append(u"abc") + b.append(u"123") s = b.build() - assert s == "abc123" + assert s == u"abc123" def test_append_slice(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append_slice("abcdefgh", 2, 5) - raises(ValueError, b.append_slice, "1", 2, 1) + b.append_slice(u"abcdefgh", 2, 5) + raises(ValueError, b.append_slice, u"1", 2, 1) s = b.build() - assert s == "cde" - b.append_slice("abc", 1, 2) - assert b.build() == "cdeb" + assert s == u"cde" + b.append_slice(u"abc", 1, 2) + assert b.build() == u"cdeb" def test_stringbuilder(self): from __pypy__.builders import BytesBuilder diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -9,14 +9,14 @@ import rpython rpydir = str(py.path.local(rpython.__file__).join('..')) -def _get_compiler_type(cc, x64_flag, ver0=None): +def _get_compiler_type(cc, x64_flag): if not cc: cc = os.environ.get('CC','') if not cc: - return MsvcPlatform(x64=x64_flag, ver0=ver0) + return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - return MsvcPlatform(cc=cc, x64=x64_flag, ver0=ver0) + return MsvcPlatform(cc=cc, x64=x64_flag) def _get_vcver0(): # try to get the compiler which served to compile python @@ -28,17 +28,13 @@ return vsver return None -def Windows(cc=None, ver0=None): - #if ver0 is None: - # ver0 = _get_vcver0() - return _get_compiler_type(cc, False, ver0=ver0) +def Windows(cc=None): + return _get_compiler_type(cc, False) def Windows_x64(cc=None, ver0=None): raise Exception("Win64 is not supported. You must either build for Win32" " or contribute the missing support in PyPy.") - if ver0 is None: - ver0 = _get_vcver0() - return _get_compiler_type(cc, True, ver0=ver0) + return _get_compiler_type(cc, True) def _find_vcvarsall(version, x64flag): import rpython.tool.setuptools_msvc as msvc @@ -50,7 +46,7 @@ return msvc.msvc14_get_vc_env(arch) else: return msvc.msvc9_query_vcvarsall(version / 10.0, arch) - + def _get_msvc_env(vsver, x64flag): vcdict = None toolsdir = None @@ -74,7 +70,7 @@ # even msdn does not know which to run # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx # which names both - vcvars = os.path.join(toolsdir, 'vcvars32.bat') + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -83,12 +79,14 @@ stderr=subprocess.PIPE) stdout, stderr = popen.communicate() - if popen.wait() != 0: + if popen.wait() != 0 or stdout[:5].lower() == 'error': + log.msg('Running "%s" errored: \n\nstdout:\n%s\n\nstderr:\n%s' % ( + vcvars, stdout.split()[0], stderr)) return None - if stdout[:5].lower() == 'error': - log.msg('Running "%s" errored: %s' %(vcvars, stdout.split()[0])) - return None - except: + else: + log.msg('Running "%s" succeeded' %(vcvars,)) + except Exception as e: + log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") @@ -102,6 +100,8 @@ for key, value in vcdict.items(): if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value + if 'PATH' not in env: + log.msg('Did not find "PATH" in stdout\n%s' %(stdout)) if not _find_executable('mt.exe', env['PATH']): # For some reason the sdk bin path is missing? # put it together from some other env variables that happened to exist @@ -113,7 +113,7 @@ log.msg('Could not find mt.exe on path=%s' % env['PATH']) log.msg('Running vsver %s set this env' % vsver) for key, value in vcdict.items(): - log.msg('%s=%s' %(key, value)) + log.msg('%s=%s' %(key, value)) log.msg("Updated environment with vsver %d, using x64 %s" % (vsver, x64flag,)) return env @@ -122,7 +122,7 @@ if ver0 in vcvers: vcvers.insert(0, ver0) errs = [] - for vsver in vcvers: + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) if env is not None: return env, vsver @@ -189,8 +189,13 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, [], + try: + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) + except EnvironmentError: + log.msg('Could not run %s using PATH=\n%s' %(self.cc, + '\n'.join(self.c_environ['PATH'].split(';')))) + raise r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: self.version = int(''.join(r.groups())) / 10 - 60 @@ -276,23 +281,21 @@ if not standalone: args = self._args_for_shared(args) - if self.version >= 80: - # Tell the linker to generate a manifest file - temp_manifest = exe_name.dirpath().join( - exe_name.purebasename + '.manifest') - args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] + # Tell the linker to generate a manifest file + temp_manifest = exe_name.dirpath().join( + exe_name.purebasename + '.manifest') + args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] self._execute_c_compiler(self.link, args, exe_name) - if self.version >= 80: - # Now, embed the manifest into the program - if standalone: - mfid = 1 - else: - mfid = 2 - out_arg = '-outputresource:%s;%s' % (exe_name, mfid) - args = ['-nologo', '-manifest', str(temp_manifest), out_arg] - self._execute_c_compiler('mt.exe', args, exe_name) + # Now, embed the manifest into the program + if standalone: + mfid = 1 + else: + mfid = 2 + out_arg = '-outputresource:%s;%s' % (exe_name, mfid) + args = ['-nologo', '-manifest', str(temp_manifest), out_arg] + self._execute_c_compiler('mt.exe', args, exe_name) return exe_name @@ -396,7 +399,7 @@ if len(headers_to_precompile)>0: if shared: - no_precompile_cfiles += [m.makefile_dir / 'main.c', + no_precompile_cfiles += [m.makefile_dir / 'main.c', m.makefile_dir / 'wmain.c'] stdafx_h = path.join('stdafx.h') txt = '#ifndef PYPY_STDAFX_H\n' From pypy.commits at gmail.com Wed Jan 16 17:49:02 2019 From: pypy.commits at gmail.com (stevie_92) Date: Wed, 16 Jan 2019 14:49:02 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Implemented first version of CPython-style cycle detection Message-ID: <5c3fb4de.1c69fb81.c6e72.ede6@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95662:fdd107ae6257 Date: 2019-01-16 18:26 +0100 http://bitbucket.org/pypy/pypy/changeset/fdd107ae6257/ Log: Implemented first version of CPython-style cycle detection Fixed dot tests diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3031,6 +3031,8 @@ self.rrc_dealloc_pending = self.AddressStack() self.rrc_tp_traverse = tp_traverse self.rrc_pyobj_list = self._pygchdr(pyobj_list) + self.rrc_pyobj_old_list = lltype.malloc( + self.PYOBJ_GC_HDR, flavor='raw', immortal=True) self.rrc_gc_as_pyobj = gc_as_pyobj self.rrc_pyobj_as_gc = pyobj_as_gc self.rrc_enabled = True @@ -3203,14 +3205,13 @@ _rrc_free._always_inline_ = True def rrc_major_collection_trace(self): + self._rrc_collect_rawrefcount_roots() + self._rrc_mark_rawrefcount() self.rrc_p_list_old.foreach(self._rrc_major_trace, None) def _rrc_major_trace(self, pyobject, ignore): - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT - # - rc = self._pyobj(pyobject).c_ob_refcnt - if rc == REFCNT_FROM_PYPY or rc == REFCNT_FROM_PYPY_LIGHT: + rc = self.rrc_pyobj_as_gc(self._pyobj(pyobject)).c_gc_refs + if rc == 0: pass # the corresponding object may die else: # force the corresponding object to be alive @@ -3220,6 +3221,9 @@ self.visit_all_objects() def rrc_major_collection_free(self): + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + # ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 2") length_estimate = self.rrc_p_dict.length() self.rrc_p_dict.delete() @@ -3238,6 +3242,20 @@ no_o_dict) self.rrc_o_list_old.delete() self.rrc_o_list_old = new_o_list + # free all dead refcounted objects, in unreachable cycles + pygchdr = self.rrc_pyobj_old_list.c_gc_next + while pygchdr <> self.rrc_pyobj_old_list: + assert pygchdr.c_gc_refs == 0 + pyobj = self.rrc_gc_as_pyobj(pygchdr) + if pyobj.c_ob_refcnt >= REFCNT_FROM_PYPY_LIGHT: + lltype.free(pyobj, flavor='raw') + elif pyobj.c_ob_refcnt >= REFCNT_FROM_PYPY: + pyobject = llmemory.cast_ptr_to_adr(pyobj) + pyobj.c_ob_refcnt = 1 + self.rrc_dealloc_pending.append(pyobject) + else: + lltype.free(pyobj, flavor='raw') + pygchdr = pygchdr.c_gc_next def _rrc_major_free(self, pyobject, surviving_list, surviving_dict): # The pyobject survives if the corresponding obj survives. @@ -3251,7 +3269,110 @@ if surviving_dict: surviving_dict.insertclean(obj, pyobject) else: - self._rrc_free(pyobject) + # The pyobject is freed later, if it is in old list, so + # just unlink here. + self._pyobj(pyobject).c_ob_pypy_link = 0 + + def _rrc_collect_rawrefcount_roots(self): + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY + from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + # + # Initialize the cyclic refcount with the real refcount. + pygchdr = self.rrc_pyobj_list.c_gc_next + while pygchdr <> self.rrc_pyobj_list: + pygchdr.c_gc_refs = self.rrc_gc_as_pyobj(pygchdr).c_ob_refcnt + if pygchdr.c_gc_refs >= REFCNT_FROM_PYPY_LIGHT: + pygchdr.c_gc_refs -= REFCNT_FROM_PYPY_LIGHT + elif pygchdr.c_gc_refs >= REFCNT_FROM_PYPY: + pygchdr.c_gc_refs -= REFCNT_FROM_PYPY + pygchdr = pygchdr.c_gc_next + + # For every object in this set, if it is marked, add 1 as a real + # refcount + self.rrc_p_list_old.foreach(self._rrc_obj_fix_refcnt, None) + + # Subtract all internal refcounts from the cyclic refcount + # of rawrefcounted objects + pygchdr = self.rrc_pyobj_list.c_gc_next + while pygchdr <> self.rrc_pyobj_list: + pyobj = self.rrc_gc_as_pyobj(pygchdr) + self._rrc_visit_pyobj = self._rrc_subtract_internal_refcnt + self._rrc_traverse(pyobj) + pygchdr = pygchdr.c_gc_next + + # now all rawrefcounted roots or live border objects have a + # refcount > 0 + + def _rrc_subtract_internal_refcnt(self, pyobj): + pygchdr = self.rrc_pyobj_as_gc(pyobj) + pygchdr.c_gc_refs -= 1 + + def _rrc_obj_fix_refcnt(self, pyobject, ignore): + intobj = self._pyobj(pyobject).c_ob_pypy_link + obj = llmemory.cast_int_to_adr(intobj) + gchdr = self.rrc_pyobj_as_gc(self._pyobj(pyobject)) + if self.header(obj).tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS): + gchdr.c_gc_refs += 1 + + def _rrc_mark_rawrefcount(self): + if self.rrc_pyobj_list.c_gc_next == self.rrc_pyobj_list: + self.rrc_pyobj_old_list.c_gc_next = self.rrc_pyobj_old_list + self.rrc_pyobj_old_list.c_gc_prev = self.rrc_pyobj_old_list + return + # as long as new objects with cyclic a refcount > 0 or alive border + # objects are found, increment the refcount of all referenced objects + # of those newly found objects + self.rrc_pyobj_old_list.c_gc_next = self.rrc_pyobj_list.c_gc_next + self.rrc_pyobj_old_list.c_gc_prev = self.rrc_pyobj_list.c_gc_prev + self.rrc_pyobj_old_list.c_gc_next.c_gc_prev = self.rrc_pyobj_old_list + self.rrc_pyobj_old_list.c_gc_prev.c_gc_next = self.rrc_pyobj_old_list + self.rrc_pyobj_list.c_gc_next = self.rrc_pyobj_list + self.rrc_pyobj_list.c_gc_prev = self.rrc_pyobj_list + found_alive = True + # + while found_alive: + found_alive = False + gchdr = self.rrc_pyobj_old_list.c_gc_next + while gchdr <> self.rrc_pyobj_old_list: + next_old = gchdr.c_gc_next + alive = gchdr.c_gc_refs > 0 + pyobj = self.rrc_gc_as_pyobj(gchdr) + obj = None + if pyobj.c_ob_pypy_link <> 0: + intobj = pyobj.c_ob_pypy_link + obj = llmemory.cast_int_to_adr(intobj) + if not alive and self.header(obj).tid & ( + GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS): + # add fake refcount, to mark it as live + gchdr.c_gc_refs += 1 + alive = True + if alive: + # remove from old list + next = gchdr.c_gc_next + next.c_gc_prev = gchdr.c_gc_prev + gchdr.c_gc_prev.c_gc_next = next + # add to new list + next = self.rrc_pyobj_list.c_gc_next + self.rrc_pyobj_list.c_gc_next = gchdr + gchdr.c_gc_prev = self.rrc_pyobj_list + gchdr.c_gc_next = next + next.c_gc_prev = gchdr + # increment refcounts + self._rrc_visit_pyobj = self._rrc_increment_refcnt + self._rrc_traverse(pyobj) + # mark recursively, if it is a pypyobj + if not obj is None: + self.objects_to_trace.append(obj) + self.visit_all_objects() + found_alive = True + gchdr = next_old + # + # now all rawrefcounted objects, which are alive, have a cyclic + # refcount > 0 or are marked + + def _rrc_increment_refcnt(self, pyobj): + pygchdr = self.rrc_pyobj_as_gc(pyobj) + pygchdr.c_gc_refs += 1 def _rrc_visit(pyobj, self_ptr): from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance @@ -3261,12 +3382,11 @@ self._rrc_visit_pyobj(pyobj) return rffi.cast(rffi.INT_real, 0) - def _rrc_traverse(self, pyobject): + def _rrc_traverse(self, pyobj): from rpython.rlib.objectmodel import we_are_translated from rpython.rtyper.annlowlevel import (cast_nongc_instance_to_adr, llhelper) # - pyobj = self._pyobj(pyobject) if we_are_translated(): callback_ptr = llhelper(self.RAWREFCOUNT_VISIT, IncrementalMiniMarkGC._rrc_visit) diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -31,7 +31,7 @@ def rawrefcount_tp_traverse(obj, callback, args): refs = self.pyobj_refs[self.pyobjs.index(obj)] for ref in refs: - callback(self.gc, ref) + callback(ref) def rawrefcount_gc_as_pyobj(gc): return self.pyobjs[self.gcobjs.index(gc)] @@ -439,8 +439,7 @@ info = NodeInfo(type, alive, ext_refcnt) if type == "C": r, raddr, check_alive = self._rawrefcount_pyobj() - if ext_refcnt > 0: - r.c_ob_refcnt = ext_refcnt + r.c_ob_refcnt += ext_refcnt nodes[name] = CPythonNode(r, raddr, check_alive, info) elif type == "P": p, pref, check_alive = \ @@ -452,8 +451,7 @@ p, pref, r, raddr, check_alive =\ self._rawrefcount_pair(42 + i, rooted=rooted, create_old=True) - if ext_refcnt > 0: - r.c_ob_refcnt = ext_refcnt + r.c_ob_refcnt += ext_refcnt nodes[name] = BorderNode(p, pref, r, raddr, check_alive, info) i += 1 @@ -483,15 +481,23 @@ dests_by_source[source].append(dest.r) for source in dests_by_source: dests_target = dests_by_source[source] - def append(self, pyobj): + def append(pyobj): dests_target.remove(pyobj) self.gc._rrc_visit_pyobj = append - self.gc._rrc_traverse(source.raddr) + self.gc._rrc_traverse(source.r) assert len(dests_target) == 0 # do collection self.gc.collect() + # simply free all pending deallocations, we don't care about the + # side effects + next_dead = self.gc.rawrefcount_next_dead() + while next_dead <> llmemory.NULL: + pyobj = llmemory.cast_adr_to_ptr(next_dead, self.gc.PYOBJ_HDR_PTR) + lltype.free(pyobj, flavor='raw') + next_dead = self.gc.rawrefcount_next_dead() + # check livelihood of objects, according to graph for name in nodes: n = nodes[name] From pypy.commits at gmail.com Wed Jan 16 17:49:03 2019 From: pypy.commits at gmail.com (stevie_92) Date: Wed, 16 Jan 2019 14:49:03 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed translation issues Message-ID: <5c3fb4df.1c69fb81.d2997.9052@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95663:1fdae7eeae56 Date: 2019-01-16 23:38 +0100 http://bitbucket.org/pypy/pypy/changeset/1fdae7eeae56/ Log: Fixed translation issues diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3296,17 +3296,12 @@ pygchdr = self.rrc_pyobj_list.c_gc_next while pygchdr <> self.rrc_pyobj_list: pyobj = self.rrc_gc_as_pyobj(pygchdr) - self._rrc_visit_pyobj = self._rrc_subtract_internal_refcnt - self._rrc_traverse(pyobj) + self._rrc_traverse(pyobj, -1) pygchdr = pygchdr.c_gc_next # now all rawrefcounted roots or live border objects have a # refcount > 0 - def _rrc_subtract_internal_refcnt(self, pyobj): - pygchdr = self.rrc_pyobj_as_gc(pyobj) - pygchdr.c_gc_refs -= 1 - def _rrc_obj_fix_refcnt(self, pyobject, ignore): intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) @@ -3358,10 +3353,11 @@ gchdr.c_gc_next = next next.c_gc_prev = gchdr # increment refcounts - self._rrc_visit_pyobj = self._rrc_increment_refcnt - self._rrc_traverse(pyobj) + self._rrc_traverse(pyobj, 1) # mark recursively, if it is a pypyobj - if not obj is None: + if pyobj.c_ob_pypy_link <> 0: + intobj = pyobj.c_ob_pypy_link + obj = llmemory.cast_int_to_adr(intobj) self.objects_to_trace.append(obj) self.visit_all_objects() found_alive = True @@ -3370,31 +3366,31 @@ # now all rawrefcounted objects, which are alive, have a cyclic # refcount > 0 or are marked - def _rrc_increment_refcnt(self, pyobj): - pygchdr = self.rrc_pyobj_as_gc(pyobj) - pygchdr.c_gc_refs += 1 - def _rrc_visit(pyobj, self_ptr): from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance # self_adr = rffi.cast(llmemory.Address, self_ptr) self = cast_adr_to_nongc_instance(IncrementalMiniMarkGC, self_adr) - self._rrc_visit_pyobj(pyobj) + self._rrc_visit_action(pyobj, None) return rffi.cast(rffi.INT_real, 0) - def _rrc_traverse(self, pyobj): + def _rrc_visit_action(self, pyobj, ignore): + pygchdr = self.rrc_pyobj_as_gc(pyobj) + pygchdr.c_gc_refs += self.rrc_refcnt_add + + def _rrc_traverse(self, pyobj, refcnt_add): from rpython.rlib.objectmodel import we_are_translated from rpython.rtyper.annlowlevel import (cast_nongc_instance_to_adr, llhelper) # + self.rrc_refcnt_add = refcnt_add if we_are_translated(): callback_ptr = llhelper(self.RAWREFCOUNT_VISIT, IncrementalMiniMarkGC._rrc_visit) self_ptr = rffi.cast(rffi.VOIDP, cast_nongc_instance_to_adr(self)) + self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) else: - callback_ptr = self._rrc_visit_pyobj - self_ptr = None - self.rrc_tp_traverse(pyobj, callback_ptr, self_ptr) + self.rrc_tp_traverse(pyobj, self._rrc_visit_action, None) def _rrc_gc_list_init(self, pygclist): pygclist.c_gc_next = pygclist diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -31,7 +31,7 @@ def rawrefcount_tp_traverse(obj, callback, args): refs = self.pyobj_refs[self.pyobjs.index(obj)] for ref in refs: - callback(ref) + callback(ref, args) def rawrefcount_gc_as_pyobj(gc): return self.pyobjs[self.gcobjs.index(gc)] @@ -481,10 +481,9 @@ dests_by_source[source].append(dest.r) for source in dests_by_source: dests_target = dests_by_source[source] - def append(pyobj): + def append(pyobj, ignore): dests_target.remove(pyobj) - self.gc._rrc_visit_pyobj = append - self.gc._rrc_traverse(source.r) + self.gc.rrc_tp_traverse(source.r, append, None) assert len(dests_target) == 0 # do collection From pypy.commits at gmail.com Thu Jan 17 01:47:01 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 22:47:01 -0800 (PST) Subject: [pypy-commit] buildbot default: also upgrade pip, setuptools when installing virtualenv Message-ID: <5c4024e5.1c69fb81.514b7.2e8f@mx.google.com> Author: Matti Picus Branch: Changeset: r1065:224046cb3bb6 Date: 2019-01-17 08:46 +0200 http://bitbucket.org/pypy/buildbot/changeset/224046cb3bb6/ Log: also upgrade pip, setuptools when installing virtualenv diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -499,7 +499,7 @@ factory.addStep(ShellCmd( description="Install recent virtualenv", command=prefix + [target, '-mpip', 'install', '--upgrade', - virt_package], + 'pip', 'setuptools', virt_package], workdir='venv', flunkOnFailure=True)) factory.addStep(ShellCmd( From pypy.commits at gmail.com Thu Jan 17 01:58:43 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 16 Jan 2019 22:58:43 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: my flights are confirmed Message-ID: <5c4027a3.1c69fb81.8c71a.6274@mx.google.com> Author: Matti Picus Branch: extradoc Changeset: r5939:737a1c075595 Date: 2019-01-17 08:58 +0200 http://bitbucket.org/pypy/extradoc/changeset/737a1c075595/ Log: my flights are confirmed diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -9,7 +9,7 @@ Name Arrive/Depart Accomodation ============================ ============== =========================== Carl Friedrich Bolz-Tereick always there private -Matti Picus Feb 4? - 9? airbnb +Matti Picus Feb 4 - 9 airbnb Manuel Feb 3 - 7? share a room? Antonio Cuni Feb 3 - 9 airbnb Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf From pypy.commits at gmail.com Fri Jan 18 06:32:19 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 18 Jan 2019 03:32:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5c41b943.1c69fb81.7dee6.1af2@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95665:ef10a3c9698b Date: 2019-01-18 12:47 +0200 http://bitbucket.org/pypy/pypy/changeset/ef10a3c9698b/ Log: merge default into branch diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -15,11 +15,15 @@ rsyncignore = ['_cache'] try: - from hypothesis import settings + from hypothesis import settings, __version__ except ImportError: pass else: - settings.register_profile('default', deadline=None) + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) settings.load_profile('default') # PyPy's command line extra options (these are added diff --git a/rpython/conftest.py b/rpython/conftest.py --- a/rpython/conftest.py +++ b/rpython/conftest.py @@ -5,6 +5,18 @@ option = None +try: + from hypothesis import settings, __version__ +except ImportError: + pass +else: + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + def braindead_deindent(self): """monkeypatch that wont end up doing stupid in the python tokenizer""" text = '\n'.join(self.lines) From pypy.commits at gmail.com Fri Jan 18 06:32:21 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 18 Jan 2019 03:32:21 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5c41b945.1c69fb81.f4993.9ea4@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95666:3d5d0ea3c275 Date: 2019-01-18 12:48 +0200 http://bitbucket.org/pypy/pypy/changeset/3d5d0ea3c275/ Log: merge default into branch diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -5,6 +5,18 @@ rsyncdirs = ['.', '../lib-python', '../lib_pypy', '../demo'] rsyncignore = ['_cache'] +try: + from hypothesis import settings, __version__ +except ImportError: + pass +else: + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + # PyPy's command line extra options (these are added # to py.test's standard options) # diff --git a/rpython/conftest.py b/rpython/conftest.py --- a/rpython/conftest.py +++ b/rpython/conftest.py @@ -5,6 +5,18 @@ option = None +try: + from hypothesis import settings, __version__ +except ImportError: + pass +else: + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + def braindead_deindent(self): """monkeypatch that wont end up doing stupid in the python tokenizer""" text = '\n'.join(self.lines) diff --git a/rpython/rlib/rwinreg.py b/rpython/rlib/rwinreg.py --- a/rpython/rlib/rwinreg.py +++ b/rpython/rlib/rwinreg.py @@ -47,7 +47,7 @@ HKEY = rwin32.HANDLE PHKEY = rffi.CArrayPtr(HKEY) REGSAM = rwin32.DWORD -suffix = 'W' + def get_traits(suffix): RegSetValue = external( 'RegSetValue' + suffix, diff --git a/rpython/translator/platform/test/test_makefile.py b/rpython/translator/platform/test/test_makefile.py --- a/rpython/translator/platform/test/test_makefile.py +++ b/rpython/translator/platform/test/test_makefile.py @@ -85,7 +85,8 @@ txt = '#include \n' for i in range(ncfiles): txt += "int func%03d();\n" % i - txt += "\nint main(int argc, char * argv[])\n" + txt += "\n__declspec(dllexport) int\n" + txt += "pypy_main_startup(int argc, char * argv[])\n" txt += "{\n int i=0;\n" for i in range(ncfiles): txt += " i += func%03d();\n" % i @@ -119,7 +120,7 @@ clean = ('clean', '', 'rm -f $(OBJECTS) $(TARGET) ') get_time = time.time #write a non-precompiled header makefile - mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir) + mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir, shared=True) mk.rule(*clean) mk.write() t0 = get_time() @@ -128,7 +129,7 @@ t_normal = t1 - t0 self.platform.execute_makefile(mk, extra_opts=['clean']) # Write a super-duper makefile with precompiled headers - mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir, + mk = self.platform.gen_makefile(cfiles, eci, path=tmpdir, shared=True, headers_to_precompile=cfiles_precompiled_headers,) mk.rule(*clean) mk.write() diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -9,14 +9,14 @@ import rpython rpydir = str(py.path.local(rpython.__file__).join('..')) -def _get_compiler_type(cc, x64_flag, ver0=None): +def _get_compiler_type(cc, x64_flag): if not cc: cc = os.environ.get('CC','') if not cc: - return MsvcPlatform(x64=x64_flag, ver0=ver0) + return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - return MsvcPlatform(cc=cc, x64=x64_flag, ver0=ver0) + return MsvcPlatform(cc=cc, x64=x64_flag) def _get_vcver0(): # try to get the compiler which served to compile python @@ -28,17 +28,13 @@ return vsver return None -def Windows(cc=None, ver0=None): - if ver0 is None: - ver0 = _get_vcver0() - return _get_compiler_type(cc, False, ver0=ver0) +def Windows(cc=None): + return _get_compiler_type(cc, False) def Windows_x64(cc=None, ver0=None): raise Exception("Win64 is not supported. You must either build for Win32" " or contribute the missing support in PyPy.") - if ver0 is None: - ver0 = _get_vcver0() - return _get_compiler_type(cc, True, ver0=ver0) + return _get_compiler_type(cc, True) def _find_vcvarsall(version, x64flag): import rpython.tool.setuptools_msvc as msvc @@ -46,19 +42,16 @@ arch = 'x64' else: arch = 'x86' - if version == 140: + if version >= 140: return msvc.msvc14_get_vc_env(arch) else: return msvc.msvc9_query_vcvarsall(version / 10.0, arch) - + def _get_msvc_env(vsver, x64flag): vcdict = None toolsdir = None try: - if vsver < 140: - toolsdir = os.environ['VS%sCOMNTOOLS' % vsver] - else: - raise KeyError('always use registry values') + toolsdir = os.environ['VS%sCOMNTOOLS' % vsver] except KeyError: # use setuptools from python3 to find tools try: @@ -76,8 +69,8 @@ if not os.path.exists(vcvars): # even msdn does not know which to run # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx - # wich names both - vcvars = os.path.join(toolsdir, 'vcvars32.bat') + # which names both + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -86,12 +79,14 @@ stderr=subprocess.PIPE) stdout, stderr = popen.communicate() - if popen.wait() != 0: + if popen.wait() != 0 or stdout[:5].lower() == 'error': + log.msg('Running "%s" errored: \n\nstdout:\n%s\n\nstderr:\n%s' % ( + vcvars, stdout.split()[0], stderr)) return None - if stdout[:5].lower() == 'error': - log.msg('Running "%s" errored: %s' %(vcvars, stdout.split()[0])) - return None - except: + else: + log.msg('Running "%s" succeeded' %(vcvars,)) + except Exception as e: + log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") @@ -105,15 +100,29 @@ for key, value in vcdict.items(): if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value + if 'PATH' not in env: + log.msg('Did not find "PATH" in stdout\n%s' %(stdout)) + if not _find_executable('mt.exe', env['PATH']): + # For some reason the sdk bin path is missing? + # put it together from some other env variables that happened to exist + # on the buildbot where this occurred + if 'WindowsSDKVersion' in vcdict and 'WindowsSdkDir' in vcdict: + binpath = vcdict['WindowsSdkDir'] + '\\bin\\' + vcdict['WindowsSDKVersion'] + 'x86' + env['PATH'] += ';' + binpath + if not _find_executable('mt.exe', env['PATH']): + log.msg('Could not find mt.exe on path=%s' % env['PATH']) + log.msg('Running vsver %s set this env' % vsver) + for key, value in vcdict.items(): + log.msg('%s=%s' %(key, value)) log.msg("Updated environment with vsver %d, using x64 %s" % (vsver, x64flag,)) return env def find_msvc_env(x64flag=False, ver0=None): - vcvers = [140, 90, 100] + vcvers = [140, 141, 150, 90, 100] if ver0 in vcvers: vcvers.insert(0, ver0) errs = [] - for vsver in vcvers: + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) if env is not None: return env, vsver @@ -168,6 +177,9 @@ patch_os_env(self.externals) self.c_environ = os.environ.copy() if cc is None: + # prefer compiler used to build host. Python2 only + if ver0 is None: + ver0 = _get_vcver0() msvc_compiler_environ, self.vsver = find_msvc_env(x64, ver0=ver0) Platform.__init__(self, 'cl.exe') if msvc_compiler_environ: @@ -180,8 +192,13 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, [], + try: + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) + except EnvironmentError: + log.msg('Could not run %s using PATH=\n%s' %(self.cc, + '\n'.join(self.c_environ['PATH'].split(';')))) + raise r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: self.version = int(''.join(r.groups())) / 10 - 60 @@ -267,23 +284,21 @@ if not standalone: args = self._args_for_shared(args) - if self.version >= 80: - # Tell the linker to generate a manifest file - temp_manifest = exe_name.dirpath().join( - exe_name.purebasename + '.manifest') - args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] + # Tell the linker to generate a manifest file + temp_manifest = exe_name.dirpath().join( + exe_name.purebasename + '.manifest') + args += ["/MANIFEST", "/MANIFESTFILE:%s" % (temp_manifest,)] self._execute_c_compiler(self.link, args, exe_name) - if self.version >= 80: - # Now, embed the manifest into the program - if standalone: - mfid = 1 - else: - mfid = 2 - out_arg = '-outputresource:%s;%s' % (exe_name, mfid) - args = ['-nologo', '-manifest', str(temp_manifest), out_arg] - self._execute_c_compiler('mt.exe', args, exe_name) + # Now, embed the manifest into the program + if standalone: + mfid = 1 + else: + mfid = 2 + out_arg = '-outputresource:%s;%s' % (exe_name, mfid) + args = ['-nologo', '-manifest', str(temp_manifest), out_arg] + self._execute_c_compiler('mt.exe', args, exe_name) return exe_name @@ -387,7 +402,8 @@ if len(headers_to_precompile)>0: if shared: - no_precompile_cfiles += ['main.c', 'wmain.c'] + no_precompile_cfiles += [m.makefile_dir / 'main.c', + m.makefile_dir / 'wmain.c'] stdafx_h = path.join('stdafx.h') txt = '#ifndef PYPY_STDAFX_H\n' txt += '#define PYPY_STDAFX_H\n' From pypy.commits at gmail.com Fri Jan 18 06:32:23 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 18 Jan 2019 03:32:23 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: add passing test Message-ID: <5c41b947.1c69fb81.7bfae.d00b@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95667:6124fb988a60 Date: 2019-01-18 12:49 +0200 http://bitbucket.org/pypy/pypy/changeset/6124fb988a60/ Log: add passing test diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -150,6 +150,11 @@ import re, _weakref _weakref.ref(re.compile(r"")) + def test_match_compat(self): + import re + res = re.match(r'(a)|(b)', 'b').start(1) + assert res == -1 + class AppTestSreMatch: spaceconfig = dict(usemodules=('array', )) From pypy.commits at gmail.com Fri Jan 18 06:32:25 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 18 Jan 2019 03:32:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: check for negative index Message-ID: <5c41b949.1c69fb81.75f4.9ed7@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95668:e0d70d85d65c Date: 2019-01-18 13:30 +0200 http://bitbucket.org/pypy/pypy/changeset/e0d70d85d65c/ Log: check for negative index diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -570,6 +570,8 @@ logarithmic in the length of the string, plus some constant that is not tiny either. """ + if bytepos < 0: + return bytepos index_min = 0 index_max = len(storage) - 1 while index_min < index_max: From pypy.commits at gmail.com Fri Jan 18 06:32:27 2019 From: pypy.commits at gmail.com (mattip) Date: Fri, 18 Jan 2019 03:32:27 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch Message-ID: <5c41b94b.1c69fb81.1e16d.da33@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95669:83df732f6df1 Date: 2019-01-18 13:31 +0200 http://bitbucket.org/pypy/pypy/changeset/83df732f6df1/ Log: merge unicode-utf8 into branch diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -15,11 +15,15 @@ rsyncignore = ['_cache'] try: - from hypothesis import settings + from hypothesis import settings, __version__ except ImportError: pass else: - settings.register_profile('default', deadline=None) + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) settings.load_profile('default') # PyPy's command line extra options (these are added diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -123,6 +123,11 @@ import re, _weakref _weakref.ref(re.compile(r"")) + def test_match_compat(self): + import re + res = re.match(r'(a)|(b)', 'b').start(1) + assert res == -1 + def test_pattern_check(self): import _sre raises(TypeError, _sre.compile, {}, 0, []) diff --git a/rpython/conftest.py b/rpython/conftest.py --- a/rpython/conftest.py +++ b/rpython/conftest.py @@ -5,6 +5,18 @@ option = None +try: + from hypothesis import settings, __version__ +except ImportError: + pass +else: + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + def braindead_deindent(self): """monkeypatch that wont end up doing stupid in the python tokenizer""" text = '\n'.join(self.lines) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -572,6 +572,8 @@ logarithmic in the length of the string, plus some constant that is not tiny either. """ + if bytepos < 0: + return bytepos index_min = 0 index_max = len(storage) - 1 while index_min < index_max: From pypy.commits at gmail.com Fri Jan 18 14:29:54 2019 From: pypy.commits at gmail.com (rlamy) Date: Fri, 18 Jan 2019 11:29:54 -0800 (PST) Subject: [pypy-commit] pypy default: Use OrderedDict as the reference to make hypothesis happy Message-ID: <5c422932.1c69fb81.5c675.a7c9@mx.google.com> Author: Ronan Lamy Branch: Changeset: r95670:0873ec79aa36 Date: 2019-01-18 19:29 +0000 http://bitbucket.org/pypy/pypy/changeset/0873ec79aa36/ Log: Use OrderedDict as the reference to make hypothesis happy diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -1,6 +1,7 @@ import sys from contextlib import contextmanager import signal +from collections import OrderedDict from rpython.translator.translator import TranslationContext from rpython.annotator.model import ( @@ -1196,7 +1197,7 @@ DictValue(None, s_value)) dictrepr.setup() self.l_dict = self.newdict(dictrepr) - self.reference = self.new_reference() + self.reference = OrderedDict() self.ll_key = r_key.convert_const self.ll_value = r_value.convert_const self.removed_keys = [] @@ -1323,7 +1324,6 @@ class DictSpace(MappingSpace): MappingRepr = rdict.DictRepr - new_reference = dict ll_getitem = staticmethod(rdict.ll_dict_getitem) ll_setitem = staticmethod(rdict.ll_dict_setitem) ll_delitem = staticmethod(rdict.ll_dict_delitem) diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -422,7 +422,6 @@ class ODictSpace(MappingSpace): MappingRepr = rodct.OrderedDictRepr - new_reference = OrderedDict moved_around = False ll_getitem = staticmethod(rodct.ll_dict_getitem) ll_setitem = staticmethod(rodct.ll_dict_setitem) From pypy.commits at gmail.com Fri Jan 18 15:08:39 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 18 Jan 2019 12:08:39 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Fixed some minor issues and added TODOs for CPython style Message-ID: <5c423247.1c69fb81.4b17.1844@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95671:faf091537a23 Date: 2019-01-17 18:21 +0100 http://bitbucket.org/pypy/pypy/changeset/faf091537a23/ Log: Fixed some minor issues and added TODOs for CPython style cycle detection diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3031,8 +3031,6 @@ self.rrc_dealloc_pending = self.AddressStack() self.rrc_tp_traverse = tp_traverse self.rrc_pyobj_list = self._pygchdr(pyobj_list) - self.rrc_pyobj_old_list = lltype.malloc( - self.PYOBJ_GC_HDR, flavor='raw', immortal=True) self.rrc_gc_as_pyobj = gc_as_pyobj self.rrc_pyobj_as_gc = pyobj_as_gc self.rrc_enabled = True @@ -3171,7 +3169,7 @@ else: self._rrc_free(pyobject) - def _rrc_free(self, pyobject): + def _rrc_free(self, pyobject, major=False): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT # @@ -3179,6 +3177,11 @@ if rc >= REFCNT_FROM_PYPY_LIGHT: rc -= REFCNT_FROM_PYPY_LIGHT if rc == 0: + if major: # remove from old list + pygchdr = self.rrc_pyobj_as_gc(self._pyobj(pyobject)) + next = pygchdr.c_gc_next + next.c_gc_prev = pygchdr.c_gc_prev + pygchdr.c_gc_prev.c_gc_next = next lltype.free(self._pyobj(pyobject), flavor='raw') else: # can only occur if LIGHT is used in create_link_pyobj() @@ -3191,16 +3194,19 @@ rc -= REFCNT_FROM_PYPY self._pyobj(pyobject).c_ob_pypy_link = 0 if rc == 0: - self.rrc_dealloc_pending.append(pyobject) - # an object with refcnt == 0 cannot stay around waiting - # for its deallocator to be called. Some code (lxml) - # expects that tp_dealloc is called immediately when - # the refcnt drops to 0. If it isn't, we get some - # uncleared raw pointer that can still be used to access - # the object; but (PyObject *)raw_pointer is then bogus - # because after a Py_INCREF()/Py_DECREF() on it, its - # tp_dealloc is also called! - rc = 1 + if not major: # we do it later in major collections + self.rrc_dealloc_pending.append(pyobject) + # an object with refcnt == 0 cannot stay around waiting + # for its deallocator to be called. Some code (lxml) + # expects that tp_dealloc is called immediately when + # the refcnt drops to 0. If it isn't, we get some + # uncleared raw pointer that can still be used to access + # the object; but (PyObject *)raw_pointer is then bogus + # because after a Py_INCREF()/Py_DECREF() on it, its + # tp_dealloc is also called! + rc = 1 + else: + rc = REFCNT_FROM_PYPY self._pyobj(pyobject).c_ob_refcnt = rc _rrc_free._always_inline_ = True @@ -3209,6 +3215,28 @@ self._rrc_mark_rawrefcount() self.rrc_p_list_old.foreach(self._rrc_major_trace, None) + # TODO: for all unreachable objects, which are marked potentially + # TODO: uncollectable, move them to the set of uncollectable objs + + # TODO: for all unreachable objects with tp_del (legacy finalizer), + # TODO: except for border objects with a refcount of + # TODO: REFCNT_FROM_PYPY (equals zero at this point): + # TODO: * mark reachable pypy objects + # TODO: * move reachable cpython objects back to pyobj_list + # TODO: * mark all reachable objects as potentially uncollectable + + # TODO: handle weakrefs for unreachable objects + + # TODO: call tp_finalize for unreachable objects + # TODO: (could resurrect objects, so we have to do it now) + # TODO: (set finalizer flag before calling and check if + # TODO: finalizer was not called before) + + # TODO: for all objects in unreachable, check if they + # TODO: are still unreachable. if not, abort and move all + # TODO: unreachable back to pyobj_list and mark all reachable + # TODO: pypy objects + def _rrc_major_trace(self, pyobject, ignore): rc = self.rrc_pyobj_as_gc(self._pyobj(pyobject)).c_gc_refs if rc == 0: @@ -3222,7 +3250,6 @@ def rrc_major_collection_free(self): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT # ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 2") length_estimate = self.rrc_p_dict.length() @@ -3242,20 +3269,27 @@ no_o_dict) self.rrc_o_list_old.delete() self.rrc_o_list_old = new_o_list - # free all dead refcounted objects, in unreachable cycles + + # TODO: === DO THIS LATER OUTSIDE OF GC === + # TODO: (like dealloc_pending.append, but only pass reference to + # TODO: unreachable object list, not for each object. free + # TODO: this list afterwards) + # TODO: call tp_clear (wrapped between inc- and decref) instead of + # TODO: free, to break cycles for unreachable objects + + # TODO: === REMOVE THE CODE BELOW === + # TODO: this exists only to please the current tests, but fails + # TODO: if the pending deallocations are executed properly pygchdr = self.rrc_pyobj_old_list.c_gc_next while pygchdr <> self.rrc_pyobj_old_list: assert pygchdr.c_gc_refs == 0 pyobj = self.rrc_gc_as_pyobj(pygchdr) - if pyobj.c_ob_refcnt >= REFCNT_FROM_PYPY_LIGHT: - lltype.free(pyobj, flavor='raw') - elif pyobj.c_ob_refcnt >= REFCNT_FROM_PYPY: - pyobject = llmemory.cast_ptr_to_adr(pyobj) - pyobj.c_ob_refcnt = 1 - self.rrc_dealloc_pending.append(pyobject) - else: - lltype.free(pyobj, flavor='raw') + if pyobj.c_ob_refcnt == REFCNT_FROM_PYPY: + pyobj.c_ob_refcnt = 0 + pyobj.c_ob_refcnt += 1 + self.rrc_dealloc_pending.append(llmemory.cast_ptr_to_adr(pyobj)) pygchdr = pygchdr.c_gc_next + lltype.free(self.rrc_pyobj_old_list, flavor='raw') def _rrc_major_free(self, pyobject, surviving_list, surviving_dict): # The pyobject survives if the corresponding obj survives. @@ -3269,9 +3303,7 @@ if surviving_dict: surviving_dict.insertclean(obj, pyobject) else: - # The pyobject is freed later, if it is in old list, so - # just unlink here. - self._pyobj(pyobject).c_ob_pypy_link = 0 + self._rrc_free(pyobject, True) def _rrc_collect_rawrefcount_roots(self): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY @@ -3305,11 +3337,13 @@ def _rrc_obj_fix_refcnt(self, pyobject, ignore): intobj = self._pyobj(pyobject).c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) + # TODO: only if Py_TPFLAGS_HAVE_GC is set gchdr = self.rrc_pyobj_as_gc(self._pyobj(pyobject)) if self.header(obj).tid & (GCFLAG_VISITED | GCFLAG_NO_HEAP_PTRS): gchdr.c_gc_refs += 1 def _rrc_mark_rawrefcount(self): + self.rrc_pyobj_old_list = lltype.malloc(self.PYOBJ_GC_HDR, flavor='raw') if self.rrc_pyobj_list.c_gc_next == self.rrc_pyobj_list: self.rrc_pyobj_old_list.c_gc_next = self.rrc_pyobj_old_list self.rrc_pyobj_old_list.c_gc_prev = self.rrc_pyobj_old_list @@ -3332,7 +3366,6 @@ next_old = gchdr.c_gc_next alive = gchdr.c_gc_refs > 0 pyobj = self.rrc_gc_as_pyobj(gchdr) - obj = None if pyobj.c_ob_pypy_link <> 0: intobj = pyobj.c_ob_pypy_link obj = llmemory.cast_int_to_adr(intobj) @@ -3375,6 +3408,7 @@ return rffi.cast(rffi.INT_real, 0) def _rrc_visit_action(self, pyobj, ignore): + # TODO: only if Py_TPFLAGS_HAVE_GC is set pygchdr = self.rrc_pyobj_as_gc(pyobj) pygchdr.c_gc_refs += self.rrc_refcnt_add diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -490,7 +490,7 @@ self.gc.collect() # simply free all pending deallocations, we don't care about the - # side effects + # side effects for now... next_dead = self.gc.rawrefcount_next_dead() while next_dead <> llmemory.NULL: pyobj = llmemory.cast_adr_to_ptr(next_dead, self.gc.PYOBJ_HDR_PTR) From pypy.commits at gmail.com Fri Jan 18 15:08:41 2019 From: pypy.commits at gmail.com (stevie_92) Date: Fri, 18 Jan 2019 12:08:41 -0800 (PST) Subject: [pypy-commit] pypy cpyext-gc-cycle: Adapted tests in gc/rawrefcount to new cycle deletion Message-ID: <5c423249.1c69fb81.f3c41.053f@mx.google.com> Author: Stefan Beyer Branch: cpyext-gc-cycle Changeset: r95672:6e15b053de37 Date: 2019-01-18 21:06 +0100 http://bitbucket.org/pypy/pypy/changeset/6e15b053de37/ Log: Adapted tests in gc/rawrefcount to new cycle deletion diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py --- a/pypy/module/cpyext/state.py +++ b/pypy/module/cpyext/state.py @@ -214,13 +214,27 @@ def _rawrefcount_perform(space): - from pypy.module.cpyext.pyobject import PyObject, decref + from pypy.module.cpyext.pyobject import PyObject, incref, decref while True: py_obj = rawrefcount.next_dead(PyObject) if not py_obj: break decref(space, py_obj) + while True: + py_obj = rawrefcount.cyclic_garbage_head(PyObject) + if not py_obj: + break + + pyobj = rffi.cast(PyObject, py_obj) + if pyobj.c_ob_type and pyobj.c_ob_type.c_tp_clear: + incref(space, py_obj) + pyobj.c_ob_type.c_tp_clear(pyobj) + decref(space, py_obj) + + if py_obj == rawrefcount.cyclic_garbage_head(PyObject): + rawrefcount.cyclic_garbage_remove() + class PyObjDeallocAction(executioncontext.AsyncAction): """An action that invokes _Py_Dealloc() on the dying PyObjects. """ diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -3031,6 +3031,12 @@ self.rrc_dealloc_pending = self.AddressStack() self.rrc_tp_traverse = tp_traverse self.rrc_pyobj_list = self._pygchdr(pyobj_list) + self.rrc_pyobj_old_list = \ + lltype.malloc(self.PYOBJ_GC_HDR, flavor='raw', immortal=True) + self.rrc_pyobj_garbage_list = \ + lltype.malloc(self.PYOBJ_GC_HDR, flavor='raw', immortal=True) + self.rrc_pyobj_garbage_list.c_gc_next = self.rrc_pyobj_garbage_list + self.rrc_pyobj_garbage_list.c_gc_prev = self.rrc_pyobj_garbage_list self.rrc_gc_as_pyobj = gc_as_pyobj self.rrc_pyobj_as_gc = pyobj_as_gc self.rrc_enabled = True @@ -3096,6 +3102,26 @@ return self.rrc_dealloc_pending.pop() return llmemory.NULL + def rawrefcount_cyclic_garbage_head(self): + if self.rrc_pyobj_garbage_list.c_gc_next <> \ + self.rrc_pyobj_garbage_list: + return llmemory.cast_ptr_to_adr( + self.rrc_gc_as_pyobj(self.rrc_pyobj_garbage_list.c_gc_next)) + else: + return llmemory.NULL + + def rawrefcount_cyclic_garbage_remove(self): + gchdr = self.rrc_pyobj_garbage_list.c_gc_next + # remove from old list + next = gchdr.c_gc_next + next.c_gc_prev = gchdr.c_gc_prev + gchdr.c_gc_prev.c_gc_next = next + # add to new list, may die later + next = self.rrc_pyobj_list.c_gc_next + self.rrc_pyobj_list.c_gc_next = gchdr + gchdr.c_gc_prev = self.rrc_pyobj_list + gchdr.c_gc_next = next + next.c_gc_prev = gchdr def rrc_invoke_callback(self): if self.rrc_enabled and self.rrc_dealloc_pending.non_empty(): @@ -3194,19 +3220,16 @@ rc -= REFCNT_FROM_PYPY self._pyobj(pyobject).c_ob_pypy_link = 0 if rc == 0: - if not major: # we do it later in major collections - self.rrc_dealloc_pending.append(pyobject) - # an object with refcnt == 0 cannot stay around waiting - # for its deallocator to be called. Some code (lxml) - # expects that tp_dealloc is called immediately when - # the refcnt drops to 0. If it isn't, we get some - # uncleared raw pointer that can still be used to access - # the object; but (PyObject *)raw_pointer is then bogus - # because after a Py_INCREF()/Py_DECREF() on it, its - # tp_dealloc is also called! - rc = 1 - else: - rc = REFCNT_FROM_PYPY + self.rrc_dealloc_pending.append(pyobject) + # an object with refcnt == 0 cannot stay around waiting + # for its deallocator to be called. Some code (lxml) + # expects that tp_dealloc is called immediately when + # the refcnt drops to 0. If it isn't, we get some + # uncleared raw pointer that can still be used to access + # the object; but (PyObject *)raw_pointer is then bogus + # because after a Py_INCREF()/Py_DECREF() on it, its + # tp_dealloc is also called! + rc = 1 self._pyobj(pyobject).c_ob_refcnt = rc _rrc_free._always_inline_ = True @@ -3225,7 +3248,9 @@ # TODO: * move reachable cpython objects back to pyobj_list # TODO: * mark all reachable objects as potentially uncollectable - # TODO: handle weakrefs for unreachable objects + # TODO: handle weakrefs for unreachable objects and create + # TODO: a list of callbacks, which has to be called after the + # TODO: the GC runs # TODO: call tp_finalize for unreachable objects # TODO: (could resurrect objects, so we have to do it now) @@ -3249,8 +3274,6 @@ self.visit_all_objects() def rrc_major_collection_free(self): - from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY - # ll_assert(self.rrc_p_dict_nurs.length() == 0, "p_dict_nurs not empty 2") length_estimate = self.rrc_p_dict.length() self.rrc_p_dict.delete() @@ -3270,26 +3293,17 @@ self.rrc_o_list_old.delete() self.rrc_o_list_old = new_o_list - # TODO: === DO THIS LATER OUTSIDE OF GC === - # TODO: (like dealloc_pending.append, but only pass reference to - # TODO: unreachable object list, not for each object. free - # TODO: this list afterwards) - # TODO: call tp_clear (wrapped between inc- and decref) instead of - # TODO: free, to break cycles for unreachable objects - - # TODO: === REMOVE THE CODE BELOW === - # TODO: this exists only to please the current tests, but fails - # TODO: if the pending deallocations are executed properly - pygchdr = self.rrc_pyobj_old_list.c_gc_next - while pygchdr <> self.rrc_pyobj_old_list: - assert pygchdr.c_gc_refs == 0 - pyobj = self.rrc_gc_as_pyobj(pygchdr) - if pyobj.c_ob_refcnt == REFCNT_FROM_PYPY: - pyobj.c_ob_refcnt = 0 - pyobj.c_ob_refcnt += 1 - self.rrc_dealloc_pending.append(llmemory.cast_ptr_to_adr(pyobj)) - pygchdr = pygchdr.c_gc_next - lltype.free(self.rrc_pyobj_old_list, flavor='raw') + # merge old_list into garbage_list and clear old_list + if self.rrc_pyobj_old_list.c_gc_next <> self.rrc_pyobj_old_list: + next = self.rrc_pyobj_garbage_list.c_gc_next + next_old = self.rrc_pyobj_old_list.c_gc_next + prev_old = self.rrc_pyobj_old_list.c_gc_prev + self.rrc_pyobj_garbage_list.c_gc_next = next_old + next_old.c_gc_prev = self.rrc_pyobj_garbage_list + prev_old.c_gc_next = next + next.c_gc_prev = prev_old + self.rrc_pyobj_old_list.c_gc_next = self.rrc_pyobj_old_list + self.rrc_pyobj_old_list.c_gc_prev = self.rrc_pyobj_old_list def _rrc_major_free(self, pyobject, surviving_list, surviving_dict): # The pyobject survives if the corresponding obj survives. @@ -3343,7 +3357,6 @@ gchdr.c_gc_refs += 1 def _rrc_mark_rawrefcount(self): - self.rrc_pyobj_old_list = lltype.malloc(self.PYOBJ_GC_HDR, flavor='raw') if self.rrc_pyobj_list.c_gc_next == self.rrc_pyobj_list: self.rrc_pyobj_old_list.c_gc_next = self.rrc_pyobj_old_list self.rrc_pyobj_old_list.c_gc_prev = self.rrc_pyobj_old_list diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -461,7 +461,8 @@ dest = nodes[e.get_destination()] if source.info.type == "C" or dest.info.type == "C": self._rawrefcount_addref(source.r, dest.r) - dest.info.ext_refcnt += 1 + if source.info.alive: + dest.info.ext_refcnt += 1 elif source.info.type == "P" or dest.info.type == "P": if llmemory.cast_ptr_to_adr(source.p.next) == llmemory.NULL: source.p.next = dest.p @@ -489,14 +490,47 @@ # do collection self.gc.collect() - # simply free all pending deallocations, we don't care about the - # side effects for now... + def decref_children(pyobj): + self.gc.rrc_tp_traverse(pyobj, decref, None) + def decref(pyobj, ignore): + pyobj.c_ob_refcnt -= 1 + if pyobj.c_ob_refcnt == 0: + gchdr = self.gc.rrc_pyobj_as_gc(pyobj) + next = gchdr.c_gc_next + next.c_gc_prev = gchdr.c_gc_prev + gchdr.c_gc_prev.c_gc_next = next + decref_children(pyobj) + self.pyobjs[self.pyobjs.index(pyobj)] = \ + lltype.nullptr(PYOBJ_HDR_PTR.TO) + lltype.free(pyobj, flavor='raw') + next_dead = self.gc.rawrefcount_next_dead() - while next_dead <> llmemory.NULL: + while next_dead <> llmemory.NULL: pyobj = llmemory.cast_adr_to_ptr(next_dead, self.gc.PYOBJ_HDR_PTR) - lltype.free(pyobj, flavor='raw') + decref(pyobj, None) next_dead = self.gc.rawrefcount_next_dead() + # free cyclic structures + next_dead = self.gc.rawrefcount_cyclic_garbage_head() + while next_dead <> llmemory.NULL: + pyobj = llmemory.cast_adr_to_ptr(next_dead, self.gc.PYOBJ_HDR_PTR) + pyobj.c_ob_refcnt += 1 + + def free(pyobj_to, pyobj_from): + refs = self.pyobj_refs[self.pyobjs.index(pyobj_from)] + refs.remove(pyobj_to) + decref(pyobj_to, None) + self.gc.rrc_tp_traverse(pyobj, free, pyobj) + + decref(pyobj, None) + + curr = llmemory.cast_adr_to_int(next_dead) + next_dead = self.gc.rawrefcount_cyclic_garbage_head() + + if llmemory.cast_adr_to_int(next_dead) == curr: + self.gc.rawrefcount_cyclic_garbage_remove() + next_dead = self.gc.rawrefcount_cyclic_garbage_head() + # check livelihood of objects, according to graph for name in nodes: n = nodes[name] diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -501,6 +501,12 @@ self.rawrefcount_next_dead_ptr = getfn( GCClass.rawrefcount_next_dead, [s_gc], SomeAddress(), inline = True) + self.rawrefcount_cyclic_garbage_head_ptr = getfn( + GCClass.rawrefcount_cyclic_garbage_head, [s_gc], SomeAddress(), + inline = True) + self.rawrefcount_cyclic_garbage_remove_ptr = getfn( + GCClass.rawrefcount_cyclic_garbage_remove, [s_gc], + annmodel.s_None, inline = True) if GCClass.can_usually_pin_objects: self.pin_ptr = getfn(GCClass.pin, @@ -1369,6 +1375,16 @@ [self.rawrefcount_next_dead_ptr, self.c_const_gc], resultvar=hop.spaceop.result) + def gct_gc_rawrefcount_cyclic_garbage_head(self, hop): + assert hop.spaceop.result.concretetype == llmemory.Address + hop.genop("direct_call", + [self.rawrefcount_cyclic_garbage_head_ptr, self.c_const_gc], + resultvar=hop.spaceop.result) + + def gct_gc_rawrefcount_cyclic_garbage_remove(self, hop): + hop.genop("direct_call", + [self.rawrefcount_cyclic_garbage_remove_ptr, self.c_const_gc]) + def _set_into_gc_array_part(self, op): if op.opname == 'setarrayitem': return op.args[1] diff --git a/rpython/rlib/rawrefcount.py b/rpython/rlib/rawrefcount.py --- a/rpython/rlib/rawrefcount.py +++ b/rpython/rlib/rawrefcount.py @@ -131,6 +131,16 @@ return ob @not_rpython +def cyclic_garbage_head(OB_PTR_TYPE): + # TODO + return lltype.nullptr(OB_PTR_TYPE.TO) + + at not_rpython +def cyclic_garbage_remove(): + # TODO + pass + + at not_rpython def _collect(track_allocation=True): """for tests only. Emulates a GC collection. Will invoke dealloc_trigger_callback() once if there are objects @@ -335,20 +345,32 @@ return _spec_p(hop, v_p) class Entry(ExtRegistryEntry): - _about_ = next_dead + _about_ = (next_dead, cyclic_garbage_head) def compute_result_annotation(self, s_OB_PTR_TYPE): - from rpython.annotator import model as annmodel from rpython.rtyper.llannotation import lltype_to_annotation assert s_OB_PTR_TYPE.is_constant() return lltype_to_annotation(s_OB_PTR_TYPE.const) def specialize_call(self, hop): + if self.instance is next_dead: + name = 'gc_rawrefcount_next_dead' + elif self.instance is cyclic_garbage_head: + name = 'gc_rawrefcount_cyclic_garbage_head' hop.exception_cannot_occur() - v_ob = hop.genop('gc_rawrefcount_next_dead', [], - resulttype = llmemory.Address) + v_ob = hop.genop(name, [], resulttype = llmemory.Address) return _spec_ob(hop, v_ob) +class Entry(ExtRegistryEntry): + _about_ = cyclic_garbage_remove + + def compute_result_annotation(self): + pass + + def specialize_call(self, hop): + hop.exception_cannot_occur() + hop.genop('gc_rawrefcount_cyclic_garbage_remove', []) + src_dir = py.path.local(__file__).dirpath() / 'src' boehm_eci = ExternalCompilationInfo( post_include_bits = [(src_dir / 'boehm-rawrefcount.h').read()], diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -972,6 +972,11 @@ def op_gc_rawrefcount_next_dead(self, *args): raise NotImplementedError("gc_rawrefcount_next_dead") + def op_gc_rawrefcount_cyclic_garbage_head(self, *args): + raise NotImplementedError("gc_rawrefcount_cyclic_garbage_head") + def op_gc_rawrefcount_cyclic_garbage_remove(self, *args): + raise NotImplementedError("gc_rawrefcount_cyclic_garbage_remove") + def op_do_malloc_fixedsize(self): raise NotImplementedError("do_malloc_fixedsize") def op_do_malloc_fixedsize_clear(self): diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -169,6 +169,7 @@ # This list corresponds to the operations implemented by the LLInterpreter. # ***** Run test_lloperation after changes. ***** +# ***** Run test_lloperation after changes. ***** LL_OPERATIONS = { 'direct_call': LLOp(canraise=(Exception,)), @@ -490,13 +491,15 @@ 'gc_fq_register' : LLOp(), 'gc_ignore_finalizer' : LLOp(canrun=True), - 'gc_rawrefcount_init': LLOp(), - 'gc_rawrefcount_create_link_pypy': LLOp(), - 'gc_rawrefcount_create_link_pyobj': LLOp(), - 'gc_rawrefcount_mark_deallocating': LLOp(), - 'gc_rawrefcount_from_obj': LLOp(sideeffects=False), - 'gc_rawrefcount_to_obj': LLOp(sideeffects=False), - 'gc_rawrefcount_next_dead': LLOp(), + 'gc_rawrefcount_init': LLOp(), + 'gc_rawrefcount_create_link_pypy': LLOp(), + 'gc_rawrefcount_create_link_pyobj': LLOp(), + 'gc_rawrefcount_mark_deallocating': LLOp(), + 'gc_rawrefcount_from_obj': LLOp(sideeffects=False), + 'gc_rawrefcount_to_obj': LLOp(sideeffects=False), + 'gc_rawrefcount_next_dead': LLOp(), + 'gc_rawrefcount_cyclic_garbage_head': LLOp(sideeffects=False), + 'gc_rawrefcount_cyclic_garbage_remove': LLOp(), 'gc_move_out_of_nursery': LLOp(), @@ -582,7 +585,6 @@ # __________ instrumentation _________ 'instrument_count': LLOp(), } -# ***** Run test_lloperation after changes. ***** # ____________________________________________________________ # Post-processing From pypy.commits at gmail.com Sat Jan 19 12:06:32 2019 From: pypy.commits at gmail.com (arigo) Date: Sat, 19 Jan 2019 09:06:32 -0800 (PST) Subject: [pypy-commit] pypy default: fix the test to match 432d816c6d7b Message-ID: <5c435918.1c69fb81.a5f71.35db@mx.google.com> Author: Armin Rigo Branch: Changeset: r95673:de85e0ef8bdc Date: 2019-01-19 18:06 +0100 http://bitbucket.org/pypy/pypy/changeset/de85e0ef8bdc/ Log: fix the test to match 432d816c6d7b diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -774,7 +774,7 @@ def test_collect_0(self, debuglog): self.gc.collect(1) # start a major debuglog.reset() - self.gc.collect(0) # do ONLY a minor + self.gc.collect(-1) # do ONLY a minor assert debuglog.summary() == {'gc-minor': 1} def test_enable_disable(self, debuglog): From pypy.commits at gmail.com Sat Jan 19 15:12:54 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 19 Jan 2019 12:12:54 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fill_char can be a utf8 multibyte codepoint Message-ID: <5c4384c6.1c69fb81.6c559.311b@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95674:25ff5465e2c1 Date: 2019-01-19 22:12 +0200 http://bitbucket.org/pypy/pypy/changeset/25ff5465e2c1/ Log: fill_char can be a utf8 multibyte codepoint diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -740,7 +740,7 @@ to_remainder, upper, grouped_digits=None): out = self._builder() if spec.n_lpadding: - out.append_multiple_char(fill_char[0], spec.n_lpadding) + out.append_multiple_char(fill_char, spec.n_lpadding) if spec.n_sign: sign = self._lit(spec.sign) out.append(sign) @@ -750,7 +750,7 @@ pref = self._upcase_string(pref) out.append(pref) if spec.n_spadding: - out.append_multiple_char(fill_char[0], spec.n_spadding) + out.append_multiple_char(fill_char, spec.n_spadding) if spec.n_digits != 0: if self._loc_thousands: if grouped_digits is not None: @@ -770,7 +770,7 @@ if spec.n_remainder: out.append(num[to_remainder:]) if spec.n_rpadding: - out.append_multiple_char(fill_char[0], spec.n_rpadding) + out.append_multiple_char(fill_char, spec.n_rpadding) #if complex, need to call twice - just retun the buffer return out.build() diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1133,6 +1133,8 @@ def test_format_repeat(self): assert format(u"abc", u"z<5") == u"abczz" assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007" + #CPython2 raises UnicodeEncodeError + assert format(123, u"\u2007<5") == u"123\u2007\u2007" def test_formatting_char(self): for num in range(0x80,0x100): From pypy.commits at gmail.com Sat Jan 19 15:14:59 2019 From: pypy.commits at gmail.com (mattip) Date: Sat, 19 Jan 2019 12:14:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch Message-ID: <5c438543.1c69fb81.a8b5e.47f1@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95675:c01df448bea6 Date: 2019-01-19 22:14 +0200 http://bitbucket.org/pypy/pypy/changeset/c01df448bea6/ Log: merge unicode-utf8 into branch diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -751,7 +751,7 @@ to_remainder, upper, grouped_digits=None): out = self._builder() if spec.n_lpadding: - out.append_multiple_char(fill_char[0], spec.n_lpadding) + out.append_multiple_char(fill_char, spec.n_lpadding) if spec.n_sign: sign = self._lit(spec.sign) out.append(sign) @@ -761,7 +761,7 @@ pref = self._upcase_string(pref) out.append(pref) if spec.n_spadding: - out.append_multiple_char(fill_char[0], spec.n_spadding) + out.append_multiple_char(fill_char, spec.n_spadding) if spec.n_digits != 0: if self._loc_thousands: if grouped_digits is not None: @@ -781,7 +781,7 @@ if spec.n_remainder: out.append(num[to_remainder:]) if spec.n_rpadding: - out.append_multiple_char(fill_char[0], spec.n_rpadding) + out.append_multiple_char(fill_char, spec.n_rpadding) #if complex, need to call twice - just retun the buffer return out.build() diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1130,6 +1130,8 @@ def test_format_repeat(self): assert format(u"abc", u"z<5") == u"abczz" assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007" + #CPython2 raises UnicodeEncodeError + assert format(123, u"\u2007<5") == u"123\u2007\u2007" def test_formatting_unicode__repr__(self): # Printable character From pypy.commits at gmail.com Sat Jan 19 17:58:28 2019 From: pypy.commits at gmail.com (arigo) Date: Sat, 19 Jan 2019 14:58:28 -0800 (PST) Subject: [pypy-commit] pypy default: Support for revdb translation Message-ID: <5c43ab94.1c69fb81.e252b.53d0@mx.google.com> Author: Armin Rigo Branch: Changeset: r95676:30d8fded3b4f Date: 2019-01-19 23:36 +0100 http://bitbucket.org/pypy/pypy/changeset/30d8fded3b4f/ Log: Support for revdb translation diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,7 +4,6 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', - 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', @@ -23,6 +22,7 @@ 'get_stats': 'app_referents.get_stats', }) self.interpleveldefs.update({ + 'collect_step': 'interp_gc.collect_step', 'get_rpy_roots': 'referents.get_rpy_roots', 'get_rpy_referents': 'referents.get_rpy_referents', 'get_rpy_memory_usage': 'referents.get_rpy_memory_usage', diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -112,6 +112,8 @@ #define OP_GC__DISABLE_FINALIZERS(r) boehm_gc_finalizer_lock++ #define OP_GC__ENABLE_FINALIZERS(r) (boehm_gc_finalizer_lock--, \ boehm_gc_finalizer_notifier()) +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define OP_BOEHM_FQ_REGISTER(tagindex, obj, r) \ boehm_fq_register(boehm_fq_queues + tagindex, obj) @@ -127,6 +129,8 @@ #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */ #define OP_GC__DISABLE_FINALIZERS(r) /* nothing */ #define OP_GC__ENABLE_FINALIZERS(r) /* nothing */ +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define GC_REGISTER_FINALIZER(a, b, c, d, e) /* nothing */ #define GC_gcollect() /* nothing */ #define GC_set_max_heap_size(a) /* nothing */ From pypy.commits at gmail.com Sun Jan 20 05:59:49 2019 From: pypy.commits at gmail.com (mjacob) Date: Sun, 20 Jan 2019 02:59:49 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5c4454a5.1c69fb81.fdee4.3628@mx.google.com> Author: Manuel Jacob Branch: py3.5 Changeset: r95677:344c0186ad69 Date: 2019-01-20 11:58 +0100 http://bitbucket.org/pypy/pypy/changeset/344c0186ad69/ Log: hg merge default diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,7 +4,6 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', - 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', @@ -23,6 +22,7 @@ 'get_stats': 'app_referents.get_stats', }) self.interpleveldefs.update({ + 'collect_step': 'interp_gc.collect_step', 'get_rpy_roots': 'referents.get_rpy_roots', 'get_rpy_referents': 'referents.get_rpy_referents', 'get_rpy_memory_usage': 'referents.get_rpy_memory_usage', diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -774,7 +774,7 @@ def test_collect_0(self, debuglog): self.gc.collect(1) # start a major debuglog.reset() - self.gc.collect(0) # do ONLY a minor + self.gc.collect(-1) # do ONLY a minor assert debuglog.summary() == {'gc-minor': 1} def test_enable_disable(self, debuglog): diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -1,6 +1,7 @@ import sys from contextlib import contextmanager import signal +from collections import OrderedDict from rpython.translator.translator import TranslationContext from rpython.annotator.model import ( @@ -1196,7 +1197,7 @@ DictValue(None, s_value)) dictrepr.setup() self.l_dict = self.newdict(dictrepr) - self.reference = self.new_reference() + self.reference = OrderedDict() self.ll_key = r_key.convert_const self.ll_value = r_value.convert_const self.removed_keys = [] @@ -1323,7 +1324,6 @@ class DictSpace(MappingSpace): MappingRepr = rdict.DictRepr - new_reference = dict ll_getitem = staticmethod(rdict.ll_dict_getitem) ll_setitem = staticmethod(rdict.ll_dict_setitem) ll_delitem = staticmethod(rdict.ll_dict_delitem) diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -422,7 +422,6 @@ class ODictSpace(MappingSpace): MappingRepr = rodct.OrderedDictRepr - new_reference = OrderedDict moved_around = False ll_getitem = staticmethod(rodct.ll_dict_getitem) ll_setitem = staticmethod(rodct.ll_dict_setitem) diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -112,6 +112,8 @@ #define OP_GC__DISABLE_FINALIZERS(r) boehm_gc_finalizer_lock++ #define OP_GC__ENABLE_FINALIZERS(r) (boehm_gc_finalizer_lock--, \ boehm_gc_finalizer_notifier()) +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define OP_BOEHM_FQ_REGISTER(tagindex, obj, r) \ boehm_fq_register(boehm_fq_queues + tagindex, obj) @@ -127,6 +129,8 @@ #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */ #define OP_GC__DISABLE_FINALIZERS(r) /* nothing */ #define OP_GC__ENABLE_FINALIZERS(r) /* nothing */ +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define GC_REGISTER_FINALIZER(a, b, c, d, e) /* nothing */ #define GC_gcollect() /* nothing */ #define GC_set_max_heap_size(a) /* nothing */ From pypy.commits at gmail.com Sun Jan 20 12:11:36 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 20 Jan 2019 09:11:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: test, fix for capitalize returning more than one codepoint Message-ID: <5c44abc8.1c69fb81.200d0.4c21@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95678:488e3a462f8d Date: 2019-01-20 11:35 +0200 http://bitbucket.org/pypy/pypy/changeset/488e3a462f8d/ Log: test, fix for capitalize returning more than one codepoint diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -407,6 +407,8 @@ # check with Ll chars with no upper - nothing changes here assert ('\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == '\u019b\u1d00\u1d86\u0221\u1fb7') + # cpython issue 17252 for i_dot + assert u'h\u0130'.capitalize() == u'H\u0069\u0307' def test_changed_in_unicodedata_version_8(self): assert u'\u025C'.upper() == u'\uA7AB' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -855,8 +855,9 @@ for c in codes: builder.append_code(c) for ch in it: - ch = unicodedb.tolower(ch) - builder.append_code(ch) + ch = unicodedb.tolower_full(ch) + for ch1 in ch: + builder.append_code(ch1) return self.from_utf8builder(builder) @unwrap_spec(width=int, w_fillchar=WrappedDefault(u' ')) From pypy.commits at gmail.com Sun Jan 20 12:11:38 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 20 Jan 2019 09:11:38 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: cpython prefers "invalid continuation byte" over "unxepected end of data" Message-ID: <5c44abca.1c69fb81.f604d.0f11@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95679:98c9d750a001 Date: 2019-01-20 12:05 +0200 http://bitbucket.org/pypy/pypy/changeset/98c9d750a001/ Log: cpython prefers "invalid continuation byte" over "unxepected end of data" diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -395,8 +395,12 @@ if not final: pos -= 1 break - r, pos, rettype = errorhandler(errors, "utf8", "unexpected end of data", - s, pos - 1, pos) + if (pos) < end and rutf8._invalid_byte_2_of_3(ordch1, + ord(s[pos]), allow_surrogates): + msg = "invalid continuation byte" + else: + msg = "unexpected end of data" + r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos) res.append(r) suppressing = True continue @@ -429,8 +433,13 @@ if not final: pos -= 1 break - r, pos, rettype = errorhandler(errors, "utf8", "unexpected end of data", - s, pos - 1, pos) + if pos < end and rutf8._invalid_byte_2_of_4(ordch1, s[pos]): + msg = "invalid continuation byte" + elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos + 1])): + msg = "invalid continuation byte" + else: + msg = "unexpected end of data" + r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos) res.append(r) suppressing = True continue diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -544,6 +544,10 @@ assert codecs.decode(b'abc') == 'abc' exc = raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii') + exc = raises(UnicodeDecodeError, codecs.decode, b'\xe0\x00', 'utf-8') + print(dir(exc.value)) + assert 'invalid continuation byte' in exc.value.reason + def test_bad_errorhandler_return(self): import codecs def baddecodereturn1(exc): From pypy.commits at gmail.com Sun Jan 20 12:11:39 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 20 Jan 2019 09:11:39 -0800 (PST) Subject: [pypy-commit] pypy default: whoops Message-ID: <5c44abcb.1c69fb81.f9b4d.170c@mx.google.com> Author: Matti Picus Branch: Changeset: r95680:eb58462b4225 Date: 2019-01-20 19:09 +0200 http://bitbucket.org/pypy/pypy/changeset/eb58462b4225/ Log: whoops diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -86,7 +86,7 @@ else: log.msg('Running "%s" succeeded' %(vcvars,)) except Exception as e: - log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) + log.msg('Running "%s" failed: "%s"' % (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") From pypy.commits at gmail.com Sun Jan 20 12:11:41 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 20 Jan 2019 09:11:41 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5c44abcd.1c69fb81.bf977.9a73@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95681:49aade4a2c18 Date: 2019-01-20 19:10 +0200 http://bitbucket.org/pypy/pypy/changeset/49aade4a2c18/ Log: merge default into branch diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -86,7 +86,7 @@ else: log.msg('Running "%s" succeeded' %(vcvars,)) except Exception as e: - log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) + log.msg('Running "%s" failed: "%s"' % (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") From pypy.commits at gmail.com Mon Jan 21 01:17:30 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 20 Jan 2019 22:17:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix 98c9d750a001 Message-ID: <5c4563fa.1c69fb81.c605f.6af1@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95682:53455a64a497 Date: 2019-01-21 08:08 +0200 http://bitbucket.org/pypy/pypy/changeset/53455a64a497/ Log: fix 98c9d750a001 diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -433,7 +433,7 @@ if not final: pos -= 1 break - if pos < end and rutf8._invalid_byte_2_of_4(ordch1, s[pos]): + if pos < end and rutf8._invalid_byte_2_of_4(ordch1, ord(s[pos])): msg = "invalid continuation byte" elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos + 1])): msg = "invalid continuation byte" From pypy.commits at gmail.com Mon Jan 21 01:17:31 2019 From: pypy.commits at gmail.com (mattip) Date: Sun, 20 Jan 2019 22:17:31 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: make doc test pass Message-ID: <5c4563fb.1c69fb81.e252b.e9be@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95683:87ee2e64d118 Date: 2019-01-21 08:16 +0200 http://bitbucket.org/pypy/pypy/changeset/87ee2e64d118/ Log: make doc test pass diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -5,15 +5,6 @@ .. this is the revision after release-pypy3.5-v6.0 .. startrev: 580e3e26cd32 -.. branch: hroncok/fix-multiprocessing-regression-on-newer--1524656522151 +.. branch: unicode-utf8 -Fix multiprocessing regression on newer glibcs - -.. branch: py3.5-user-site-impl - -Use implementation-specific site directories in sysconfig like in Python2 - -.. branch: py3.5-reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb +Use utf-8 internally to represent unicode strings From pypy.commits at gmail.com Mon Jan 21 07:21:22 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 21 Jan 2019 04:21:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: unicodeobject.text_w must be valid unicode Message-ID: <5c45b942.1c69fb81.99bf3.f028@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95684:0be26dc39a59 Date: 2019-01-21 14:00 +0200 http://bitbucket.org/pypy/pypy/changeset/0be26dc39a59/ Log: unicodeobject.text_w must be valid unicode Can we speed this up by storing a _valid state when creating the object? diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -87,7 +87,8 @@ return space.newint(uid) def text_w(self, space): - return self._utf8 + eh = unicodehelper.decode_error_handler(space) + return unicodehelper.utf8_encode_utf_8(self._utf8, 'utf-8', eh) def utf8_w(self, space): return self._utf8 From pypy.commits at gmail.com Mon Jan 21 07:21:24 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 21 Jan 2019 04:21:24 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix definitely broken code Message-ID: <5c45b944.1c69fb81.76f1f.0261@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95685:58a8827b62dc Date: 2019-01-21 14:05 +0200 http://bitbucket.org/pypy/pypy/changeset/58a8827b62dc/ Log: fix definitely broken code diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -114,9 +114,8 @@ elif _MACOSX: utf8 = space.utf8_w(w_uni) errorhandler=state.encode_error_handler, - bytes = unicodehelper.utf8_encode_utf_8( - utf8, 'surrogateescape', - allow_surrogates=False) + bytes = utf8_encode_utf_8(utf8, 'surrogateescape', errorhandler, + allow_surrogates=False) elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized # or the filesystem codec is implemented in Python we cannot From pypy.commits at gmail.com Mon Jan 21 10:08:29 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 21 Jan 2019 07:08:29 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: Backed out changeset: 0be26dc39a59 Message-ID: <5c45e06d.1c69fb81.dcc79.76fe@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95686:92187a9964e4 Date: 2019-01-21 16:27 +0200 http://bitbucket.org/pypy/pypy/changeset/92187a9964e4/ Log: Backed out changeset: 0be26dc39a59 diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -87,8 +87,7 @@ return space.newint(uid) def text_w(self, space): - eh = unicodehelper.decode_error_handler(space) - return unicodehelper.utf8_encode_utf_8(self._utf8, 'utf-8', eh) + return self._utf8 def utf8_w(self, space): return self._utf8 From pypy.commits at gmail.com Mon Jan 21 10:08:31 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 21 Jan 2019 07:08:31 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: test, fix encoding port from unicode to bytes Message-ID: <5c45e06f.1c69fb81.dcde5.bc9b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95687:15a9ac1c0012 Date: 2019-01-21 17:07 +0200 http://bitbucket.org/pypy/pypy/changeset/15a9ac1c0012/ Log: test, fix encoding port from unicode to bytes diff --git a/pypy/module/_socket/interp_func.py b/pypy/module/_socket/interp_func.py --- a/pypy/module/_socket/interp_func.py +++ b/pypy/module/_socket/interp_func.py @@ -311,7 +311,7 @@ elif space.isinstance_w(w_port, space.w_bytes): port = space.bytes_w(w_port) elif space.isinstance_w(w_port, space.w_unicode): - port = space.text_w(w_port) + port = space.bytes_w(space.encode_unicode_object(w_port, 'utf-8', 'strict')) else: raise oefmt(space.w_TypeError, "getaddrinfo() argument 2 must be integer or string") diff --git a/pypy/module/_socket/test/test_sock_app.py b/pypy/module/_socket/test/test_sock_app.py --- a/pypy/module/_socket/test/test_sock_app.py +++ b/pypy/module/_socket/test/test_sock_app.py @@ -209,6 +209,17 @@ w_l = space.appexec([w_socket, space.newbytes(host), space.wrap('smtp')], "(_socket, host, port): return _socket.getaddrinfo(host, port)") assert space.unwrap(w_l) == socket.getaddrinfo(host, 'smtp') + w_l = space.appexec([w_socket, space.newbytes(host), space.wrap(u'\uD800')], ''' + + (_socket, host, port): + try: + info = _socket.getaddrinfo(host, port) + except Exception as e: + return e.reason == 'surrogates not allowed' + return -1 + ''') + assert space.unwrap(w_l) == True + def test_unknown_addr_as_object(): from pypy.module._socket.interp_socket import addr_as_object @@ -729,7 +740,7 @@ def setup_class(cls): if not hasattr(os, 'getpid'): pytest.skip("AF_NETLINK needs os.getpid()") - + if cls.runappdirect: import _socket w_ok = hasattr(_socket, 'AF_NETLINK') From pypy.commits at gmail.com Mon Jan 21 10:16:05 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 21 Jan 2019 07:16:05 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix for mac OS Message-ID: <5c45e235.1c69fb81.50a0a.265c@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95688:a2e41be99943 Date: 2019-01-21 17:15 +0200 http://bitbucket.org/pypy/pypy/changeset/a2e41be99943/ Log: fix for mac OS diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -82,9 +82,8 @@ errorhandler=errorhandler, force_ignore=False) elif _MACOSX: bytes = space.bytes_w(w_string) - utf8 = str_decode_utf8( - bytes, 'surrogateescape', final=True, - allow_surrogates=False)[0] + utf8 = str_decode_utf8(bytes, 'surrogateescape', True, errorhandler, + allow_surrogates=False)[0] uni = space.realunicode_w(utf8) elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized From pypy.commits at gmail.com Mon Jan 21 10:44:25 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 21 Jan 2019 07:44:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: improve TODO Message-ID: <5c45e8d9.1c69fb81.996d9.f09e@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95689:8830c8411301 Date: 2019-01-21 17:43 +0200 http://bitbucket.org/pypy/pypy/changeset/8830c8411301/ Log: improve TODO diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -1,15 +1,23 @@ -* find a better way to run "find" without creating the index storage, - if one is not already readily available +* find a better way to run "find" without creating the index storage, if one + is not already readily available * write the correct jit_elidable in _get_index_storage * improve performance of splitlines * fix _pypyjson to not use a wrapped dict when decoding an object * make sure we review all the places that call ord(unichr) to check for ValueErrors * Find a more elegant way to define MAXUNICODE in rpython/rlib/runicode.py -* rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes -* revisit why runicode import str_decode_utf_8_impl needed instead of runicode import str_decode_utf_8 -* revisit all places where we do utf8.decode('utf-8'), they should work directly with utf8 +* revisit why runicode import str_decode_utf_8_impl needed instead of runicode + import str_decode_utf_8 +* revisit all places where we do utf8.decode('utf-8'), they should work + directly with utf8 - rutf8.utf8_encode_mbcs - unicodehelper.fsencode + - unicodehelper.unicode_to_decimal_w * remove asserts from _WIN32 paths in rlib.rposix.re{name,place} -* convert all realunicode_w to unicode_w after we flush out all old uses of unicode_w -* benchmark +* convert all realunicode_w to unicode_w after we flush out all old uses of + unicode_w +* benchmark more (looks good so far) +* Review all uses of W_Unicode.text_w, right now it is exactly W_Unicode.utf8_w. + It shoud only return valid utf8 (see 0be26dc39a59 which broke translation on + win32 and failed tests on linux64). Then we can use it in places like + _socket.interp_func.getaddrinfo instead of space.encode_unicode_object(w_port, + 'utf-8', 'strict') From pypy.commits at gmail.com Tue Jan 22 05:37:42 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 22 Jan 2019 02:37:42 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: calculate padding from unicode length, not utf8 length Message-ID: <5c46f276.1c69fb81.c8d34.319c@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95690:0e658f7c342f Date: 2019-01-22 08:05 +0200 http://bitbucket.org/pypy/pypy/changeset/0e658f7c342f/ Log: calculate padding from unicode length, not utf8 length diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -348,12 +348,12 @@ return if prec >= 0 and prec < length: length = prec # ignore the end of the string if too long + padding = self.width - length if do_unicode: # XXX could use W_UnicodeObject.descr_getslice, but that would # require a refactor to use the w_val, not r length = rutf8._pos_at_index(r, length) result = self.result - padding = self.width - length if padding < 0: padding = 0 assert padding >= 0 diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1129,6 +1129,14 @@ return u'\u1234' '%s' % X() + def test_formatting_unicode__str__4(self): + # from lib-python/3/test/test_tokenize + fmt = "%(token)-13.13r %(start)s" + vals = {"token" : u"Örter", "start": "(1, 0)"} + expected = u"'Örter' (1, 0)" + s = fmt % vals + assert s == expected, "\ns = '%s'\nexpected= '%s'" %(s, expected) + def test_format_repeat(self): assert format(u"abc", u"z<5") == u"abczz" assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007" From pypy.commits at gmail.com Tue Jan 22 05:37:44 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 22 Jan 2019 02:37:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: test, fix capitalize with sigma in final position Message-ID: <5c46f278.1c69fb81.864eb.7fe5@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95691:1142ef536c49 Date: 2019-01-22 11:03 +0200 http://bitbucket.org/pypy/pypy/changeset/1142ef536c49/ Log: test, fix capitalize with sigma in final position diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -387,6 +387,7 @@ assert u'\ud800'.upper() == u'\ud800' def test_capitalize(self): + assert u'A\u0345\u03a3'.capitalize() == u'A\u0345\u03c2' assert u"brown fox".capitalize() == u"Brown fox" assert u' hello '.capitalize() == u' hello ' assert u'Hello '.capitalize() == u'Hello ' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -856,8 +856,12 @@ builder.append_code(c) for ch in it: ch = unicodedb.tolower_full(ch) - for ch1 in ch: - builder.append_code(ch1) + if it.done(): + # Special case lower-sigma + if ch[-1] == 0x03c3: + ch[-1] = 0x03c2 + for c in ch: + builder.append_code(c) return self.from_utf8builder(builder) @unwrap_spec(width=int, w_fillchar=WrappedDefault(u' ')) From pypy.commits at gmail.com Tue Jan 22 05:37:46 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 22 Jan 2019 02:37:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: test, fix for suppressing multiple error output bytes Message-ID: <5c46f27a.1c69fb81.f1eb.26a6@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95692:d529e654cfb5 Date: 2019-01-22 12:36 +0200 http://bitbucket.org/pypy/pypy/changeset/d529e654cfb5/ Log: test, fix for suppressing multiple error output bytes diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -398,9 +398,9 @@ msg = "invalid continuation byte" else: msg = "unexpected end of data" + suppressing = True r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos) res.append(r) - suppressing = True continue ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) @@ -437,9 +437,9 @@ msg = "invalid continuation byte" else: msg = "unexpected end of data" + suppressing = True r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos) res.append(r) - suppressing = True continue ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -272,6 +272,73 @@ assert ((b'aaaa' + bseq + b'bbbb').decode('utf-8', 'ignore') == u'aaaabbbb') + def test_invalid_cb_for_3bytes_seq(self): + """ + Test that an 'invalid continuation byte' error is raised when the + continuation byte(s) of a 3-bytes sequence are invalid. When + errors='replace', if the first continuation byte is valid, the first + two bytes (start byte + 1st cb) are replaced by a single U+FFFD and the + third byte is handled separately, otherwise only the start byte is + replaced with a U+FFFD and the other continuation bytes are handled + separately. + E.g. in the sequence , E1 is the start byte of a 3-bytes + sequence, 80 is a valid continuation byte, but 41 is not a valid cb + because it's the ASCII letter 'A'. + Note: when the start byte is E0 or ED, the valid ranges for the first + continuation byte are limited to A0..BF and 80..9F respectively. + Python 2 used to consider all the bytes in range 80..BF valid when the + start byte was ED. This is fixed in Python 3. + """ + FFFD = '\ufffd' + FFFDx2 = FFFD * 2 + sequences = [ + ('E0 00', FFFD+'\x00'), ('E0 7F', FFFD+'\x7f'), ('E0 80', FFFDx2), + ('E0 9F', FFFDx2), ('E0 C0', FFFDx2), ('E0 FF', FFFDx2), + ('E0 A0 00', FFFD+'\x00'), ('E0 A0 7F', FFFD+'\x7f'), + ('E0 A0 C0', FFFDx2), ('E0 A0 FF', FFFDx2), + ('E0 BF 00', FFFD+'\x00'), ('E0 BF 7F', FFFD+'\x7f'), + ('E0 BF C0', FFFDx2), ('E0 BF FF', FFFDx2), ('E1 00', FFFD+'\x00'), + ('E1 7F', FFFD+'\x7f'), ('E1 C0', FFFDx2), ('E1 FF', FFFDx2), + ('E1 80 00', FFFD+'\x00'), ('E1 80 7F', FFFD+'\x7f'), + ('E1 80 C0', FFFDx2), ('E1 80 FF', FFFDx2), + ('E1 BF 00', FFFD+'\x00'), ('E1 BF 7F', FFFD+'\x7f'), + ('E1 BF C0', FFFDx2), ('E1 BF FF', FFFDx2), ('EC 00', FFFD+'\x00'), + ('EC 7F', FFFD+'\x7f'), ('EC C0', FFFDx2), ('EC FF', FFFDx2), + ('EC 80 00', FFFD+'\x00'), ('EC 80 7F', FFFD+'\x7f'), + ('EC 80 C0', FFFDx2), ('EC 80 FF', FFFDx2), + ('EC BF 00', FFFD+'\x00'), ('EC BF 7F', FFFD+'\x7f'), + ('EC BF C0', FFFDx2), ('EC BF FF', FFFDx2), ('ED 00', FFFD+'\x00'), + ('ED 7F', FFFD+'\x7f'), + ('ED A0', FFFDx2), ('ED BF', FFFDx2), # see note ^ + ('ED C0', FFFDx2), ('ED FF', FFFDx2), ('ED 80 00', FFFD+'\x00'), + ('ED 80 7F', FFFD+'\x7f'), ('ED 80 C0', FFFDx2), + ('ED 80 FF', FFFDx2), ('ED 9F 00', FFFD+'\x00'), + ('ED 9F 7F', FFFD+'\x7f'), ('ED 9F C0', FFFDx2), + ('ED 9F FF', FFFDx2), ('EE 00', FFFD+'\x00'), + ('EE 7F', FFFD+'\x7f'), ('EE C0', FFFDx2), ('EE FF', FFFDx2), + ('EE 80 00', FFFD+'\x00'), ('EE 80 7F', FFFD+'\x7f'), + ('EE 80 C0', FFFDx2), ('EE 80 FF', FFFDx2), + ('EE BF 00', FFFD+'\x00'), ('EE BF 7F', FFFD+'\x7f'), + ('EE BF C0', FFFDx2), ('EE BF FF', FFFDx2), ('EF 00', FFFD+'\x00'), + ('EF 7F', FFFD+'\x7f'), ('EF C0', FFFDx2), ('EF FF', FFFDx2), + ('EF 80 00', FFFD+'\x00'), ('EF 80 7F', FFFD+'\x7f'), + ('EF 80 C0', FFFDx2), ('EF 80 FF', FFFDx2), + ('EF BF 00', FFFD+'\x00'), ('EF BF 7F', FFFD+'\x7f'), + ('EF BF C0', FFFDx2), ('EF BF FF', FFFDx2), + ] + err = 'invalid continuation byte' + for s, res in sequences: + seq = bytes(int(c, 16) for c in s.split()) + print(seq, [hex(ord(c)) for c in res]) + exc = raises(UnicodeDecodeError, seq.decode, 'utf-8') + assert err in str(exc.value) + assert seq.decode('utf-8', 'replace') == res + assert ((b'aaaa' + seq + b'bbbb').decode('utf-8', 'replace') == + 'aaaa' + res + 'bbbb') + res = res.replace('\ufffd', '') + assert seq.decode('utf-8', 'ignore') == res + assert((b'aaaa' + seq + b'bbbb').decode('utf-8', 'ignore') == + 'aaaa' + res + 'bbbb') class AppTestPartialEvaluation: spaceconfig = dict(usemodules=['array',]) From pypy.commits at gmail.com Tue Jan 22 05:56:25 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 22 Jan 2019 02:56:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: try not special-casing mac OS Message-ID: <5c46f6d9.1c69fb81.a17e2.0d3b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95693:3f8c26309cd2 Date: 2019-01-22 12:55 +0200 http://bitbucket.org/pypy/pypy/changeset/3f8c26309cd2/ Log: try not special-casing mac OS diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -80,11 +80,11 @@ slen = len(bytes) uni, size = runicode.str_decode_mbcs(bytes, slen, 'strict', final=True, errorhandler=errorhandler, force_ignore=False) - elif _MACOSX: + elif 0 and _MACOSX: bytes = space.bytes_w(w_string) utf8 = str_decode_utf8(bytes, 'surrogateescape', True, errorhandler, allow_surrogates=False)[0] - uni = space.realunicode_w(utf8) + uni = utf8.decode('utf-8') elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized # or the filesystem codec is implemented in Python we cannot @@ -110,7 +110,7 @@ errorhandler=state.encode_error_handler utf8 = space.utf8_w(w_uni) bytes = utf8_encode_mbcs(utf8, 'strict', errorhandler) - elif _MACOSX: + elif 0 and _MACOSX: utf8 = space.utf8_w(w_uni) errorhandler=state.encode_error_handler, bytes = utf8_encode_utf_8(utf8, 'surrogateescape', errorhandler, From pypy.commits at gmail.com Tue Jan 22 06:54:44 2019 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Jan 2019 03:54:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: change UnicodeDictStrategy to store wrapped unicode objects as keys. this lifts Message-ID: <5c470484.1c69fb81.3fc1e.6635@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r95694:b6331207f8b9 Date: 2019-01-21 13:11 +0100 http://bitbucket.org/pypy/pypy/changeset/b6331207f8b9/ Log: change UnicodeDictStrategy to store wrapped unicode objects as keys. this lifts the restriction that they are ascii only. the faster hash and eq dispatching should still be a big win compared to going through the space, despite the keys being wrapped. diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -639,7 +639,7 @@ if type(w_key) is self.space.StringObjectCls: self.switch_to_bytes_strategy(w_dict) return - elif type(w_key) is self.space.UnicodeObjectCls and w_key.is_ascii(): + elif type(w_key) is self.space.UnicodeObjectCls: self.switch_to_unicode_strategy(w_dict) return w_type = self.space.type(w_key) @@ -1193,6 +1193,11 @@ create_iterator_classes(BytesDictStrategy) +def unicode_eq(w_uni1, w_uni2): + return w_uni1.eq_w(w_uni2) + +def unicode_hash(w_uni): + return w_uni.hash_w() class UnicodeDictStrategy(AbstractTypedStrategy, DictStrategy): erase, unerase = rerased.new_erasing_pair("unicode") @@ -1200,18 +1205,18 @@ unerase = staticmethod(unerase) def wrap(self, unwrapped): - return self.space.newutf8(unwrapped, len(unwrapped)) + return unwrapped def unwrap(self, wrapped): - return self.space.utf8_w(wrapped) + return wrapped def is_correct_type(self, w_obj): space = self.space - return type(w_obj) is space.UnicodeObjectCls and w_obj.is_ascii() + return type(w_obj) is space.UnicodeObjectCls def get_empty_storage(self): - res = {} - mark_dict_non_null(res) + res = r_dict(unicode_eq, unicode_hash, + force_non_null=True) return self.erase(res) def _never_equal_to(self, w_lookup_type): @@ -1235,14 +1240,14 @@ ## assert key is not None ## return self.unerase(w_dict.dstorage).get(key, None) - def listview_utf8(self, w_dict): - return self.unerase(w_dict.dstorage).keys() + ## def listview_utf8(self, w_dict): + ## return self.unerase(w_dict.dstorage).keys() ## def w_keys(self, w_dict): ## return self.space.newlist_bytes(self.listview_bytes(w_dict)) def wrapkey(space, key): - return space.newutf8(key, len(key)) + return key ## @jit.look_inside_iff(lambda self, w_dict: ## w_dict_unrolling_heuristic(w_dict)) diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- import sys import py @@ -141,6 +142,7 @@ w_d.initialize_content([(wb("a"), w(1)), (wb("b"), w(2))]) assert self.space.listview_bytes(w_d) == ["a", "b"] + @py.test.mark.skip("possible re-enable later?") def test_listview_unicode_dict(self): w = self.space.wrap w_d = self.space.newdict() @@ -1151,8 +1153,11 @@ assert d.keys() == [u"a"] assert type(d.keys()[0]) is unicode + d = {} + d[u"ä"] = 1 + assert "UnicodeDictStrategy" in self.get_strategy(d) + def test_empty_to_int(self): - import sys d = {} d[1] = "hi" assert "IntDictStrategy" in self.get_strategy(d) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -224,10 +224,19 @@ def descr_str(self, space): return encode_object(space, self, 'ascii', 'strict') - def descr_hash(self, space): + def hash_w(self): + # shortcut for UnicodeDictStrategy x = compute_hash(self._utf8) x -= (x == -1) # convert -1 to -2 without creating a bridge - return space.newint(x) + return x + + def descr_hash(self, space): + return space.newint(self.hash_w()) + + def eq_w(self, w_other): + # shortcut for UnicodeDictStrategy + assert isinstance(w_other, W_UnicodeObject) + return self._utf8 == w_other._utf8 def descr_eq(self, space, w_other): try: From pypy.commits at gmail.com Tue Jan 22 06:54:46 2019 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Jan 2019 03:54:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: make json decoding efficient again Message-ID: <5c470486.1c69fb81.73b0b.8c3a@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r95695:df16e768b3bb Date: 2019-01-22 12:49 +0100 http://bitbucket.org/pypy/pypy/changeset/df16e768b3bb/ Log: make json decoding efficient again diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -248,8 +248,7 @@ self.pos = i+1 return self.space.newdict() - # XXX this should be improved to use an unwrapped dict - w_dict = self.space.newdict() + d = self._create_empty_dict() while True: # parse a key: value w_name = self.decode_key(i) @@ -261,13 +260,13 @@ i = self.skip_whitespace(i) # w_value = self.decode_any(i) - self.space.setitem(w_dict, w_name, w_value) + d[w_name] = w_value i = self.skip_whitespace(self.pos) ch = self.ll_chars[i] i += 1 if ch == '}': self.pos = i - return w_dict + return self._create_dict(d) elif ch == ',': pass elif ch == '\0': @@ -306,6 +305,15 @@ return self.space.newutf8(self.getslice(start, end), end - start) + def _create_dict(self, d): + from pypy.objspace.std.dictmultiobject import from_unicode_key_dict + return from_unicode_key_dict(self.space, d) + + def _create_empty_dict(self): + from pypy.objspace.std.dictmultiobject import create_empty_unicode_key_dict + return create_empty_unicode_key_dict(self.space) + + def decode_string_escaped(self, start): i = self.pos builder = StringBuilder((i - start) * 2) # just an estimate diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -41,6 +41,16 @@ w_dct.length() <= UNROLL_CUTOFF) +# for json decoder +def create_empty_unicode_key_dict(space): + return r_dict(unicode_eq, unicode_hash, + force_non_null=True) + +def from_unicode_key_dict(space, d): + strategy = space.fromcache(UnicodeDictStrategy) + return W_DictObject(space, strategy, strategy.erase(d)) + + class W_DictMultiObject(W_Root): """ Abstract base class that does not store a strategy. """ __slots__ = ['space', 'dstorage'] @@ -1215,8 +1225,7 @@ return type(w_obj) is space.UnicodeObjectCls def get_empty_storage(self): - res = r_dict(unicode_eq, unicode_hash, - force_non_null=True) + res = create_empty_unicode_key_dict(self.space) return self.erase(res) def _never_equal_to(self, w_lookup_type): From pypy.commits at gmail.com Tue Jan 22 06:57:31 2019 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 22 Jan 2019 03:57:31 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: done by df16e768b3bb Message-ID: <5c47052b.1c69fb81.8bd3b.c1e3@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r95696:4bd5dbda0076 Date: 2019-01-22 12:56 +0100 http://bitbucket.org/pypy/pypy/changeset/4bd5dbda0076/ Log: done by df16e768b3bb diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -2,6 +2,5 @@ if one is not already readily available * write the correct jit_elidable in _get_index_storage * improve performance of splitlines -* fix _pypyjson to not use a wrapped dict when decoding an object * make sure we review all the places that call ord(unichr) to check for ValueErrors * Find a more elegant way to define MAXUNICODE in rpython/rlib/runicode.py From pypy.commits at gmail.com Tue Jan 22 12:18:00 2019 From: pypy.commits at gmail.com (rlamy) Date: Tue, 22 Jan 2019 09:18:00 -0800 (PST) Subject: [pypy-commit] pypy default: Reduce indirection in generating hypothesis tests Message-ID: <5c475048.1c69fb81.9c516.bdf2@mx.google.com> Author: Ronan Lamy Branch: Changeset: r95697:eec4eefc8618 Date: 2019-01-22 17:17 +0000 http://bitbucket.org/pypy/pypy/changeset/eec4eefc8618/ Log: Reduce indirection in generating hypothesis tests diff --git a/rpython/jit/metainterp/test/test_zvector.py b/rpython/jit/metainterp/test/test_zvector.py --- a/rpython/jit/metainterp/test/test_zvector.py +++ b/rpython/jit/metainterp/test/test_zvector.py @@ -90,47 +90,39 @@ # FLOAT UNARY - def _vector_float_unary(self, func, type, data): + @pytest.mark.parametrize('func', + [lambda v: abs(v), lambda v: -v], + ids=['abs', 'neg']) + @given(la=st.lists(st.floats(), min_size=10, max_size=150)) + def test_vector_float_unary(self, func, la): func = always_inline(func) + tp = rffi.DOUBLE - size = rffi.sizeof(type) - myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) + size = rffi.sizeof(tp) + myjitdriver = JitDriver(greens=[], reds='auto', vectorize=True) + def f(bytecount, va, vc): i = 0 while i < bytecount: myjitdriver.jit_merge_point() - a = raw_storage_getitem(type,va,i) + a = raw_storage_getitem(tp, va, i) c = func(a) - raw_storage_setitem(vc, i, rffi.cast(type,c)) + raw_storage_setitem(vc, i, rffi.cast(tp, c)) i += size - la = data.draw(st.lists(st.floats(), min_size=10, max_size=150)) l = len(la) - rawstorage = RawStorage() - va = rawstorage.new(la, type) - vc = rawstorage.new(None, type, size=l) - self.meta_interp(f, [l*size, va, vc], vec=True) + va = rawstorage.new(la, tp) + vc = rawstorage.new(None, tp, size=l) + self.meta_interp(f, [l * size, va, vc], vec=True) for i in range(l): - c = raw_storage_getitem(type,vc,i*size) + c = raw_storage_getitem(tp, vc, i * size) r = func(la[i]) assert isclose(r, c) rawstorage.clear() - def vec_int_unary(test_func, unary_func, type): - return pytest.mark.parametrize('func,type', [ - (unary_func, type) - ])(given(data=st.data())(test_func)) - - vec_float_unary = functools.partial(vec_int_unary, _vector_float_unary) - - test_vec_float_abs = \ - vec_float_unary(lambda v: abs(v), rffi.DOUBLE) - test_vec_float_neg = \ - vec_float_unary(lambda v: -v, rffi.DOUBLE) - # FLOAT BINARY def _vector_simple_float(self, func, type, data): @@ -376,38 +368,37 @@ res = self.meta_interp(f, [count], vec=True) assert res == f(count) == breaks - def _vec_reduce(self, strat, func, type, data): - func = always_inline(func) + def vec_reduce(strat, arith_func, tp): + @pytest.mark.parametrize('func, tp', [ + (arith_func, tp) + ]) + @given(la=st.lists(strat, min_size=11, max_size=150)) + def _vec_reduce(self, func, tp, la): + func = always_inline(func) - size = rffi.sizeof(type) - myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) - def f(accum, bytecount, v): - i = 0 - while i < bytecount: - myjitdriver.jit_merge_point() - e = raw_storage_getitem(type,v,i) - accum = func(accum,e) - i += size - return accum + size = rffi.sizeof(tp) + myjitdriver = JitDriver(greens=[], reds='auto', vectorize=True) - la = data.draw(st.lists(strat, min_size=10, max_size=150)) - #la = [1.0] * 10 - l = len(la) + def f(accum, bytecount, v): + i = 0 + while i < bytecount: + myjitdriver.jit_merge_point() + e = raw_storage_getitem(tp, v, i) + accum = func(accum, e) + i += size + return accum - accum = data.draw(strat) - rawstorage = RawStorage() - va = rawstorage.new(la, type) - res = self.meta_interp(f, [accum, l*size, va], vec=True) + accum = la[0] + la = la[1:] + l = len(la) + rawstorage = RawStorage() + va = rawstorage.new(la, tp) + res = self.meta_interp(f, [accum, l * size, va], vec=True) - assert isclose(rffi.cast(type, res), f(accum, l*size, va)) + assert isclose(rffi.cast(tp, res), f(accum, l * size, va)) - rawstorage.clear() - - def vec_reduce(test_func, strat, arith_func, type): - return pytest.mark.parametrize('strat,func,type', [ - (strat, arith_func, type) - ])(given(data=st.data())(test_func)) - vec_reduce = functools.partial(vec_reduce, _vec_reduce) + rawstorage.clear() + return _vec_reduce test_vec_int_sum = vec_reduce(st.integers(min_value=-2**(64-1), max_value=2**(64-1)-1), lambda a,b: lltype.intmask(lltype.intmask(a)+lltype.intmask(b)), lltype.Signed) From pypy.commits at gmail.com Tue Jan 22 12:31:24 2019 From: pypy.commits at gmail.com (rlamy) Date: Tue, 22 Jan 2019 09:31:24 -0800 (PST) Subject: [pypy-commit] pypy default: Delete confusing unused method Message-ID: <5c47536c.1c69fb81.64e9b.b134@mx.google.com> Author: Ronan Lamy Branch: Changeset: r95698:33fe32bc9404 Date: 2019-01-22 17:30 +0000 http://bitbucket.org/pypy/pypy/changeset/33fe32bc9404/ Log: Delete confusing unused method diff --git a/rpython/jit/metainterp/test/test_zvector.py b/rpython/jit/metainterp/test/test_zvector.py --- a/rpython/jit/metainterp/test/test_zvector.py +++ b/rpython/jit/metainterp/test/test_zvector.py @@ -81,13 +81,6 @@ if not self.supports_vector_ext(): py.test.skip("this cpu %s has no implemented vector backend" % CPU) - def meta_interp(self, f, args, policy=None, vec=True, vec_all=False): - return ll_meta_interp(f, args, enable_opts=self.enable_opts, - policy=policy, - CPUClass=self.CPUClass, - type_system=self.type_system, - vec=vec, vec_all=vec_all) - # FLOAT UNARY @pytest.mark.parametrize('func', From pypy.commits at gmail.com Wed Jan 23 09:46:12 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 06:46:12 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: finish f287dec62c4e for swapcase, capitalize Message-ID: <5c487e34.1c69fb81.8529a.a4b1@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95699:a7867a23009b Date: 2019-01-22 23:17 +0200 http://bitbucket.org/pypy/pypy/changeset/a7867a23009b/ Log: finish f287dec62c4e for swapcase, capitalize diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -939,6 +939,9 @@ def test_swapcase(self): assert '\xe4\xc4\xdf'.swapcase() == '\xc4\xe4SS' + # sigma-little becomes sigma-little-final + assert u'A\u0345\u03a3'.swapcase() == u'a\u0399\u03c2' + # but not if the previous codepoint is 0-width assert u'\u0345\u03a3'.swapcase() == u'\u0399\u03c3' def test_call_special_methods(self): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -375,15 +375,20 @@ def descr_swapcase(self, space): value = self._utf8 builder = rutf8.Utf8StringBuilder(len(value)) + i = 0 for ch in rutf8.Utf8StringIterator(value): if unicodedb.isupper(ch): - codes = unicodedb.tolower_full(ch) + if ch == 0x3a3: + codes = [self._handle_capital_sigma(value, i),] + else: + codes = unicodedb.tolower_full(ch) elif unicodedb.islower(ch): codes = unicodedb.toupper_full(ch) else: codes = [ch,] for c in codes: builder.append_code(c) + i += 1 return self.from_utf8builder(builder) def descr_title(self, space): @@ -847,21 +852,23 @@ if self._len() == 0: return self._empty() - builder = rutf8.Utf8StringBuilder(len(self._utf8)) - it = rutf8.Utf8StringIterator(self._utf8) + value = self._utf8 + builder = rutf8.Utf8StringBuilder(len(value)) + it = rutf8.Utf8StringIterator(value) uchar = it.next() codes = unicodedb.toupper_full(uchar) # can sometimes give more than one, like for omega-with-Ypogegrammeni, 8179 for c in codes: builder.append_code(c) + i = 1 for ch in it: - ch = unicodedb.tolower_full(ch) - if it.done(): - # Special case lower-sigma - if ch[-1] == 0x03c3: - ch[-1] = 0x03c2 - for c in ch: + if ch == 0x3a3: + codes = [self._handle_capital_sigma(value, i),] + else: + codes = unicodedb.tolower_full(ch) + for c in codes: builder.append_code(c) + i += 1 return self.from_utf8builder(builder) @unwrap_spec(width=int, w_fillchar=WrappedDefault(u' ')) From pypy.commits at gmail.com Wed Jan 23 09:46:14 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 06:46:14 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: test, refactor logic around invalid continuation byte Message-ID: <5c487e36.1c69fb81.8c71a.3e15@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95700:ee5c96ad9ed6 Date: 2019-01-23 11:30 +0200 http://bitbucket.org/pypy/pypy/changeset/ee5c96ad9ed6/ Log: test, refactor logic around invalid continuation byte diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -328,7 +328,7 @@ res = rutf8.utf8_encode_mbcs(s, errors, errorhandler, force_replace=False) return res - + def str_decode_mbcs(s, errors, final, errorhandler, force_ignore=True): slen = len(s) res, size = runicode.str_decode_mbcs(s, slen, errors, final=final, @@ -345,21 +345,18 @@ res = StringBuilder(slen) pos = 0 end = len(s) - suppressing = False # we are in a chain of "bad" unicode, only emit one fix while pos < end: ordch1 = ord(s[pos]) # fast path for ASCII if ordch1 <= 0x7F: pos += 1 res.append(chr(ordch1)) - suppressing = False continue if ordch1 <= 0xC1: r, pos, rettype = errorhandler(errors, "utf8", "invalid start byte", s, pos, pos + 1) - if not suppressing: - res.append(r) + res.append(r) continue pos += 1 @@ -371,16 +368,14 @@ break r, pos, rettype = errorhandler(errors, "utf8", "unexpected end of data", s, pos - 1, pos) - if not suppressing: - res.append(r) + res.append(r) continue ordch2 = ord(s[pos]) if rutf8._invalid_byte_2_of_2(ordch2): r, pos, rettype = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) - if not suppressing: - res.append(r) + res.append(r) continue # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz pos += 1 @@ -396,10 +391,13 @@ if (pos) < end and rutf8._invalid_byte_2_of_3(ordch1, ord(s[pos]), allow_surrogates): msg = "invalid continuation byte" + r, pos, rettype = errorhandler(errors, "utf8", msg, s, + pos - 1, pos) else: msg = "unexpected end of data" - suppressing = True - r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos) + r, pos, rettype = errorhandler(errors, "utf8", msg, s, + pos - 1, pos) + pos = end res.append(r) continue ordch2 = ord(s[pos]) @@ -408,14 +406,12 @@ if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): r, pos, rettype = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) - if not suppressing: - res.append(r) + res.append(r) continue elif rutf8._invalid_byte_3_of_3(ordch3): r, pos, rettype = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos + 1) - if not suppressing: - res.append(r) + res.append(r) continue pos += 2 @@ -423,7 +419,6 @@ res.append(chr(ordch1)) res.append(chr(ordch2)) res.append(chr(ordch3)) - suppressing = False continue if ordch1 <= 0xF4: @@ -433,23 +428,27 @@ break if pos < end and rutf8._invalid_byte_2_of_4(ordch1, ord(s[pos])): msg = "invalid continuation byte" + r, pos, rettype = errorhandler(errors, "utf8", msg, s, + pos - 1, pos) elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos + 1])): msg = "invalid continuation byte" + pos += 1 + r, pos, rettype = errorhandler(errors, "utf8", msg, s, + pos - 2, pos) else: msg = "unexpected end of data" - suppressing = True - r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos) + r, pos, rettype = errorhandler(errors, "utf8", msg, s, + pos - 1, pos) + pos = end res.append(r) continue ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) ordch4 = ord(s[pos + 2]) - if rutf8._invalid_byte_2_of_4(ordch1, ordch2): r, pos, rettype = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) - if not suppressing: - res.append(r) + res.append(r) continue elif rutf8._invalid_byte_3_of_4(ordch3): r, pos, rettype = errorhandler(errors, "utf8", "invalid continuation byte", @@ -459,8 +458,7 @@ elif rutf8._invalid_byte_4_of_4(ordch4): r, pos, rettype = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos + 2) - if not suppressing: - res.append(r) + res.append(r) continue pos += 3 @@ -469,13 +467,11 @@ res.append(chr(ordch2)) res.append(chr(ordch3)) res.append(chr(ordch4)) - suppressing = False continue r, pos, rettype = errorhandler(errors, "utf8", "invalid start byte", s, pos - 1, pos) - if not suppressing: - res.append(r) + res.append(r) r = res.build() return r, rutf8.check_utf8(r, True), pos diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -230,7 +230,7 @@ (b'\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64', '\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'), ] - for n, (seq, res) in enumerate(sequences): + for (seq, res) in sequences: raises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict') uni = seq.decode('utf-8', 'replace') assert uni == res @@ -329,7 +329,6 @@ err = 'invalid continuation byte' for s, res in sequences: seq = bytes(int(c, 16) for c in s.split()) - print(seq, [hex(ord(c)) for c in res]) exc = raises(UnicodeDecodeError, seq.decode, 'utf-8') assert err in str(exc.value) assert seq.decode('utf-8', 'replace') == res @@ -340,6 +339,95 @@ assert((b'aaaa' + seq + b'bbbb').decode('utf-8', 'ignore') == 'aaaa' + res + 'bbbb') + def test_invalid_cb_for_4bytes_seq(self): + """ + Test that an 'invalid continuation byte' error is raised when the + continuation byte(s) of a 4-bytes sequence are invalid. When + errors='replace',the start byte and all the following valid + continuation bytes are replaced with a single U+FFFD, and all the bytes + starting from the first invalid continuation bytes (included) are + handled separately. + E.g. in the sequence , E1 is the start byte of a 3-bytes + sequence, 80 is a valid continuation byte, but 41 is not a valid cb + because it's the ASCII letter 'A'. + Note: when the start byte is E0 or ED, the valid ranges for the first + continuation byte are limited to A0..BF and 80..9F respectively. + However, when the start byte is ED, Python 2 considers all the bytes + in range 80..BF valid. This is fixed in Python 3. + """ + FFFD = '\ufffd' + FFFDx2 = FFFD * 2 + sequences = [ + ('F0 00', FFFD+'\x00'), ('F0 7F', FFFD+'\x7f'), ('F0 80', FFFDx2), + ('F0 8F', FFFDx2), ('F0 C0', FFFDx2), ('F0 FF', FFFDx2), + ('F0 90 00', FFFD+'\x00'), ('F0 90 7F', FFFD+'\x7f'), + ('F0 90 C0', FFFDx2), ('F0 90 FF', FFFDx2), + ('F0 BF 00', FFFD+'\x00'), ('F0 BF 7F', FFFD+'\x7f'), + ('F0 BF C0', FFFDx2), ('F0 BF FF', FFFDx2), + ('F0 90 80 00', FFFD+'\x00'), ('F0 90 80 7F', FFFD+'\x7f'), + ('F0 90 80 C0', FFFDx2), ('F0 90 80 FF', FFFDx2), + ('F0 90 BF 00', FFFD+'\x00'), ('F0 90 BF 7F', FFFD+'\x7f'), + ('F0 90 BF C0', FFFDx2), ('F0 90 BF FF', FFFDx2), + ('F0 BF 80 00', FFFD+'\x00'), ('F0 BF 80 7F', FFFD+'\x7f'), + ('F0 BF 80 C0', FFFDx2), ('F0 BF 80 FF', FFFDx2), + ('F0 BF BF 00', FFFD+'\x00'), ('F0 BF BF 7F', FFFD+'\x7f'), + ('F0 BF BF C0', FFFDx2), ('F0 BF BF FF', FFFDx2), + ('F1 00', FFFD+'\x00'), ('F1 7F', FFFD+'\x7f'), ('F1 C0', FFFDx2), + ('F1 FF', FFFDx2), ('F1 80 00', FFFD+'\x00'), + ('F1 80 7F', FFFD+'\x7f'), ('F1 80 C0', FFFDx2), + ('F1 80 FF', FFFDx2), ('F1 BF 00', FFFD+'\x00'), + ('F1 BF 7F', FFFD+'\x7f'), ('F1 BF C0', FFFDx2), + ('F1 BF FF', FFFDx2), ('F1 80 80 00', FFFD+'\x00'), + ('F1 80 80 7F', FFFD+'\x7f'), ('F1 80 80 C0', FFFDx2), + ('F1 80 80 FF', FFFDx2), ('F1 80 BF 00', FFFD+'\x00'), + ('F1 80 BF 7F', FFFD+'\x7f'), ('F1 80 BF C0', FFFDx2), + ('F1 80 BF FF', FFFDx2), ('F1 BF 80 00', FFFD+'\x00'), + ('F1 BF 80 7F', FFFD+'\x7f'), ('F1 BF 80 C0', FFFDx2), + ('F1 BF 80 FF', FFFDx2), ('F1 BF BF 00', FFFD+'\x00'), + ('F1 BF BF 7F', FFFD+'\x7f'), ('F1 BF BF C0', FFFDx2), + ('F1 BF BF FF', FFFDx2), ('F3 00', FFFD+'\x00'), + ('F3 7F', FFFD+'\x7f'), ('F3 C0', FFFDx2), ('F3 FF', FFFDx2), + ('F3 80 00', FFFD+'\x00'), ('F3 80 7F', FFFD+'\x7f'), + ('F3 80 C0', FFFDx2), ('F3 80 FF', FFFDx2), + ('F3 BF 00', FFFD+'\x00'), ('F3 BF 7F', FFFD+'\x7f'), + ('F3 BF C0', FFFDx2), ('F3 BF FF', FFFDx2), + ('F3 80 80 00', FFFD+'\x00'), ('F3 80 80 7F', FFFD+'\x7f'), + ('F3 80 80 C0', FFFDx2), ('F3 80 80 FF', FFFDx2), + ('F3 80 BF 00', FFFD+'\x00'), ('F3 80 BF 7F', FFFD+'\x7f'), + ('F3 80 BF C0', FFFDx2), ('F3 80 BF FF', FFFDx2), + ('F3 BF 80 00', FFFD+'\x00'), ('F3 BF 80 7F', FFFD+'\x7f'), + ('F3 BF 80 C0', FFFDx2), ('F3 BF 80 FF', FFFDx2), + ('F3 BF BF 00', FFFD+'\x00'), ('F3 BF BF 7F', FFFD+'\x7f'), + ('F3 BF BF C0', FFFDx2), ('F3 BF BF FF', FFFDx2), + ('F4 00', FFFD+'\x00'), ('F4 7F', FFFD+'\x7f'), ('F4 90', FFFDx2), + ('F4 BF', FFFDx2), ('F4 C0', FFFDx2), ('F4 FF', FFFDx2), + ('F4 80 00', FFFD+'\x00'), ('F4 80 7F', FFFD+'\x7f'), + ('F4 80 C0', FFFDx2), ('F4 80 FF', FFFDx2), + ('F4 8F 00', FFFD+'\x00'), ('F4 8F 7F', FFFD+'\x7f'), + ('F4 8F C0', FFFDx2), ('F4 8F FF', FFFDx2), + ('F4 80 80 00', FFFD+'\x00'), ('F4 80 80 7F', FFFD+'\x7f'), + ('F4 80 80 C0', FFFDx2), ('F4 80 80 FF', FFFDx2), + ('F4 80 BF 00', FFFD+'\x00'), ('F4 80 BF 7F', FFFD+'\x7f'), + ('F4 80 BF C0', FFFDx2), ('F4 80 BF FF', FFFDx2), + ('F4 8F 80 00', FFFD+'\x00'), ('F4 8F 80 7F', FFFD+'\x7f'), + ('F4 8F 80 C0', FFFDx2), ('F4 8F 80 FF', FFFDx2), + ('F4 8F BF 00', FFFD+'\x00'), ('F4 8F BF 7F', FFFD+'\x7f'), + ('F4 8F BF C0', FFFDx2), ('F4 8F BF FF', FFFDx2) + ] + err = 'invalid continuation byte' + for s, res in sequences: + seq = bytes(int(c, 16) for c in s.split()) + exc = raises(UnicodeDecodeError, seq.decode, 'utf-8') + assert err in str(exc.value) + assert seq.decode('utf-8', 'replace') == res + assert ((b'aaaa' + seq + b'bbbb').decode('utf-8', 'replace') == + 'aaaa' + res + 'bbbb') + res = res.replace('\ufffd', '') + assert seq.decode('utf-8', 'ignore') == res + assert((b'aaaa' + seq + b'bbbb').decode('utf-8', 'ignore') == + 'aaaa' + res + 'bbbb') + + class AppTestPartialEvaluation: spaceconfig = dict(usemodules=['array',]) @@ -612,7 +700,6 @@ exc = raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii') exc = raises(UnicodeDecodeError, codecs.decode, b'\xe0\x00', 'utf-8') - print(dir(exc.value)) assert 'invalid continuation byte' in exc.value.reason def test_bad_errorhandler_return(self): From pypy.commits at gmail.com Wed Jan 23 09:46:16 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 06:46:16 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: match startswith, endswith cpython behaviour for needle='', start>0, end==0 Message-ID: <5c487e38.1c69fb81.73b0b.17d4@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95701:d7b0f65f80a2 Date: 2019-01-23 16:28 +0200 http://bitbucket.org/pypy/pypy/changeset/d7b0f65f80a2/ Log: match startswith, endswith cpython behaviour for needle='',start>0,end==0 diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -156,7 +156,8 @@ assert w_res is space.newbool(expected) expected = u.startswith(v, start, start + len1) - if expected and start > len(u): + if ((expected and start > len(u)) or + (start > 0 and start + len1 ==0)): expected = False # python2 vs. python3 w_res = space.call_method(w_u, 'startswith', w_v, space.newint(start), @@ -171,7 +172,8 @@ assert w_res is space.newbool(expected) expected = u.endswith(v, start, start + len1) - if expected and start > len(u): + if ((expected and start > len(u)) or + (start > 0 and start + len1 ==0)): expected = False # python2 vs. python3 w_res = space.call_method(w_u, 'endswith', w_v, space.newint(start), @@ -626,6 +628,8 @@ assert ''.endswith('a') is False assert 'x'.endswith('xx') is False assert 'y'.endswith('xx') is False + assert 'x'.endswith('', 1, 0) is False + def test_endswith_more(self): assert 'abc'.endswith('ab', 0, 2) is True diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -636,6 +636,8 @@ def descr_startswith(self, space, w_prefix, w_start=None, w_end=None): start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end) value = self._utf8 + if start > 0 and not space.is_none(w_end) and space.int_w(w_end) == 0: + return space.w_False if space.isinstance_w(w_prefix, space.w_tuple): return self._startswith_tuple(space, value, w_prefix, start, end) try: @@ -657,6 +659,9 @@ def descr_endswith(self, space, w_suffix, w_start=None, w_end=None): start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end) value = self._utf8 + # match cpython behaviour + if start > 0 and not space.is_none(w_end) and space.int_w(w_end) == 0: + return space.w_False if space.isinstance_w(w_suffix, space.w_tuple): return self._endswith_tuple(space, value, w_suffix, start, end) try: From pypy.commits at gmail.com Wed Jan 23 09:46:18 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 06:46:18 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: test, fix for pickling StringIO with pos != 0 Message-ID: <5c487e3a.1c69fb81.3c5dd.b5bc@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95702:8e00f53ea94c Date: 2019-01-23 16:28 +0200 http://bitbucket.org/pypy/pypy/changeset/8e00f53ea94c/ Log: test, fix for pickling StringIO with pos != 0 diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -134,8 +134,11 @@ if space.is_w(w_newline, space.w_None): newline = None + elif space.isinstance_w(w_newline, space.w_unicode): + newline = space.utf8_w(w_newline) else: - newline = space.utf8_w(w_newline) + raise oefmt(space.w_TypeError, + "newline must be str or None, not %T", w_newline) if (newline is not None and newline != "" and newline != "\n" and newline != "\r" and newline != "\r\n"): @@ -202,7 +205,8 @@ if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.buf = UnicodeIO(initval, pos) + self.buf = UnicodeIO(initval) + self.buf.seek(pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): raise oefmt( diff --git a/pypy/module/_io/test/test_stringio.py b/pypy/module/_io/test/test_stringio.py --- a/pypy/module/_io/test/test_stringio.py +++ b/pypy/module/_io/test/test_stringio.py @@ -248,6 +248,8 @@ assert sio.newlines == ("\n", "\r\n") sio.write("c\rd") assert sio.newlines == ("\r", "\n", "\r\n") + exc = raises(TypeError, io.StringIO, newline=b'\n') + assert 'bytes' in str(exc.value) def test_iterator(self): import io @@ -305,3 +307,18 @@ raises(TypeError, sio.__setstate__, 0) sio.close() raises(ValueError, sio.__setstate__, ("closed", "", 0, None)) + + def test_roundtrip_state(self): + import io + s = '12345678' + sio1 = io.StringIO(s) + sio1.foo = 42 + sio1.seek(2) + assert sio1.getvalue() == s + state = sio1.__getstate__() + sio2 = io.StringIO() + sio2.__setstate__(state) + assert sio2.getvalue() == s + assert sio2.foo == 42 + assert sio2.tell() == 2 + From pypy.commits at gmail.com Wed Jan 23 09:46:20 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 06:46:20 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default into unicode-utf8 Message-ID: <5c487e3c.1c69fb81.7d44d.fb1b@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95703:f68e17e4136e Date: 2019-01-23 16:30 +0200 http://bitbucket.org/pypy/pypy/changeset/f68e17e4136e/ Log: merge default into unicode-utf8 diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,7 +4,6 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', - 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', @@ -23,6 +22,7 @@ 'get_stats': 'app_referents.get_stats', }) self.interpleveldefs.update({ + 'collect_step': 'interp_gc.collect_step', 'get_rpy_roots': 'referents.get_rpy_roots', 'get_rpy_referents': 'referents.get_rpy_referents', 'get_rpy_memory_usage': 'referents.get_rpy_memory_usage', diff --git a/rpython/jit/metainterp/test/test_zvector.py b/rpython/jit/metainterp/test/test_zvector.py --- a/rpython/jit/metainterp/test/test_zvector.py +++ b/rpython/jit/metainterp/test/test_zvector.py @@ -81,56 +81,41 @@ if not self.supports_vector_ext(): py.test.skip("this cpu %s has no implemented vector backend" % CPU) - def meta_interp(self, f, args, policy=None, vec=True, vec_all=False): - return ll_meta_interp(f, args, enable_opts=self.enable_opts, - policy=policy, - CPUClass=self.CPUClass, - type_system=self.type_system, - vec=vec, vec_all=vec_all) - # FLOAT UNARY - def _vector_float_unary(self, func, type, data): + @pytest.mark.parametrize('func', + [lambda v: abs(v), lambda v: -v], + ids=['abs', 'neg']) + @given(la=st.lists(st.floats(), min_size=10, max_size=150)) + def test_vector_float_unary(self, func, la): func = always_inline(func) + tp = rffi.DOUBLE - size = rffi.sizeof(type) - myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) + size = rffi.sizeof(tp) + myjitdriver = JitDriver(greens=[], reds='auto', vectorize=True) + def f(bytecount, va, vc): i = 0 while i < bytecount: myjitdriver.jit_merge_point() - a = raw_storage_getitem(type,va,i) + a = raw_storage_getitem(tp, va, i) c = func(a) - raw_storage_setitem(vc, i, rffi.cast(type,c)) + raw_storage_setitem(vc, i, rffi.cast(tp, c)) i += size - la = data.draw(st.lists(st.floats(), min_size=10, max_size=150)) l = len(la) - rawstorage = RawStorage() - va = rawstorage.new(la, type) - vc = rawstorage.new(None, type, size=l) - self.meta_interp(f, [l*size, va, vc], vec=True) + va = rawstorage.new(la, tp) + vc = rawstorage.new(None, tp, size=l) + self.meta_interp(f, [l * size, va, vc], vec=True) for i in range(l): - c = raw_storage_getitem(type,vc,i*size) + c = raw_storage_getitem(tp, vc, i * size) r = func(la[i]) assert isclose(r, c) rawstorage.clear() - def vec_int_unary(test_func, unary_func, type): - return pytest.mark.parametrize('func,type', [ - (unary_func, type) - ])(given(data=st.data())(test_func)) - - vec_float_unary = functools.partial(vec_int_unary, _vector_float_unary) - - test_vec_float_abs = \ - vec_float_unary(lambda v: abs(v), rffi.DOUBLE) - test_vec_float_neg = \ - vec_float_unary(lambda v: -v, rffi.DOUBLE) - # FLOAT BINARY def _vector_simple_float(self, func, type, data): @@ -376,38 +361,37 @@ res = self.meta_interp(f, [count], vec=True) assert res == f(count) == breaks - def _vec_reduce(self, strat, func, type, data): - func = always_inline(func) + def vec_reduce(strat, arith_func, tp): + @pytest.mark.parametrize('func, tp', [ + (arith_func, tp) + ]) + @given(la=st.lists(strat, min_size=11, max_size=150)) + def _vec_reduce(self, func, tp, la): + func = always_inline(func) - size = rffi.sizeof(type) - myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) - def f(accum, bytecount, v): - i = 0 - while i < bytecount: - myjitdriver.jit_merge_point() - e = raw_storage_getitem(type,v,i) - accum = func(accum,e) - i += size - return accum + size = rffi.sizeof(tp) + myjitdriver = JitDriver(greens=[], reds='auto', vectorize=True) - la = data.draw(st.lists(strat, min_size=10, max_size=150)) - #la = [1.0] * 10 - l = len(la) + def f(accum, bytecount, v): + i = 0 + while i < bytecount: + myjitdriver.jit_merge_point() + e = raw_storage_getitem(tp, v, i) + accum = func(accum, e) + i += size + return accum - accum = data.draw(strat) - rawstorage = RawStorage() - va = rawstorage.new(la, type) - res = self.meta_interp(f, [accum, l*size, va], vec=True) + accum = la[0] + la = la[1:] + l = len(la) + rawstorage = RawStorage() + va = rawstorage.new(la, tp) + res = self.meta_interp(f, [accum, l * size, va], vec=True) - assert isclose(rffi.cast(type, res), f(accum, l*size, va)) + assert isclose(rffi.cast(tp, res), f(accum, l * size, va)) - rawstorage.clear() - - def vec_reduce(test_func, strat, arith_func, type): - return pytest.mark.parametrize('strat,func,type', [ - (strat, arith_func, type) - ])(given(data=st.data())(test_func)) - vec_reduce = functools.partial(vec_reduce, _vec_reduce) + rawstorage.clear() + return _vec_reduce test_vec_int_sum = vec_reduce(st.integers(min_value=-2**(64-1), max_value=2**(64-1)-1), lambda a,b: lltype.intmask(lltype.intmask(a)+lltype.intmask(b)), lltype.Signed) diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -774,7 +774,7 @@ def test_collect_0(self, debuglog): self.gc.collect(1) # start a major debuglog.reset() - self.gc.collect(0) # do ONLY a minor + self.gc.collect(-1) # do ONLY a minor assert debuglog.summary() == {'gc-minor': 1} def test_enable_disable(self, debuglog): diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -1,6 +1,7 @@ import sys from contextlib import contextmanager import signal +from collections import OrderedDict from rpython.translator.translator import TranslationContext from rpython.annotator.model import ( @@ -1196,7 +1197,7 @@ DictValue(None, s_value)) dictrepr.setup() self.l_dict = self.newdict(dictrepr) - self.reference = self.new_reference() + self.reference = OrderedDict() self.ll_key = r_key.convert_const self.ll_value = r_value.convert_const self.removed_keys = [] @@ -1323,7 +1324,6 @@ class DictSpace(MappingSpace): MappingRepr = rdict.DictRepr - new_reference = dict ll_getitem = staticmethod(rdict.ll_dict_getitem) ll_setitem = staticmethod(rdict.ll_dict_setitem) ll_delitem = staticmethod(rdict.ll_dict_delitem) diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -422,7 +422,6 @@ class ODictSpace(MappingSpace): MappingRepr = rodct.OrderedDictRepr - new_reference = OrderedDict moved_around = False ll_getitem = staticmethod(rodct.ll_dict_getitem) ll_setitem = staticmethod(rodct.ll_dict_setitem) diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -112,6 +112,8 @@ #define OP_GC__DISABLE_FINALIZERS(r) boehm_gc_finalizer_lock++ #define OP_GC__ENABLE_FINALIZERS(r) (boehm_gc_finalizer_lock--, \ boehm_gc_finalizer_notifier()) +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define OP_BOEHM_FQ_REGISTER(tagindex, obj, r) \ boehm_fq_register(boehm_fq_queues + tagindex, obj) @@ -127,6 +129,8 @@ #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */ #define OP_GC__DISABLE_FINALIZERS(r) /* nothing */ #define OP_GC__ENABLE_FINALIZERS(r) /* nothing */ +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define GC_REGISTER_FINALIZER(a, b, c, d, e) /* nothing */ #define GC_gcollect() /* nothing */ #define GC_set_max_heap_size(a) /* nothing */ diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -86,7 +86,7 @@ else: log.msg('Running "%s" succeeded' %(vcvars,)) except Exception as e: - log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) + log.msg('Running "%s" failed: "%s"' % (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") From pypy.commits at gmail.com Wed Jan 23 09:46:22 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 06:46:22 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into unicode-utf8-py3 Message-ID: <5c487e3e.1c69fb81.40e28.ac3e@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95704:5a0b1049fa98 Date: 2019-01-23 16:31 +0200 http://bitbucket.org/pypy/pypy/changeset/5a0b1049fa98/ Log: merge py3.5 into unicode-utf8-py3 diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,7 +4,6 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', - 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', @@ -23,6 +22,7 @@ 'get_stats': 'app_referents.get_stats', }) self.interpleveldefs.update({ + 'collect_step': 'interp_gc.collect_step', 'get_rpy_roots': 'referents.get_rpy_roots', 'get_rpy_referents': 'referents.get_rpy_referents', 'get_rpy_memory_usage': 'referents.get_rpy_memory_usage', diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -774,7 +774,7 @@ def test_collect_0(self, debuglog): self.gc.collect(1) # start a major debuglog.reset() - self.gc.collect(0) # do ONLY a minor + self.gc.collect(-1) # do ONLY a minor assert debuglog.summary() == {'gc-minor': 1} def test_enable_disable(self, debuglog): diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -1,6 +1,7 @@ import sys from contextlib import contextmanager import signal +from collections import OrderedDict from rpython.translator.translator import TranslationContext from rpython.annotator.model import ( @@ -1196,7 +1197,7 @@ DictValue(None, s_value)) dictrepr.setup() self.l_dict = self.newdict(dictrepr) - self.reference = self.new_reference() + self.reference = OrderedDict() self.ll_key = r_key.convert_const self.ll_value = r_value.convert_const self.removed_keys = [] @@ -1323,7 +1324,6 @@ class DictSpace(MappingSpace): MappingRepr = rdict.DictRepr - new_reference = dict ll_getitem = staticmethod(rdict.ll_dict_getitem) ll_setitem = staticmethod(rdict.ll_dict_setitem) ll_delitem = staticmethod(rdict.ll_dict_delitem) diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -422,7 +422,6 @@ class ODictSpace(MappingSpace): MappingRepr = rodct.OrderedDictRepr - new_reference = OrderedDict moved_around = False ll_getitem = staticmethod(rodct.ll_dict_getitem) ll_setitem = staticmethod(rodct.ll_dict_setitem) diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -112,6 +112,8 @@ #define OP_GC__DISABLE_FINALIZERS(r) boehm_gc_finalizer_lock++ #define OP_GC__ENABLE_FINALIZERS(r) (boehm_gc_finalizer_lock--, \ boehm_gc_finalizer_notifier()) +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define OP_BOEHM_FQ_REGISTER(tagindex, obj, r) \ boehm_fq_register(boehm_fq_queues + tagindex, obj) @@ -127,6 +129,8 @@ #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */ #define OP_GC__DISABLE_FINALIZERS(r) /* nothing */ #define OP_GC__ENABLE_FINALIZERS(r) /* nothing */ +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define GC_REGISTER_FINALIZER(a, b, c, d, e) /* nothing */ #define GC_gcollect() /* nothing */ #define GC_set_max_heap_size(a) /* nothing */ diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -86,7 +86,7 @@ else: log.msg('Running "%s" succeeded' %(vcvars,)) except Exception as e: - log.msg('Running "%s" failed: "%s"', (vcvars, str(e))) + log.msg('Running "%s" failed: "%s"' % (vcvars, str(e))) return None stdout = stdout.replace("\r\n", "\n") From pypy.commits at gmail.com Wed Jan 23 12:30:00 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 23 Jan 2019 09:30:00 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: backport relevant parts of 8e00f53ea94c Message-ID: <5c48a498.1c69fb81.9319b.be4b@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95705:9853efedd970 Date: 2019-01-23 19:29 +0200 http://bitbucket.org/pypy/pypy/changeset/9853efedd970/ Log: backport relevant parts of 8e00f53ea94c diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -196,7 +196,8 @@ if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.buf = UnicodeIO(initval, pos) + self.buf = UnicodeIO(initval) + self.buf.seek(pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): raise oefmt( diff --git a/pypy/module/_io/test/test_stringio.py b/pypy/module/_io/test/test_stringio.py --- a/pypy/module/_io/test/test_stringio.py +++ b/pypy/module/_io/test/test_stringio.py @@ -307,3 +307,17 @@ raises(TypeError, sio.__setstate__, 0) sio.close() raises(ValueError, sio.__setstate__, (u"closed", u"", 0, None)) + + def test_roundtrip_state(self): + import io + s = u'12345678' + sio1 = io.StringIO(s) + sio1.foo = 42 + sio1.seek(2) + assert sio1.getvalue() == s + state = sio1.__getstate__() + sio2 = io.StringIO() + sio2.__setstate__(state) + assert sio2.getvalue() == s + assert sio2.foo == 42 + assert sio2.tell() == 2 From pypy.commits at gmail.com Thu Jan 24 12:25:26 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 24 Jan 2019 09:25:26 -0800 (PST) Subject: [pypy-commit] pypy release-pypy2.7-7.x: start the release branch for pypy 7.0, and bump the version number Message-ID: <5c49f506.1c69fb81.e81c0.7be1@mx.google.com> Author: Antonio Cuni Branch: release-pypy2.7-7.x Changeset: r95710:d47849ba8135 Date: 2019-01-24 18:20 +0100 http://bitbucket.org/pypy/pypy/changeset/d47849ba8135/ Log: start the release branch for pypy 7.0, and bump the version number diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -65,10 +65,15 @@ # |version| and |release|, also used in various other places throughout the # built documents. # + +# Make sure to keep this in sync with: +# module/sys/version.py +# module/cpyext/include/patchlevel.h +# # The short X.Y version. -version = '6.0' +version = '7.0' # The full version, including alpha/beta/rc tags. -release = '6.0.0' +release = '7.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -28,9 +28,12 @@ /* Version as a string */ #define PY_VERSION "2.7.13" -/* PyPy version as a string */ -#define PYPY_VERSION "6.1.0-alpha0" -#define PYPY_VERSION_NUM 0x06010000 +/* PyPy version as a string: make sure to keep this in sync with: + * module/sys/version.py + * doc/conf.py + */ +#define PYPY_VERSION "7.0.0" +#define PYPY_VERSION_NUM 0x07000000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,10 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (6, 1, 0, "alpha", 0) #XXX # sync patchlevel.h +# make sure to keep PYPY_VERSION in sync with: +# module/cpyext/include/patchlevel.h +# doc/conf.py +PYPY_VERSION = (7, 0, 0, "final", 0) import pypy From pypy.commits at gmail.com Thu Jan 24 12:25:28 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 24 Jan 2019 09:25:28 -0800 (PST) Subject: [pypy-commit] pypy default: merge release-pypy2.7-7.x into default, to incorporate the version bump Message-ID: <5c49f508.1c69fb81.4df90.2aa4@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95711:c4dc91f2e037 Date: 2019-01-24 18:21 +0100 http://bitbucket.org/pypy/pypy/changeset/c4dc91f2e037/ Log: merge release-pypy2.7-7.x into default, to incorporate the version bump diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -65,10 +65,15 @@ # |version| and |release|, also used in various other places throughout the # built documents. # + +# Make sure to keep this in sync with: +# module/sys/version.py +# module/cpyext/include/patchlevel.h +# # The short X.Y version. -version = '6.0' +version = '7.0' # The full version, including alpha/beta/rc tags. -release = '6.0.0' +release = '7.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -28,9 +28,12 @@ /* Version as a string */ #define PY_VERSION "2.7.13" -/* PyPy version as a string */ -#define PYPY_VERSION "6.1.0-alpha0" -#define PYPY_VERSION_NUM 0x06010000 +/* PyPy version as a string: make sure to keep this in sync with: + * module/sys/version.py + * doc/conf.py + */ +#define PYPY_VERSION "7.0.0" +#define PYPY_VERSION_NUM 0x07000000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,10 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (6, 1, 0, "alpha", 0) #XXX # sync patchlevel.h +# make sure to keep PYPY_VERSION in sync with: +# module/cpyext/include/patchlevel.h +# doc/conf.py +PYPY_VERSION = (7, 0, 0, "final", 0) import pypy From pypy.commits at gmail.com Thu Jan 24 12:25:30 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 24 Jan 2019 09:25:30 -0800 (PST) Subject: [pypy-commit] pypy default: bump again the version number for the development version Message-ID: <5c49f50a.1c69fb81.4bd74.4dd5@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95712:f3cf624ab14c Date: 2019-01-24 18:24 +0100 http://bitbucket.org/pypy/pypy/changeset/f3cf624ab14c/ Log: bump again the version number for the development version diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -71,9 +71,9 @@ # module/cpyext/include/patchlevel.h # # The short X.Y version. -version = '7.0' +version = '7.1' # The full version, including alpha/beta/rc tags. -release = '7.0.0' +release = '7.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -32,9 +32,8 @@ * module/sys/version.py * doc/conf.py */ -#define PYPY_VERSION "7.0.0" -#define PYPY_VERSION_NUM 0x07000000 - +#define PYPY_VERSION "7.1.0" +#define PYPY_VERSION_NUM 0x07010000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object stays alive. */ diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -13,7 +13,7 @@ # make sure to keep PYPY_VERSION in sync with: # module/cpyext/include/patchlevel.h # doc/conf.py -PYPY_VERSION = (7, 0, 0, "final", 0) +PYPY_VERSION = (7, 1, 0, "alpha0", 0) import pypy From pypy.commits at gmail.com Thu Jan 24 12:33:53 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 24 Jan 2019 09:33:53 -0800 (PST) Subject: [pypy-commit] pypy release-pypy2.7-7.x: hg merge default, but keep the 7.0.0 version number Message-ID: <5c49f701.1c69fb81.d7951.541e@mx.google.com> Author: Antonio Cuni Branch: release-pypy2.7-7.x Changeset: r95713:7986159ef4d8 Date: 2019-01-24 18:32 +0100 http://bitbucket.org/pypy/pypy/changeset/7986159ef4d8/ Log: hg merge default, but keep the 7.0.0 version number From pypy.commits at gmail.com Thu Jan 24 13:52:58 2019 From: pypy.commits at gmail.com (rlamy) Date: Thu, 24 Jan 2019 10:52:58 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Merged in asottile/pypy-1/asottile/remove-unused-and-deprecated-import-of-i-1548354433308 (pull request #634) Message-ID: <5c4a098a.1c69fb81.7dee6.54e0@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95715:af613daf3779 Date: 2019-01-24 18:52 +0000 http://bitbucket.org/pypy/pypy/changeset/af613daf3779/ Log: Merged in asottile/pypy-1/asottile/remove-unused-and-deprecated- import-of-i-1548354433308 (pull request #634) Remove unused and deprecated import of `imp` diff --git a/lib-python/3/distutils/sysconfig_pypy.py b/lib-python/3/distutils/sysconfig_pypy.py --- a/lib-python/3/distutils/sysconfig_pypy.py +++ b/lib-python/3/distutils/sysconfig_pypy.py @@ -10,7 +10,7 @@ import sys import os -import imp, _imp +import _imp from distutils.errors import DistutilsPlatformError From pypy.commits at gmail.com Thu Jan 24 13:53:08 2019 From: pypy.commits at gmail.com (asottile) Date: Thu, 24 Jan 2019 10:53:08 -0800 (PST) Subject: [pypy-commit] pypy asottile/remove-unused-and-deprecated-import-of-i-1548354433308: Remove unused and deprecated import of `imp` Message-ID: <5c4a0994.1c69fb81.1b880.f7bd@mx.google.com> Author: Anthony Sottile Branch: asottile/remove-unused-and-deprecated-import-of-i-1548354433308 Changeset: r95714:16ae9390565a Date: 2019-01-24 18:31 +0000 http://bitbucket.org/pypy/pypy/changeset/16ae9390565a/ Log: Remove unused and deprecated import of `imp` diff --git a/lib-python/3/distutils/sysconfig_pypy.py b/lib-python/3/distutils/sysconfig_pypy.py --- a/lib-python/3/distutils/sysconfig_pypy.py +++ b/lib-python/3/distutils/sysconfig_pypy.py @@ -10,7 +10,7 @@ import sys import os -import imp, _imp +import _imp from distutils.errors import DistutilsPlatformError From pypy.commits at gmail.com Fri Jan 25 09:00:11 2019 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 25 Jan 2019 06:00:11 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: fix remaining failing tests Message-ID: <5c4b166b.1c69fb81.5b5ff.00a0@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8-py3 Changeset: r95722:b37c4fff34a0 Date: 2019-01-25 14:55 +0100 http://bitbucket.org/pypy/pypy/changeset/b37c4fff34a0/ Log: fix remaining failing tests diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py --- a/pypy/objspace/std/mapdict.py +++ b/pypy/objspace/std/mapdict.py @@ -433,8 +433,8 @@ def materialize_str_dict(self, space, obj, str_dict): new_obj = self.back.materialize_str_dict(space, obj, str_dict) if self.index == DICT: - uni_name = str_decode_utf8(self.name, "string", True, None)[0] - str_dict[uni_name] = obj._mapdict_read_storage(self.storageindex) + w_key = space.newtext(self.name) + str_dict[w_key] = obj._mapdict_read_storage(self.storageindex) else: self._copy_attr(obj, new_obj) return new_obj diff --git a/pypy/objspace/std/test/test_kwargsdict.py b/pypy/objspace/std/test/test_kwargsdict.py --- a/pypy/objspace/std/test/test_kwargsdict.py +++ b/pypy/objspace/std/test/test_kwargsdict.py @@ -82,7 +82,7 @@ for i in range(100): assert d.setitem_str("ה%s" % i, 4) is None assert d.get_strategy() is not strategy - assert "ObjectDictStrategy" == d.get_strategy().__class__.__name__ + assert "UnicodeDictStrategy" == d.get_strategy().__class__.__name__ def test_keys_doesnt_wrap(): space = FakeSpace() From pypy.commits at gmail.com Fri Jan 25 09:58:18 2019 From: pypy.commits at gmail.com (antocuni) Date: Fri, 25 Jan 2019 06:58:18 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default; this bump the version to 7.1-alpha0 Message-ID: <5c4b240a.1c69fb81.ea429.c364@mx.google.com> Author: Antonio Cuni Branch: py3.5 Changeset: r95723:4b2995821717 Date: 2019-01-25 15:52 +0100 http://bitbucket.org/pypy/pypy/changeset/4b2995821717/ Log: hg merge default; this bump the version to 7.1-alpha0 diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -65,10 +65,15 @@ # |version| and |release|, also used in various other places throughout the # built documents. # + +# Make sure to keep this in sync with: +# module/sys/version.py +# module/cpyext/include/patchlevel.h +# # The short X.Y version. -version = '6.0' +version = '7.1' # The full version, including alpha/beta/rc tags. -release = '6.0.0' +release = '7.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -28,10 +28,12 @@ /* Version as a string */ #define PY_VERSION "3.5.3" -/* PyPy version as a string */ -#define PYPY_VERSION "6.1.0-alpha0" -#define PYPY_VERSION_NUM 0x06010000 - +/* PyPy version as a string: make sure to keep this in sync with: + * module/sys/version.py + * doc/conf.py + */ +#define PYPY_VERSION "7.1.0" +#define PYPY_VERSION_NUM 0x07010000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object stays alive. */ diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,10 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (6, 1, 0, "alpha", 0) #XXX # sync patchlevel.h +# make sure to keep PYPY_VERSION in sync with: +# module/cpyext/include/patchlevel.h +# doc/conf.py +PYPY_VERSION = (7, 1, 0, "alpha0", 0) import pypy diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -991,7 +991,9 @@ items = d.items() d.clear() d[key] = value - d.update(items) + # r_dict.update does not support list of tuples, do it manually + for key, value in items: + d[key] = value @specialize.call_location() def move_to_end(d, key, last=True): diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -708,6 +708,15 @@ move_to_end(d, 'key1', last=False) assert d.items() == [('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')] +def test_r_dict_move_to_end(): + d = r_dict(strange_key_eq, strange_key_hash) + d['1key'] = 'val1' + d['2key'] = 'val2' + d['3key'] = 'val3' + # does not crash, we can't check that it actually moves to end on CPython + move_to_end(d, '1key') + move_to_end(d, '1key', last=False) + def test_import_from_mixin(): class M: # old-style def f(self): From pypy.commits at gmail.com Fri Jan 25 09:58:20 2019 From: pypy.commits at gmail.com (antocuni) Date: Fri, 25 Jan 2019 06:58:20 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-7.x: create the release branch for py3.5 7.0, and correct the version number Message-ID: <5c4b240c.1c69fb81.2c485.1167@mx.google.com> Author: Antonio Cuni Branch: release-pypy3.5-7.x Changeset: r95724:ece1f9ddcb91 Date: 2019-01-25 15:56 +0100 http://bitbucket.org/pypy/pypy/changeset/ece1f9ddcb91/ Log: create the release branch for py3.5 7.0, and correct the version number diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -71,9 +71,9 @@ # module/cpyext/include/patchlevel.h # # The short X.Y version. -version = '7.1' +version = '7.0' # The full version, including alpha/beta/rc tags. -release = '7.1.0' +release = '7.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -32,8 +32,9 @@ * module/sys/version.py * doc/conf.py */ -#define PYPY_VERSION "7.1.0" -#define PYPY_VERSION_NUM 0x07010000 +#define PYPY_VERSION "7.0.0" +#define PYPY_VERSION_NUM 0x07000000 + /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object stays alive. */ diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -13,7 +13,7 @@ # make sure to keep PYPY_VERSION in sync with: # module/cpyext/include/patchlevel.h # doc/conf.py -PYPY_VERSION = (7, 1, 0, "alpha0", 0) +PYPY_VERSION = (7, 0, 0, "final", 0) import pypy From pypy.commits at gmail.com Fri Jan 25 09:58:22 2019 From: pypy.commits at gmail.com (antocuni) Date: Fri, 25 Jan 2019 06:58:22 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge release-pypy3.5-7.x into the development branch, and make sure to keep the correct development version Message-ID: <5c4b240e.1c69fb81.cc6e4.9548@mx.google.com> Author: Antonio Cuni Branch: py3.5 Changeset: r95725:b8fff341cb4d Date: 2019-01-25 15:57 +0100 http://bitbucket.org/pypy/pypy/changeset/b8fff341cb4d/ Log: merge release-pypy3.5-7.x into the development branch, and make sure to keep the correct development version From pypy.commits at gmail.com Fri Jan 25 10:25:56 2019 From: pypy.commits at gmail.com (antocuni) Date: Fri, 25 Jan 2019 07:25:56 -0800 (PST) Subject: [pypy-commit] pypy default: document the steps I did to create the release branches Message-ID: <5c4b2a84.1c69fb81.dcde5.9929@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95726:c42e8df4ef8c Date: 2019-01-25 16:25 +0100 http://bitbucket.org/pypy/pypy/changeset/c42e8df4ef8c/ Log: document the steps I did to create the release branches diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -16,9 +16,6 @@ How to Create a PyPy Release ++++++++++++++++++++++++++++ -Overview --------- - As a meta rule setting up issues in the tracker for items here may help not forgetting things. A set of todo files may also work. @@ -28,17 +25,54 @@ Release Steps -------------- +++++++++++++++ -* If needed, make a release branch -* Bump the - pypy version number in module/sys/version.py and in - module/cpyext/include/patchlevel.h and in doc/conf.py. The branch - will capture the revision number of this change for the release. +Make the release branch +------------------------ - Some of the next updates may be done before or after branching; make - sure things are ported back to the trunk and to the branch as - necessary. +This is needed only in case you are doing a new major version; if not, you can +probably reuse the existing release branch. + +We want to be able to freely merge default into the branch and vice-versa; +thus we need to do a complicate dance to avoid to patch the version number +when we do a merge:: + + $ hg up -r default + $ # edit the version to e.g. 7.0.0-final + $ hg ci + $ hg branch release-pypy2.7-7.x && hg ci + $ hg up -r default + $ # edit the version to 7.1.0-alpha0 + $ hg ci + $ hg up -r release-pypy2.7-7.x + $ hg merge default + $ # edit the version to AGAIN 7.0.0-final + $ hg ci + +Then, we need to do the same for the 3.x branch:: + + $ hg up -r py3.5 + $ hg merge default # this brings the version fo 7.1.0-alpha0 + $ hg branch release-pypy3.5-7.x + $ # edit the version to 7.0.0-final + $ hg ci + $ hg up -r py3.5 + $ hg merge release-pypy3.5-7.x + $ # edit the version to 7.1.0-alpha0 + $ hg ci + +To change the version, you need to edit three files: + + - ``module/sys/version.py`` + + - ``module/cpyext/include/patchlevel.h`` + + - ``doc/conf.py`` + + +Other steps +----------- + * Make sure the RPython builds on the buildbot pass with no failures From pypy.commits at gmail.com Fri Jan 25 10:44:51 2019 From: pypy.commits at gmail.com (antocuni) Date: Fri, 25 Jan 2019 07:44:51 -0800 (PST) Subject: [pypy-commit] pypy default: fix the version Message-ID: <5c4b2ef3.1c69fb81.a00f3.ea9d@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95727:a5bef2990aeb Date: 2019-01-25 16:36 +0100 http://bitbucket.org/pypy/pypy/changeset/a5bef2990aeb/ Log: fix the version diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -32,7 +32,7 @@ * module/sys/version.py * doc/conf.py */ -#define PYPY_VERSION "7.1.0" +#define PYPY_VERSION "7.1.0-alpha0" #define PYPY_VERSION_NUM 0x07010000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -13,7 +13,7 @@ # make sure to keep PYPY_VERSION in sync with: # module/cpyext/include/patchlevel.h # doc/conf.py -PYPY_VERSION = (7, 1, 0, "alpha0", 0) +PYPY_VERSION = (7, 1, 0, "alpha", 0) import pypy From pypy.commits at gmail.com Fri Jan 25 10:44:53 2019 From: pypy.commits at gmail.com (antocuni) Date: Fri, 25 Jan 2019 07:44:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5c4b2ef5.1c69fb81.d7041.abd2@mx.google.com> Author: Antonio Cuni Branch: py3.5 Changeset: r95728:75ae721dc112 Date: 2019-01-25 16:44 +0100 http://bitbucket.org/pypy/pypy/changeset/75ae721dc112/ Log: hg merge default diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -16,9 +16,6 @@ How to Create a PyPy Release ++++++++++++++++++++++++++++ -Overview --------- - As a meta rule setting up issues in the tracker for items here may help not forgetting things. A set of todo files may also work. @@ -28,17 +25,54 @@ Release Steps -------------- +++++++++++++++ -* If needed, make a release branch -* Bump the - pypy version number in module/sys/version.py and in - module/cpyext/include/patchlevel.h and in doc/conf.py. The branch - will capture the revision number of this change for the release. +Make the release branch +------------------------ - Some of the next updates may be done before or after branching; make - sure things are ported back to the trunk and to the branch as - necessary. +This is needed only in case you are doing a new major version; if not, you can +probably reuse the existing release branch. + +We want to be able to freely merge default into the branch and vice-versa; +thus we need to do a complicate dance to avoid to patch the version number +when we do a merge:: + + $ hg up -r default + $ # edit the version to e.g. 7.0.0-final + $ hg ci + $ hg branch release-pypy2.7-7.x && hg ci + $ hg up -r default + $ # edit the version to 7.1.0-alpha0 + $ hg ci + $ hg up -r release-pypy2.7-7.x + $ hg merge default + $ # edit the version to AGAIN 7.0.0-final + $ hg ci + +Then, we need to do the same for the 3.x branch:: + + $ hg up -r py3.5 + $ hg merge default # this brings the version fo 7.1.0-alpha0 + $ hg branch release-pypy3.5-7.x + $ # edit the version to 7.0.0-final + $ hg ci + $ hg up -r py3.5 + $ hg merge release-pypy3.5-7.x + $ # edit the version to 7.1.0-alpha0 + $ hg ci + +To change the version, you need to edit three files: + + - ``module/sys/version.py`` + + - ``module/cpyext/include/patchlevel.h`` + + - ``doc/conf.py`` + + +Other steps +----------- + * Make sure the RPython builds on the buildbot pass with no failures diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -32,7 +32,7 @@ * module/sys/version.py * doc/conf.py */ -#define PYPY_VERSION "7.1.0" +#define PYPY_VERSION "7.1.0-alpha0" #define PYPY_VERSION_NUM 0x07010000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -13,7 +13,7 @@ # make sure to keep PYPY_VERSION in sync with: # module/cpyext/include/patchlevel.h # doc/conf.py -PYPY_VERSION = (7, 1, 0, "alpha0", 0) +PYPY_VERSION = (7, 1, 0, "alpha", 0) import pypy From pypy.commits at gmail.com Fri Jan 25 10:45:37 2019 From: pypy.commits at gmail.com (mjacob) Date: Fri, 25 Jan 2019 07:45:37 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: Fix date. Message-ID: <5c4b2f21.1c69fb81.c5a3d.efc5@mx.google.com> Author: Manuel Jacob Branch: extradoc Changeset: r5941:be3b514769de Date: 2019-01-25 16:45 +0100 http://bitbucket.org/pypy/extradoc/changeset/be3b514769de/ Log: Fix date. diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -10,7 +10,7 @@ ============================ ============== =========================== Carl Friedrich Bolz-Tereick always there private Matti Picus Feb 4 - 9 airbnb -Manuel Feb 3 - 7? share a room? +Manuel Jacob Feb 3 - 7 share a room? Antonio Cuni Feb 3 - 9 airbnb Andrew Lawrence Feb 3 - 9 backpackers Düsseldorf Alexander Schremmer Feb 4 - 8 Essen, guest room available From pypy.commits at gmail.com Fri Jan 25 20:51:09 2019 From: pypy.commits at gmail.com (rlamy) Date: Fri, 25 Jan 2019 17:51:09 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Avoid importing from the stdlib at translation time. Message-ID: <5c4bbd0d.1c69fb81.8fc15.d84e@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95729:eb0bd5f16264 Date: 2019-01-26 01:50 +0000 http://bitbucket.org/pypy/pypy/changeset/eb0bd5f16264/ Log: Avoid importing from the stdlib at translation time. Makes dict_items, dict_keys, and dict_values instantiable and subclassable. diff --git a/lib_pypy/_pypy_collections.py b/lib_pypy/_pypy_collections.py --- a/lib_pypy/_pypy_collections.py +++ b/lib_pypy/_pypy_collections.py @@ -1,6 +1,5 @@ from __pypy__ import reversed_dict, move_to_end, objects_in_repr from _operator import eq as _eq -import _collections_abc class OrderedDict(dict): @@ -29,7 +28,33 @@ raise TypeError('expected at most 1 arguments, got %d' % len(args)) self.__update(*args, **kwds) - update = __update = _collections_abc.MutableMapping.update + def update(*args, **kwds): + ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. + If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v + In either case, this is followed by: for k, v in F.items(): D[k] = v + ''' + if not args: + raise TypeError("descriptor 'update' of 'OrderedDict' object " + "needs an argument") + self, *args = args + if len(args) > 1: + raise TypeError('update expected at most 1 arguments, got %d' % + len(args)) + if args: + other = args[0] + if hasattr(other, 'items'): + for key, value in other.items(): + self[key] = value + elif hasattr(other, "keys"): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + __update = update def __reversed__(self): return reversed_dict(self) @@ -106,17 +131,20 @@ "D.values() -> an object providing a view on D's values" return _OrderedDictValuesView(self) +dict_keys = type({}.keys()) +dict_values = type({}.values()) +dict_items = type({}.items()) -class _OrderedDictKeysView(_collections_abc.KeysView): +class _OrderedDictKeysView(dict_keys): def __reversed__(self): yield from reversed_dict(self._mapping) -class _OrderedDictItemsView(_collections_abc.ItemsView): +class _OrderedDictItemsView(dict_items): def __reversed__(self): for key in reversed_dict(self._mapping): yield (key, self._mapping[key]) -class _OrderedDictValuesView(_collections_abc.ValuesView): +class _OrderedDictValuesView(dict_values): def __reversed__(self): for key in reversed_dict(self._mapping): yield self._mapping[key] diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1538,6 +1538,12 @@ descr_or, descr_ror = _as_set_op('or', 'update') descr_xor, descr_rxor = _as_set_op('xor', 'symmetric_difference_update') +def new_dict_items(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewItemsObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewItemsObject(W_DictViewObject, SetLikeDictView): def descr_iter(self, space): return W_DictMultiIterItemsObject(space, self.w_dict.iteritems()) @@ -1557,18 +1563,32 @@ return space.w_False return space.newbool(space.eq_w(w_value, w_found)) +def new_dict_keys(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewKeysObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewKeysObject(W_DictViewObject, SetLikeDictView): def descr_iter(self, space): return W_DictMultiIterKeysObject(space, self.w_dict.iterkeys()) + def descr_contains(self, space, w_key): return self.w_dict.descr_contains(space, w_key) +def new_dict_values(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewValuesObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewValuesObject(W_DictViewObject): def descr_iter(self, space): return W_DictMultiIterValuesObject(space, self.w_dict.itervalues()) W_DictViewItemsObject.typedef = TypeDef( "dict_items", + __new__ = interp2app(new_dict_items), __repr__ = interp2app(W_DictViewItemsObject.descr_repr), __len__ = interp2app(W_DictViewItemsObject.descr_len), __iter__ = interp2app(W_DictViewItemsObject.descr_iter), @@ -1594,6 +1614,7 @@ W_DictViewKeysObject.typedef = TypeDef( "dict_keys", + __new__ = interp2app(new_dict_keys), __repr__ = interp2app(W_DictViewKeysObject.descr_repr), __len__ = interp2app(W_DictViewKeysObject.descr_len), __iter__ = interp2app(W_DictViewKeysObject.descr_iter), @@ -1619,6 +1640,7 @@ W_DictViewValuesObject.typedef = TypeDef( "dict_values", + __new__ = interp2app(new_dict_values), __repr__ = interp2app(W_DictViewValuesObject.descr_repr), __len__ = interp2app(W_DictViewValuesObject.descr_len), __iter__ = interp2app(W_DictViewValuesObject.descr_iter), From pypy.commits at gmail.com Sat Jan 26 16:26:21 2019 From: pypy.commits at gmail.com (rlamy) Date: Sat, 26 Jan 2019 13:26:21 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix reversed() on OrderedDict views Message-ID: <5c4cd07d.1c69fb81.cc6e4.5a92@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95730:4674aa2581e3 Date: 2019-01-26 20:12 +0000 http://bitbucket.org/pypy/pypy/changeset/4674aa2581e3/ Log: Fix reversed() on OrderedDict views diff --git a/lib_pypy/_pypy_collections.py b/lib_pypy/_pypy_collections.py --- a/lib_pypy/_pypy_collections.py +++ b/lib_pypy/_pypy_collections.py @@ -137,14 +137,14 @@ class _OrderedDictKeysView(dict_keys): def __reversed__(self): - yield from reversed_dict(self._mapping) + yield from reversed_dict(self._dict) class _OrderedDictItemsView(dict_items): def __reversed__(self): - for key in reversed_dict(self._mapping): - yield (key, self._mapping[key]) + for key in reversed_dict(self._dict): + yield (key, self._dict[key]) class _OrderedDictValuesView(dict_values): def __reversed__(self): - for key in reversed_dict(self._mapping): - yield self._mapping[key] + for key in reversed_dict(self._dict): + yield self._dict[key] diff --git a/pypy/module/_collections/test/test_ordereddict.py b/pypy/module/_collections/test/test_ordereddict.py --- a/pypy/module/_collections/test/test_ordereddict.py +++ b/pypy/module/_collections/test/test_ordereddict.py @@ -22,3 +22,17 @@ assert d['x'] == 42 d.update({'y': 2}) assert d['y'] == 42 + + def test_reversed(self): + import sys + from _collections import OrderedDict + + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + od = OrderedDict(pairs) + if '__pypy__' in sys.builtin_module_names: + # dict ordering is wrong when testing interpreted on top of CPython + pairs = list(dict(od).items()) + assert list(reversed(od)) == [t[0] for t in reversed(pairs)] + assert list(reversed(od.keys())) == [t[0] for t in reversed(pairs)] + assert list(reversed(od.values())) == [t[1] for t in reversed(pairs)] + assert list(reversed(od.items())) == list(reversed(pairs)) diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -11,7 +11,7 @@ WrappedDefault, applevel, interp2app, unwrap_spec) from pypy.interpreter.mixedmodule import MixedModule from pypy.interpreter.signature import Signature -from pypy.interpreter.typedef import TypeDef +from pypy.interpreter.typedef import TypeDef, interp_attrproperty_w from pypy.interpreter.unicodehelper import decode_utf8 from pypy.objspace.std.util import negate @@ -1610,6 +1610,7 @@ __xor__ = interp2app(W_DictViewItemsObject.descr_xor), __rxor__ = interp2app(W_DictViewItemsObject.descr_rxor), isdisjoint = interp2app(W_DictViewItemsObject.descr_isdisjoint), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewItemsObject), ) W_DictViewKeysObject.typedef = TypeDef( @@ -1636,6 +1637,7 @@ __xor__ = interp2app(W_DictViewKeysObject.descr_xor), __rxor__ = interp2app(W_DictViewKeysObject.descr_rxor), isdisjoint = interp2app(W_DictViewKeysObject.descr_isdisjoint), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewKeysObject), ) W_DictViewValuesObject.typedef = TypeDef( @@ -1644,4 +1646,5 @@ __repr__ = interp2app(W_DictViewValuesObject.descr_repr), __len__ = interp2app(W_DictViewValuesObject.descr_len), __iter__ = interp2app(W_DictViewValuesObject.descr_iter), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewValuesObject), ) From pypy.commits at gmail.com Sat Jan 26 16:26:23 2019 From: pypy.commits at gmail.com (rlamy) Date: Sat, 26 Jan 2019 13:26:23 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Disable tests requiring dict view classes to not be callable, see eb0bd5f16264 Message-ID: <5c4cd07f.1c69fb81.5d337.52f1@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95731:ab129900b464 Date: 2019-01-26 21:25 +0000 http://bitbucket.org/pypy/pypy/changeset/ab129900b464/ Log: Disable tests requiring dict view classes to not be callable, see eb0bd5f16264 diff --git a/lib-python/3/test/test_dictviews.py b/lib-python/3/test/test_dictviews.py --- a/lib-python/3/test/test_dictviews.py +++ b/lib-python/3/test/test_dictviews.py @@ -1,9 +1,11 @@ +from test import support import copy import pickle import unittest class DictSetTest(unittest.TestCase): + @support.cpython_only def test_constructors_not_callable(self): kt = type({}.keys()) self.assertRaises(TypeError, kt, {}) diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -787,17 +787,6 @@ assert len(d.items()) == 2 assert len(d.values()) == 2 - def test_constructors_not_callable(self): - kt = type({}.keys()) - raises(TypeError, kt, {}) - raises(TypeError, kt) - it = type({}.items()) - raises(TypeError, it, {}) - raises(TypeError, it) - vt = type({}.values()) - raises(TypeError, vt, {}) - raises(TypeError, vt) - def test_dict_keys(self): d = {1: 10, "a": "ABC"} keys = d.keys() From pypy.commits at gmail.com Sat Jan 26 17:23:28 2019 From: pypy.commits at gmail.com (rlamy) Date: Sat, 26 Jan 2019 14:23:28 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Get open() from builtin module _io, instead of stdlib io Message-ID: <5c4cdde0.1c69fb81.4f303.6ee5@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95732:bb4e8dfe6ac8 Date: 2019-01-26 22:22 +0000 http://bitbucket.org/pypy/pypy/changeset/bb4e8dfe6ac8/ Log: Get open() from builtin module _io, instead of stdlib io diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py --- a/pypy/interpreter/pyopcode.py +++ b/pypy/interpreter/pyopcode.py @@ -1049,6 +1049,8 @@ w_locals = d.w_locals if w_locals is None: # CPython does this w_locals = space.w_None + if not we_are_translated() and modulename == 'io': + import pdb; pdb.set_trace() w_modulename = space.newtext(modulename) w_globals = self.get_w_globals() if w_flag is None: diff --git a/pypy/module/__builtin__/state.py b/pypy/module/__builtin__/state.py --- a/pypy/module/__builtin__/state.py +++ b/pypy/module/__builtin__/state.py @@ -2,8 +2,8 @@ class State: def __init__(self, space): self.w_open = space.appexec([], """(): - import io - return io.open""") - + import _io + return _io.open""") + def get(space): return space.fromcache(State) From pypy.commits at gmail.com Sat Jan 26 22:47:05 2019 From: pypy.commits at gmail.com (rlamy) Date: Sat, 26 Jan 2019 19:47:05 -0800 (PST) Subject: [pypy-commit] pypy py3.5: kill the one app_main test that requires it to be importable at interp-level and simplify code Message-ID: <5c4d29b9.1c69fb81.668ce.f128@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95734:cc45a45ea346 Date: 2019-01-27 03:46 +0000 http://bitbucket.org/pypy/pypy/changeset/cc45a45ea346/ Log: kill the one app_main test that requires it to be importable at interp-level and simplify code diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -3,7 +3,6 @@ # See test/test_app_main. # Missing vs CPython: -b, -d, -x -from __future__ import print_function, unicode_literals USAGE1 = __doc__ = """\ Options and arguments (and corresponding environment variables): -B : don't write .py[co] files on import; also PYTHONDONTWRITEBYTECODE=x @@ -334,7 +333,7 @@ del encerr def create_stdio(fd, writing, name, encoding, errors, unbuffered): - import io + import _io # stdin is always opened in buffered mode, first because it # shouldn't make a difference in common use cases, second because # TextIOWrapper depends on the presence of a read1() method which @@ -342,7 +341,7 @@ buffering = 0 if unbuffered and writing else -1 mode = 'w' if writing else 'r' try: - buf = io.open(fd, mode + 'b', buffering, closefd=False) + buf = _io.open(fd, mode + 'b', buffering, closefd=False) except OSError as e: if e.errno != errno.EBADF: raise @@ -352,7 +351,7 @@ raw.name = name # translate \r\n to \n for sys.stdin on Windows newline = None if sys.platform == 'win32' and not writing else '\n' - stream = io.TextIOWrapper(buf, encoding, errors, newline=newline, + stream = _io.TextIOWrapper(buf, encoding, errors, newline=newline, line_buffering=unbuffered or raw.isatty()) stream.mode = mode return stream @@ -549,12 +548,6 @@ return options -# this indirection is needed to be able to import this module on python2, else -# we have a SyntaxError: unqualified exec in a nested function - at hidden_applevel -def exec_(src, dic): - exec(src, dic) - @hidden_applevel def run_command_line(interactive, inspect, @@ -663,7 +656,7 @@ else: if not isolated: sys.path.insert(0, '') - success = run_toplevel(exec_, bytes, mainmodule.__dict__) + success = run_toplevel(exec, bytes, mainmodule.__dict__) elif run_module != 0: # handle the "-m" command # '' on sys.path is required also here @@ -703,7 +696,7 @@ python_startup, 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co_python_startup, mainmodule.__dict__) + exec(co_python_startup, mainmodule.__dict__) mainmodule.__file__ = python_startup mainmodule.__cached__ = None run_toplevel(run_it) @@ -721,7 +714,7 @@ def run_it(): co_stdin = compile(sys.stdin.read(), '', 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co_stdin, mainmodule.__dict__) + exec(co_stdin, mainmodule.__dict__) mainmodule.__file__ = '' mainmodule.__cached__ = None success = run_toplevel(run_it) @@ -763,7 +756,7 @@ co = marshal.load(f) if type(co) is not type((lambda:0).__code__): raise RuntimeError("Bad code object in .pyc file") - exec_(co, namespace) + exec(co, namespace) args = (execfile, filename, mainmodule.__dict__) else: filename = sys.argv[0] @@ -791,7 +784,7 @@ code = f.read() co = compile(code, filename, 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co, namespace) + exec(co, namespace) args = (execfile, filename, mainmodule.__dict__) success = run_toplevel(*args) diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py --- a/pypy/interpreter/test/test_app_main.py +++ b/pypy/interpreter/test/test_app_main.py @@ -1043,36 +1043,6 @@ assert data.startswith("15\\u20ac ('strict', 'backslashreplace')") -class TestAppMain: - def test_print_info(self): - from pypy.interpreter import app_main - import sys, cStringIO - prev_so = sys.stdout - prev_ti = getattr(sys, 'pypy_translation_info', 'missing') - sys.pypy_translation_info = { - 'translation.foo': True, - 'translation.bar': 42, - 'translation.egg.something': None, - 'objspace.x': 'hello', - } - try: - sys.stdout = f = cStringIO.StringIO() - py.test.raises(SystemExit, app_main.print_info) - finally: - sys.stdout = prev_so - if prev_ti == 'missing': - del sys.pypy_translation_info - else: - sys.pypy_translation_info = prev_ti - assert f.getvalue() == ("[objspace]\n" - " x = 'hello'\n" - "[translation]\n" - " bar = 42\n" - " [egg]\n" - " something = None\n" - " foo = True\n") - - @py.test.mark.skipif('config.getoption("runappdirect")') class AppTestAppMain: def setup_class(self): From pypy.commits at gmail.com Sun Jan 27 03:27:45 2019 From: pypy.commits at gmail.com (arigo) Date: Sun, 27 Jan 2019 00:27:45 -0800 (PST) Subject: [pypy-commit] cffi default: tweak docs Message-ID: <5c4d6b81.1c69fb81.5c675.b6fc@mx.google.com> Author: Armin Rigo Branch: Changeset: r3183:378b0cdeadb5 Date: 2019-01-27 09:27 +0100 http://bitbucket.org/cffi/cffi/changeset/378b0cdeadb5/ Log: tweak docs diff --git a/doc/source/ref.rst b/doc/source/ref.rst --- a/doc/source/ref.rst +++ b/doc/source/ref.rst @@ -608,8 +608,9 @@ **ffi.release(cdata)**: release the resources held by a cdata object from ``ffi.new()``, ``ffi.gc()``, ``ffi.from_buffer()`` or ``ffi.new_allocator()()``. The cdata object must not be used afterwards. -The regular destructor of the cdata object releases the same resources, -but this allows the operation to occur at a known time. +The normal Python destructor of the cdata object releases the same resources, +but this allows the releasing to occur at a known time, as opposed as at an +unspecified point in the future. *New in version 1.12.* ``ffi.release(cdata)`` is equivalent to ``cdata.__exit__()``, which means that diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -23,14 +23,13 @@ * CPython 2.x: ``ffi.dlopen()`` failed with non-ascii file names on Posix -* ``ffi.from_buffer()`` takes two new arguments: an optional *first* argument - gives the array type of the result; and the keyword argument - ``require_writable`` can ask the object passed in to raise an exception if - it is read-only. +* ``ffi.from_buffer()`` takes a new optional *first* argument that gives + the array type of the result. It also takes an optional keyword argument + ``require_writable`` to refuse read-only Python buffers. * ``ffi.new()``, ``ffi.gc()`` or ``ffi.from_buffer()`` cdata objects can now be released at known times, either by using the ``with`` - keyword or be calling the new ``ffi.release()``. + keyword or by calling the new ``ffi.release()``. v1.11.5 From pypy.commits at gmail.com Mon Jan 28 12:51:36 2019 From: pypy.commits at gmail.com (antocuni) Date: Mon, 28 Jan 2019 09:51:36 -0800 (PST) Subject: [pypy-commit] pypy default: update contributor.rst Message-ID: <5c4f4128.1c69fb81.b928a.0923@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95735:d4cd9570a582 Date: 2019-01-28 17:27 +0100 http://bitbucket.org/pypy/pypy/changeset/d4cd9570a582/ Log: update contributor.rst diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -7,16 +7,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -26,8 +26,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -37,10 +37,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -81,12 +81,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -101,8 +101,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -111,10 +112,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -130,6 +131,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -143,6 +145,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -154,7 +157,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -165,7 +167,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -177,6 +178,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -184,12 +186,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -209,7 +213,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -237,12 +240,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -257,10 +263,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -268,28 +276,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -299,6 +305,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -307,6 +314,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -316,8 +324,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -331,8 +340,8 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac + hgattic Berker Peksag Christian Muirhead soareschen @@ -351,12 +360,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz From pypy.commits at gmail.com Mon Jan 28 12:51:38 2019 From: pypy.commits at gmail.com (antocuni) Date: Mon, 28 Jan 2019 09:51:38 -0800 (PST) Subject: [pypy-commit] pypy default: start to write a release announcement Message-ID: <5c4f412a.1c69fb81.f5de5.0e6f@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95736:8019bdc4d877 Date: 2019-01-28 18:50 +0100 http://bitbucket.org/pypy/pypy/changeset/8019bdc4d877/ Log: start to write a release announcement diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v7.0.0.rst @@ -0,0 +1,69 @@ +====================================================== +PyPy v7.0.0: triple release of 2.7, 3.5 and 3.6-alpha +====================================================== + +The PyPy team is proud to release the version 7.0.0 of PyPy, which includes +three different interpreters: + + - PyPy2.7, which is an interpreter supporting the syntax and the features of + Python 2.7 + + - PyPy3.5, which supports Python 3.5 + + - PyPy3.6-alpha: this is the first official release of PyPy to support 3.6 + features, although it is still considered alpha quality. + +All the interpreters are based on much the same codebase, thus the triple +release. + +XXX write release highlights + +You can download the v6.0 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. If PyPy is not quite good enough for your needs, we are available for +direct consulting work. + +We would also like to thank our contributors and encourage new people to join +the project. PyPy has many layers and we need help with all of them: `PyPy`_ +and `RPython`_ documentation improvements, tweaking popular `modules`_ to run +on pypy, or general `help`_ with making RPython's JIT even better. + +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`modules`: project-ideas.html#make-more-python-modules-pypy-friendly +.. _`help`: project-ideas.html + + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7, 3.5 and 3.6. It's fast (`PyPy and CPython 2.7.x`_ performance +comparison) due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +Unfortunately at the moment of writing our ARM buildbots are out of service, +so for now we are **not** releasing any binary for the ARM architecture. + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + + +Changelog +========= + +XXX write me From pypy.commits at gmail.com Mon Jan 28 14:00:51 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 28 Jan 2019 11:00:51 -0800 (PST) Subject: [pypy-commit] pypy default: first draft of release note Message-ID: <5c4f5163.1c69fb81.e21ff.2e99@mx.google.com> Author: Matti Picus Branch: Changeset: r95738:07c721ed19aa Date: 2019-01-28 16:20 +0200 http://bitbucket.org/pypy/pypy/changeset/07c721ed19aa/ Log: first draft of release note diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v7.0.0.rst @@ -0,0 +1,141 @@ +=========================================== +PyPy2.7, PyPy3.5. PyPy3.6alpha v7.0 release +=========================================== + +The PyPy team is proud to release PyPy2.7 (supporting Python 2.7 syntax), +PyPy3.5 (supporting Python 3.5 syntax with f-strings from 3.6), and our first +alpha release of PyPy3.6 for 64-bit linux only (supporting Python 3.6 syntax). +The releases are based on much the same codebase. + +This release is a feature release following our previous 6.0 release in April +2018. Our C-API compatibility layer ``cpyext`` is more mature, as more projects +use PyPy3.5 in their CI testing. Since these changes affect the included python +development header files, all c-extension modules must be recompiled for this +version. + +Until we can work with downstream providers to distribute builds with PyPy, we +have made packages for some common packages `available as wheels`_. + +The GC now has `hooks`_ to gain more insights into its performance, and it is +now possible to manually manage the GC by using a combination of +``gc.disable`` and ``gc.collect_step``. See the `GC blog post`_. + + +We updated the `cffi`_ module included in PyPy to version 1.12, and the +`cppyy`_ backend to 1.4. Please use these to wrap your C and C++ code, +respectively, for a JIT friendly experience. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +We strongly recommend updating. + +The PyPy3.6 release and the Windows PyPy3.5 release are still not production +quality so your mileage may vary. There are open issues with incomplete +compatibility and c-extension support. + +The utf8 branch that changes internal representation of unicode to utf8 did not +make it into the release, so there is still more goodness coming. + +You can download the v7.0 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. If PyPy is not quite good enough for your needs, we are available for +direct consulting work. + +We would also like to thank our contributors and encourage new people to join +the project. PyPy has many layers and we need help with all of them: `PyPy`_ +and `RPython`_ documentation improvements, tweaking popular `modules`_ to run +on pypy, or general `help`_ with making RPython's JIT even better. + +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`help`: project-ideas.html +.. _`cffi`: http://cffi.readthedocs.io +.. _`cppyy`: https://cppyy.readthedocs.io +.. _`available as wheels`: https://github.com/antocuni/pypy-wheels +.. _`GC blog post`: https://morepypy.blogspot.com/2019/01/pypy-for-low-latency-systems.html + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7, 3.5 and 3.6. It's fast (`PyPy and CPython 2.7.x`_ performance comparison) +due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux, + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + +Changelog +========= + +If not specified, the changes are shared across versions + +* Support ``__set_name__``, ``__init_subclass__`` (Py3.6) +* Support ``cppyy`` in Py3.5 and Py3.6 +* Use implementation-specific site directories in ``sysconfig`` (Py3.5, Py3.6) +* Adding detection of gcc to ``sysconfig`` (Py3.5, Py3.6) +* Fix multiprocessing regression on newer glibcs +* Make sure 'blocking-ness' of socket is set along with default timeout +* Include ``crypt.h`` for ``crypt()`` on Linux +* Improve and re-organize the contributing_ documentation +* Make the ``__module__`` attribute writable, fixing an incompatibility with + NumPy 1.16 +* Implement ``Py_ReprEnter``, ``Py_ReprLeave(), ``PyMarshal_ReadObjectFromString``, + ``PyMarshal_WriteObjectToString``, ``PyObject_DelItemString``, + ``PyMapping_DelItem``, ``PyMapping_DelItemString``, ``PyEval_GetFrame``, + ``PyOS_InputHook``, ``PyErr_FormatFromCause`` (Py3.6), +* Implement new wordcode instruction encoding (Py3.6) +* Log additional gc-minor and gc-collect-step info in the PYPYLOG +* Set ``reverse-debugger`` active by default. For more information, see + https://bitbucket.org/pypy/revdb +* Support underscores in numerical literals like ``'4_2'`` (Py3.6) +* Pre-emptively raise MemoryError if the size of dequeue in ``_collections.deque`` + is too large (Py3.5) +* Fix multithreading issues in calls to ``os.setenv`` +* Add missing defines and typedefs for numpy and pandas on MSVC +* Add CPython macros like ``Py_NAN`` to header files +* Rename the ``MethodType`` to ``instancemethod``, like CPython +* Better support for `async with` in generators (Py3.5, Py3.6) +* Improve the performance of ``pow(a, b, c)`` if ``c`` is a large integer +* Now ``vmprof`` works on FreeBSD +* Support GNU Hurd, fixes for FreeBSD +* Add deprecation warning if type of result of ``__float__`` is float inherited + class (Py3.6) +* Fix async generator bug when yielding a ``StopIteration`` (Py3.6) +* Speed up ``max(list-of-int)`` from non-jitted code +* Fix Windows ``os.listdir()`` for some cases (see CPython #32539) +* Add ``select.PIPE_BUF`` +* Use ``subprocess`` to avoid shell injection in ``shutil`` module +* Rename ``_Py_ZeroStruct`` to ``_Py_FalseStruct`` (Py3.5, Py3.6) +* Remove some cpyext names for Py3.5, Py3.6 +* Enable use of unicode file names in ``dlopen`` +* Backport CPython fix for ``thread.RLock`` +* Make GC hooks measure time in seconds (as opposed to an opaque unit) +* Refactor and reorganize tests in ``test_lib_pypy`` +* Check error values in ``socket.setblocking`` (Py3.6) +* Add support for FsPath to os.unlink() (Py3.6) +* Fix freezing builtin modules at translation +* Tweak ``W_UnicodeDictionaryStrategy`` which speeds up dictionaries with only + unicode keys + +We also refactored many parts of the JIT bridge optimizations, as well as cpyext +internals, and together with new contributors fixed issues, added new +documentation, and cleaned up the codebase. + +.. _contributing: http://doc.pypy.org/en/latest/contributing.html From pypy.commits at gmail.com Mon Jan 28 14:00:54 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 28 Jan 2019 11:00:54 -0800 (PST) Subject: [pypy-commit] pypy py3.6: merge py3.5 into branch Message-ID: <5c4f5166.1c69fb81.5bf2d.fad5@mx.google.com> Author: Matti Picus Branch: py3.6 Changeset: r95739:614f05464dbb Date: 2019-01-28 16:22 +0200 http://bitbucket.org/pypy/pypy/changeset/614f05464dbb/ Log: merge py3.5 into branch diff too long, truncating to 2000 out of 2143 lines diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -325,19 +325,32 @@ a = array.array('H', [10000, 20000, 30000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 6 ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 3 + assert c[1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + def test_memmove(self): ffi = FFI() p = ffi.new("short[]", [-1234, -2345, -3456, -4567, -5678]) diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -239,19 +239,31 @@ def test_ffi_from_buffer(): import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + py.test.raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) @@ -1647,24 +1676,6 @@ py.test.raises(TypeError, len, q.a) py.test.raises(TypeError, list, q.a) - def test_from_buffer(self): - import array - a = array.array('H', [10000, 20000, 30000]) - c = ffi.from_buffer(a) - assert ffi.typeof(c) is ffi.typeof("char[]") - ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) - assert c == ffi.from_buffer(a, require_writable=True) - # - p = ffi.from_buffer(b"abcd") - assert p[2] == b"c" - # - assert p == ffi.from_buffer(b"abcd", False) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) - py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", - require_writable=True) - def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ "char", diff --git a/extra_tests/test_bufferedreader.py b/extra_tests/test_bufferedreader.py --- a/extra_tests/test_bufferedreader.py +++ b/extra_tests/test_bufferedreader.py @@ -88,7 +88,7 @@ assert self.stream.readline(80) == expected @pytest.mark.parametrize('StreamCls', [Stream, StreamCFFI]) - at settings(max_examples=50) + at settings(max_examples=50, deadline=None) @given(params=data_and_sizes(), chunk_size=st.integers(MIN_READ_SIZE, 8192)) def test_stateful(params, chunk_size, StreamCls): data, sizes = params diff --git a/extra_tests/test_datetime.py b/extra_tests/test_datetime.py --- a/extra_tests/test_datetime.py +++ b/extra_tests/test_datetime.py @@ -33,7 +33,9 @@ (timedelta_safe(1, 2, 3), "timedelta_safe(1, 2, 3)"), ]) def test_repr(obj, expected): - assert repr(obj) == expected + # XXX: there's a discrepancy between datetime.py and CPython's _datetime + # for the repr() of Python-defined subclasses of datetime classes. + assert repr(obj).endswith(expected) @pytest.mark.parametrize("obj", [ datetime.date.today(), diff --git a/lib-python/3/distutils/sysconfig_pypy.py b/lib-python/3/distutils/sysconfig_pypy.py --- a/lib-python/3/distutils/sysconfig_pypy.py +++ b/lib-python/3/distutils/sysconfig_pypy.py @@ -10,7 +10,7 @@ import sys import os -import imp, _imp +import _imp from distutils.errors import DistutilsPlatformError diff --git a/lib-python/3/test/test_dictviews.py b/lib-python/3/test/test_dictviews.py --- a/lib-python/3/test/test_dictviews.py +++ b/lib-python/3/test/test_dictviews.py @@ -1,3 +1,4 @@ +from test import support import collections import copy import pickle @@ -5,6 +6,7 @@ class DictSetTest(unittest.TestCase): + @support.cpython_only def test_constructors_not_callable(self): kt = type({}.keys()) self.assertRaises(TypeError, kt, {}) diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -410,6 +410,6 @@ id(self)) def __bool__(self): - return self._buffer[0] not in (0, '\x00') + return self._buffer[0] not in (0, b'\x00') from _ctypes.function import CFuncPtr diff --git a/lib_pypy/_gdbm.py b/lib_pypy/_gdbm.py --- a/lib_pypy/_gdbm.py +++ b/lib_pypy/_gdbm.py @@ -74,12 +74,11 @@ self.__check_closed() key = _checkstr(key) return lib.pygdbm_exists(self.__ll_dbm, key, len(key)) - has_key = __contains__ def get(self, key, default=None): with _lock: self.__check_closed() - key = _checkstr(key) + key = _checkstr(key) drec = lib.pygdbm_fetch(self.__ll_dbm, key, len(key)) if not drec.dptr: return default diff --git a/lib_pypy/_pypy_collections.py b/lib_pypy/_pypy_collections.py --- a/lib_pypy/_pypy_collections.py +++ b/lib_pypy/_pypy_collections.py @@ -1,6 +1,5 @@ from __pypy__ import reversed_dict, move_to_end, objects_in_repr from _operator import eq as _eq -import _collections_abc class OrderedDict(dict): @@ -29,7 +28,33 @@ raise TypeError('expected at most 1 arguments, got %d' % len(args)) self.__update(*args, **kwds) - update = __update = _collections_abc.MutableMapping.update + def update(*args, **kwds): + ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. + If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v + In either case, this is followed by: for k, v in F.items(): D[k] = v + ''' + if not args: + raise TypeError("descriptor 'update' of 'OrderedDict' object " + "needs an argument") + self, *args = args + if len(args) > 1: + raise TypeError('update expected at most 1 arguments, got %d' % + len(args)) + if args: + other = args[0] + if hasattr(other, 'items'): + for key, value in other.items(): + self[key] = value + elif hasattr(other, "keys"): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + __update = update def __reversed__(self): return reversed_dict(self) @@ -106,17 +131,20 @@ "D.values() -> an object providing a view on D's values" return _OrderedDictValuesView(self) +dict_keys = type({}.keys()) +dict_values = type({}.values()) +dict_items = type({}.items()) -class _OrderedDictKeysView(_collections_abc.KeysView): +class _OrderedDictKeysView(dict_keys): def __reversed__(self): - yield from reversed_dict(self._mapping) + yield from reversed_dict(self._dict) -class _OrderedDictItemsView(_collections_abc.ItemsView): +class _OrderedDictItemsView(dict_items): def __reversed__(self): - for key in reversed_dict(self._mapping): - yield (key, self._mapping[key]) + for key in reversed_dict(self._dict): + yield (key, self._dict[key]) -class _OrderedDictValuesView(_collections_abc.ValuesView): +class _OrderedDictValuesView(dict_values): def __reversed__(self): - for key in reversed_dict(self._mapping): - yield self._mapping[key] + for key in reversed_dict(self._dict): + yield self._dict[key] diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -16,6 +16,8 @@ # Python 3.x basestring = str +_unspecified = object() + class FFI(object): @@ -341,15 +343,22 @@ # """ # note that 'buffer' is a type, set on this instance by __init__ - def from_buffer(self, python_buffer, require_writable=False): - """Return a that points to the data of the + def from_buffer(self, cdecl, python_buffer=_unspecified, + require_writable=False): + """Return a cdata of the given type pointing to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types str or unicode (you can build 'char[]' arrays explicitly) but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. + + The first argument is optional and default to 'char[]'. """ - return self._backend.from_buffer(self.BCharA, python_buffer, + if python_buffer is _unspecified: + cdecl, python_buffer = self.BCharA, cdecl + elif isinstance(cdecl, basestring): + cdecl = self._typeof(cdecl) + return self._backend.from_buffer(cdecl, python_buffer, require_writable) def memmove(self, dest, src, n): @@ -530,6 +539,9 @@ def from_handle(self, x): return self._backend.from_handle(x) + def release(self, x): + self._backend.release(x) + def set_unicode(self, enabled_flag): """Windows: if 'enabled_flag' is True, enable the UNICODE and _UNICODE defines in C, and declare the types like TCHAR and LPTCSTR diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -16,6 +16,13 @@ except ImportError: lock = None +def _workaround_for_static_import_finders(): + # Issue #392: packaging tools like cx_Freeze can not find these + # because pycparser uses exec dynamic import. This is an obscure + # workaround. This function is never called. + import pycparser.yacctab + import pycparser.lextab + CDEF_SOURCE_STRING = "" _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", re.DOTALL | re.MULTILINE) diff --git a/pypy/conftest.py b/pypy/conftest.py --- a/pypy/conftest.py +++ b/pypy/conftest.py @@ -14,6 +14,18 @@ rsyncdirs = ['.', '../lib-python', '../lib_pypy', '../demo'] rsyncignore = ['_cache'] +try: + from hypothesis import settings, __version__ +except ImportError: + pass +else: + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + # PyPy's command line extra options (these are added # to py.test's standard options) # diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -65,10 +65,15 @@ # |version| and |release|, also used in various other places throughout the # built documents. # + +# Make sure to keep this in sync with: +# module/sys/version.py +# module/cpyext/include/patchlevel.h +# # The short X.Y version. -version = '6.0' +version = '7.1' # The full version, including alpha/beta/rc tags. -release = '6.0.0' +release = '7.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -16,9 +16,6 @@ How to Create a PyPy Release ++++++++++++++++++++++++++++ -Overview --------- - As a meta rule setting up issues in the tracker for items here may help not forgetting things. A set of todo files may also work. @@ -28,17 +25,54 @@ Release Steps -------------- +++++++++++++++ -* If needed, make a release branch -* Bump the - pypy version number in module/sys/version.py and in - module/cpyext/include/patchlevel.h and in doc/conf.py. The branch - will capture the revision number of this change for the release. +Make the release branch +------------------------ - Some of the next updates may be done before or after branching; make - sure things are ported back to the trunk and to the branch as - necessary. +This is needed only in case you are doing a new major version; if not, you can +probably reuse the existing release branch. + +We want to be able to freely merge default into the branch and vice-versa; +thus we need to do a complicate dance to avoid to patch the version number +when we do a merge:: + + $ hg up -r default + $ # edit the version to e.g. 7.0.0-final + $ hg ci + $ hg branch release-pypy2.7-7.x && hg ci + $ hg up -r default + $ # edit the version to 7.1.0-alpha0 + $ hg ci + $ hg up -r release-pypy2.7-7.x + $ hg merge default + $ # edit the version to AGAIN 7.0.0-final + $ hg ci + +Then, we need to do the same for the 3.x branch:: + + $ hg up -r py3.5 + $ hg merge default # this brings the version fo 7.1.0-alpha0 + $ hg branch release-pypy3.5-7.x + $ # edit the version to 7.0.0-final + $ hg ci + $ hg up -r py3.5 + $ hg merge release-pypy3.5-7.x + $ # edit the version to 7.1.0-alpha0 + $ hg ci + +To change the version, you need to edit three files: + + - ``module/sys/version.py`` + + - ``module/cpyext/include/patchlevel.h`` + + - ``doc/conf.py`` + + +Other steps +----------- + * Make sure the RPython builds on the buildbot pass with no failures diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -3,7 +3,6 @@ # See test/test_app_main. # Missing vs CPython: -b, -d, -x -from __future__ import print_function, unicode_literals USAGE1 = __doc__ = """\ Options and arguments (and corresponding environment variables): -B : don't write .py[co] files on import; also PYTHONDONTWRITEBYTECODE=x @@ -334,7 +333,7 @@ del encerr def create_stdio(fd, writing, name, encoding, errors, unbuffered): - import io + import _io # stdin is always opened in buffered mode, first because it # shouldn't make a difference in common use cases, second because # TextIOWrapper depends on the presence of a read1() method which @@ -342,7 +341,7 @@ buffering = 0 if unbuffered and writing else -1 mode = 'w' if writing else 'r' try: - buf = io.open(fd, mode + 'b', buffering, closefd=False) + buf = _io.open(fd, mode + 'b', buffering, closefd=False) except OSError as e: if e.errno != errno.EBADF: raise @@ -352,7 +351,7 @@ raw.name = name # translate \r\n to \n for sys.stdin on Windows newline = None if sys.platform == 'win32' and not writing else '\n' - stream = io.TextIOWrapper(buf, encoding, errors, newline=newline, + stream = _io.TextIOWrapper(buf, encoding, errors, newline=newline, line_buffering=unbuffered or raw.isatty()) stream.mode = mode return stream @@ -549,12 +548,6 @@ return options -# this indirection is needed to be able to import this module on python2, else -# we have a SyntaxError: unqualified exec in a nested function - at hidden_applevel -def exec_(src, dic): - exec(src, dic) - @hidden_applevel def run_command_line(interactive, inspect, @@ -664,7 +657,7 @@ else: if not isolated: sys.path.insert(0, '') - success = run_toplevel(exec_, bytes, mainmodule.__dict__) + success = run_toplevel(exec, bytes, mainmodule.__dict__) elif run_module != 0: # handle the "-m" command # '' on sys.path is required also here @@ -704,7 +697,7 @@ python_startup, 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co_python_startup, mainmodule.__dict__) + exec(co_python_startup, mainmodule.__dict__) mainmodule.__file__ = python_startup mainmodule.__cached__ = None run_toplevel(run_it) @@ -722,7 +715,7 @@ def run_it(): co_stdin = compile(sys.stdin.read(), '', 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co_stdin, mainmodule.__dict__) + exec(co_stdin, mainmodule.__dict__) mainmodule.__file__ = '' mainmodule.__cached__ = None success = run_toplevel(run_it) @@ -764,7 +757,7 @@ co = marshal.load(f) if type(co) is not type((lambda:0).__code__): raise RuntimeError("Bad code object in .pyc file") - exec_(co, namespace) + exec(co, namespace) args = (execfile, filename, mainmodule.__dict__) else: filename = sys.argv[0] @@ -792,7 +785,7 @@ code = f.read() co = compile(code, filename, 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co, namespace) + exec(co, namespace) args = (execfile, filename, mainmodule.__dict__) success = run_toplevel(*args) diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py --- a/pypy/interpreter/test/test_app_main.py +++ b/pypy/interpreter/test/test_app_main.py @@ -1043,36 +1043,6 @@ assert data.startswith("15\\u20ac ('strict', 'backslashreplace')") -class TestAppMain: - def test_print_info(self): - from pypy.interpreter import app_main - import sys, cStringIO - prev_so = sys.stdout - prev_ti = getattr(sys, 'pypy_translation_info', 'missing') - sys.pypy_translation_info = { - 'translation.foo': True, - 'translation.bar': 42, - 'translation.egg.something': None, - 'objspace.x': 'hello', - } - try: - sys.stdout = f = cStringIO.StringIO() - py.test.raises(SystemExit, app_main.print_info) - finally: - sys.stdout = prev_so - if prev_ti == 'missing': - del sys.pypy_translation_info - else: - sys.pypy_translation_info = prev_ti - assert f.getvalue() == ("[objspace]\n" - " x = 'hello'\n" - "[translation]\n" - " bar = 42\n" - " [egg]\n" - " something = None\n" - " foo = True\n") - - @py.test.mark.skipif('config.getoption("runappdirect")') class AppTestAppMain: def setup_class(self): diff --git a/pypy/module/__builtin__/state.py b/pypy/module/__builtin__/state.py --- a/pypy/module/__builtin__/state.py +++ b/pypy/module/__builtin__/state.py @@ -2,8 +2,8 @@ class State: def __init__(self, space): self.w_open = space.appexec([], """(): - import io - return io.open""") - + import _io + return _io.open""") + def get(space): return space.fromcache(State) diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -4,32 +4,32 @@ def test_simple(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append("abc") - b.append("123") - b.append("1") + b.append(u"abc") + b.append(u"123") + b.append(u"1") s = b.build() - assert s == "abc1231" + assert s == u"abc1231" assert b.build() == s - b.append("123") - assert b.build() == s + "123" + b.append(u"123") + assert b.build() == s + u"123" def test_preallocate(self): from __pypy__.builders import StringBuilder b = StringBuilder(10) - b.append("abc") - b.append("123") + b.append(u"abc") + b.append(u"123") s = b.build() - assert s == "abc123" + assert s == u"abc123" def test_append_slice(self): from __pypy__.builders import StringBuilder b = StringBuilder() - b.append_slice("abcdefgh", 2, 5) - raises(ValueError, b.append_slice, "1", 2, 1) + b.append_slice(u"abcdefgh", 2, 5) + raises(ValueError, b.append_slice, u"1", 2, 1) s = b.build() - assert s == "cde" - b.append_slice("abc", 1, 2) - assert b.build() == "cdeb" + assert s == u"cde" + b.append_slice(u"abc", 1, 2) + assert b.build() == u"cdeb" def test_stringbuilder(self): from __pypy__.builders import BytesBuilder diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -52,6 +52,7 @@ 'unpack': 'func.unpack', 'buffer': 'cbuffer.MiniBuffer', 'memmove': 'func.memmove', + 'release': 'func.release', 'get_errno': 'cerrno.get_errno', 'set_errno': 'cerrno.set_errno', diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -476,6 +476,19 @@ def get_structobj(self): return None + def enter_exit(self, exit_now): + raise oefmt(self.space.w_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc(), ffi.from_buffer() " + "or ffi.new_allocator()() can be used with the 'with' keyword or " + "ffi.release()") + + def descr_enter(self): + self.enter_exit(False) + return self + + def descr_exit(self, args_w): + self.enter_exit(True) + class W_CDataMem(W_CData): """This is used only by the results of cffi.cast('int', x) @@ -528,14 +541,33 @@ def get_structobj(self): return self + def enter_exit(self, exit_now): + from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray + if not isinstance(self.ctype, W_CTypePtrOrArray): + W_CData.enter_exit(self, exit_now) + elif exit_now: + self._do_exit() + + def _do_exit(self): + raise NotImplementedError + class W_CDataNewStd(W_CDataNewOwning): """Subclass using the standard allocator, lltype.malloc()/lltype.free()""" - _attrs_ = [] + _attrs_ = ['explicitly_freed'] + explicitly_freed = False @rgc.must_be_light_finalizer def __del__(self): - lltype.free(self._ptr, flavor='raw') + if not self.explicitly_freed: + lltype.free(self._ptr, flavor='raw') + + def _do_exit(self): + if not self.explicitly_freed: + rgc.add_memory_pressure(-self._sizeof(), self) + self.explicitly_freed = True + rgc.may_ignore_finalizer(self) + lltype.free(self._ptr, flavor='raw') class W_CDataNewNonStd(W_CDataNewOwning): @@ -543,7 +575,16 @@ _attrs_ = ['w_raw_cdata', 'w_free'] def _finalize_(self): - self.space.call_function(self.w_free, self.w_raw_cdata) + if self.w_free is not None: + self.space.call_function(self.w_free, self.w_raw_cdata) + + def _do_exit(self): + w_free = self.w_free + if w_free is not None: + rgc.add_memory_pressure(-self._sizeof(), self) + self.w_free = None + self.may_unregister_rpython_finalizer(self.space) + self.space.call_function(w_free, self.w_raw_cdata) class W_CDataPtrToStructOrUnion(W_CData): @@ -573,6 +614,12 @@ else: return None + def enter_exit(self, exit_now): + if exit_now: + structobj = self.structobj + if isinstance(structobj, W_CDataNewOwning): + structobj._do_exit() + class W_CDataSliced(W_CData): """Subclass with an explicit length, for slices.""" @@ -611,21 +658,28 @@ class W_CDataFromBuffer(W_CData): _attrs_ = ['buf', 'length', 'w_keepalive'] - _immutable_fields_ = ['buf', 'length', 'w_keepalive'] + _immutable_fields_ = ['buf', 'length'] - def __init__(self, space, cdata, ctype, buf, w_object): + def __init__(self, space, cdata, length, ctype, buf, w_object): W_CData.__init__(self, space, cdata, ctype) self.buf = buf - self.length = buf.getlength() + self.length = length self.w_keepalive = w_object def get_array_length(self): return self.length def _repr_extra(self): - w_repr = self.space.repr(self.w_keepalive) - return "buffer len %d from '%s' object" % ( - self.length, self.space.type(self.w_keepalive).name) + if self.w_keepalive is not None: + name = self.space.type(self.w_keepalive).name + else: + name = "(released)" + return "buffer len %d from '%s' object" % (self.length, name) + + def enter_exit(self, exit_now): + # for now, limited effect on PyPy + if exit_now: + self.w_keepalive = None class W_CDataGCP(W_CData): @@ -640,6 +694,9 @@ self.register_finalizer(space) def _finalize_(self): + self.invoke_finalizer() + + def invoke_finalizer(self): w_destructor = self.w_destructor if w_destructor is not None: self.w_destructor = None @@ -649,6 +706,11 @@ self.w_destructor = None self.may_unregister_rpython_finalizer(self.space) + def enter_exit(self, exit_now): + if exit_now: + self.may_unregister_rpython_finalizer(self.space) + self.invoke_finalizer() + W_CData.typedef = TypeDef( '_cffi_backend.CData', @@ -678,5 +740,7 @@ __iter__ = interp2app(W_CData.iter), __weakref__ = make_weakref_descr(W_CData), __dir__ = interp2app(W_CData.dir), + __enter__ = interp2app(W_CData.descr_enter), + __exit__ = interp2app(W_CData.descr_exit), ) W_CData.typedef.acceptable_as_base_class = False diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -25,7 +25,7 @@ assert isinstance(ctptr, W_CTypePointer) W_CTypePtrOrArray.__init__(self, space, arraysize, extra, 0, ctptr.ctitem) - self.length = length + self.length = length # -1 if no length is given, e.g. 'int[]' self.ctptr = ctptr def _alignof(self): @@ -86,7 +86,7 @@ def _check_subscript_index(self, w_cdata, i): space = self.space if i < 0: - raise oefmt(space.w_IndexError, "negative index not supported") + raise oefmt(space.w_IndexError, "negative index") if i >= w_cdata.get_array_length(): raise oefmt(space.w_IndexError, "index too large for cdata '%s' (expected %d < %d)", @@ -96,7 +96,7 @@ def _check_slice_index(self, w_cdata, start, stop): space = self.space if start < 0: - raise oefmt(space.w_IndexError, "negative index not supported") + raise oefmt(space.w_IndexError, "negative index") if stop > w_cdata.get_array_length(): raise oefmt(space.w_IndexError, "index too large (expected %d <= %d)", diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -328,7 +328,8 @@ @unwrap_spec(require_writable=int) - def descr_from_buffer(self, w_python_buffer, require_writable=0): + def descr_from_buffer(self, w_cdecl, w_python_buffer=None, + require_writable=0): """\ Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is @@ -337,9 +338,13 @@ containing large quantities of raw data in some other format, like 'array.array' or numpy arrays.""" # - w_ctchara = newtype._new_chara_type(self.space) - return func._from_buffer(self.space, w_ctchara, w_python_buffer, - require_writable) + if w_python_buffer is None: + w_python_buffer = w_cdecl + w_ctype = newtype._new_chara_type(self.space) + else: + w_ctype = self.ffi_type(w_cdecl, ACCEPT_STRING | ACCEPT_CTYPE) + return func.from_buffer(self.space, w_ctype, w_python_buffer, + require_writable) @unwrap_spec(w_arg=W_CData) @@ -703,6 +708,16 @@ pass return w_res + @unwrap_spec(w_cdata=W_CData) + def descr_release(self, w_cdata): + """\ +Release now the resources held by a 'cdata' object from ffi.new(), +ffi.gc() or ffi.from_buffer(). The cdata object must not be used +afterwards. + +'ffi.release(cdata)' is equivalent to 'cdata.__exit__()'.""" + w_cdata.enter_exit(True) + class W_InitOnceLock(W_Root): def __init__(self, space): @@ -777,6 +792,7 @@ new_allocator = interp2app(W_FFIObject.descr_new_allocator), new_handle = interp2app(W_FFIObject.descr_new_handle), offsetof = interp2app(W_FFIObject.descr_offsetof), + release = interp2app(W_FFIObject.descr_release), sizeof = interp2app(W_FFIObject.descr_sizeof), string = interp2app(W_FFIObject.descr_string), typeof = interp2app(W_FFIObject.descr_typeof), diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -112,16 +112,10 @@ @unwrap_spec(w_ctype=ctypeobj.W_CType, require_writable=int) def from_buffer(space, w_ctype, w_x, require_writable=0): - from pypy.module._cffi_backend import ctypearray, ctypeprim - # - if (not isinstance(w_ctype, ctypearray.W_CTypeArray) or - not isinstance(w_ctype.ctptr.ctitem, ctypeprim.W_CTypePrimitiveChar)): - raise oefmt(space.w_TypeError, - "needs 'char[]', got '%s'", w_ctype.name) - # - return _from_buffer(space, w_ctype, w_x, require_writable) - -def _from_buffer(space, w_ctype, w_x, require_writable): + from pypy.module._cffi_backend import ctypearray + if not isinstance(w_ctype, ctypearray.W_CTypeArray): + raise oefmt(space.w_TypeError, "expected an array ctype, got '%s'", + w_ctype.name) if space.isinstance_w(w_x, space.w_unicode): raise oefmt(space.w_TypeError, "from_buffer() cannot return the address of a unicode object") @@ -140,7 +134,37 @@ "buffer interface but cannot be rendered as a plain " "raw address on PyPy", w_x) # - return cdataobj.W_CDataFromBuffer(space, _cdata, w_ctype, buf, w_x) + buffersize = buf.getlength() + arraylength = w_ctype.length + if arraylength >= 0: + # it's an array with a fixed length; make sure that the + # buffer contains enough bytes. + if buffersize < w_ctype.size: + raise oefmt(space.w_ValueError, + "buffer is too small (%d bytes) for '%s' (%d bytes)", + buffersize, w_ctype.name, w_ctype.size) + else: + # it's an open 'array[]' + itemsize = w_ctype.ctitem.size + if itemsize == 1: + # fast path, performance only + arraylength = buffersize + elif itemsize > 0: + # give it as many items as fit the buffer. Ignore a + # partial last element. + arraylength = buffersize / itemsize + else: + # it's an array 'empty[]'. Unsupported obscure case: + # the problem is that setting the length of the result + # to anything large (like SSIZE_T_MAX) is dangerous, + # because if someone tries to loop over it, it will + # turn effectively into an infinite loop. + raise oefmt(space.w_ZeroDivisionError, + "from_buffer('%s', ..): the actual length of the array " + "cannot be computed", w_ctype.name) + # + return cdataobj.W_CDataFromBuffer(space, _cdata, arraylength, + w_ctype, buf, w_x) # ____________________________________________________________ @@ -264,3 +288,7 @@ @unwrap_spec(w_cdata=cdataobj.W_CData, size=int) def gcp(space, w_cdata, w_destructor, size=0): return w_cdata.with_gc(w_destructor, size) + + at unwrap_spec(w_cdata=cdataobj.W_CData) +def release(space, w_cdata): + w_cdata.enter_exit(True) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3742,6 +3742,64 @@ p1[0] = b"g" assert ba == b"goo" +def test_from_buffer_types(): + BInt = new_primitive_type("int") + BIntP = new_pointer_type(BInt) + BIntA = new_array_type(BIntP, None) + lst = [-12345678, 87654321, 489148] + bytestring = buffer(newp(BIntA, lst))[:] + b'XYZ' + # + p1 = from_buffer(BIntA, bytestring) # int[] + assert typeof(p1) is BIntA + assert len(p1) == 3 + assert p1[0] == lst[0] + assert p1[1] == lst[1] + assert p1[2] == lst[2] + py.test.raises(IndexError, "p1[3]") + py.test.raises(IndexError, "p1[-1]") + # + py.test.raises(TypeError, from_buffer, BInt, bytestring) + py.test.raises(TypeError, from_buffer, BIntP, bytestring) + # + BIntA2 = new_array_type(BIntP, 2) + p2 = from_buffer(BIntA2, bytestring) # int[2] + assert typeof(p2) is BIntA2 + assert len(p2) == 2 + assert p2[0] == lst[0] + assert p2[1] == lst[1] + py.test.raises(IndexError, "p2[2]") + py.test.raises(IndexError, "p2[-1]") + assert p2 == p1 + # + BIntA4 = new_array_type(BIntP, 4) # int[4]: too big + py.test.raises(ValueError, from_buffer, BIntA4, bytestring) + # + BStruct = new_struct_type("foo") + complete_struct_or_union(BStruct, [('a1', BInt, -1), + ('a2', BInt, -1)]) + BStructP = new_pointer_type(BStruct) + BStructA = new_array_type(BStructP, None) + p1 = from_buffer(BStructA, bytestring) # struct[] + assert len(p1) == 1 + assert typeof(p1) is BStructA + assert p1[0].a1 == lst[0] + assert p1[0].a2 == lst[1] + py.test.raises(IndexError, "p1[1]") + # + BEmptyStruct = new_struct_type("empty") + complete_struct_or_union(BEmptyStruct, [], Ellipsis, 0) + assert sizeof(BEmptyStruct) == 0 + BEmptyStructP = new_pointer_type(BEmptyStruct) + BEmptyStructA = new_array_type(BEmptyStructP, None) + py.test.raises(ZeroDivisionError, from_buffer, # empty[] + BEmptyStructA, bytestring) + # + BEmptyStructA5 = new_array_type(BEmptyStructP, 5) + p1 = from_buffer(BEmptyStructA5, bytestring) # struct empty[5] + assert typeof(p1) is BEmptyStructA5 + assert len(p1) == 5 + assert cast(BIntP, p1) == from_buffer(BIntA, bytestring) + def test_memmove(): Short = new_primitive_type("short") ShortA = new_array_type(new_pointer_type(Short), None) @@ -4074,3 +4132,114 @@ assert_eq(cast(t5, 7.0), cast(t3, 7)) assert_lt(cast(t5, 3.1), 3.101) assert_gt(cast(t5, 3.1), 3) + +def test_explicit_release_new(): + # release() on a ffi.new() object has no effect on CPython, but + # really releases memory on PyPy. We can't test that effect + # though, because a released cdata is not marked. + BIntP = new_pointer_type(new_primitive_type("int")) + p = newp(BIntP) + p[0] = 42 + py.test.raises(IndexError, "p[1]") + release(p) + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + # + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + assert pstruct.p == cast(BIntP, 0) + release(pstruct) + # here, reading pstruct.p might give garbage or segfault... + release(pstruct) # no effect + +def test_explicit_release_new_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + with newp(BIntP) as p: + p[0] = 42 + assert p[0] == 42 + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + +def test_explicit_release_badtype(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, release, p) + py.test.raises(ValueError, release, p) + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + py.test.raises(ValueError, release, pstruct[0]) + +def test_explicit_release_badtype_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, "with p: pass") + py.test.raises(ValueError, "with p: pass") + +def test_explicit_release_gc(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + assert seen == [] + release(p) + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_gc_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + with p: + assert p[0] == 12345 + assert seen == [] + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + assert p[2] == b"z" + release(p) + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer_contextmgr(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + with p: + assert p[2] == b"z" + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_bytearray_on_cpython(): + if '__pypy__' in sys.builtin_module_names: + py.test.skip("pypy's bytearray are never locked") + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + a += b't' * 10 + p = from_buffer(BCharA, a) + py.test.raises(BufferError, "a += b'u' * 100") + release(p) + a += b'v' * 100 + release(p) # no effect + a += b'w' * 1000 + assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -282,19 +282,31 @@ import _cffi_backend as _cffi1_backend import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - assert c == ffi.from_buffer(a, True) + assert list(a) == [10000, 20500, 30000, 40000] + raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) assert c == ffi.from_buffer(a, require_writable=True) # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # p = ffi.from_buffer(b"abcd") assert p[2] == b"c" # - assert p == ffi.from_buffer(b"abcd", False) - raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) + assert p == ffi.from_buffer(b"abcd", require_writable=False) + raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py --- a/pypy/module/_cffi_backend/test/test_recompiler.py +++ b/pypy/module/_cffi_backend/test/test_recompiler.py @@ -2108,3 +2108,36 @@ else: assert lib.__loader__ is None assert lib.__spec__ is None + + def test_release(self): + ffi, lib = self.prepare("", "test_release", "") + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + ffi, lib = self.prepare("struct ab { int a, b; };", + "test_release_new_allocator", + "struct ab { int a, b; };") + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] diff --git a/pypy/module/_collections/test/test_ordereddict.py b/pypy/module/_collections/test/test_ordereddict.py --- a/pypy/module/_collections/test/test_ordereddict.py +++ b/pypy/module/_collections/test/test_ordereddict.py @@ -22,3 +22,17 @@ assert d['x'] == 42 d.update({'y': 2}) assert d['y'] == 42 + + def test_reversed(self): + import sys + from _collections import OrderedDict + + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + od = OrderedDict(pairs) + if '__pypy__' in sys.builtin_module_names: + # dict ordering is wrong when testing interpreted on top of CPython + pairs = list(dict(od).items()) + assert list(reversed(od)) == [t[0] for t in reversed(pairs)] + assert list(reversed(od.keys())) == [t[0] for t in reversed(pairs)] + assert list(reversed(od.values())) == [t[1] for t in reversed(pairs)] + assert list(reversed(od.items())) == list(reversed(pairs)) diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -1,8 +1,10 @@ # NOT_RPYTHON # do not load _cppyy here, see _post_import_startup() -import types import sys +class _C: + def _m(self): pass +MethodType = type(_C()._m) # Metaclasses are needed to store C++ static data members as properties and to # provide Python language features such as a customized __dir__ for namespaces @@ -238,7 +240,7 @@ # prepare dictionary for python-side C++ class representation def dispatch(self, m_name, signature): cppol = decl.__dispatch__(m_name, signature) - return types.MethodType(cppol, self, type(self)) + return MethodType(cppol, self, type(self)) d_class = {"__cppdecl__" : decl, "__new__" : make_new(decl), "__module__" : make_module_name(scope), diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -170,60 +170,48 @@ return False return space.isinstance_w(self.w_pattern, space.w_unicode) - def getstring(self, w_string): - """Accepts a string-like object (str, bytes, bytearray, buffer...) - and returns a tuple (len, rpython_unicode, rpython_str, rpython_buf), - where only one of the rpython_xxx is non-None. - """ - unicodestr = None - string = None - buf = None - space = self.space - if space.isinstance_w(w_string, space.w_unicode): - unicodestr = space.unicode_w(w_string) - length = len(unicodestr) - elif space.isinstance_w(w_string, space.w_bytes): - string = space.bytes_w(w_string) - length = len(string) - else: - buf = space.readbuf_w(w_string) - length = buf.getlength() - assert length >= 0 - return (length, unicodestr, string, buf) - - def make_ctx(self, w_string, pos=0, endpos=sys.maxint, flags=0): + def make_ctx(self, w_string, pos=0, endpos=sys.maxint): """Make a StrMatchContext, BufMatchContext or a UnicodeMatchContext for searching in the given w_string object.""" space = self.space - length, unicodestr, string, buf = self.getstring(w_string) if pos < 0: pos = 0 - elif pos > length: - pos = length if endpos < pos: endpos = pos - elif endpos > length: - endpos = length - flags = self.flags | flags - # - if unicodestr is not None: + if space.isinstance_w(w_string, space.w_unicode): if self.is_known_bytes(): raise oefmt(space.w_TypeError, "can't use a bytes pattern on a string-like " "object") - return rsre_core.UnicodeMatchContext(unicodestr, - pos, endpos, flags) + unicodestr = space.unicode_w(w_string) + length = len(unicodestr) + if pos > length: + pos = length + if endpos > length: + endpos = length + return rsre_core.UnicodeMatchContext( + unicodestr, pos, endpos, self.flags) + elif self.is_known_unicode(): + raise oefmt(space.w_TypeError, + "can't use a string pattern on a bytes-like " + "object") + elif space.isinstance_w(w_string, space.w_bytes): + string = space.bytes_w(w_string) + length = len(string) + if pos > length: + pos = length + if endpos > length: + endpos = length + return rsre_core.StrMatchContext(string, pos, endpos, self.flags) else: - if self.is_known_unicode(): - raise oefmt(space.w_TypeError, - "can't use a string pattern on a bytes-like " - "object") - if string is not None: - return rsre_core.StrMatchContext(string, - pos, endpos, flags) - else: - return rsre_core.BufMatchContext(buf, - pos, endpos, flags) + buf = space.readbuf_w(w_string) + size = buf.getlength() + assert size >= 0 + if pos > size: + pos = size + if endpos > size: + endpos = size + return rsre_core.BufMatchContext(buf, pos, endpos, self.flags) def getmatch(self, ctx, found): if found: @@ -336,20 +324,23 @@ # w_string are both string or both unicode objects, and if w_ptemplate # is a literal use_builder = False + is_buffer = False filter_as_unicode = filter_as_string = None if space.is_true(space.callable(w_ptemplate)): w_filter = w_ptemplate filter_is_callable = True else: - length, filter_as_unicode, filter_as_string, buf = ( - self.getstring(w_ptemplate)) - if filter_as_unicode is not None: + if space.isinstance_w(w_ptemplate, space.w_unicode): + filter_as_unicode = space.unicode_w(w_ptemplate) literal = u'\\' not in filter_as_unicode use_builder = ( space.isinstance_w(w_string, space.w_unicode) and literal) else: - if buf is not None: - filter_as_string = buf.as_str() + if space.isinstance_w(w_ptemplate, space.w_bytes): + filter_as_string = space.bytes_w(w_ptemplate) + else: + filter_as_string = space.readbuf_w(w_ptemplate).as_str() + is_buffer = True literal = '\\' not in filter_as_string use_builder = ( space.isinstance_w(w_string, space.w_bytes) and literal) @@ -360,7 +351,7 @@ # not a literal; hand it over to the template compiler # FIX for a CPython 3.5 bug: if w_ptemplate is a buffer # (e.g. a bytearray), convert it to a byte string here. - if buf is not None: + if is_buffer: w_ptemplate = space.newbytes(filter_as_string) w_re = import_re(space) w_filter = space.call_method(w_re, '_subx', diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -28,10 +28,12 @@ /* Version as a string */ #define PY_VERSION "3.6.1" -/* PyPy version as a string */ -#define PYPY_VERSION "6.1.0-alpha0" -#define PYPY_VERSION_NUM 0x06010000 - +/* PyPy version as a string: make sure to keep this in sync with: + * module/sys/version.py + * doc/conf.py + */ +#define PYPY_VERSION "7.1.0-alpha0" +#define PYPY_VERSION_NUM 0x07010000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object stays alive. */ diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,7 +4,6 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', - 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', @@ -23,6 +22,7 @@ 'get_stats': 'app_referents.get_stats', }) self.interpleveldefs.update({ + 'collect_step': 'interp_gc.collect_step', 'get_rpy_roots': 'referents.get_rpy_roots', 'get_rpy_referents': 'referents.get_rpy_referents', 'get_rpy_memory_usage': 'referents.get_rpy_memory_usage', diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py --- a/pypy/module/pypyjit/test_pypy_c/test_containers.py +++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py @@ -20,7 +20,7 @@ assert log.result % 1000 == 0 loop, = log.loops_by_filename(self.filepath) ops = loop.ops_by_id('look') - assert log.opnames(ops) == [] + assert log.opnames(ops) == ['guard_nonnull_class'] def test_identitydict(self): def fn(n): diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py --- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py +++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py @@ -407,6 +407,7 @@ i138 = call_i(ConstClass(_ll_1_raw_malloc_varsize_zero__Signed), 6, descr=...) check_memory_error(i138) setfield_gc(p132, i138, descr=...) + setfield_gc(p132, 0, descr=...) setfield_gc(p132, ConstPtr(ptr139), descr=...) setfield_gc(p132, -1, descr=...) setfield_gc(p0, p133, descr=...) diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py --- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py +++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py @@ -244,6 +244,8 @@ f80 = raw_load_f(i67, i79, descr=) i81 = int_add(i71, 1) --TICK-- + i92 = int_le(i33, _) + guard_true(i92, descr=...) jump(..., descr=...) """) @@ -283,6 +285,8 @@ f86 = float_add(f74, f85) i87 = int_add(i76, 1) --TICK-- + i98 = int_le(i36, _) + guard_true(i98, descr=...) jump(..., descr=...) """) @@ -390,6 +394,8 @@ assert log.result == [0.] * N loop, = log.loops_by_filename(self.filepath) assert loop.match(""" + i4 = int_lt(i91, 0) + guard_false(i4, descr=...) i92 = int_ge(i91, i37) guard_false(i92, descr=...) i93 = int_add(i91, 1) diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py --- a/pypy/module/pypyjit/test_pypy_c/test_misc.py +++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py @@ -113,6 +113,7 @@ i12 = int_is_true(i4) guard_true(i12, descr=...) guard_not_invalidated(descr=...) + guard_nonnull_class(p10, ConstClass(W_IntObject), descr=...) i10p = getfield_gc_i(p10, descr=...) i10 = int_mul_ovf(2, i10p) guard_no_overflow(descr=...) @@ -146,12 +147,16 @@ RANGE_ITER_STEP_1 = """ guard_not_invalidated? # W_IntRangeStepOneIterator.next() + i80 = int_lt(i11, 0) + guard_false(i80, descr=...) i16 = int_lt(i11, i12) guard_true(i16, descr=...) i20 = int_add(i11, 1) setfield_gc(p4, i20, descr=<.* .*W_IntRangeIterator.inst_current .*>) guard_not_invalidated? i21 = force_token() + i89 = int_lt(0, i9) + guard_true(i89, descr=...) i88 = int_sub(i9, 1) # Compared with pypy2, we get these two operations extra. diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -10,7 +10,10 @@ #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h -PYPY_VERSION = (6, 1, 0, "alpha", 0) #XXX # sync patchlevel.h +# make sure to keep PYPY_VERSION in sync with: +# module/cpyext/include/patchlevel.h +# doc/conf.py +PYPY_VERSION = (7, 1, 0, "alpha", 0) import pypy diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -11,7 +11,7 @@ WrappedDefault, applevel, interp2app, unwrap_spec) from pypy.interpreter.mixedmodule import MixedModule from pypy.interpreter.signature import Signature -from pypy.interpreter.typedef import TypeDef +from pypy.interpreter.typedef import TypeDef, interp_attrproperty_w from pypy.interpreter.unicodehelper import decode_utf8 from pypy.objspace.std.util import negate @@ -1538,6 +1538,12 @@ descr_or, descr_ror = _as_set_op('or', 'update') descr_xor, descr_rxor = _as_set_op('xor', 'symmetric_difference_update') +def new_dict_items(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewItemsObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewItemsObject(W_DictViewObject, SetLikeDictView): def descr_iter(self, space): return W_DictMultiIterItemsObject(space, self.w_dict.iteritems()) @@ -1557,18 +1563,32 @@ return space.w_False return space.newbool(space.eq_w(w_value, w_found)) +def new_dict_keys(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewKeysObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewKeysObject(W_DictViewObject, SetLikeDictView): def descr_iter(self, space): return W_DictMultiIterKeysObject(space, self.w_dict.iterkeys()) + def descr_contains(self, space, w_key): return self.w_dict.descr_contains(space, w_key) +def new_dict_values(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewValuesObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewValuesObject(W_DictViewObject): def descr_iter(self, space): return W_DictMultiIterValuesObject(space, self.w_dict.itervalues()) W_DictViewItemsObject.typedef = TypeDef( "dict_items", + __new__ = interp2app(new_dict_items), __repr__ = interp2app(W_DictViewItemsObject.descr_repr), __len__ = interp2app(W_DictViewItemsObject.descr_len), __iter__ = interp2app(W_DictViewItemsObject.descr_iter), @@ -1590,10 +1610,12 @@ __xor__ = interp2app(W_DictViewItemsObject.descr_xor), __rxor__ = interp2app(W_DictViewItemsObject.descr_rxor), isdisjoint = interp2app(W_DictViewItemsObject.descr_isdisjoint), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewItemsObject), ) W_DictViewKeysObject.typedef = TypeDef( "dict_keys", + __new__ = interp2app(new_dict_keys), __repr__ = interp2app(W_DictViewKeysObject.descr_repr), __len__ = interp2app(W_DictViewKeysObject.descr_len), __iter__ = interp2app(W_DictViewKeysObject.descr_iter), @@ -1615,11 +1637,14 @@ __xor__ = interp2app(W_DictViewKeysObject.descr_xor), __rxor__ = interp2app(W_DictViewKeysObject.descr_rxor), isdisjoint = interp2app(W_DictViewKeysObject.descr_isdisjoint), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewKeysObject), ) W_DictViewValuesObject.typedef = TypeDef( "dict_values", + __new__ = interp2app(new_dict_values), __repr__ = interp2app(W_DictViewValuesObject.descr_repr), __len__ = interp2app(W_DictViewValuesObject.descr_len), __iter__ = interp2app(W_DictViewValuesObject.descr_iter), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewValuesObject), ) diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -787,17 +787,6 @@ assert len(d.items()) == 2 assert len(d.values()) == 2 - def test_constructors_not_callable(self): - kt = type({}.keys()) - raises(TypeError, kt, {}) - raises(TypeError, kt) - it = type({}.items()) - raises(TypeError, it, {}) - raises(TypeError, it) - vt = type({}.values()) - raises(TypeError, vt, {}) - raises(TypeError, vt) - def test_dict_keys(self): d = {1: 10, "a": "ABC"} keys = d.keys() @@ -1177,7 +1166,6 @@ assert type(list(d.keys())[0]) is str def test_empty_to_int(self): - skip('IntDictStrategy is disabled for now, re-enable it!') import sys d = {} d[1] = "hi" diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py --- a/pypy/testrunner_cfg.py +++ b/pypy/testrunner_cfg.py @@ -6,6 +6,8 @@ 'memory/test', 'jit/metainterp', 'jit/backend/arm', 'jit/backend/x86', 'jit/backend/zarch', 'module/cpyext/test', + # python3 slowness ... + 'module/_cffi_backend/test', 'module/__pypy__/test', ] def collect_one_testdir(testdirs, reldir, tests): diff --git a/rpython/conftest.py b/rpython/conftest.py --- a/rpython/conftest.py +++ b/rpython/conftest.py @@ -5,6 +5,18 @@ option = None +try: + from hypothesis import settings, __version__ +except ImportError: + pass +else: + if __version__[:2] < '3.6': + s = settings(deadline=None) + settings.register_profile('default', s) + else: + settings.register_profile('default', deadline=None) + settings.load_profile('default') + def braindead_deindent(self): """monkeypatch that wont end up doing stupid in the python tokenizer""" text = '\n'.join(self.lines) diff --git a/rpython/jit/metainterp/test/test_zvector.py b/rpython/jit/metainterp/test/test_zvector.py --- a/rpython/jit/metainterp/test/test_zvector.py +++ b/rpython/jit/metainterp/test/test_zvector.py @@ -81,56 +81,41 @@ if not self.supports_vector_ext(): py.test.skip("this cpu %s has no implemented vector backend" % CPU) - def meta_interp(self, f, args, policy=None, vec=True, vec_all=False): - return ll_meta_interp(f, args, enable_opts=self.enable_opts, - policy=policy, - CPUClass=self.CPUClass, - type_system=self.type_system, - vec=vec, vec_all=vec_all) - # FLOAT UNARY - def _vector_float_unary(self, func, type, data): + @pytest.mark.parametrize('func', + [lambda v: abs(v), lambda v: -v], + ids=['abs', 'neg']) + @given(la=st.lists(st.floats(), min_size=10, max_size=150)) + def test_vector_float_unary(self, func, la): func = always_inline(func) + tp = rffi.DOUBLE - size = rffi.sizeof(type) - myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) + size = rffi.sizeof(tp) + myjitdriver = JitDriver(greens=[], reds='auto', vectorize=True) + def f(bytecount, va, vc): i = 0 while i < bytecount: myjitdriver.jit_merge_point() - a = raw_storage_getitem(type,va,i) + a = raw_storage_getitem(tp, va, i) c = func(a) - raw_storage_setitem(vc, i, rffi.cast(type,c)) + raw_storage_setitem(vc, i, rffi.cast(tp, c)) i += size - la = data.draw(st.lists(st.floats(), min_size=10, max_size=150)) l = len(la) - rawstorage = RawStorage() - va = rawstorage.new(la, type) - vc = rawstorage.new(None, type, size=l) - self.meta_interp(f, [l*size, va, vc], vec=True) + va = rawstorage.new(la, tp) + vc = rawstorage.new(None, tp, size=l) + self.meta_interp(f, [l * size, va, vc], vec=True) for i in range(l): - c = raw_storage_getitem(type,vc,i*size) + c = raw_storage_getitem(tp, vc, i * size) r = func(la[i]) assert isclose(r, c) rawstorage.clear() - def vec_int_unary(test_func, unary_func, type): - return pytest.mark.parametrize('func,type', [ - (unary_func, type) - ])(given(data=st.data())(test_func)) - - vec_float_unary = functools.partial(vec_int_unary, _vector_float_unary) - - test_vec_float_abs = \ - vec_float_unary(lambda v: abs(v), rffi.DOUBLE) - test_vec_float_neg = \ - vec_float_unary(lambda v: -v, rffi.DOUBLE) - # FLOAT BINARY def _vector_simple_float(self, func, type, data): @@ -376,38 +361,37 @@ res = self.meta_interp(f, [count], vec=True) assert res == f(count) == breaks - def _vec_reduce(self, strat, func, type, data): - func = always_inline(func) + def vec_reduce(strat, arith_func, tp): + @pytest.mark.parametrize('func, tp', [ + (arith_func, tp) + ]) + @given(la=st.lists(strat, min_size=11, max_size=150)) + def _vec_reduce(self, func, tp, la): + func = always_inline(func) - size = rffi.sizeof(type) - myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) - def f(accum, bytecount, v): - i = 0 - while i < bytecount: - myjitdriver.jit_merge_point() - e = raw_storage_getitem(type,v,i) - accum = func(accum,e) - i += size - return accum + size = rffi.sizeof(tp) + myjitdriver = JitDriver(greens=[], reds='auto', vectorize=True) - la = data.draw(st.lists(strat, min_size=10, max_size=150)) - #la = [1.0] * 10 - l = len(la) + def f(accum, bytecount, v): + i = 0 + while i < bytecount: + myjitdriver.jit_merge_point() + e = raw_storage_getitem(tp, v, i) + accum = func(accum, e) + i += size + return accum - accum = data.draw(strat) - rawstorage = RawStorage() - va = rawstorage.new(la, type) - res = self.meta_interp(f, [accum, l*size, va], vec=True) + accum = la[0] + la = la[1:] + l = len(la) + rawstorage = RawStorage() + va = rawstorage.new(la, tp) + res = self.meta_interp(f, [accum, l * size, va], vec=True) - assert isclose(rffi.cast(type, res), f(accum, l*size, va)) + assert isclose(rffi.cast(tp, res), f(accum, l * size, va)) - rawstorage.clear() - - def vec_reduce(test_func, strat, arith_func, type): - return pytest.mark.parametrize('strat,func,type', [ - (strat, arith_func, type) - ])(given(data=st.data())(test_func)) - vec_reduce = functools.partial(vec_reduce, _vec_reduce) + rawstorage.clear() + return _vec_reduce test_vec_int_sum = vec_reduce(st.integers(min_value=-2**(64-1), max_value=2**(64-1)-1), lambda a,b: lltype.intmask(lltype.intmask(a)+lltype.intmask(b)), lltype.Signed) diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -774,7 +774,7 @@ def test_collect_0(self, debuglog): self.gc.collect(1) # start a major debuglog.reset() - self.gc.collect(0) # do ONLY a minor + self.gc.collect(-1) # do ONLY a minor assert debuglog.summary() == {'gc-minor': 1} def test_enable_disable(self, debuglog): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -991,7 +991,9 @@ items = d.items() d.clear() d[key] = value - d.update(items) + # r_dict.update does not support list of tuples, do it manually + for key, value in items: + d[key] = value @specialize.call_location() def move_to_end(d, key, last=True): diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -708,6 +708,15 @@ move_to_end(d, 'key1', last=False) assert d.items() == [('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')] +def test_r_dict_move_to_end(): + d = r_dict(strange_key_eq, strange_key_hash) + d['1key'] = 'val1' + d['2key'] = 'val2' + d['3key'] = 'val3' + # does not crash, we can't check that it actually moves to end on CPython + move_to_end(d, '1key') + move_to_end(d, '1key', last=False) + def test_import_from_mixin(): class M: # old-style def f(self): diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -1,6 +1,7 @@ import sys from contextlib import contextmanager import signal +from collections import OrderedDict from rpython.translator.translator import TranslationContext from rpython.annotator.model import ( @@ -1196,7 +1197,7 @@ DictValue(None, s_value)) dictrepr.setup() self.l_dict = self.newdict(dictrepr) - self.reference = self.new_reference() + self.reference = OrderedDict() self.ll_key = r_key.convert_const self.ll_value = r_value.convert_const self.removed_keys = [] @@ -1323,7 +1324,6 @@ class DictSpace(MappingSpace): MappingRepr = rdict.DictRepr - new_reference = dict ll_getitem = staticmethod(rdict.ll_dict_getitem) ll_setitem = staticmethod(rdict.ll_dict_setitem) ll_delitem = staticmethod(rdict.ll_dict_delitem) diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -422,7 +422,6 @@ class ODictSpace(MappingSpace): MappingRepr = rodct.OrderedDictRepr - new_reference = OrderedDict moved_around = False ll_getitem = staticmethod(rodct.ll_dict_getitem) ll_setitem = staticmethod(rodct.ll_dict_setitem) diff --git a/rpython/translator/c/src/mem.h b/rpython/translator/c/src/mem.h --- a/rpython/translator/c/src/mem.h +++ b/rpython/translator/c/src/mem.h @@ -112,6 +112,8 @@ #define OP_GC__DISABLE_FINALIZERS(r) boehm_gc_finalizer_lock++ #define OP_GC__ENABLE_FINALIZERS(r) (boehm_gc_finalizer_lock--, \ boehm_gc_finalizer_notifier()) +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define OP_BOEHM_FQ_REGISTER(tagindex, obj, r) \ boehm_fq_register(boehm_fq_queues + tagindex, obj) @@ -127,6 +129,8 @@ #define OP_BOEHM_DISAPPEARING_LINK(link, obj, r) /* nothing */ #define OP_GC__DISABLE_FINALIZERS(r) /* nothing */ #define OP_GC__ENABLE_FINALIZERS(r) /* nothing */ +#define OP_GC__DISABLE(r) /* nothing */ +#define OP_GC__ENABLE(r) /* nothing */ #define GC_REGISTER_FINALIZER(a, b, c, d, e) /* nothing */ #define GC_gcollect() /* nothing */ #define GC_set_max_heap_size(a) /* nothing */ diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -9,14 +9,14 @@ import rpython rpydir = str(py.path.local(rpython.__file__).join('..')) -def _get_compiler_type(cc, x64_flag, ver0=None): +def _get_compiler_type(cc, x64_flag): if not cc: cc = os.environ.get('CC','') if not cc: - return MsvcPlatform(x64=x64_flag, ver0=ver0) + return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - return MsvcPlatform(cc=cc, x64=x64_flag, ver0=ver0) + return MsvcPlatform(cc=cc, x64=x64_flag) def _get_vcver0(): # try to get the compiler which served to compile python @@ -28,17 +28,13 @@ return vsver return None From pypy.commits at gmail.com Mon Jan 28 14:00:56 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 28 Jan 2019 11:00:56 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.6-7.x: version -> 7.0.0 alpha Message-ID: <5c4f5168.1c69fb81.3234d.23ba@mx.google.com> Author: Matti Picus Branch: release-pypy3.6-7.x Changeset: r95740:a51d929d674b Date: 2019-01-28 16:26 +0200 http://bitbucket.org/pypy/pypy/changeset/a51d929d674b/ Log: version -> 7.0.0 alpha diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -32,8 +32,8 @@ * module/sys/version.py * doc/conf.py */ -#define PYPY_VERSION "7.1.0-alpha0" -#define PYPY_VERSION_NUM 0x07010000 +#define PYPY_VERSION "7.0.0-alpha0" +#define PYPY_VERSION_NUM 0x07000000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object stays alive. */ diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -13,7 +13,7 @@ # make sure to keep PYPY_VERSION in sync with: # module/cpyext/include/patchlevel.h # doc/conf.py -PYPY_VERSION = (7, 1, 0, "alpha", 0) +PYPY_VERSION = (7, 0, 0, "alpha", 0) import pypy From pypy.commits at gmail.com Mon Jan 28 14:00:57 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 28 Jan 2019 11:00:57 -0800 (PST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <5c4f5169.1c69fb81.508fc.0d89@mx.google.com> Author: Matti Picus Branch: Changeset: r95741:018346b46386 Date: 2019-01-28 20:56 +0200 http://bitbucket.org/pypy/pypy/changeset/018346b46386/ Log: merge heads diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -7,16 +7,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -26,8 +26,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -37,10 +37,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -81,12 +81,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -101,8 +101,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -111,10 +112,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -130,6 +131,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -143,6 +145,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -154,7 +157,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -165,7 +167,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -177,6 +178,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -184,12 +186,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -209,7 +213,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -237,12 +240,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -257,10 +263,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -268,28 +276,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -299,6 +305,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -307,6 +314,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -316,8 +324,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -331,8 +340,8 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac + hgattic Berker Peksag Christian Muirhead soareschen @@ -351,12 +360,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -1,17 +1,20 @@ -=========================================== -PyPy2.7, PyPy3.5. PyPy3.6alpha v7.0 release -=========================================== +====================================================== +PyPy v7.0.0: triple release of 2.7, 3.5 and 3.6-alpha +====================================================== -The PyPy team is proud to release PyPy2.7 (supporting Python 2.7 syntax), -PyPy3.5 (supporting Python 3.5 syntax with f-strings from 3.6), and our first -alpha release of PyPy3.6 for 64-bit linux only (supporting Python 3.6 syntax). -The releases are based on much the same codebase. +The PyPy team is proud to release the version 7.0.0 of PyPy, which includes +three different interpreters: -This release is a feature release following our previous 6.0 release in April -2018. Our C-API compatibility layer ``cpyext`` is more mature, as more projects -use PyPy3.5 in their CI testing. Since these changes affect the included python -development header files, all c-extension modules must be recompiled for this -version. + - PyPy2.7, which is an interpreter supporting the syntax and the features of + Python 2.7 + + - PyPy3.5, which supports Python 3.5 + + - PyPy3.6-alpha: this is the first official release of PyPy to support 3.6 + features, although it is still considered alpha quality. + +All the interpreters are based on much the same codebase, thus the triple +release. Until we can work with downstream providers to distribute builds with PyPy, we have made packages for some common packages `available as wheels`_. @@ -35,8 +38,7 @@ The utf8 branch that changes internal representation of unicode to utf8 did not make it into the release, so there is still more goodness coming. - -You can download the v7.0 releases here: +You can download the v6.0 releases here: http://pypy.org/download.html @@ -57,12 +59,13 @@ .. _`available as wheels`: https://github.com/antocuni/pypy-wheels .. _`GC blog post`: https://morepypy.blogspot.com/2019/01/pypy-for-low-latency-systems.html + What is PyPy? ============= PyPy is a very compliant Python interpreter, almost a drop-in replacement for -CPython 2.7, 3.5 and 3.6. It's fast (`PyPy and CPython 2.7.x`_ performance comparison) -due to its integrated tracing JIT compiler. +CPython 2.7, 3.5 and 3.6. It's fast (`PyPy and CPython 2.7.x`_ performance +comparison) due to its integrated tracing JIT compiler. We also welcome developers of other `dynamic languages`_ to see what RPython can do for them. @@ -72,15 +75,17 @@ * **x86** machines on most common operating systems (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) - * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux, - * big- and little-endian variants of **PPC64** running Linux, * **s390x** running Linux +Unfortunately at the moment of writing our ARM buildbots are out of service, +so for now we are **not** releasing any binary for the ARM architecture. + .. _`PyPy and CPython 2.7.x`: http://speed.pypy.org .. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + Changelog ========= From pypy.commits at gmail.com Mon Jan 28 14:00:59 2019 From: pypy.commits at gmail.com (mattip) Date: Mon, 28 Jan 2019 11:00:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8-py3: update whatsnew Message-ID: <5c4f516b.1c69fb81.b5938.246f@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95742:2368e6fcba43 Date: 2019-01-23 23:40 +0200 http://bitbucket.org/pypy/pypy/changeset/2368e6fcba43/ Log: update whatsnew diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -8,3 +8,7 @@ .. branch: unicode-utf8 Use utf-8 internally to represent unicode strings + +.. branch: unicode-utf8-py3 + +Use utf-8 internally to represent unicode strings From pypy.commits at gmail.com Mon Jan 28 14:03:27 2019 From: pypy.commits at gmail.com (rlamy) Date: Mon, 28 Jan 2019 11:03:27 -0800 (PST) Subject: [pypy-commit] pypy py3.6: hg merge py3.5 Message-ID: <5c4f51ff.1c69fb81.f04e7.c911@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95743:c5adb85f6de1 Date: 2019-01-28 19:02 +0000 http://bitbucket.org/pypy/pypy/changeset/c5adb85f6de1/ Log: hg merge py3.5 diff --git a/lib_pypy/_collections.py b/lib_pypy/_collections.py --- a/lib_pypy/_collections.py +++ b/lib_pypy/_collections.py @@ -390,7 +390,7 @@ class defaultdict(dict): __slots__ = ["default_factory"] - + def __init__(self, *args, **kwds): if len(args) > 0: default_factory = args[0] @@ -401,10 +401,10 @@ default_factory = None self.default_factory = default_factory super(defaultdict, self).__init__(*args, **kwds) - + def __missing__(self, key): # from defaultdict docs - if self.default_factory is None: + if self.default_factory is None: raise KeyError(key) self[key] = value = self.default_factory() return value @@ -420,7 +420,7 @@ def copy(self): return type(self)(self.default_factory, self) - + def __copy__(self): return self.copy() @@ -438,9 +438,3 @@ """ return (type(self), (self.default_factory,), None, None, iter(self.items())) - - -try: - from _pypy_collections import OrderedDict -except ImportError: - pass diff --git a/pypy/module/_collections/__init__.py b/pypy/module/_collections/__init__.py --- a/pypy/module/_collections/__init__.py +++ b/pypy/module/_collections/__init__.py @@ -8,6 +8,7 @@ appleveldefs = { 'defaultdict': 'app_defaultdict.defaultdict', + 'OrderedDict': 'app_odict.OrderedDict', } interpleveldefs = { @@ -25,15 +26,3 @@ space = self.space space.getattr(self, space.newtext('defaultdict')) # force importing space.delattr(self, space.newtext('__missing__')) - - def startup(self, space): - # OrderedDict is normally present, but in some cases the line - # "from __pypy__ import reversed_dict, move_to_end" from - # _pypy_collections.py raises - space.appexec([self], """(mod): - try: - from _pypy_collections import OrderedDict - mod.OrderedDict = OrderedDict - except ImportError: - pass - """) diff --git a/lib_pypy/_pypy_collections.py b/pypy/module/_collections/app_odict.py rename from lib_pypy/_pypy_collections.py rename to pypy/module/_collections/app_odict.py From pypy.commits at gmail.com Mon Jan 28 17:44:42 2019 From: pypy.commits at gmail.com (arigo) Date: Mon, 28 Jan 2019 14:44:42 -0800 (PST) Subject: [pypy-commit] cffi default: Issue #362 Message-ID: <5c4f85da.1c69fb81.5b5ff.7e04@mx.google.com> Author: Armin Rigo Branch: Changeset: r3184:73a16cc62771 Date: 2019-01-28 23:33 +0100 http://bitbucket.org/cffi/cffi/changeset/73a16cc62771/ Log: Issue #362 Add "thread canary" objects which are deallocated if the PyThreadState is explicitly deallocated by CPython. If the thread shuts down first, then instead the canary is inserted in a zombie list. In that case, we clear and delete properly the PyThreadState at the next occasion. diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -7671,7 +7671,7 @@ init_cffi_tls(); if (PyErr_Occurred()) INITERROR; - init_cffi_tls_delete(); + init_cffi_tls_zombie(); if (PyErr_Occurred()) INITERROR; diff --git a/c/misc_thread_common.h b/c/misc_thread_common.h --- a/c/misc_thread_common.h +++ b/c/misc_thread_common.h @@ -5,10 +5,10 @@ struct cffi_tls_s { - /* The locally-made thread state. This is only non-null in case - we build the thread state here. It remains null if this thread - had already a thread state provided by CPython. */ - PyThreadState *local_thread_state; + /* The current thread's ThreadCanaryObj. This is only non-null in + case cffi builds the thread state here. It remains null if this + thread had already a thread state provided by CPython. */ + struct thread_canary_s *local_thread_canary; #ifndef USE__THREAD /* The saved errno. If the C compiler supports '__thread', then @@ -26,81 +26,245 @@ or misc_win32.h */ -/* issue #362: Py_Finalize() will free any threadstate around, so in - * that case we must not call PyThreadState_Delete() any more on them - * from cffi_thread_shutdown(). The following mess is to give a - * thread-safe way to know that Py_Finalize() started. +/* We try to keep the PyThreadState around in a thread not started by + * Python but where cffi callbacks occur. If we didn't do that, then + * the standard logic in PyGILState_Ensure() and PyGILState_Release() + * would create a new PyThreadState and completely free it for every + * single call. For some applications, this is a huge slow-down. + * + * As shown by issue #362, it is quite messy to do. The current + * solution is to keep the PyThreadState alive by incrementing its + * 'gilstate_counter'. We detect thread shut-down, and we put the + * PyThreadState inside a list of zombies (we can't free it + * immediately because we don't have the GIL at that point in time). + * We also detect other pieces of code (notably Py_Finalize()) which + * clear and free PyThreadStates under our feet, using ThreadCanaryObj. */ -#define TLS_DEL_LOCK() PyThread_acquire_lock(cffi_tls_delete_lock, WAIT_LOCK) -#define TLS_DEL_UNLOCK() PyThread_release_lock(cffi_tls_delete_lock) -static PyThread_type_lock cffi_tls_delete_lock = NULL; -static int cffi_tls_delete; -static PyObject *old_exitfunc; -static PyObject *cffi_tls_shutdown(PyObject *self, PyObject *args) +#define TLS_ZOM_LOCK() PyThread_acquire_lock(cffi_zombie_lock, WAIT_LOCK) +#define TLS_ZOM_UNLOCK() PyThread_release_lock(cffi_zombie_lock) +static PyThread_type_lock cffi_zombie_lock = NULL; + + +/* A 'canary' object is created in a thread when there is a callback + invoked, and that thread has no PyThreadState so far. It is an + object of reference count equal to 1, which is stored in the + PyThreadState->dict. Two things can occur then: + + 1. The PyThreadState can be forcefully cleared by Py_Finalize(). + Then thread_canary_dealloc() is called, and we have to cancel + the hacks we did to keep the PyThreadState alive. + + 2. The thread finishes. In that case, we put the canary in a list + of zombies, and at some convenient time later when we have the + GIL, we free all PyThreadStates in the zombie list. + + Some more fun comes from the fact that thread_canary_dealloc() can + be called at a point where the canary is in the zombie list already. + Also, the various pieces are freed at specific points in time, and + we must make sure not to access already-freed structures: + + - the struct cffi_tls_s is valid until the thread shuts down, and + then it is freed by cffi_thread_shutdown(). + + - the canary is a normal Python object, but we have a borrowed + reference to it from cffi_tls_s.local_thread_canary. + */ + +typedef struct thread_canary_s { + PyObject_HEAD + struct thread_canary_s *zombie_prev, *zombie_next; + PyThreadState *tstate; + struct cffi_tls_s *tls; +} ThreadCanaryObj; + +static PyTypeObject ThreadCanary_Type; /* forward */ +static ThreadCanaryObj cffi_zombie_head; + +static void +_thread_canary_detach_with_lock(ThreadCanaryObj *ob) { - /* the lock here will wait until any parallel cffi_thread_shutdown() - is done. Future cffi_thread_shutdown() won't touch their - PyThreadState any more, which are all supposed to be freed anyway - very soon after the present cffi_tls_shutdown() function is called. + /* must be called with both the GIL and TLS_ZOM_LOCK. */ + ThreadCanaryObj *p, *n; + p = ob->zombie_prev; + n = ob->zombie_next; + p->zombie_next = n; + n->zombie_prev = p; + ob->zombie_prev = NULL; + ob->zombie_next = NULL; +} + +static void +thread_canary_dealloc(ThreadCanaryObj *ob) +{ + /* this ThreadCanaryObj is being freed: if it is in the zombie + chained list, remove it. Thread-safety: 'zombie_next' amd + 'local_thread_canary' accesses need to be protected with + the TLS_ZOM_LOCK. */ - PyObject *ofn; - - TLS_DEL_LOCK(); - cffi_tls_delete = 0; /* Py_Finalize() called */ - TLS_DEL_UNLOCK(); - - ofn = old_exitfunc; - if (ofn == NULL) - { - Py_INCREF(Py_None); - return Py_None; + TLS_ZOM_LOCK(); + if (ob->zombie_next != NULL) { + //fprintf(stderr, "thread_canary_dealloc(%p): ZOMBIE\n", ob); + _thread_canary_detach_with_lock(ob); } else - { - old_exitfunc = NULL; - return PyObject_CallFunction(ofn, ""); + //fprintf(stderr, "thread_canary_dealloc(%p): not a zombie\n", ob); + + if (ob->tls != NULL) { + //fprintf(stderr, "thread_canary_dealloc(%p): was local_thread_canary\n", ob); + assert(ob->tls->local_thread_canary == ob); + ob->tls->local_thread_canary = NULL; } + TLS_ZOM_UNLOCK(); + + PyObject_Del((PyObject *)ob); } -static void init_cffi_tls_delete(void) +static void +thread_canary_make_zombie(ThreadCanaryObj *ob) { - static PyMethodDef mdef = { - "cffi_tls_shutdown", cffi_tls_shutdown, METH_NOARGS, - }; - PyObject *shutdown_fn; + /* This must be called without the GIL, but with the TLS_ZOM_LOCK. + It must be called at most once for a given ThreadCanaryObj. */ + ThreadCanaryObj *last; - cffi_tls_delete_lock = PyThread_allocate_lock(); - if (cffi_tls_delete_lock == NULL) - { - PyErr_SetString(PyExc_SystemError, - "can't allocate cffi_tls_delete_lock"); - return; + //fprintf(stderr, "thread_canary_make_zombie(%p)\n", ob); + if (ob->zombie_next) + Py_FatalError("cffi: ThreadCanaryObj is already a zombie"); + last = cffi_zombie_head.zombie_prev; + ob->zombie_next = &cffi_zombie_head; + ob->zombie_prev = last; + last->zombie_next = ob; + cffi_zombie_head.zombie_prev = ob; +} + +static void +thread_canary_free_zombies(void) +{ + /* This must be called with the GIL. */ + if (cffi_zombie_head.zombie_next == &cffi_zombie_head) + return; /* fast path */ + + while (1) { + ThreadCanaryObj *ob; + PyThreadState *tstate = NULL; + + TLS_ZOM_LOCK(); + ob = cffi_zombie_head.zombie_next; + if (ob != &cffi_zombie_head) { + tstate = ob->tstate; + //fprintf(stderr, "thread_canary_free_zombie(%p) tstate=%p\n", ob, tstate); + _thread_canary_detach_with_lock(ob); + if (tstate == NULL) + Py_FatalError("cffi: invalid ThreadCanaryObj->tstate"); + } + TLS_ZOM_UNLOCK(); + + if (tstate == NULL) + break; + PyThreadState_Clear(tstate); /* calls thread_canary_dealloc on 'ob', + but now ob->zombie_next == NULL. */ + PyThreadState_Delete(tstate); + //fprintf(stderr, "thread_canary_free_zombie: cleared and deleted tstate=%p\n", tstate); } + //fprintf(stderr, "thread_canary_free_zombie: end\n"); +} - shutdown_fn = PyCFunction_New(&mdef, NULL); - if (shutdown_fn == NULL) - return; +static void +thread_canary_register(PyThreadState *tstate) +{ + /* called with the GIL; 'tstate' is the current PyThreadState. */ + ThreadCanaryObj *canary; + PyObject *tdict; + struct cffi_tls_s *tls; + int err; - old_exitfunc = PySys_GetObject("exitfunc"); - if (PySys_SetObject("exitfunc", shutdown_fn) == 0) - cffi_tls_delete = 1; /* all ready */ - Py_DECREF(shutdown_fn); + /* first free the zombies, if any */ + thread_canary_free_zombies(); + + tls = get_cffi_tls(); + if (tls == NULL) + goto ignore_error; + + tdict = PyThreadState_GetDict(); + if (tdict == NULL) + goto ignore_error; + + canary = PyObject_New(ThreadCanaryObj, &ThreadCanary_Type); + //fprintf(stderr, "thread_canary_register(%p): tstate=%p tls=%p\n", canary, tstate, tls); + if (canary == NULL) + goto ignore_error; + canary->zombie_prev = NULL; + canary->zombie_next = NULL; + canary->tstate = tstate; + canary->tls = tls; + + err = PyDict_SetItemString(tdict, "cffi.thread.canary", (PyObject *)canary); + Py_DECREF(canary); + if (err < 0) + goto ignore_error; + + /* thread-safety: we have the GIL here, and 'tstate' is the one that + corresponds to our own thread. We are allocating a new 'canary' + and setting it up for our own thread, both in 'tdict' (which owns + the reference) and in 'tls->local_thread_canary' (which doesn't). */ + assert(Py_REFCNT(canary) == 1); + tls->local_thread_canary = canary; + tstate->gilstate_counter++; + /* ^^^ this means 'tstate' will never be automatically freed by + PyGILState_Release() */ + return; + + ignore_error: + PyErr_Clear(); +} + +static PyTypeObject ThreadCanary_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_cffi_backend.thread_canary", + sizeof(ThreadCanaryObj), + 0, + (destructor)thread_canary_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ +}; + +static void init_cffi_tls_zombie(void) +{ + cffi_zombie_head.zombie_next = &cffi_zombie_head; + cffi_zombie_head.zombie_prev = &cffi_zombie_head; + cffi_zombie_lock = PyThread_allocate_lock(); + if (cffi_zombie_lock == NULL) + PyErr_SetString(PyExc_SystemError, "can't allocate cffi_zombie_lock"); } static void cffi_thread_shutdown(void *p) { + /* this function is called from misc_thread_posix or misc_win32 + when a thread is about to end. */ struct cffi_tls_s *tls = (struct cffi_tls_s *)p; - if (tls->local_thread_state != NULL) { - /* - * issue #362: see comments above - */ - TLS_DEL_LOCK(); - if (cffi_tls_delete) - PyThreadState_Delete(tls->local_thread_state); - TLS_DEL_UNLOCK(); + /* thread-safety: this field 'local_thread_canary' can be reset + to NULL in parallel, protected by TLS_ZOM_LOCK. */ + TLS_ZOM_LOCK(); + if (tls->local_thread_canary != NULL) { + tls->local_thread_canary->tls = NULL; + thread_canary_make_zombie(tls->local_thread_canary); } + TLS_ZOM_UNLOCK(); + //fprintf(stderr, "thread_shutdown(%p)\n", tls); free(tls); } @@ -168,7 +332,6 @@ PyGILState_Ensure(). */ PyGILState_STATE result; - struct cffi_tls_s *tls; PyThreadState *ts = PyGILState_GetThisThreadState(); if (ts != NULL) { @@ -193,13 +356,9 @@ assert(ts == get_current_ts()); assert(ts->gilstate_counter >= 1); - /* Save the now-current thread state inside our 'local_thread_state' - field, to be removed at thread shutdown */ - tls = get_cffi_tls(); - if (tls != NULL) { - tls->local_thread_state = ts; - ts->gilstate_counter++; - } + /* Use the ThreadCanary mechanism to keep 'ts' alive until the + thread really shuts down */ + thread_canary_register(ts); return result; } From pypy.commits at gmail.com Mon Jan 28 18:00:10 2019 From: pypy.commits at gmail.com (arigo) Date: Mon, 28 Jan 2019 15:00:10 -0800 (PST) Subject: [pypy-commit] cffi default: Windows compilation fix Message-ID: <5c4f897a.1c69fb81.45d38.9d73@mx.google.com> Author: Armin Rigo Branch: Changeset: r3185:e851dbe5757a Date: 2019-01-29 00:00 +0100 http://bitbucket.org/cffi/cffi/changeset/e851dbe5757a/ Log: Windows compilation fix diff --git a/c/ffi_obj.c b/c/ffi_obj.c --- a/c/ffi_obj.c +++ b/c/ffi_obj.c @@ -700,22 +700,22 @@ static PyObject *ffi_from_buffer(FFIObject *self, PyObject *args, PyObject *kwds) { - PyObject *cdecl, *python_buf = NULL; + PyObject *cdecl1, *python_buf = NULL; CTypeDescrObject *ct; int require_writable = 0; static char *keywords[] = {"cdecl", "python_buffer", "require_writable", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:from_buffer", keywords, - &cdecl, &python_buf, &require_writable)) + &cdecl1, &python_buf, &require_writable)) return NULL; if (python_buf == NULL) { - python_buf = cdecl; + python_buf = cdecl1; ct = g_ct_chararray; } else { - ct = _ffi_type(self, cdecl, ACCEPT_STRING|ACCEPT_CTYPE); + ct = _ffi_type(self, cdecl1, ACCEPT_STRING|ACCEPT_CTYPE); if (ct == NULL) return NULL; } From pypy.commits at gmail.com Mon Jan 28 18:14:04 2019 From: pypy.commits at gmail.com (arigo) Date: Mon, 28 Jan 2019 15:14:04 -0800 (PST) Subject: [pypy-commit] cffi default: Backed out changeset 7a76a3815340 Message-ID: <5c4f8cbc.1c69fb81.8a849.46e1@mx.google.com> Author: Armin Rigo Branch: Changeset: r3186:e2f85d257915 Date: 2019-01-29 00:12 +0100 http://bitbucket.org/cffi/cffi/changeset/e2f85d257915/ Log: Backed out changeset 7a76a3815340 On Windows, there is no lround() or (as far as I can find) any math function returning an integer. diff --git a/testing/cffi0/test_function.py b/testing/cffi0/test_function.py --- a/testing/cffi0/test_function.py +++ b/testing/cffi0/test_function.py @@ -45,14 +45,15 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_lround_no_return_value(self): + def test_getenv_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void lround(double x); + void getenv(char *); """) - m = ffi.dlopen(lib_m) - x = m.lround(1.23) + needs_dlopen_none() + m = ffi.dlopen(None) + x = m.getenv("FOO") assert x is None def test_dlopen_filename(self): From pypy.commits at gmail.com Mon Jan 28 18:14:05 2019 From: pypy.commits at gmail.com (arigo) Date: Mon, 28 Jan 2019 15:14:05 -0800 (PST) Subject: [pypy-commit] cffi default: py3 fix Message-ID: <5c4f8cbd.1c69fb81.d2997.a172@mx.google.com> Author: Armin Rigo Branch: Changeset: r3187:d100f3412c88 Date: 2019-01-29 00:14 +0100 http://bitbucket.org/cffi/cffi/changeset/d100f3412c88/ Log: py3 fix diff --git a/testing/cffi0/test_function.py b/testing/cffi0/test_function.py --- a/testing/cffi0/test_function.py +++ b/testing/cffi0/test_function.py @@ -53,7 +53,7 @@ """) needs_dlopen_none() m = ffi.dlopen(None) - x = m.getenv("FOO") + x = m.getenv(b"FOO") assert x is None def test_dlopen_filename(self): From pypy.commits at gmail.com Tue Jan 29 05:25:01 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 29 Jan 2019 02:25:01 -0800 (PST) Subject: [pypy-commit] pypy default: windows: complain if failed to find compiler, suggest fix Message-ID: <5c5029fd.1c69fb81.4958.e70d@mx.google.com> Author: Matti Picus Branch: Changeset: r95744:a0ab50629937 Date: 2019-01-29 12:23 +0200 http://bitbucket.org/pypy/pypy/changeset/a0ab50629937/ Log: windows: complain if failed to find compiler, suggest fix diff --git a/rpython/tool/setuptools_msvc.py b/rpython/tool/setuptools_msvc.py --- a/rpython/tool/setuptools_msvc.py +++ b/rpython/tool/setuptools_msvc.py @@ -27,7 +27,6 @@ import platform import itertools import distutils.errors -from pkg_resources.extern.packaging.version import LegacyVersion from setuptools.extern.six.moves import filterfalse @@ -201,6 +200,7 @@ """ if "numpy.distutils" in sys.modules: import numpy as np + from pkg_resources.extern.packaging.version import LegacyVersion if LegacyVersion(np.__version__) < LegacyVersion('1.11.2'): return np.distutils.ccompiler.gen_lib_options(*args, **kwargs) return get_unpatched(msvc14_gen_lib_options)(*args, **kwargs) diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -56,7 +56,12 @@ # use setuptools from python3 to find tools try: vcdict = _find_vcvarsall(vsver, x64flag) + except ImportError as e: + if 'setuptools' in str(e): + log.error('is setuptools installed (perhaps try %s -mensurepip)?' % sys.executable) + log.error('looking for compiler %s raised exception "%s' % (vsver, str(e))) except Exception as e: + log.error('looking for compiler %s raised exception "%s' % (vsver, str(e))) return None else: if x64flag: From pypy.commits at gmail.com Tue Jan 29 05:25:03 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 29 Jan 2019 02:25:03 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5c5029ff.1c69fb81.8fa5f.51ff@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95745:9d2fa7c63b7c Date: 2019-01-29 12:24 +0200 http://bitbucket.org/pypy/pypy/changeset/9d2fa7c63b7c/ Log: merge default into branch diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -7,16 +7,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -26,8 +26,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -37,10 +37,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -81,12 +81,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -101,8 +101,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -111,10 +112,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -130,6 +131,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -143,6 +145,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -154,7 +157,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -165,7 +167,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -177,6 +178,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -184,12 +186,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -209,7 +213,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -237,12 +240,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -257,10 +263,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -268,28 +276,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -299,6 +305,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -307,6 +314,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -316,8 +324,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -331,8 +340,8 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac + hgattic Berker Peksag Christian Muirhead soareschen @@ -351,12 +360,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v7.0.0.rst @@ -0,0 +1,146 @@ +====================================================== +PyPy v7.0.0: triple release of 2.7, 3.5 and 3.6-alpha +====================================================== + +The PyPy team is proud to release the version 7.0.0 of PyPy, which includes +three different interpreters: + + - PyPy2.7, which is an interpreter supporting the syntax and the features of + Python 2.7 + + - PyPy3.5, which supports Python 3.5 + + - PyPy3.6-alpha: this is the first official release of PyPy to support 3.6 + features, although it is still considered alpha quality. + +All the interpreters are based on much the same codebase, thus the triple +release. + +Until we can work with downstream providers to distribute builds with PyPy, we +have made packages for some common packages `available as wheels`_. + +The GC now has `hooks`_ to gain more insights into its performance, and it is +now possible to manually manage the GC by using a combination of +``gc.disable`` and ``gc.collect_step``. See the `GC blog post`_. + + +We updated the `cffi`_ module included in PyPy to version 1.12, and the +`cppyy`_ backend to 1.4. Please use these to wrap your C and C++ code, +respectively, for a JIT friendly experience. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +We strongly recommend updating. + +The PyPy3.6 release and the Windows PyPy3.5 release are still not production +quality so your mileage may vary. There are open issues with incomplete +compatibility and c-extension support. + +The utf8 branch that changes internal representation of unicode to utf8 did not +make it into the release, so there is still more goodness coming. +You can download the v6.0 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. If PyPy is not quite good enough for your needs, we are available for +direct consulting work. + +We would also like to thank our contributors and encourage new people to join +the project. PyPy has many layers and we need help with all of them: `PyPy`_ +and `RPython`_ documentation improvements, tweaking popular `modules`_ to run +on pypy, or general `help`_ with making RPython's JIT even better. + +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`help`: project-ideas.html +.. _`cffi`: http://cffi.readthedocs.io +.. _`cppyy`: https://cppyy.readthedocs.io +.. _`available as wheels`: https://github.com/antocuni/pypy-wheels +.. _`GC blog post`: https://morepypy.blogspot.com/2019/01/pypy-for-low-latency-systems.html + + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7, 3.5 and 3.6. It's fast (`PyPy and CPython 2.7.x`_ performance +comparison) due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +Unfortunately at the moment of writing our ARM buildbots are out of service, +so for now we are **not** releasing any binary for the ARM architecture. + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + + +Changelog +========= + +If not specified, the changes are shared across versions + +* Support ``__set_name__``, ``__init_subclass__`` (Py3.6) +* Support ``cppyy`` in Py3.5 and Py3.6 +* Use implementation-specific site directories in ``sysconfig`` (Py3.5, Py3.6) +* Adding detection of gcc to ``sysconfig`` (Py3.5, Py3.6) +* Fix multiprocessing regression on newer glibcs +* Make sure 'blocking-ness' of socket is set along with default timeout +* Include ``crypt.h`` for ``crypt()`` on Linux +* Improve and re-organize the contributing_ documentation +* Make the ``__module__`` attribute writable, fixing an incompatibility with + NumPy 1.16 +* Implement ``Py_ReprEnter``, ``Py_ReprLeave(), ``PyMarshal_ReadObjectFromString``, + ``PyMarshal_WriteObjectToString``, ``PyObject_DelItemString``, + ``PyMapping_DelItem``, ``PyMapping_DelItemString``, ``PyEval_GetFrame``, + ``PyOS_InputHook``, ``PyErr_FormatFromCause`` (Py3.6), +* Implement new wordcode instruction encoding (Py3.6) +* Log additional gc-minor and gc-collect-step info in the PYPYLOG +* Set ``reverse-debugger`` active by default. For more information, see + https://bitbucket.org/pypy/revdb +* Support underscores in numerical literals like ``'4_2'`` (Py3.6) +* Pre-emptively raise MemoryError if the size of dequeue in ``_collections.deque`` + is too large (Py3.5) +* Fix multithreading issues in calls to ``os.setenv`` +* Add missing defines and typedefs for numpy and pandas on MSVC +* Add CPython macros like ``Py_NAN`` to header files +* Rename the ``MethodType`` to ``instancemethod``, like CPython +* Better support for `async with` in generators (Py3.5, Py3.6) +* Improve the performance of ``pow(a, b, c)`` if ``c`` is a large integer +* Now ``vmprof`` works on FreeBSD +* Support GNU Hurd, fixes for FreeBSD +* Add deprecation warning if type of result of ``__float__`` is float inherited + class (Py3.6) +* Fix async generator bug when yielding a ``StopIteration`` (Py3.6) +* Speed up ``max(list-of-int)`` from non-jitted code +* Fix Windows ``os.listdir()`` for some cases (see CPython #32539) +* Add ``select.PIPE_BUF`` +* Use ``subprocess`` to avoid shell injection in ``shutil`` module +* Rename ``_Py_ZeroStruct`` to ``_Py_FalseStruct`` (Py3.5, Py3.6) +* Remove some cpyext names for Py3.5, Py3.6 +* Enable use of unicode file names in ``dlopen`` +* Backport CPython fix for ``thread.RLock`` +* Make GC hooks measure time in seconds (as opposed to an opaque unit) +* Refactor and reorganize tests in ``test_lib_pypy`` +* Check error values in ``socket.setblocking`` (Py3.6) +* Add support for FsPath to os.unlink() (Py3.6) +* Fix freezing builtin modules at translation +* Tweak ``W_UnicodeDictionaryStrategy`` which speeds up dictionaries with only + unicode keys + +We also refactored many parts of the JIT bridge optimizations, as well as cpyext +internals, and together with new contributors fixed issues, added new +documentation, and cleaned up the codebase. + +.. _contributing: http://doc.pypy.org/en/latest/contributing.html diff --git a/rpython/tool/setuptools_msvc.py b/rpython/tool/setuptools_msvc.py --- a/rpython/tool/setuptools_msvc.py +++ b/rpython/tool/setuptools_msvc.py @@ -27,7 +27,6 @@ import platform import itertools import distutils.errors -from pkg_resources.extern.packaging.version import LegacyVersion from setuptools.extern.six.moves import filterfalse @@ -201,6 +200,7 @@ """ if "numpy.distutils" in sys.modules: import numpy as np + from pkg_resources.extern.packaging.version import LegacyVersion if LegacyVersion(np.__version__) < LegacyVersion('1.11.2'): return np.distutils.ccompiler.gen_lib_options(*args, **kwargs) return get_unpatched(msvc14_gen_lib_options)(*args, **kwargs) diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -56,7 +56,12 @@ # use setuptools from python3 to find tools try: vcdict = _find_vcvarsall(vsver, x64flag) + except ImportError as e: + if 'setuptools' in str(e): + log.error('is setuptools installed (perhaps try %s -mensurepip)?' % sys.executable) + log.error('looking for compiler %s raised exception "%s' % (vsver, str(e))) except Exception as e: + log.error('looking for compiler %s raised exception "%s' % (vsver, str(e))) return None else: if x64flag: From pypy.commits at gmail.com Wed Jan 30 02:27:50 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 29 Jan 2019 23:27:50 -0800 (PST) Subject: [pypy-commit] pypy default: copy contributors to LICENSE, index and copy whatsnew (backporting the 3.5 one) Message-ID: <5c5151f6.1c69fb81.ebc89.10b5@mx.google.com> Author: Matti Picus Branch: Changeset: r95746:481c69f7d81f Date: 2019-01-30 09:21 +0200 http://bitbucket.org/pypy/pypy/changeset/481c69f7d81f/ Log: copy contributors to LICENSE, index and copy whatsnew (backporting the 3.5 one) diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -40,16 +40,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -59,8 +59,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -70,10 +70,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -114,12 +114,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -134,8 +134,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -144,10 +145,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -163,6 +164,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -176,6 +178,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -187,7 +190,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -198,7 +200,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -210,6 +211,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -217,12 +219,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -242,7 +246,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -270,12 +273,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -290,10 +296,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -301,28 +309,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -332,6 +338,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -340,6 +347,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -349,8 +357,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -364,7 +373,6 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -384,12 +392,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -341,7 +341,6 @@ Anna Ravencroft remarkablerocket Petre Vijiac - hgattic Berker Peksag Christian Muirhead soareschen diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v7.0.0.rst release-v6.0.0.rst release-v5.10.1.rst release-v5.10.0.rst diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,7 @@ .. toctree:: whatsnew-head.rst + whatsnew-pypy2-7.0.0.rst whatsnew-pypy2-6.0.0.rst whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.10.0.rst @@ -41,6 +42,7 @@ .. toctree:: whatsnew-pypy3-head.rst + whatsnew-pypy3-7.0.0.rst whatsnew-pypy3-5.9.0.rst whatsnew-pypy3-5.8.0.rst whatsnew-pypy3-5.7.0.rst diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy2-7.0.0.rst rename from pypy/doc/whatsnew-head.rst rename to pypy/doc/whatsnew-pypy2-7.0.0.rst diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -1,7 +1,19 @@ -========================= -What's new in PyPy3 5.9+ -========================= - -.. this is the revision after release-pypy3.5-5.9 -.. startrev: be41e3ac0a29 - +======================== +What's new in PyPy3 6.0+ +======================== + +.. this is the revision after release-pypy3.5-v6.0 +.. startrev: 580e3e26cd32 + +.. branch: hroncok/fix-multiprocessing-regression-on-newer--1524656522151 + +Fix multiprocessing regression on newer glibcs + +.. branch: py3.5-user-site-impl + +Use implementation-specific site directories in sysconfig like in Python2 + +.. branch: py3.5-reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb From pypy.commits at gmail.com Wed Jan 30 02:27:52 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 29 Jan 2019 23:27:52 -0800 (PST) Subject: [pypy-commit] pypy default: move whatsnew-pypy3-head Message-ID: <5c5151f8.1c69fb81.44675.1d0b@mx.google.com> Author: Matti Picus Branch: Changeset: r95747:20364eed7800 Date: 2019-01-30 09:24 +0200 http://bitbucket.org/pypy/pypy/changeset/20364eed7800/ Log: move whatsnew-pypy3-head diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-7.0.0.rst rename from pypy/doc/whatsnew-pypy3-head.rst rename to pypy/doc/whatsnew-pypy3-7.0.0.rst From pypy.commits at gmail.com Wed Jan 30 02:27:54 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 29 Jan 2019 23:27:54 -0800 (PST) Subject: [pypy-commit] pypy default: restart whatsnew-head for py3.5, py2 Message-ID: <5c5151fa.1c69fb81.efa15.1bbd@mx.google.com> Author: Matti Picus Branch: Changeset: r95748:7364fb8c4c67 Date: 2019-01-30 09:24 +0200 http://bitbucket.org/pypy/pypy/changeset/7364fb8c4c67/ Log: restart whatsnew-head for py3.5, py2 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-head.rst @@ -0,0 +1,7 @@ +========================== +What's new in PyPy2.7 7.0+ +========================== + +.. this is a revision shortly after release-pypy-7.0.0 +.. startrev: 481c69f7d81f + diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -0,0 +1,7 @@ +======================== +What's new in PyPy3 7.0+ +======================== + +.. this is the revision after release-pypy3.5-v7.0 +.. startrev: 9d2fa7c63b7c + From pypy.commits at gmail.com Wed Jan 30 02:27:56 2019 From: pypy.commits at gmail.com (mattip) Date: Tue, 29 Jan 2019 23:27:56 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5c5151fc.1c69fb81.4e144.3f66@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95749:665e6fb2cccb Date: 2019-01-30 09:26 +0200 http://bitbucket.org/pypy/pypy/changeset/665e6fb2cccb/ Log: merge default into branch diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -40,16 +40,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -59,8 +59,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -70,10 +70,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -114,12 +114,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -134,8 +134,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -144,10 +145,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -163,6 +164,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -176,6 +178,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -187,7 +190,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -198,7 +200,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -210,6 +211,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -217,12 +219,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -242,7 +246,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -270,12 +273,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -290,10 +296,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -301,28 +309,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -332,6 +338,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -340,6 +347,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -349,8 +357,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -364,7 +373,6 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -384,12 +392,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -341,7 +341,6 @@ Anna Ravencroft remarkablerocket Petre Vijiac - hgattic Berker Peksag Christian Muirhead soareschen diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v7.0.0.rst release-v6.0.0.rst release-v5.10.1.rst release-v5.10.0.rst diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,7 @@ .. toctree:: whatsnew-head.rst + whatsnew-pypy2-7.0.0.rst whatsnew-pypy2-6.0.0.rst whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.10.0.rst @@ -41,6 +42,7 @@ .. toctree:: whatsnew-pypy3-head.rst + whatsnew-pypy3-7.0.0.rst whatsnew-pypy3-5.9.0.rst whatsnew-pypy3-5.8.0.rst whatsnew-pypy3-5.7.0.rst diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,69 +1,7 @@ ========================== -What's new in PyPy2.7 6.0+ +What's new in PyPy2.7 7.0+ ========================== -.. this is a revision shortly after release-pypy-6.0.0 -.. startrev: e50e11af23f1 +.. this is a revision shortly after release-pypy-7.0.0 +.. startrev: 481c69f7d81f -.. branch: cppyy-packaging - -Main items: vastly better template resolution and improved performance. In -detail: upgrade to backend 1.4, improved handling of templated methods and -functions (in particular automatic deduction of types), improved pythonization -interface, range of compatibility fixes for Python3, free functions now take -fast libffi path when possible, moves for strings (incl. from Python str), -easier/faster handling of std::vector by numpy, improved and faster object -identity preservation - -.. branch: socket_default_timeout_blockingness - -Make sure 'blocking-ness' of socket is set along with default timeout - -.. branch: crypt_h - -Include crypt.h for crypt() on Linux - -.. branch: gc-more-logging - -Log additional gc-minor and gc-collect-step info in the PYPYLOG - -.. branch: reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb - - -.. branch: pyparser-improvements-3 - -Small refactorings in the Python parser. - -.. branch: fix-readme-typo - -.. branch: avoid_shell_injection_in_shutil - -Backport CPython fix for possible shell injection issue in `distutils.spawn`, -https://bugs.python.org/issue34540 - -.. branch: cffi_dlopen_unicode - -Enable use of unicode file names in `dlopen` - -.. branch: rlock-in-rpython - -Backport CPython fix for `thread.RLock` - - -.. branch: expose-gc-time - -Make GC hooks measure time in seconds (as opposed to an opaque unit). - -.. branch: cleanup-test_lib_pypy - -Update most test_lib_pypy/ tests and move them to extra_tests/. - -.. branch: gc-disable - -Make it possible to manually manage the GC by using a combination of -gc.disable() and gc.collect_step(). Make sure to write a proper release -announcement in which we explain that existing programs could leak memory if -they run for too much time between a gc.disable()/gc.enable() diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy2-7.0.0.rst copy from pypy/doc/whatsnew-head.rst copy to pypy/doc/whatsnew-pypy2-7.0.0.rst diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-7.0.0.rst copy from pypy/doc/whatsnew-pypy3-head.rst copy to pypy/doc/whatsnew-pypy3-7.0.0.rst diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -1,19 +1,7 @@ -======================== -What's new in PyPy3 6.0+ -======================== - -.. this is the revision after release-pypy3.5-v6.0 -.. startrev: 580e3e26cd32 - -.. branch: hroncok/fix-multiprocessing-regression-on-newer--1524656522151 - -Fix multiprocessing regression on newer glibcs - -.. branch: py3.5-user-site-impl - -Use implementation-specific site directories in sysconfig like in Python2 - -.. branch: py3.5-reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb +======================== +What's new in PyPy3 7.0+ +======================== + +.. this is the revision after release-pypy3.5-v7.0 +.. startrev: 9d2fa7c63b7c + From pypy.commits at gmail.com Wed Jan 30 15:30:46 2019 From: pypy.commits at gmail.com (arigo) Date: Wed, 30 Jan 2019 12:30:46 -0800 (PST) Subject: [pypy-commit] pypy default: Change to test the readthedocs integration hooks Message-ID: <5c520976.1c69fb81.a25de.a0d4@mx.google.com> Author: Armin Rigo Branch: Changeset: r95750:0d9f689ae8c7 Date: 2019-01-30 21:30 +0100 http://bitbucket.org/pypy/pypy/changeset/0d9f689ae8c7/ Log: Change to test the readthedocs integration hooks diff --git a/pypy/doc/interpreter.rst b/pypy/doc/interpreter.rst --- a/pypy/doc/interpreter.rst +++ b/pypy/doc/interpreter.rst @@ -156,7 +156,7 @@ environment found in `Frames`. Frames and Functions have references to a code object. Here is a list of Code attributes: -* ``co_flags`` flags if this code object has nested scopes/generators +* ``co_flags`` flags if this code object has nested scopes/generators/etc. * ``co_stacksize`` the maximum depth the stack can reach while executing the code * ``co_code`` the actual bytecode string From pypy.commits at gmail.com Wed Jan 30 16:38:52 2019 From: pypy.commits at gmail.com (arigo) Date: Wed, 30 Jan 2019 13:38:52 -0800 (PST) Subject: [pypy-commit] pypy default: Expand like about revdb Message-ID: <5c52196c.1c69fb81.ca4fb.b99f@mx.google.com> Author: Armin Rigo Branch: Changeset: r95751:1cabe8ae73e8 Date: 2019-01-30 22:34 +0100 http://bitbucket.org/pypy/pypy/changeset/1cabe8ae73e8/ Log: Expand like about revdb diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -107,7 +107,10 @@ ``PyOS_InputHook``, ``PyErr_FormatFromCause`` (Py3.6), * Implement new wordcode instruction encoding (Py3.6) * Log additional gc-minor and gc-collect-step info in the PYPYLOG -* Set ``reverse-debugger`` active by default. For more information, see +* The ``reverse-debugger`` (revdb) branch has been merged to the default + branch, so it should always be up-to-date. You still need a special pypy + build, but you can compile it from the same source as the one we distribute + for the v7.0.0 release. For more information, see https://bitbucket.org/pypy/revdb * Support underscores in numerical literals like ``'4_2'`` (Py3.6) * Pre-emptively raise MemoryError if the size of dequeue in ``_collections.deque`` From pypy.commits at gmail.com Wed Jan 30 16:40:17 2019 From: pypy.commits at gmail.com (arigo) Date: Wed, 30 Jan 2019 13:40:17 -0800 (PST) Subject: [pypy-commit] cffi default: Update whatsnew Message-ID: <5c5219c1.1c69fb81.c0aec.7d06@mx.google.com> Author: Armin Rigo Branch: Changeset: r3188:eaf42ddaca57 Date: 2019-01-30 22:40 +0100 http://bitbucket.org/cffi/cffi/changeset/eaf42ddaca57/ Log: Update whatsnew diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -14,7 +14,7 @@ to ``pack=1`` (saying e.g. that fields like ``int`` should be aligned to 1 byte instead of 4). -* Windows, CPython 3.x: link cffi modules with ``python3.dll`` +* Windows, CPython 3.x: cffi modules are linked with ``python3.dll`` again. This makes them independant on the exact CPython version, like they are on other platforms. **It requires virtualenv 16.0.0.** @@ -31,6 +31,13 @@ can now be released at known times, either by using the ``with`` keyword or by calling the new ``ffi.release()``. +* CPython: if a thread is started from C and then runs Python code (with + callbacks or with the embedding solution), then previous versions of + cffi would contain possible crashes and/or memory leaks. Hopefully, + this has been fixed (see `issue #362`_). + +.. _`issue #362`: https://bitbucket.org/cffi/cffi/issues/362/ + v1.11.5 ======= From pypy.commits at gmail.com Wed Jan 30 17:52:48 2019 From: pypy.commits at gmail.com (arigo) Date: Wed, 30 Jan 2019 14:52:48 -0800 (PST) Subject: [pypy-commit] pypy default: Tentative fix for cpyext with revdb Message-ID: <5c522ac0.1c69fb81.4e144.938f@mx.google.com> Author: Armin Rigo Branch: Changeset: r95753:45e0519a5fe6 Date: 2019-01-30 23:52 +0100 http://bitbucket.org/pypy/pypy/changeset/45e0519a5fe6/ Log: Tentative fix for cpyext with revdb diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -507,6 +507,7 @@ header = DEFAULT_HEADER if func.__name__ in FUNCTIONS_BY_HEADER[header]: raise ValueError("%s already registered" % func.__name__) + func._revdb_c_only_ = True # hack for revdb api_function = COnlyApiFunction(argtypes, restype, func) FUNCTIONS_BY_HEADER[header][func.__name__] = api_function return api_function diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -67,7 +67,7 @@ """Returns True if we have a "split GC address space", i.e. if we are translating with an option that doesn't support taking raw addresses inside GC objects and "hacking" at them. This is - notably the case with --reversedb.""" + notably the case with --revdb.""" return False # for test purposes we allow objects to be pinned and use diff --git a/rpython/translator/revdb/gencsupp.py b/rpython/translator/revdb/gencsupp.py --- a/rpython/translator/revdb/gencsupp.py +++ b/rpython/translator/revdb/gencsupp.py @@ -51,6 +51,10 @@ ## return False def prepare_function(funcgen): + if getattr(getattr(funcgen.graph, 'func', None), '_revdb_c_only_', False): + extra_enter_text = 'RPY_REVDB_C_ONLY_ENTER' + extra_return_text = 'RPY_REVDB_C_ONLY_LEAVE' + return extra_enter_text, extra_return_text stack_bottom = False for block in funcgen.graph.iterblocks(): for op in block.operations: diff --git a/rpython/translator/revdb/src-revdb/revdb.c b/rpython/translator/revdb/src-revdb/revdb.c --- a/rpython/translator/revdb/src-revdb/revdb.c +++ b/rpython/translator/revdb/src-revdb/revdb.c @@ -253,7 +253,10 @@ "(use REVDB=logfile)\n", (int)getpid()); } - rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); + if (rpy_rev_fileno >= 0) + rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); + else + rpy_revdb.buf_p = NULL; rpy_revdb.buf_limit = rpy_rev_buffer + sizeof(rpy_rev_buffer) - 32; rpy_revdb.unique_id_seen = 1; @@ -269,17 +272,23 @@ ssize_t full_size; assert(rpy_revdb.lock); + if (rpy_revdb.buf_p == NULL) + return; + assert(rpy_rev_fileno >= 0); + /* write the current buffer content to the OS */ full_size = rpy_revdb.buf_p - rpy_rev_buffer; rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); - if (rpy_rev_fileno >= 0) - write_all(rpy_rev_buffer, full_size); + write_all(rpy_rev_buffer, full_size); } static ssize_t current_packet_size(void) { /* must be called with the lock held */ - return rpy_revdb.buf_p - (rpy_rev_buffer + sizeof(int16_t)); + if (rpy_revdb.buf_p != NULL) + return rpy_revdb.buf_p - (rpy_rev_buffer + sizeof(int16_t)); + else + return 0; } RPY_EXTERN @@ -327,6 +336,11 @@ rpy_reverse_db_flush(); assert(current_packet_size() == 0); + if (rpy_rev_fileno < 0) + return; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + *(int16_t *)p = async_code; memcpy(rpy_revdb.buf_p, &content, sizeof(uint64_t)); rpy_revdb.buf_p += sizeof(uint64_t); @@ -472,6 +486,9 @@ if (rpy_rev_fileno < 0) return 1; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + base_offset = lseek(rpy_rev_fileno, 0, SEEK_CUR); if (base_offset < 0) { perror("lseek"); @@ -488,6 +505,9 @@ if (rpy_rev_fileno < 0) return; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + base_offset = lseek(rpy_rev_fileno, 0, SEEK_CUR); if (base_offset < 0) { perror("lseek"); @@ -1033,9 +1053,9 @@ " echo 0 | sudo tee /proc/sys/kernel/randomize_va_space\n" "\n" "It has been reported that on Linux kernel 4.12.4-1-ARCH,\n" - "ASLR cannot be disabled at all for libpypy-c.so. For now\n" - "there is no good solution. Either you downgrade the\n" - "kernel, or you translate with --no-shared (and you loose\n" + "ASLR cannot be disabled at all for libpypy-c.so. It works\n" + "again in kernel 4.19 (and maybe sooner). Either change\n" + "kernels, or translate with --no-shared (but then you loose\n" "PyPy's cpyext ability).\n" "\n", argv[0]); exit(1); diff --git a/rpython/translator/revdb/src-revdb/revdb_include.h b/rpython/translator/revdb/src-revdb/revdb_include.h --- a/rpython/translator/revdb/src-revdb/revdb_include.h +++ b/rpython/translator/revdb/src-revdb/revdb_include.h @@ -16,7 +16,8 @@ #endif bool_t watch_enabled; int lock; - char *buf_p, *buf_limit, *buf_readend; + char *buf_p; /* NULL during recording if recording is actually disabled */ + char *buf_limit, *buf_readend; uint64_t stop_point_seen, stop_point_break; uint64_t unique_id_seen, unique_id_break; } rpy_revdb_t; @@ -85,9 +86,13 @@ { \ decl_e = variable; \ _RPY_REVDB_PRINT("[ wr ]", _e); \ - memcpy(rpy_revdb.buf_p, &_e, sizeof(_e)); \ - if ((rpy_revdb.buf_p += sizeof(_e)) > rpy_revdb.buf_limit) \ - rpy_reverse_db_flush(); \ + char *_dst = rpy_revdb.buf_p; \ + if (_dst) { \ + memcpy(_dst, &_e, sizeof(_e)); \ + if ((rpy_revdb.buf_p = _dst + sizeof(_e)) \ + > rpy_revdb.buf_limit) \ + rpy_reverse_db_flush(); \ + } \ } #define _RPY_REVDB_EMIT_REPLAY(decl_e, variable) \ @@ -179,6 +184,13 @@ rpy_reverse_db_bad_acquire_gil("release"); \ } +#define RPY_REVDB_C_ONLY_ENTER \ + char *saved_bufp = rpy_revdb.buf_p; \ + rpy_revdb.buf_p = NULL; + +#define RPY_REVDB_C_ONLY_LEAVE \ + rpy_revdb.buf_p = saved_bufp; + #define RPY_REVDB_CALLBACKLOC(locnum) \ rpy_reverse_db_callback_loc(locnum) From pypy.commits at gmail.com Wed Jan 30 18:42:42 2019 From: pypy.commits at gmail.com (arigo) Date: Wed, 30 Jan 2019 15:42:42 -0800 (PST) Subject: [pypy-commit] pypy default: Fix for revdb (how to test??) Message-ID: <5c523672.1c69fb81.95c81.146b@mx.google.com> Author: Armin Rigo Branch: Changeset: r95754:04dfff1c783a Date: 2019-01-31 00:42 +0100 http://bitbucket.org/pypy/pypy/changeset/04dfff1c783a/ Log: Fix for revdb (how to test??) diff --git a/rpython/rlib/src/boehm-rawrefcount.c b/rpython/rlib/src/boehm-rawrefcount.c --- a/rpython/rlib/src/boehm-rawrefcount.c +++ b/rpython/rlib/src/boehm-rawrefcount.c @@ -191,6 +191,7 @@ #endif assert(result->ob_refcnt == REFCNT_FROM_PYPY); result->ob_refcnt = 1; + result->ob_pypy_link = 0; p->pyobj = NULL; *pp = p->next_in_bucket; p->next_in_bucket = hash_free_list; From pypy.commits at gmail.com Thu Jan 31 01:39:19 2019 From: pypy.commits at gmail.com (mattip) Date: Wed, 30 Jan 2019 22:39:19 -0800 (PST) Subject: [pypy-commit] pypy default: extend description of fix in release notes Message-ID: <5c529817.1c69fb81.a3c56.1fe1@mx.google.com> Author: Matti Picus Branch: Changeset: r95755:e4986da8a6c5 Date: 2019-01-31 08:38 +0200 http://bitbucket.org/pypy/pypy/changeset/e4986da8a6c5/ Log: extend description of fix in release notes diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -129,7 +129,8 @@ * Speed up ``max(list-of-int)`` from non-jitted code * Fix Windows ``os.listdir()`` for some cases (see CPython #32539) * Add ``select.PIPE_BUF`` -* Use ``subprocess`` to avoid shell injection in ``shutil`` module +* Use ``subprocess`` to avoid shell injection in ``shutil`` module - backport + of https://bugs.python.org/issue34540 * Rename ``_Py_ZeroStruct`` to ``_Py_FalseStruct`` (Py3.5, Py3.6) * Remove some cpyext names for Py3.5, Py3.6 * Enable use of unicode file names in ``dlopen`` From pypy.commits at gmail.com Thu Jan 31 04:26:37 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 01:26:37 -0800 (PST) Subject: [pypy-commit] cffi default: Merged in vyskocilm/cffi (pull request #80) Message-ID: <5c52bf4d.1c69fb81.a818c.a396@mx.google.com> Author: Armin Rigo Branch: Changeset: r3204:b21780fe49f6 Date: 2019-01-31 09:26 +0000 http://bitbucket.org/cffi/cffi/changeset/b21780fe49f6/ Log: Merged in vyskocilm/cffi (pull request #80) Passing of proper CFLAGS/CXXFLAGS/LDFLAGS is hard and error prone diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -2,6 +2,7 @@ from .lock import allocate_lock from .error import CDefError from . import model +from . import pkgconfig try: callable @@ -640,6 +641,9 @@ if os.sep in module_name or (os.altsep and os.altsep in module_name): raise ValueError("'module_name' must not contain '/': use a dotted " "name to make a 'package.module' location") + if "pkgconfig" in kwds: + pkgconfig.merge_flags(kwds, pkgconfig.flags(kwds["pkgconfig"])) + del kwds["pkgconfig"] self._assigned_source = (str(module_name), source, source_extension, kwds) diff --git a/cffi/error.py b/cffi/error.py --- a/cffi/error.py +++ b/cffi/error.py @@ -21,3 +21,10 @@ """ An error raised when incomplete structures are passed into cdef, but no verification has been done """ + +class PkgConfigError(Exception): + """ An error raised for all pkg-config related errors + except version mismatch""" + +class PkgConfigModuleVersionNotFound(Exception): + """ An error raised when requested version was not found""" diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py new file mode 100644 --- /dev/null +++ b/cffi/pkgconfig.py @@ -0,0 +1,105 @@ +# pkg-config, https://www.freedesktop.org/wiki/Software/pkg-config/ integration for cffi +import subprocess +import sys +import re + +from .error import PkgConfigModuleVersionNotFound +from .error import PkgConfigError + +def merge_flags(cfg1, cfg2): + """Merge values from cffi config flags cfg2 to cf1 + + Example: + merge_flags({"libraries": ["one"]}, {"libraries": "two"}) + {"libraries}" : ["one", "two"]} + """ + for key, value in cfg2.items(): + if not key in cfg1: + cfg1 [key] = value + else: + cfg1 [key].extend(value) + return cfg1 + + +def call(libname, flag): + """Calls pkg-config and returing the output if found + """ + a = ["pkg-config", "--print-errors"] + a.append(flag) + a.append(libname) + pc = None + try: + pc = subprocess.Popen(a, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except FileNotFoundError: + pass + if pc is None: + raise PkgConfigError("pkg-config was not found on this system") + + bout, berr = pc.communicate() + if berr is not None: + err = berr.decode(sys.getfilesystemencoding()) + if re.search("Requested '.*' but version of ", err, re.MULTILINE) is not None: + raise PkgConfigModuleVersionNotFound(err) + else: + PkgConfigError(err) + return bout + + +def flags(libs): + r"""Return compiler line flags for FFI.set_source based on pkg-config output + + Usage + ... + ffibuilder.set_source("_foo", pkgconfig = ["libfoo", "libbar >= 1.8.3"]) + + If pkg-config is installed on build machine, then arguments include_dirs, + library_dirs, libraries, define_macros, extra_compile_args and + extra_link_args are extended with an output of pkg-config for libfoo and + libbar. + + Raises + * PkgConfigModuleVersionNotFound if requested version does not match + * PkgConfigError for all other errors + """ + + subprocess.check_output(["pkg-config", "--version"]) + + # make API great again! + if isinstance(libs, (str, bytes)): + libs = (libs, ) + + # drop starting -I -L -l from cflags + def dropILl(string): + def _dropILl(string): + if string.startswith("-I") or string.startswith("-L") or string.startswith("-l"): + return string [2:] + return [_dropILl(x) for x in string.split()] + + # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by cffi + def macros(string): + def _macros(string): + return tuple(string [2:].split("=", 2)) + return [_macros(x) for x in string.split() if x.startswith("-D")] + + def drop_macros(string): + return [x for x in string.split() if not x.startswith("-D")] + + # return kwargs for given libname + def kwargs(libname): + fse = sys.getfilesystemencoding() + return { + "include_dirs" : dropILl(call(libname, "--cflags-only-I").decode(fse)), + "library_dirs" : dropILl(call(libname, "--libs-only-L").decode(fse)), + "libraries" : dropILl(call(libname, "--libs-only-l").decode(fse)), + "define_macros" : macros(call(libname, "--cflags-only-other").decode('ascii')), + "extra_compile_args" : drop_macros(call(libname, "--cflags-only-other").decode('ascii')), + "extra_link_args" : call(libname, "--libs-only-other").decode('ascii').split() + } + + # merge all arguments together + ret = {} + for libname in libs: + foo = kwargs(libname) + merge_flags(ret, foo) + + return ret diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py new file mode 100644 --- /dev/null +++ b/testing/cffi1/test_pkgconfig.py @@ -0,0 +1,43 @@ +import sys +import subprocess +import py +import cffi.pkgconfig as pkgconfig + +def mock_call(libname, flag): + assert libname=="python-3.6", "mocked pc function supports python-3.6 input ONLY" + + flags = { + "--cflags-only-I": b"-I/usr/include/python3.6m\n", + "--libs-only-L": b"-L/usr/lib64\n", + "--libs-only-l": b"-lpython3.6\n", + "--cflags-only-other": b"-DCFFI_TEST=1 -O42\n", + "--libs-only-other": b"-lm\n", + } + return flags[flag] + +pkgconfig.call = mock_call + + +def test_merge_flags(): + + d1 = {"ham": [1, 2, 3], "spam" : ["a", "b", "c"], "foo" : []} + d2 = {"spam" : ["spam", "spam", "spam"], "bar" : ["b", "a", "z"]} + + pkgconfig.merge_flags(d1, d2) + assert d1 == { + "ham": [1, 2, 3], + "spam" : ["a", "b", "c", "spam", "spam", "spam"], + "bar" : ["b", "a", "z"], + "foo" : []} + + +def test_pkgconfig(): + flags = pkgconfig.flags("python-3.6") + assert flags == { + 'include_dirs': [u'/usr/include/python3.6m'], + 'library_dirs': [u'/usr/lib64'], + 'libraries': [u'python3.6'], + 'define_macros': [(u'CFFI_TEST', u'1')], + 'extra_compile_args': [u'-O42'], + 'extra_link_args': [u'-lm'] + } From pypy.commits at gmail.com Thu Jan 31 04:26:40 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:40 -0800 (PST) Subject: [pypy-commit] cffi default: Passing of proper CFLAGS/CXXFLAGS/LDFLAGS is hard and error prone Message-ID: <5c52bf50.1c69fb81.ba29d.dcb8@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3189:4e8e9cab26fd Date: 2017-05-22 23:49 +0200 http://bitbucket.org/cffi/cffi/changeset/4e8e9cab26fd/ Log: Passing of proper CFLAGS/CXXFLAGS/LDFLAGS is hard and error prone Add pkg-config wrapper, which is the cross-platform tool telling exactly this. diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -2,6 +2,7 @@ from .lock import allocate_lock from .error import CDefError from . import model +from .pkgconfig import pkgconfig_installed, merge_dicts, pkgconfig_kwargs try: callable @@ -611,6 +612,18 @@ if os.sep in module_name or (os.altsep and os.altsep in module_name): raise ValueError("'module_name' must not contain '/': use a dotted " "name to make a 'package.module' location") + if "pkgconfig" in kwds: + if pkgconfig_installed (): + try: + del kwds ["libraries"] + except KeyError: + pass + merge_dicts (kwds, pkgconfig_kwargs (kwds ["pkgconfig"])) + try: + del kwds ["pkgconfig"] + except KeyError: + pass + print (kwds) self._assigned_source = (str(module_name), source, source_extension, kwds) diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py new file mode 100644 --- /dev/null +++ b/cffi/pkgconfig.py @@ -0,0 +1,65 @@ +# pkg-config, https://www.freedesktop.org/wiki/Software/pkg-config/ integration for cffi +import subprocess + +def pkgconfig_installed (): + try: + subprocess.check_output (["pkg-config", "--version"]) + return True + except subprocess.CalledProcessError: + return False + +def merge_dicts (d1, d2): + for key, value in d2.items (): + if not key in d1: + d1 [key] = value + else: + d1 [key].extend (value) + return d1 + +def pkgconfig_kwargs (libs): + """If pkg-config is available, then return kwargs for set_source based on pkg-config output + + It setup include_dirs, library_dirs, libraries and define_macros + """ + + # make API great again! + if isinstance (libs, (str, bytes)): + libs = (libs, ) + + # drop starting -I -L -l from cflags + def dropILl (string): + def _dropILl (string): + if string.startswith ("-I") or string.startswith ("-L") or string.startswith ("-l"): + return string [2:] + return [_dropILl (x) for x in string.split ()] + + # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by cffi + def macros (string): + def _macros (string): + return tuple (string [2:].split ('=', 2)) + return [_macros (x) for x in string.split () if x.startswith ("-D")] + + # pkg-config call + def pc (libname, *args): + a = ["pkg-config", "--print-errors"] + a.extend (args) + a.append (libname) + return subprocess.check_output (a) + + # return kwargs for given libname + def kwargs (libname): + return { + "include_dirs" : dropILl (pc (libname, "--cflags-only-I")), + "library_dirs" : dropILl (pc (libname, "--libs-only-L")), + "libraries" : dropILl (pc (libname, "--libs-only-l")), + "define_macros" : macros (pc (libname, "--cflags")), + } + + # merge all arguments together + ret = {} + for libname in libs: + foo = kwargs (libname) + merge_dicts (ret, foo) + + return ret + From pypy.commits at gmail.com Thu Jan 31 04:26:42 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:42 -0800 (PST) Subject: [pypy-commit] cffi default: Improve documentation of pkgconfig_kwargs Message-ID: <5c52bf52.1c69fb81.f3586.ba31@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3190:47a70ef16876 Date: 2017-05-25 09:14 +0200 http://bitbucket.org/cffi/cffi/changeset/47a70ef16876/ Log: Improve documentation of pkgconfig_kwargs diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -2,6 +2,7 @@ import subprocess def pkgconfig_installed (): + """Check if pkg=config is installed or not""" try: subprocess.check_output (["pkg-config", "--version"]) return True @@ -9,6 +10,7 @@ return False def merge_dicts (d1, d2): + """Helper function to merge two dicts with lists""" for key, value in d2.items (): if not key in d1: d1 [key] = value @@ -17,9 +19,16 @@ return d1 def pkgconfig_kwargs (libs): - """If pkg-config is available, then return kwargs for set_source based on pkg-config output - - It setup include_dirs, library_dirs, libraries and define_macros + r"""Return kwargs for FFI.set_source based on pkg-config output + + Usage + ... + ffibuilder.set_source ("_foo", libraries = ["foo", "bar"], pkgconfig = ["libfoo", "libbar"]) + + If pkg-config is installed on build machine, then arguments include_dirs, + library_dirs and define_macros are extended with an output of pkg-config + [command] libfoo and pkgconfig [command] libbar. Argument libraries is + replaced by an output of pkgconfig --libs-only-l calls. """ # make API great again! From pypy.commits at gmail.com Thu Jan 31 04:26:44 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:44 -0800 (PST) Subject: [pypy-commit] cffi default: can't link with libraries expects -pthreads or similar flags Message-ID: <5c52bf54.1c69fb81.dfb6b.1967@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3191:c195b130abef Date: 2017-05-25 09:25 +0200 http://bitbucket.org/cffi/cffi/changeset/c195b130abef/ Log: can't link with libraries expects -pthreads or similar flags read extra_compile_args and extra_link_args fom pkg-config too diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -48,6 +48,9 @@ return tuple (string [2:].split ('=', 2)) return [_macros (x) for x in string.split () if x.startswith ("-D")] + def drop_macros (string): + return [x for x in string.split () if not x.startswith ("-D")] + # pkg-config call def pc (libname, *args): a = ["pkg-config", "--print-errors"] @@ -61,7 +64,9 @@ "include_dirs" : dropILl (pc (libname, "--cflags-only-I")), "library_dirs" : dropILl (pc (libname, "--libs-only-L")), "libraries" : dropILl (pc (libname, "--libs-only-l")), - "define_macros" : macros (pc (libname, "--cflags")), + "define_macros" : macros (pc (libname, "--cflags-only-other")), + "extra_compile_args" : drop_macros (pc (libname, "--cflags-only-other")), + "extra_link_args" : pc (libname, "--libs-only-other").split () } # merge all arguments together From pypy.commits at gmail.com Thu Jan 31 04:26:45 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:45 -0800 (PST) Subject: [pypy-commit] cffi default: drop debug print Message-ID: <5c52bf55.1c69fb81.e35e5.aba8@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3192:e23bbddde57b Date: 2017-05-25 09:26 +0200 http://bitbucket.org/cffi/cffi/changeset/e23bbddde57b/ Log: drop debug print diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -623,7 +623,6 @@ del kwds ["pkgconfig"] except KeyError: pass - print (kwds) self._assigned_source = (str(module_name), source, source_extension, kwds) From pypy.commits at gmail.com Thu Jan 31 04:26:47 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:47 -0800 (PST) Subject: [pypy-commit] cffi default: add test for pkg-config integration Message-ID: <5c52bf57.1c69fb81.8a370.0df2@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3193:95044035f313 Date: 2017-05-25 10:00 +0200 http://bitbucket.org/cffi/cffi/changeset/95044035f313/ Log: add test for pkg-config integration diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py new file mode 100644 --- /dev/null +++ b/testing/cffi1/test_pkgconfig.py @@ -0,0 +1,30 @@ +import sys +import subprocess +import py +from cffi.pkgconfig import pkgconfig_installed, merge_dicts, pkgconfig_kwargs + +def test_merge_dicts (): + + d1 = {"ham": [1, 2, 3], "spam" : ["a", "b", "c"], "foo" : []} + d2 = {"spam" : ["spam", "spam", "spam"], "bar" : ["b", "a", "z"]} + + merge_dicts (d1, d2) + assert d1 == { + "ham": [1, 2, 3], + "spam" : ["a", "b", "c", "spam", "spam", "spam"], + "bar" : ["b", "a", "z"], + "foo" : []} + +def test_pkgconfig (): + if not pkgconfig_installed: + py.test.skip ("pkg-config is not installed on the system") + + version = sys.version_info.major + kwargs = {} + try: + kwargs = pkgconfig_kwargs ("python%s" % version) + except subprocess.CalledProcessError as e: + py.test.skip ("No python%s pkg-config file installed" % version) + + assert any ("python" in lib for lib in kwargs ["libraries"]) == True + assert any ("python" in dir for dir in kwargs ["include_dirs"]) == True From pypy.commits at gmail.com Thu Jan 31 04:26:48 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:48 -0800 (PST) Subject: [pypy-commit] cffi default: code not compatible with python3 Message-ID: <5c52bf58.1c69fb81.cf38f.a3ef@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3194:b72192c28e05 Date: 2017-05-25 10:02 +0200 http://bitbucket.org/cffi/cffi/changeset/b72192c28e05/ Log: code not compatible with python3 use bytes instead of strings diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -38,7 +38,7 @@ # drop starting -I -L -l from cflags def dropILl (string): def _dropILl (string): - if string.startswith ("-I") or string.startswith ("-L") or string.startswith ("-l"): + if string.startswith (b"-I") or string.startswith (b"-L") or string.startswith (b"-l"): return string [2:] return [_dropILl (x) for x in string.split ()] diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py --- a/testing/cffi1/test_pkgconfig.py +++ b/testing/cffi1/test_pkgconfig.py @@ -26,5 +26,5 @@ except subprocess.CalledProcessError as e: py.test.skip ("No python%s pkg-config file installed" % version) - assert any ("python" in lib for lib in kwargs ["libraries"]) == True - assert any ("python" in dir for dir in kwargs ["include_dirs"]) == True + assert any (b"python" in lib for lib in kwargs ["libraries"]) == True + assert any (b"python" in dir for dir in kwargs ["include_dirs"]) == True From pypy.commits at gmail.com Thu Jan 31 04:26:50 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:50 -0800 (PST) Subject: [pypy-commit] cffi default: Increase testing coverage and refactor method names Message-ID: <5c52bf5a.1c69fb81.1f9bb.4e9b@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3195:09ffc07bbde2 Date: 2019-01-08 08:32 +0100 http://bitbucket.org/cffi/cffi/changeset/09ffc07bbde2/ Log: Increase testing coverage and refactor method names Making `pkgconfig.call` function accessible, tests can monkey patch it and provide mock. This improves testing, however raised a need to give functions better names than `pkgconfig.pkgconfig_kwargs` or `pkgconfig.pc`. diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -2,7 +2,7 @@ from .lock import allocate_lock from .error import CDefError from . import model -from .pkgconfig import pkgconfig_installed, merge_dicts, pkgconfig_kwargs +from . import pkgconfig try: callable @@ -612,17 +612,12 @@ if os.sep in module_name or (os.altsep and os.altsep in module_name): raise ValueError("'module_name' must not contain '/': use a dotted " "name to make a 'package.module' location") - if "pkgconfig" in kwds: - if pkgconfig_installed (): - try: - del kwds ["libraries"] - except KeyError: - pass - merge_dicts (kwds, pkgconfig_kwargs (kwds ["pkgconfig"])) - try: - del kwds ["pkgconfig"] - except KeyError: - pass + if "pkgconfig" in kwds and pkgconfig.is_installed(): + if "libraries" in kwds: + del kwds["libraries"] # real library names are going to be + # provided by pkg-config + pkgconfig.merge_flags(kwds, pkgconfig.kwargs(kwds["pkgconfig"])) + del kwds["pkgconfig"] self._assigned_source = (str(module_name), source, source_extension, kwds) diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -1,79 +1,86 @@ # pkg-config, https://www.freedesktop.org/wiki/Software/pkg-config/ integration for cffi import subprocess -def pkgconfig_installed (): - """Check if pkg=config is installed or not""" +def is_installed(): + """Check if pkg-config is installed or not""" try: - subprocess.check_output (["pkg-config", "--version"]) + subprocess.check_output(["pkg-config", "--version"]) return True except subprocess.CalledProcessError: return False -def merge_dicts (d1, d2): - """Helper function to merge two dicts with lists""" - for key, value in d2.items (): - if not key in d1: - d1 [key] = value + +def merge_flags(cfg1, cfg2): + """Merge values from cffi config flags cfg2 to cf1 + + Example: + merge_flags({"libraries": ["one"]}, {"libraries": "two"}) + {"libraries}" : ["one", "two"]} + """ + for key, value in cfg2.items(): + if not key in cfg1: + cfg1 [key] = value else: - d1 [key].extend (value) - return d1 + cfg1 [key].extend(value) + return cfg1 -def pkgconfig_kwargs (libs): - r"""Return kwargs for FFI.set_source based on pkg-config output + +def call(libname, flag): + """Calls pkg-config and returing the output""" + a = ["pkg-config", "--print-errors"] + a.append(flag) + a.append(libname) + return subprocess.check_output(a) + + +def flags(libs): + r"""Return compiler line flags for FFI.set_source based on pkg-config output Usage ... - ffibuilder.set_source ("_foo", libraries = ["foo", "bar"], pkgconfig = ["libfoo", "libbar"]) + ffibuilder.set_source("_foo", libraries = ["foo", "bar"], pkgconfig = ["libfoo", "libbar"]) - If pkg-config is installed on build machine, then arguments include_dirs, - library_dirs and define_macros are extended with an output of pkg-config - [command] libfoo and pkgconfig [command] libbar. Argument libraries is - replaced by an output of pkgconfig --libs-only-l calls. + If `pkg-config` is installed on build machine, then arguments + `include_dirs`, `library_dirs`, `libraries`, `define_macros`, + `extra_compile_args` and `extra_link_args` are extended with an output of + `pkg-config` for `libfoo` and `libbar`. """ # make API great again! - if isinstance (libs, (str, bytes)): + if isinstance(libs, (str, bytes)): libs = (libs, ) # drop starting -I -L -l from cflags - def dropILl (string): - def _dropILl (string): - if string.startswith (b"-I") or string.startswith (b"-L") or string.startswith (b"-l"): + def dropILl(string): + def _dropILl(string): + if string.startswith(b"-I") or string.startswith(b"-L") or string.startswith(b"-l"): return string [2:] - return [_dropILl (x) for x in string.split ()] + return [_dropILl(x) for x in string.split()] # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by cffi - def macros (string): - def _macros (string): - return tuple (string [2:].split ('=', 2)) - return [_macros (x) for x in string.split () if x.startswith ("-D")] + def macros(string): + def _macros(string): + return tuple(string [2:].split(b"=", 2)) + return [_macros(x) for x in string.split() if x.startswith(b"-D")] - def drop_macros (string): - return [x for x in string.split () if not x.startswith ("-D")] - - # pkg-config call - def pc (libname, *args): - a = ["pkg-config", "--print-errors"] - a.extend (args) - a.append (libname) - return subprocess.check_output (a) + def drop_macros(string): + return [x for x in string.split() if not x.startswith(b"-D")] # return kwargs for given libname - def kwargs (libname): + def kwargs(libname): return { - "include_dirs" : dropILl (pc (libname, "--cflags-only-I")), - "library_dirs" : dropILl (pc (libname, "--libs-only-L")), - "libraries" : dropILl (pc (libname, "--libs-only-l")), - "define_macros" : macros (pc (libname, "--cflags-only-other")), - "extra_compile_args" : drop_macros (pc (libname, "--cflags-only-other")), - "extra_link_args" : pc (libname, "--libs-only-other").split () + "include_dirs" : dropILl(call(libname, "--cflags-only-I")), + "library_dirs" : dropILl(call(libname, "--libs-only-L")), + "libraries" : dropILl(call(libname, "--libs-only-l")), + "define_macros" : macros(call(libname, "--cflags-only-other")), + "extra_compile_args" : drop_macros(call(libname, "--cflags-only-other")), + "extra_link_args" : call(libname, "--libs-only-other").split() } # merge all arguments together ret = {} for libname in libs: - foo = kwargs (libname) - merge_dicts (ret, foo) + foo = kwargs(libname) + merge_flags(ret, foo) return ret - diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py --- a/testing/cffi1/test_pkgconfig.py +++ b/testing/cffi1/test_pkgconfig.py @@ -1,30 +1,43 @@ import sys import subprocess import py -from cffi.pkgconfig import pkgconfig_installed, merge_dicts, pkgconfig_kwargs +import cffi.pkgconfig as pkgconfig -def test_merge_dicts (): +def mock_call(libname, flag): + assert libname=="python-3.6", "mocked pc function supports python-3.6 input ONLY" + + flags = { + "--cflags-only-I": b"-I/usr/include/python3.6m\n", + "--libs-only-L": b"-L/usr/lib64\n", + "--libs-only-l": b"-lpython3.6\n", + "--cflags-only-other": b"-DCFFI_TEST=1 -O42\n", + "--libs-only-other": b"-lm\n", + } + return flags[flag] + +pkgconfig.call = mock_call + + +def test_merge_flags(): d1 = {"ham": [1, 2, 3], "spam" : ["a", "b", "c"], "foo" : []} d2 = {"spam" : ["spam", "spam", "spam"], "bar" : ["b", "a", "z"]} - merge_dicts (d1, d2) + pkgconfig.merge_flags(d1, d2) assert d1 == { "ham": [1, 2, 3], "spam" : ["a", "b", "c", "spam", "spam", "spam"], "bar" : ["b", "a", "z"], "foo" : []} -def test_pkgconfig (): - if not pkgconfig_installed: - py.test.skip ("pkg-config is not installed on the system") - version = sys.version_info.major - kwargs = {} - try: - kwargs = pkgconfig_kwargs ("python%s" % version) - except subprocess.CalledProcessError as e: - py.test.skip ("No python%s pkg-config file installed" % version) - - assert any (b"python" in lib for lib in kwargs ["libraries"]) == True - assert any (b"python" in dir for dir in kwargs ["include_dirs"]) == True +def test_pkgconfig(): + kwargs = pkgconfig.flags("python-3.6") + assert kwargs == { + 'include_dirs': [b'/usr/include/python3.6m'], + 'library_dirs': [b'/usr/lib64'], + 'libraries': [b'python3.6'], + 'define_macros': [(b'CFFI_TEST', b'1')], + 'extra_compile_args': [b'-O42'], + 'extra_link_args': [b'-lm'] + } From pypy.commits at gmail.com Thu Jan 31 04:26:53 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:53 -0800 (PST) Subject: [pypy-commit] cffi default: merge with latest tip Message-ID: <5c52bf5d.1c69fb81.9913.8f55@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3196:48ca9a578dac Date: 2019-01-08 10:29 +0100 http://bitbucket.org/cffi/cffi/changeset/48ca9a578dac/ Log: merge with latest tip diff too long, truncating to 2000 out of 8938 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -12,3 +12,8 @@ 0000000000000000000000000000000000000000 release-0.2 ca6e81df7f1ea58d891129ad016a8888c08f238b release-0.1 0000000000000000000000000000000000000000 release-0.1 +ada126bd7d1e96cc76303c1fca64a556912549d8 v1.11.1 +5f9690f5832b0292056df45f72314d69c191f75a v1.11.2 +1aafccb9255dbb36f8e785b65624e39628cee63a v1.11.3 +e08abd4703fef26f036e82255f4070277a9e03bd v1.11.4 +48416163071ed48300c3ae4358cc7fd841912413 v1.11.5 diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -2,7 +2,7 @@ #include #include "structmember.h" -#define CFFI_VERSION "1.10.0" +#define CFFI_VERSION "1.12.0" #ifdef MS_WIN32 #include @@ -60,7 +60,38 @@ # endif #endif -#include "malloc_closure.h" + +/* Define the following macro ONLY if you trust libffi's version of + * ffi_closure_alloc() more than the code in malloc_closure.h. + * IMPORTANT: DO NOT ENABLE THIS ON LINUX, unless you understand exactly + * why I recommend against it and decide that you trust it more than my + * analysis below. + * + * There are two versions of this code: one inside libffi itself, and + * one inside malloc_closure.h here. Both should be fine as long as the + * Linux distribution does _not_ enable extra security features. If it + * does, then the code in malloc_closure.h will cleanly crash because + * there is no reasonable way to obtain a read-write-execute memory + * page. On the other hand, the code in libffi will appear to + * work---but will actually randomly crash after a fork() if the child + * does not immediately call exec(). This second crash is of the kind + * that can be turned into an attack vector by a motivated attacker. + * So, _enabling_ extra security features _opens_ an attack vector. + * That sounds like a horribly bad idea to me, and is the reason for why + * I prefer CFFI crashing cleanly. + * + * Currently, we use libffi's ffi_closure_alloc() only on NetBSD. It is + * known that on the NetBSD kernel, a different strategy is used which + * should not be open to the fork() bug. + */ +#ifdef __NetBSD__ +# define CFFI_TRUST_LIBFFI +#endif + +#ifndef CFFI_TRUST_LIBFFI +# include "malloc_closure.h" +#endif + #if PY_MAJOR_VERSION >= 3 # define STR_OR_BYTES "bytes" @@ -70,7 +101,11 @@ # define PyText_FromFormat PyUnicode_FromFormat # define PyText_AsUTF8 _PyUnicode_AsString /* PyUnicode_AsUTF8 in Py3.3 */ # define PyText_AS_UTF8 _PyUnicode_AsString -# define PyText_GetSize PyUnicode_GetSize +# if PY_VERSION_HEX >= 0x03030000 +# define PyText_GetSize PyUnicode_GetLength +# else +# define PyText_GetSize PyUnicode_GetSize +# endif # define PyText_FromString PyUnicode_FromString # define PyText_FromStringAndSize PyUnicode_FromStringAndSize # define PyText_InternInPlace PyUnicode_InternInPlace @@ -116,36 +151,39 @@ /************************************************************/ /* base type flag: exactly one of the following: */ -#define CT_PRIMITIVE_SIGNED 1 /* signed integer */ -#define CT_PRIMITIVE_UNSIGNED 2 /* unsigned integer */ -#define CT_PRIMITIVE_CHAR 4 /* char, wchar_t */ -#define CT_PRIMITIVE_FLOAT 8 /* float, double, long double */ -#define CT_POINTER 16 /* pointer, excluding ptr-to-func */ -#define CT_ARRAY 32 /* array */ -#define CT_STRUCT 64 /* struct */ -#define CT_UNION 128 /* union */ -#define CT_FUNCTIONPTR 256 /* pointer to function */ -#define CT_VOID 512 /* void */ +#define CT_PRIMITIVE_SIGNED 0x001 /* signed integer */ +#define CT_PRIMITIVE_UNSIGNED 0x002 /* unsigned integer */ +#define CT_PRIMITIVE_CHAR 0x004 /* char, wchar_t, charN_t */ +#define CT_PRIMITIVE_FLOAT 0x008 /* float, double, long double */ +#define CT_POINTER 0x010 /* pointer, excluding ptr-to-func */ +#define CT_ARRAY 0x020 /* array */ +#define CT_STRUCT 0x040 /* struct */ +#define CT_UNION 0x080 /* union */ +#define CT_FUNCTIONPTR 0x100 /* pointer to function */ +#define CT_VOID 0x200 /* void */ +#define CT_PRIMITIVE_COMPLEX 0x400 /* float _Complex, double _Complex */ /* other flags that may also be set in addition to the base flag: */ -#define CT_IS_VOIDCHAR_PTR 1024 -#define CT_PRIMITIVE_FITS_LONG 2048 -#define CT_IS_OPAQUE 4096 -#define CT_IS_ENUM 8192 -#define CT_IS_PTR_TO_OWNED 16384 /* only owned if CDataOwning_Type */ -#define CT_CUSTOM_FIELD_POS 32768 -#define CT_IS_LONGDOUBLE 65536 -#define CT_IS_BOOL 131072 -#define CT_IS_FILE 262144 -#define CT_IS_VOID_PTR 524288 -#define CT_WITH_VAR_ARRAY 1048576 -#define CT_IS_UNSIZED_CHAR_A 2097152 -#define CT_LAZY_FIELD_LIST 4194304 -#define CT_WITH_PACKED_CHANGE 8388608 +#define CT_IS_VOIDCHAR_PTR 0x00001000 +#define CT_PRIMITIVE_FITS_LONG 0x00002000 +#define CT_IS_OPAQUE 0x00004000 +#define CT_IS_ENUM 0x00008000 +#define CT_IS_PTR_TO_OWNED 0x00010000 /* only owned if CDataOwning_Type */ +#define CT_CUSTOM_FIELD_POS 0x00020000 +#define CT_IS_LONGDOUBLE 0x00040000 +#define CT_IS_BOOL 0x00080000 +#define CT_IS_FILE 0x00100000 +#define CT_IS_VOID_PTR 0x00200000 +#define CT_WITH_VAR_ARRAY 0x00400000 +/* unused 0x00800000 */ +#define CT_LAZY_FIELD_LIST 0x01000000 +#define CT_WITH_PACKED_CHANGE 0x02000000 +#define CT_IS_SIGNED_WCHAR 0x04000000 #define CT_PRIMITIVE_ANY (CT_PRIMITIVE_SIGNED | \ CT_PRIMITIVE_UNSIGNED | \ CT_PRIMITIVE_CHAR | \ - CT_PRIMITIVE_FLOAT) + CT_PRIMITIVE_FLOAT | \ + CT_PRIMITIVE_COMPLEX) typedef struct _ctypedescr { PyObject_VAR_HEAD @@ -256,6 +294,11 @@ } CDataObject_gcp; typedef struct { + CDataObject head; + ffi_closure *closure; +} CDataObject_closure; + +typedef struct { ffi_cif cif; /* the following information is used when doing the call: - a buffer of size 'exchange_size' is malloced @@ -283,10 +326,14 @@ # include "file_emulator.h" #endif -#ifdef HAVE_WCHAR_H +#ifdef PyUnicode_KIND /* Python >= 3.3 */ +# include "wchar_helper_3.h" +#else # include "wchar_helper.h" #endif +#include "../cffi/_cffi_errors.h" + typedef struct _cffi_allocator_s { PyObject *ca_alloc, *ca_free; int ca_dont_clear; @@ -845,11 +892,21 @@ return 0; } +#ifdef __GNUC__ +/* This is a workaround for what I think is a GCC bug on several + platforms. See issue #378. */ +__attribute__((noinline)) +#endif +void _cffi_memcpy(char *target, const void *src, size_t size) +{ + memcpy(target, src, size); +} + #define _write_raw_data(type) \ do { \ if (size == sizeof(type)) { \ type r = (type)source; \ - memcpy(target, &r, sizeof(type)); \ + _cffi_memcpy(target, &r, sizeof(type)); \ return; \ } \ } while(0) @@ -883,6 +940,26 @@ return 0; } +static Py_complex +read_raw_complex_data(char *target, int size) +{ + Py_complex r = {0.0, 0.0}; + if (size == 2*sizeof(float)) { + float real_part, imag_part; + memcpy(&real_part, target + 0, sizeof(float)); + memcpy(&imag_part, target + sizeof(float), sizeof(float)); + r.real = real_part; + r.imag = imag_part; + return r; + } + if (size == 2*sizeof(double)) { + memcpy(&r, target, 2*sizeof(double)); + return r; + } + Py_FatalError("read_raw_complex_data: bad complex size"); + return r; +} + static void write_raw_float_data(char *target, double source, int size) { @@ -898,6 +975,25 @@ _write_raw_data(long double); } +#define _write_raw_complex_data(type) \ + do { \ + if (size == 2*sizeof(type)) { \ + type r = (type)source.real; \ + type i = (type)source.imag; \ + _cffi_memcpy(target, &r, sizeof(type)); \ + _cffi_memcpy(target+sizeof(type), &i, sizeof(type)); \ + return; \ + } \ + } while(0) + +static void +write_raw_complex_data(char *target, Py_complex source, int size) +{ + _write_raw_complex_data(float); + _write_raw_complex_data(double); + Py_FatalError("write_raw_complex_data: bad complex size"); +} + static PyObject * new_simple_cdata(char *data, CTypeDescrObject *ct) { @@ -1008,12 +1104,18 @@ } else if (ct->ct_flags & CT_PRIMITIVE_CHAR) { /*READ(data, ct->ct_size)*/ - if (ct->ct_size == sizeof(char)) + switch (ct->ct_size) { + case sizeof(char): return PyBytes_FromStringAndSize(data, 1); -#ifdef HAVE_WCHAR_H - else - return _my_PyUnicode_FromWideChar((wchar_t *)data, 1); -#endif + case 2: + return _my_PyUnicode_FromChar16((cffi_char16_t *)data, 1); + case 4: + return _my_PyUnicode_FromChar32((cffi_char32_t *)data, 1); + } + } + else if (ct->ct_flags & CT_PRIMITIVE_COMPLEX) { + Py_complex value = read_raw_complex_data(data, ct->ct_size); + return PyComplex_FromCComplex(value); } PyErr_Format(PyExc_SystemError, @@ -1088,39 +1190,65 @@ return -1; } -#ifdef HAVE_WCHAR_H -static wchar_t _convert_to_wchar_t(PyObject *init) -{ +static cffi_char16_t _convert_to_char16_t(PyObject *init) +{ + char err_got[80]; + err_got[0] = 0; + if (PyUnicode_Check(init)) { - wchar_t ordinal; - if (_my_PyUnicode_AsSingleWideChar(init, &ordinal) == 0) + cffi_char16_t ordinal; + if (_my_PyUnicode_AsSingleChar16(init, &ordinal, err_got) == 0) return ordinal; } if (CData_Check(init) && (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) && - (((CDataObject *)init)->c_type->ct_size == sizeof(wchar_t))) { + (((CDataObject *)init)->c_type->ct_size == 2)) { char *data = ((CDataObject *)init)->c_data; - /*READ(data, sizeof(wchar_t))*/ - return *(wchar_t *)data; + /*READ(data, 2)*/ + return *(cffi_char16_t *)data; } PyErr_Format(PyExc_TypeError, - "initializer for ctype 'wchar_t' must be a unicode string " - "of length 1, not %.200s", Py_TYPE(init)->tp_name); - return (wchar_t)-1; -} -#endif - -static int _convert_error(PyObject *init, const char *ct_name, + "initializer for ctype 'char16_t' must be a unicode string " + "of length 1, not %.200s", + err_got[0] == 0 ? Py_TYPE(init)->tp_name : err_got); + return (cffi_char16_t)-1; +} + +static cffi_char32_t _convert_to_char32_t(PyObject *init) +{ + char err_got[80]; + err_got[0] = 0; + + if (PyUnicode_Check(init)) { + cffi_char32_t ordinal; + if (_my_PyUnicode_AsSingleChar32(init, &ordinal, err_got) == 0) + return ordinal; + } + if (CData_Check(init) && + (((CDataObject *)init)->c_type->ct_flags & CT_PRIMITIVE_CHAR) && + (((CDataObject *)init)->c_type->ct_size == 4)) { + char *data = ((CDataObject *)init)->c_data; + /*READ(data, 4)*/ + return *(cffi_char32_t *)data; + } + PyErr_Format(PyExc_TypeError, + "initializer for ctype 'char32_t' must be a unicode string " + "of length 1, not %.200s", + err_got[0] == 0 ? Py_TYPE(init)->tp_name : err_got); + return (cffi_char32_t)-1; +} + +static int _convert_error(PyObject *init, CTypeDescrObject *ct, const char *expected) { if (CData_Check(init)) { - const char *ct_name_2 = ((CDataObject *)init)->c_type->ct_name; - if (strcmp(ct_name, ct_name_2) != 0) + CTypeDescrObject *ct2 = ((CDataObject *)init)->c_type; + if (strcmp(ct->ct_name, ct2->ct_name) != 0) PyErr_Format(PyExc_TypeError, "initializer for ctype '%s' must be a %s, " "not cdata '%s'", - ct_name, expected, ct_name_2); - else { + ct->ct_name, expected, ct2->ct_name); + else if (ct != ct2) { /* in case we'd give the error message "initializer for ctype 'A' must be a pointer to same type, not cdata 'B'", but with A=B, then give instead a different error @@ -1129,14 +1257,21 @@ "initializer for ctype '%s' appears indeed to be '%s'," " but the types are different (check that you are not" " e.g. mixing up different ffi instances)", - ct_name, ct_name_2); + ct->ct_name, ct2->ct_name); + } + else + { + PyErr_Format(PyExc_SystemError, + "initializer for ctype '%s' is correct, but we get " + "an internal mismatch--please report a bug", + ct->ct_name); } } else PyErr_Format(PyExc_TypeError, "initializer for ctype '%s' must be a %s, " "not %.200s", - ct_name, expected, Py_TYPE(init)->tp_name); + ct->ct_name, expected, Py_TYPE(init)->tp_name); return -1; } @@ -1146,7 +1281,7 @@ convert_from_object_bitfield(char *data, CFieldObject *cf, PyObject *init); static Py_ssize_t -get_new_array_length(PyObject **pvalue) +get_new_array_length(CTypeDescrObject *ctitem, PyObject **pvalue) { PyObject *value = *pvalue; @@ -1159,13 +1294,24 @@ } else if (PyUnicode_Check(value)) { /* from a unicode, we add the null terminator */ - return _my_PyUnicode_SizeAsWideChar(value) + 1; + int length; + if (ctitem->ct_size == 2) + length = _my_PyUnicode_SizeAsChar16(value); + else + length = _my_PyUnicode_SizeAsChar32(value); + return length + 1; } else { Py_ssize_t explicitlength; explicitlength = PyNumber_AsSsize_t(value, PyExc_OverflowError); if (explicitlength < 0) { - if (!PyErr_Occurred()) + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, + "expected new array length or list/tuple/str, " + "not %.200s", Py_TYPE(value)->tp_name); + } + else PyErr_SetString(PyExc_ValueError, "negative array length"); return -1; } @@ -1190,7 +1336,8 @@ { /* a special case for var-sized C99 arrays */ if ((cf->cf_type->ct_flags & CT_ARRAY) && cf->cf_type->ct_size < 0) { - Py_ssize_t varsizelength = get_new_array_length(&value); + Py_ssize_t varsizelength = get_new_array_length( + cf->cf_type->ct_itemdescr, &value); if (varsizelength < 0) return -1; if (optvarsize != NULL) { @@ -1238,6 +1385,15 @@ return 0; } +static Py_ssize_t +get_array_length(CDataObject *cd) +{ + if (cd->c_type->ct_length < 0) + return ((CDataObject_own_length *)cd)->length; + else + return cd->c_type->ct_length; +} + static int convert_array_from_object(char *data, CTypeDescrObject *ct, PyObject *init) { @@ -1291,14 +1447,18 @@ memcpy(data, srcdata, n); return 0; } -#ifdef HAVE_WCHAR_H else { Py_ssize_t n; if (!PyUnicode_Check(init)) { expected = "unicode or list or tuple"; goto cannot_convert; } - n = _my_PyUnicode_SizeAsWideChar(init); + + if (ctitem->ct_size == 4) + n = _my_PyUnicode_SizeAsChar32(init); + else + n = _my_PyUnicode_SizeAsChar16(init); + if (ct->ct_length >= 0 && n > ct->ct_length) { PyErr_Format(PyExc_IndexError, "initializer unicode is too long for '%s' " @@ -1307,10 +1467,11 @@ } if (n != ct->ct_length) n++; - _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n); - return 0; - } -#endif + if (ctitem->ct_size == 4) + return _my_PyUnicode_AsChar32(init, (cffi_char32_t *)data, n); + else + return _my_PyUnicode_AsChar16(init, (cffi_char16_t *)data, n); + } } else { expected = "list or tuple"; @@ -1318,13 +1479,24 @@ } cannot_convert: - return _convert_error(init, ct->ct_name, expected); + if ((ct->ct_flags & CT_ARRAY) && CData_Check(init)) + { + CDataObject *cd = (CDataObject *)init; + if (cd->c_type == ct) + { + Py_ssize_t n = get_array_length(cd); + memcpy(data, cd->c_data, n * ctitem->ct_size); + return 0; + } + } + return _convert_error(init, ct, expected); } static int convert_struct_from_object(char *data, CTypeDescrObject *ct, PyObject *init, Py_ssize_t *optvarsize) { + /* does not accept 'init' being already a CData */ const char *expected; if (force_lazy_struct(ct) <= 0) { @@ -1371,7 +1543,7 @@ } expected = optvarsize == NULL ? "list or tuple or dict or struct-cdata" : "list or tuple or dict"; - return _convert_error(init, ct->ct_name, expected); + return _convert_error(init, ct, expected); } #ifdef __GNUC__ @@ -1421,7 +1593,8 @@ /* for backward compatibility, accept "char *" as either source of target. This is not what C does, though, so emit a warning that will eventually turn into an - error. */ + error. The warning is turned off if both types are + pointers to single bytes. */ char *msg = (ct->ct_flags & CT_IS_VOIDCHAR_PTR ? "implicit cast to 'char *' from a different pointer type: " "will be forbidden in the future (check that the types " @@ -1431,7 +1604,12 @@ "will be forbidden in the future (check that the types " "are as you expect; use an explicit ffi.cast() if they " "are correct)"); - if (PyErr_WarnEx(PyExc_UserWarning, msg, 1)) + if ((ct->ct_flags & ctinit->ct_flags & CT_POINTER) && + ct->ct_itemdescr->ct_size == 1 && + ctinit->ct_itemdescr->ct_size == 1) { + /* no warning */ + } + else if (PyErr_WarnEx(PyExc_UserWarning, msg, 1)) return -1; } else { @@ -1492,22 +1670,29 @@ return 0; } if (ct->ct_flags & CT_PRIMITIVE_CHAR) { - if (ct->ct_size == sizeof(char)) { + switch (ct->ct_size) { + case sizeof(char): { int res = _convert_to_char(init); if (res < 0) return -1; data[0] = res; return 0; } -#ifdef HAVE_WCHAR_H - else { - wchar_t res = _convert_to_wchar_t(init); - if (res == (wchar_t)-1 && PyErr_Occurred()) + case 2: { + cffi_char16_t res = _convert_to_char16_t(init); + if (res == (cffi_char16_t)-1 && PyErr_Occurred()) return -1; - *(wchar_t *)data = res; + *(cffi_char16_t *)data = res; return 0; } -#endif + case 4: { + cffi_char32_t res = _convert_to_char32_t(init); + if (res == (cffi_char32_t)-1 && PyErr_Occurred()) + return -1; + *(cffi_char32_t *)data = res; + return 0; + } + } } if (ct->ct_flags & (CT_STRUCT|CT_UNION)) { @@ -1519,6 +1704,13 @@ } return convert_struct_from_object(data, ct, init, NULL); } + if (ct->ct_flags & CT_PRIMITIVE_COMPLEX) { + Py_complex value = PyComplex_AsCComplex(init); + if (PyErr_Occurred()) + return -1; + write_raw_complex_data(data, value, ct->ct_size); + return 0; + } PyErr_Format(PyExc_SystemError, "convert_from_object: '%s'", ct->ct_name); return -1; @@ -1527,7 +1719,7 @@ return _convert_overflow(init, ct->ct_name); cannot_convert: - return _convert_error(init, ct->ct_name, expected); + return _convert_error(init, ct, expected); } static int @@ -1587,15 +1779,6 @@ return 0; } -static Py_ssize_t -get_array_length(CDataObject *cd) -{ - if (cd->c_type->ct_length < 0) - return ((CDataObject_own_length *)cd)->length; - else - return cd->c_type->ct_length; -} - static int get_alignment(CTypeDescrObject *ct) { @@ -1678,12 +1861,16 @@ Py_DECREF(x); } else if (cd->c_type->ct_flags & CT_FUNCTIONPTR) { /* a callback */ - ffi_closure *closure = (ffi_closure *)cd->c_data; + ffi_closure *closure = ((CDataObject_closure *)cd)->closure; PyObject *args = (PyObject *)(closure->user_data); Py_XDECREF(args); +#ifdef CFFI_TRUST_LIBFFI + ffi_closure_free(closure); +#else cffi_closure_free(closure); - } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ +#endif + } + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; PyBuffer_Release(view); PyObject_Free(view); @@ -1698,11 +1885,11 @@ Py_VISIT(x); } else if (cd->c_type->ct_flags & CT_FUNCTIONPTR) { /* a callback */ - ffi_closure *closure = (ffi_closure *)cd->c_data; + ffi_closure *closure = ((CDataObject_closure *)cd)->closure; PyObject *args = (PyObject *)(closure->user_data); Py_VISIT(args); } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; Py_VISIT(view->obj); } @@ -1719,12 +1906,12 @@ Py_DECREF(x); } else if (cd->c_type->ct_flags & CT_FUNCTIONPTR) { /* a callback */ - ffi_closure *closure = (ffi_closure *)cd->c_data; + ffi_closure *closure = ((CDataObject_closure *)cd)->closure; PyObject *args = (PyObject *)(closure->user_data); closure->user_data = NULL; Py_XDECREF(args); } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; PyBuffer_Release(view); } @@ -1732,7 +1919,8 @@ } /* forward */ -static void _my_PyErr_WriteUnraisable(char *objdescr, PyObject *obj, +static void _my_PyErr_WriteUnraisable(PyObject *t, PyObject *v, PyObject *tb, + char *objdescr, PyObject *obj, char *extra_error_line); @@ -1752,8 +1940,15 @@ Py_DECREF(result); } else { - _my_PyErr_WriteUnraisable("From callback for ffi.gc ", + PyObject *t, *v, *tb; + PyErr_Fetch(&t, &v, &tb); + /* Don't use error capture here, because it is very much + * like errors at __del__(), and these ones are not captured + * either */ + /* ecap = _cffi_start_error_capture(); */ + _my_PyErr_WriteUnraisable(t, v, tb, "From callback for ffi.gc ", origobj, NULL); + /* _cffi_stop_error_capture(ecap); */ } Py_DECREF(destructor); @@ -1763,7 +1958,6 @@ Py_XDECREF(origobj); } -#ifdef Py_TPFLAGS_HAVE_FINALIZE /* CPython >= 3.4 */ static void cdatagcp_finalize(CDataObject_gcp *cd) { PyObject *destructor = cd->destructor; @@ -1772,7 +1966,6 @@ cd->origobj = NULL; gcp_finalize(destructor, origobj); } -#endif static void cdatagcp_dealloc(CDataObject_gcp *cd) { @@ -1925,13 +2118,14 @@ return _cdata_repr2(cd, "handle to", x); } else if (cd->c_type->ct_flags & CT_FUNCTIONPTR) { /* a callback */ - PyObject *args = (PyObject *)((ffi_closure *)cd->c_data)->user_data; + ffi_closure *closure = ((CDataObject_closure *)cd)->closure; + PyObject *args = (PyObject *)closure->user_data; if (args == NULL) return cdata_repr(cd); else return _cdata_repr2(cd, "calling", PyTuple_GET_ITEM(args, 1)); } - else if (cd->c_type->ct_flags & CT_IS_UNSIZED_CHAR_A) { /* from_buffer */ + else if (cd->c_type->ct_flags & CT_ARRAY) { /* from_buffer */ Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview; Py_ssize_t buflen = get_array_length(cd); return PyText_FromFormat( @@ -1956,6 +2150,11 @@ return read_raw_longdouble_data(cd->c_data) != 0.0; return read_raw_float_data(cd->c_data, cd->c_type->ct_size) != 0.0; } + if (cd->c_type->ct_flags & CT_PRIMITIVE_COMPLEX) { + Py_complex value = read_raw_complex_data(cd->c_data, + cd->c_type->ct_size); + return value.real != 0.0 || value.imag != 0.0; + } } return cd->c_data != NULL; } @@ -1976,12 +2175,19 @@ } else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) { /*READ(cd->c_data, cd->c_type->ct_size)*/ - if (cd->c_type->ct_size == sizeof(char)) + switch (cd->c_type->ct_size) { + case sizeof(char): return PyInt_FromLong((unsigned char)cd->c_data[0]); -#ifdef HAVE_WCHAR_H - else - return PyInt_FromLong((long)*(wchar_t *)cd->c_data); -#endif + case 2: + return PyInt_FromLong((long)*(cffi_char16_t *)cd->c_data); + case 4: + if (cd->c_type->ct_flags & CT_IS_SIGNED_WCHAR) + return PyInt_FromLong((long)*(int32_t *)cd->c_data); + else if (sizeof(long) > 4) + return PyInt_FromLong(*(uint32_t *)cd->c_data); + else + return PyLong_FromUnsignedLong(*(uint32_t *)cd->c_data); + } } else if (cd->c_type->ct_flags & CT_PRIMITIVE_FLOAT) { PyObject *o = cdata_float(cd); @@ -2163,7 +2369,7 @@ else if (cd->c_type->ct_flags & CT_ARRAY) { if (i < 0) { PyErr_SetString(PyExc_IndexError, - "negative index not supported"); + "negative index"); return NULL; } if (i >= get_array_length(cd)) { @@ -2216,7 +2422,7 @@ if (ct->ct_flags & CT_ARRAY) { if (start < 0) { PyErr_SetString(PyExc_IndexError, - "negative index not supported"); + "negative index"); return NULL; } if (stop > get_array_length(cd)) { @@ -2507,7 +2713,7 @@ static void _cdata_attr_errmsg(char *errmsg, CDataObject *cd, PyObject *attr) { - char *text; + const char *text; if (!PyErr_ExceptionMatches(PyExc_AttributeError)) return; PyErr_Clear(); @@ -2673,7 +2879,11 @@ } else if (PyUnicode_Check(init)) { /* from a unicode, we add the null terminator */ - length = _my_PyUnicode_SizeAsWideChar(init) + 1; + if (ctitem->ct_size == 2) + length = _my_PyUnicode_SizeAsChar16(init); + else + length = _my_PyUnicode_SizeAsChar32(init); + length += 1; } else if ((ctitem->ct_flags & CT_IS_FILE) && PyFile_Check(init)) { *output_data = (char *)PyFile_AsFile(init); @@ -2904,6 +3114,93 @@ } } +static PyObject *cdata_complex(PyObject *cd_, PyObject *noarg) +{ + CDataObject *cd = (CDataObject *)cd_; + + if (cd->c_type->ct_flags & CT_PRIMITIVE_COMPLEX) { + Py_complex value = read_raw_complex_data(cd->c_data, cd->c_type->ct_size); + PyObject *op = PyComplex_FromCComplex(value); + return op; + } + /* or cannot be directly converted by + calling complex(), just like cannot be directly + converted by calling float() */ + + PyErr_Format(PyExc_TypeError, "complex() not supported on cdata '%s'", + cd->c_type->ct_name); + return NULL; +} + +static int explicit_release_case(PyObject *cd) +{ + CTypeDescrObject *ct = ((CDataObject *)cd)->c_type; + if (Py_TYPE(cd) == &CDataOwning_Type) { + if ((ct->ct_flags & (CT_POINTER | CT_ARRAY)) != 0) /* ffi.new() */ + return 0; + } + else if (Py_TYPE(cd) == &CDataOwningGC_Type) { + if (ct->ct_flags & CT_ARRAY) /* ffi.from_buffer() */ + return 1; + } + else if (Py_TYPE(cd) == &CDataGCP_Type) { + return 2; /* ffi.gc() */ + } + PyErr_SetString(PyExc_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc(), ffi.from_buffer() " + "or ffi.new_allocator()() can be used with the 'with' keyword or " + "ffi.release()"); + return -1; +} + +static PyObject *cdata_enter(PyObject *cd, PyObject *noarg) +{ + if (explicit_release_case(cd) < 0) /* only to check the ctype */ + return NULL; + Py_INCREF(cd); + return cd; +} + +static PyObject *cdata_exit(PyObject *cd, PyObject *args) +{ + /* 'args' ignored */ + CTypeDescrObject *ct; + Py_buffer *view; + switch (explicit_release_case(cd)) + { + case 0: /* ffi.new() */ + /* no effect on CPython: raw memory is allocated with the + same malloc() as the object itself, so it can't be + released independently. If we use a custom allocator, + then it's implemented with ffi.gc(). */ + ct = ((CDataObject *)cd)->c_type; + if (ct->ct_flags & CT_IS_PTR_TO_OWNED) { + PyObject *x = ((CDataObject_own_structptr *)cd)->structobj; + if (Py_TYPE(x) == &CDataGCP_Type) { + /* this is a special case for + ffi.new_allocator()("struct-or-union") */ + cdatagcp_finalize((CDataObject_gcp *)x); + } + } + break; + + case 1: /* ffi.from_buffer() */ + view = ((CDataObject_owngc_frombuf *)cd)->bufferview; + PyBuffer_Release(view); + break; + + case 2: /* ffi.gc() or ffi.new_allocator()("not-struct-nor-union") */ + /* call the destructor immediately */ + cdatagcp_finalize((CDataObject_gcp *)cd); + break; + + default: + return NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + static PyObject *cdata_iter(CDataObject *); static PyNumberMethods CData_as_number = { @@ -2953,8 +3250,11 @@ }; static PyMethodDef cdata_methods[] = { - {"__dir__", cdata_dir, METH_NOARGS}, - {NULL, NULL} /* sentinel */ + {"__dir__", cdata_dir, METH_NOARGS}, + {"__complex__", cdata_complex, METH_NOARGS}, + {"__enter__", cdata_enter, METH_NOARGS}, + {"__exit__", cdata_exit, METH_VARARGS}, + {NULL, NULL} /* sentinel */ }; static PyTypeObject CData_Type = { @@ -3010,24 +3310,24 @@ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc)cdataowning_repr, /* tp_repr */ - 0, /* tp_as_number */ + 0, /* inherited */ /* tp_as_number */ 0, /* tp_as_sequence */ &CDataOwn_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ + 0, /* inherited */ /* tp_hash */ + 0, /* inherited */ /* tp_call */ 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ + 0, /* inherited */ /* tp_getattro */ + 0, /* inherited */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ 0, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ + 0, /* inherited */ /* tp_richcompare */ + 0, /* inherited */ /* tp_weaklistoffset */ + 0, /* inherited */ /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + 0, /* inherited */ /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ &CData_Type, /* tp_base */ @@ -3052,25 +3352,25 @@ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc)cdataowninggc_repr, /* tp_repr */ - 0, /* tp_as_number */ + 0, /* inherited */ /* tp_as_number */ 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ + 0, /* inherited */ /* tp_as_mapping */ + 0, /* inherited */ /* tp_hash */ + 0, /* inherited */ /* tp_call */ 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ + 0, /* inherited */ /* tp_getattro */ + 0, /* inherited */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES /* tp_flags */ | Py_TPFLAGS_HAVE_GC, 0, /* tp_doc */ (traverseproc)cdataowninggc_traverse, /* tp_traverse */ (inquiry)cdataowninggc_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ + 0, /* inherited */ /* tp_richcompare */ + 0, /* inherited */ /* tp_weaklistoffset */ + 0, /* inherited */ /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + 0, /* inherited */ /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ &CDataOwning_Type, /* tp_base */ @@ -3094,15 +3394,15 @@ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ + 0, /* inherited */ /* tp_repr */ + 0, /* inherited */ /* tp_as_number */ 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ + 0, /* inherited */ /* tp_as_mapping */ + 0, /* inherited */ /* tp_hash */ + 0, /* inherited */ /* tp_call */ 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ + 0, /* inherited */ /* tp_getattro */ + 0, /* inherited */ /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES /* tp_flags */ #ifdef Py_TPFLAGS_HAVE_FINALIZE @@ -3112,11 +3412,11 @@ 0, /* tp_doc */ (traverseproc)cdatagcp_traverse, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ + 0, /* inherited */ /* tp_richcompare */ + 0, /* inherited */ /* tp_weaklistoffset */ + 0, /* inherited */ /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + 0, /* inherited */ /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ &CData_Type, /* tp_base */ @@ -3128,7 +3428,7 @@ 0, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ - 0, /* tp_free */ + 0, /* inherited */ /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ @@ -3228,6 +3528,8 @@ CTypeDescrObject *ct, int dont_clear) { + /* note: objects with &CDataOwning_Type are always allocated with + either a plain malloc() or calloc(), and freed with free(). */ CDataObject *cd; if (dont_clear) cd = malloc(size); @@ -3258,6 +3560,7 @@ if (ct->ct_flags & CT_WITH_VAR_ARRAY) { PyErr_SetString(PyExc_TypeError, "return type is a struct/union with a varsize array member"); + return NULL; } cd = allocate_owning_object(dataoffset + datasize, ct, /*dont_clear=*/1); if (cd == NULL) @@ -3380,7 +3683,7 @@ dataoffset = offsetof(CDataObject_own_nolength, alignment); datasize = ct->ct_size; if (datasize < 0) { - explicitlength = get_new_array_length(&init); + explicitlength = get_new_array_length(ct->ct_itemdescr, &init); if (explicitlength < 0) return NULL; ctitem = ct->ct_itemdescr; @@ -3550,18 +3853,24 @@ value = (unsigned char)PyString_AS_STRING(ob)[0]; } #endif + else if (PyUnicode_Check(ob)) { + char err_buf[80]; + cffi_char32_t ordinal; + if (_my_PyUnicode_AsSingleChar32(ob, &ordinal, err_buf) < 0) { + PyErr_Format(PyExc_TypeError, + "cannot cast %s to ctype '%s'", err_buf, ct->ct_name); + return NULL; + } + /* the types char16_t and char32_t are both unsigned. However, + wchar_t might be signed. In theory it does not matter, + because 'ordinal' comes from a regular Python unicode. */ #ifdef HAVE_WCHAR_H - else if (PyUnicode_Check(ob)) { - wchar_t ordinal; - if (_my_PyUnicode_AsSingleWideChar(ob, &ordinal) < 0) { - PyErr_Format(PyExc_TypeError, - "cannot cast unicode string of length %zd to ctype '%s'", - PyUnicode_GET_SIZE(ob), ct->ct_name); - return NULL; - } - value = (long)ordinal; - } + if (ct->ct_flags & CT_IS_SIGNED_WCHAR) + value = (wchar_t)ordinal; + else #endif + value = ordinal; + } else if (PyBytes_Check(ob)) { int res = _convert_to_char(ob); if (res < 0) @@ -3587,6 +3896,35 @@ return cd; } +/* returns -1 if cannot cast, 0 if we don't get a value, 1 if we do */ +static int check_bytes_for_float_compatible(PyObject *io, double *out_value) +{ + if (PyBytes_Check(io)) { + if (PyBytes_GET_SIZE(io) != 1) + goto error; + *out_value = (unsigned char)PyBytes_AS_STRING(io)[0]; + return 1; + } + else if (PyUnicode_Check(io)) { + char ignored[80]; + cffi_char32_t ordinal; + if (_my_PyUnicode_AsSingleChar32(io, &ordinal, ignored) < 0) + goto error; + /* the signness of the 32-bit version of wide chars should not + * matter here, because 'ordinal' comes from a normal Python + * unicode string */ + *out_value = ordinal; + return 1; + } + *out_value = 0; /* silence a gcc warning if this function is inlined */ + return 0; + + error: + Py_DECREF(io); + *out_value = 0; /* silence a gcc warning if this function is inlined */ + return -1; +} + static PyObject *do_cast(CTypeDescrObject *ct, PyObject *ob) { CDataObject *cd; @@ -3628,6 +3966,7 @@ /* cast to a float */ double value; PyObject *io; + int res; if (CData_Check(ob)) { CDataObject *cdsrc = (CDataObject *)ob; @@ -3643,37 +3982,23 @@ Py_INCREF(io); } - if (PyBytes_Check(io)) { - if (PyBytes_GET_SIZE(io) != 1) { - Py_DECREF(io); - goto cannot_cast; - } - value = (unsigned char)PyBytes_AS_STRING(io)[0]; - } -#if HAVE_WCHAR_H - else if (PyUnicode_Check(io)) { - wchar_t ordinal; - if (_my_PyUnicode_AsSingleWideChar(io, &ordinal) < 0) { - Py_DECREF(io); - goto cannot_cast; - } - value = (long)ordinal; - } -#endif - else if ((ct->ct_flags & CT_IS_LONGDOUBLE) && + res = check_bytes_for_float_compatible(io, &value); + if (res == -1) + goto cannot_cast; + if (res == 0) { + if ((ct->ct_flags & CT_IS_LONGDOUBLE) && CData_Check(io) && (((CDataObject *)io)->c_type->ct_flags & CT_IS_LONGDOUBLE)) { - long double lvalue; - char *data = ((CDataObject *)io)->c_data; - /*READ(data, sizeof(long double)*/ - lvalue = read_raw_longdouble_data(data); - Py_DECREF(io); - cd = _new_casted_primitive(ct); - if (cd != NULL) - write_raw_longdouble_data(cd->c_data, lvalue); - return (PyObject *)cd; - } - else { + long double lvalue; + char *data = ((CDataObject *)io)->c_data; + /*READ(data, sizeof(long double)*/ + lvalue = read_raw_longdouble_data(data); + Py_DECREF(io); + cd = _new_casted_primitive(ct); + if (cd != NULL) + write_raw_longdouble_data(cd->c_data, lvalue); + return (PyObject *)cd; + } value = PyFloat_AsDouble(io); } Py_DECREF(io); @@ -3689,6 +4014,45 @@ } return (PyObject *)cd; } + else if (ct->ct_flags & CT_PRIMITIVE_COMPLEX) { + /* cast to a complex */ + Py_complex value; + PyObject *io; + int res; + + if (CData_Check(ob)) { + CDataObject *cdsrc = (CDataObject *)ob; + + if (!(cdsrc->c_type->ct_flags & CT_PRIMITIVE_ANY)) + goto cannot_cast; + io = convert_to_object(cdsrc->c_data, cdsrc->c_type); + if (io == NULL) + return NULL; + } + else { + io = ob; + Py_INCREF(io); + } + + res = check_bytes_for_float_compatible(io, &value.real); + if (res == -1) + goto cannot_cast; + if (res == 1) { + // got it from string + value.imag = 0.0; + } else { + value = PyComplex_AsCComplex(io); + } + Py_DECREF(io); + if (PyErr_Occurred()) { + return NULL; + } + cd = _new_casted_primitive(ct); + if (cd != NULL) { + write_raw_complex_data(cd->c_data, value, ct->ct_size); + } + return (PyObject *)cd; + } else { PyErr_Format(PyExc_TypeError, "cannot cast to ctype '%s'", ct->ct_name); @@ -3726,7 +4090,8 @@ static void dl_dealloc(DynLibObject *dlobj) { - dlclose(dlobj->dl_handle); + if (dlobj->dl_handle != NULL) + dlclose(dlobj->dl_handle); free(dlobj->dl_name); PyObject_Del(dlobj); } @@ -3736,6 +4101,17 @@ return PyText_FromFormat("", dlobj->dl_name); } +static int dl_check_closed(DynLibObject *dlobj) +{ + if (dlobj->dl_handle == NULL) + { + PyErr_Format(PyExc_ValueError, "library '%s' has already been closed", + dlobj->dl_name); + return -1; + } + return 0; +} + static PyObject *dl_load_function(DynLibObject *dlobj, PyObject *args) { CTypeDescrObject *ct; @@ -3746,6 +4122,9 @@ &CTypeDescr_Type, &ct, &funcname)) return NULL; + if (dl_check_closed(dlobj) < 0) + return NULL; + if (!(ct->ct_flags & (CT_FUNCTIONPTR | CT_POINTER | CT_ARRAY))) { PyErr_Format(PyExc_TypeError, "function or pointer or array cdata expected, got '%s'", @@ -3778,6 +4157,9 @@ &CTypeDescr_Type, &ct, &varname)) return NULL; + if (dl_check_closed(dlobj) < 0) + return NULL; + dlerror(); /* clear error condition */ data = dlsym(dlobj->dl_handle, varname); if (data == NULL) { @@ -3803,6 +4185,9 @@ &CTypeDescr_Type, &ct, &varname, &value)) return NULL; + if (dl_check_closed(dlobj) < 0) + return NULL; + dlerror(); /* clear error condition */ data = dlsym(dlobj->dl_handle, varname); if (data == NULL) { @@ -3818,10 +4203,22 @@ return Py_None; } +static PyObject *dl_close_lib(DynLibObject *dlobj, PyObject *no_args) +{ + if (dlobj->dl_handle != NULL) + { + dlclose(dlobj->dl_handle); + dlobj->dl_handle = NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + static PyMethodDef dl_methods[] = { {"load_function", (PyCFunction)dl_load_function, METH_VARARGS}, {"read_variable", (PyCFunction)dl_read_variable, METH_VARARGS}, {"write_variable", (PyCFunction)dl_write_variable, METH_VARARGS}, + {"close_lib", (PyCFunction)dl_close_lib, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; @@ -3857,44 +4254,103 @@ dl_methods, /* tp_methods */ }; -static PyObject *b_load_library(PyObject *self, PyObject *args) -{ - char *filename_or_null, *printable_filename; +static void *b_do_dlopen(PyObject *args, const char **p_printable_filename, + PyObject **p_temp) +{ + /* Logic to call the correct version of dlopen(). Returns NULL in case of error. + Otherwise, '*p_printable_filename' will point to a printable char version of + the filename (maybe utf-8-encoded). '*p_temp' will be set either to NULL or + to a temporary object that must be freed after looking at printable_filename. + */ void *handle; - DynLibObject *dlobj; + char *filename_or_null; int flags = 0; - + *p_temp = NULL; + if (PyTuple_GET_SIZE(args) == 0 || PyTuple_GET_ITEM(args, 0) == Py_None) { PyObject *dummy; if (!PyArg_ParseTuple(args, "|Oi:load_library", &dummy, &flags)) return NULL; filename_or_null = NULL; - } - else if (!PyArg_ParseTuple(args, "et|i:load_library", - Py_FileSystemDefaultEncoding, &filename_or_null, - &flags)) - return NULL; - + *p_printable_filename = ""; + } + else + { + PyObject *s = PyTuple_GET_ITEM(args, 0); +#ifdef MS_WIN32 + Py_UNICODE *filenameW; + if (PyArg_ParseTuple(args, "u|i:load_library", &filenameW, &flags)) + { +#if PY_MAJOR_VERSION < 3 + s = PyUnicode_AsUTF8String(s); + if (s == NULL) + return NULL; + *p_temp = s; +#endif + *p_printable_filename = PyText_AsUTF8(s); + if (*p_printable_filename == NULL) + return NULL; + + handle = dlopenW(filenameW); + goto got_handle; + } + PyErr_Clear(); +#endif + if (!PyArg_ParseTuple(args, "et|i:load_library", + Py_FileSystemDefaultEncoding, &filename_or_null, &flags)) + return NULL; +#if PY_MAJOR_VERSION < 3 + if (PyUnicode_Check(s)) + { + s = PyUnicode_AsUTF8String(s); + if (s == NULL) + return NULL; + *p_temp = s; + } +#endif + *p_printable_filename = PyText_AsUTF8(s); + if (*p_printable_filename == NULL) + return NULL; + } if ((flags & (RTLD_NOW | RTLD_LAZY)) == 0) flags |= RTLD_NOW; - printable_filename = filename_or_null ? filename_or_null : ""; handle = dlopen(filename_or_null, flags); + +#ifdef MS_WIN32 + got_handle: +#endif if (handle == NULL) { const char *error = dlerror(); - PyErr_Format(PyExc_OSError, "cannot load library %s: %s", - printable_filename, error); + PyErr_Format(PyExc_OSError, "cannot load library '%s': %s", + *p_printable_filename, error); return NULL; } + return handle; +} + +static PyObject *b_load_library(PyObject *self, PyObject *args) +{ + const char *printable_filename; + PyObject *temp; + void *handle; + DynLibObject *dlobj = NULL; + + handle = b_do_dlopen(args, &printable_filename, &temp); + if (handle == NULL) + goto error; dlobj = PyObject_New(DynLibObject, &dl_type); if (dlobj == NULL) { dlclose(handle); - return NULL; + goto error; } dlobj->dl_handle = handle; dlobj->dl_name = strdup(printable_filename); + + error: + Py_XDECREF(temp); return (PyObject *)dlobj; } @@ -3946,7 +4402,10 @@ assert(x->ct_unique_key == NULL); x->ct_unique_key = key; /* the key will be freed in ctypedescr_dealloc() */ - Py_DECREF(x); /* the 'value' in unique_cache doesn't count as 1 */ + /* the 'value' in unique_cache doesn't count as 1, but don't use + Py_DECREF(x) here because it will confuse debug builds into thinking + there was an extra DECREF in total. */ + ((PyObject *)x)->ob_refcnt--; return (PyObject *)x; error: @@ -3954,6 +4413,11 @@ return NULL; } +/* according to the C standard, these types should be equivalent to the + _Complex types for the purposes of storage (not arguments in calls!) */ +typedef float cffi_float_complex_t[2]; +typedef double cffi_double_complex_t[2]; + static PyObject *new_primitive_type(const char *name) { #define ENUM_PRIMITIVE_TYPES \ @@ -3971,7 +4435,11 @@ EPTYPE(f, float, CT_PRIMITIVE_FLOAT ) \ EPTYPE(d, double, CT_PRIMITIVE_FLOAT ) \ EPTYPE(ld, long double, CT_PRIMITIVE_FLOAT | CT_IS_LONGDOUBLE ) \ + EPTYPE2(fc, "float _Complex", cffi_float_complex_t, CT_PRIMITIVE_COMPLEX ) \ + EPTYPE2(dc, "double _Complex", cffi_double_complex_t, CT_PRIMITIVE_COMPLEX ) \ ENUM_PRIMITIVE_TYPES_WCHAR \ + EPTYPE2(c16, "char16_t", cffi_char16_t, CT_PRIMITIVE_CHAR ) \ + EPTYPE2(c32, "char32_t", cffi_char32_t, CT_PRIMITIVE_CHAR ) \ EPTYPE(b, _Bool, CT_PRIMITIVE_UNSIGNED | CT_IS_BOOL ) \ /* the following types are not primitive in the C sense */ \ EPTYPE(i8, int8_t, CT_PRIMITIVE_SIGNED) \ @@ -4008,7 +4476,8 @@ #ifdef HAVE_WCHAR_H # define ENUM_PRIMITIVE_TYPES_WCHAR \ - EPTYPE(wc, wchar_t, CT_PRIMITIVE_CHAR ) + EPTYPE(wc, wchar_t, CT_PRIMITIVE_CHAR | \ + (((wchar_t)-1) > 0 ? 0 : CT_IS_SIGNED_WCHAR)) #else # define ENUM_PRIMITIVE_TYPES_WCHAR /* nothing */ #endif @@ -4081,6 +4550,13 @@ else goto bad_ffi_type; } + else if (ptypes->flags & CT_PRIMITIVE_COMPLEX) { + /* As of March 2017, still no libffi support for complex. + It fails silently if we try to use ffi_type_complex_float + or ffi_type_complex_double. Better not use it at all. + */ + ffitype = NULL; + } else { switch (ptypes->size) { case 1: ffitype = &ffi_type_uint8; break; @@ -4215,9 +4691,6 @@ sprintf(extra_text, "[]"); length = -1; arraysize = -1; - if ((ctitem->ct_flags & CT_PRIMITIVE_CHAR) && - ctitem->ct_size == sizeof(char)) - flags |= CT_IS_UNSIZED_CHAR_A; } else { sprintf(extra_text, "[%llu]", (unsigned PY_LONG_LONG)length); @@ -4344,6 +4817,12 @@ #define SF_PACKED 0x08 #define SF_STD_FIELD_POS 0x80 +#ifdef MS_WIN32 +# define SF_DEFAULT_PACKING 8 +#else +# define SF_DEFAULT_PACKING 0x40000000 /* a huge power of two */ +#endif + static int complete_sflags(int sflags) { /* add one of the SF_xxx_BITFIELDS flags if none is specified */ @@ -4403,14 +4882,22 @@ CFieldObject **previous; int prev_bitfield_size, prev_bitfield_free; int sflags = 0, fflags; - - if (!PyArg_ParseTuple(args, "O!O!|Onii:complete_struct_or_union", + int pack = 0; + + if (!PyArg_ParseTuple(args, "O!O!|Oniii:complete_struct_or_union", &CTypeDescr_Type, &ct, &PyList_Type, &fields, - &ignored, &totalsize, &totalalignment, &sflags)) + &ignored, &totalsize, &totalalignment, &sflags, + &pack)) return NULL; sflags = complete_sflags(sflags); + if (sflags & SF_PACKED) + pack = 1; + else if (pack <= 0) + pack = SF_DEFAULT_PACKING; + else + sflags |= SF_PACKED; if ((ct->ct_flags & (CT_STRUCT|CT_IS_OPAQUE)) == (CT_STRUCT|CT_IS_OPAQUE)) { @@ -4471,9 +4958,9 @@ /* update the total alignment requirement, but skip it if the field is an anonymous bitfield or if SF_PACKED */ falignorg = get_alignment(ftype); - falign = (sflags & SF_PACKED) ? 1 : falignorg; - if (falign < 0) + if (falignorg < 0) goto error; + falign = (pack < falignorg) ? pack : falignorg; do_align = 1; if (!(sflags & SF_GCC_ARM_BITFIELDS) && fbitsize >= 0) { @@ -4520,7 +5007,6 @@ if (PyText_GetSize(fname) == 0 && ftype->ct_flags & (CT_STRUCT|CT_UNION)) { /* a nested anonymous struct or union */ - /* note: it seems we only get here with ffi.verify() */ CFieldObject *cfsrc = (CFieldObject *)ftype->ct_extra; for (; cfsrc != NULL; cfsrc = cfsrc->cf_next) { /* broken complexity in the call to get_field_name(), @@ -4773,7 +5259,7 @@ { const char *place = is_result_type ? "return value" : "argument"; - if (ct->ct_flags & CT_PRIMITIVE_ANY) { + if (ct->ct_flags & (CT_PRIMITIVE_ANY & ~CT_PRIMITIVE_COMPLEX)) { return (ffi_type *)ct->ct_extra; } else if (ct->ct_flags & (CT_POINTER|CT_FUNCTIONPTR)) { @@ -4899,9 +5385,16 @@ return NULL; } else { + char *extra = ""; + if (ct->ct_flags & CT_PRIMITIVE_COMPLEX) + extra = " (the support for complex types inside libffi " + "is mostly missing at this point, so CFFI only " + "supports complex types as arguments or return " + "value in API-mode functions)"; + PyErr_Format(PyExc_NotImplementedError, - "ctype '%s' (size %zd) not supported as %s", - ct->ct_name, ct->ct_size, place); + "ctype '%s' (size %zd) not supported as %s%s", + ct->ct_name, ct->ct_size, place, extra); return NULL; } } @@ -5269,9 +5762,14 @@ return 0; } else if (ctype->ct_flags & (CT_PRIMITIVE_CHAR | CT_PRIMITIVE_SIGNED | - CT_PRIMITIVE_UNSIGNED)) { + CT_PRIMITIVE_UNSIGNED | + CT_POINTER | CT_FUNCTIONPTR)) { /* zero extension: fill the '*result' with zeros, and (on big- - endian machines) correct the 'result' pointer to write to */ + endian machines) correct the 'result' pointer to write to. + We also do that for pointers, even though we're normally not + in this branch because ctype->ct_size == sizeof(ffi_arg) for + pointers---except on some architectures like x32 (issue #372). + */ memset(result, 0, sizeof(ffi_arg)); #ifdef WORDS_BIGENDIAN result += (sizeof(ffi_arg) - ctype->ct_size); @@ -5282,12 +5780,12 @@ return convert_from_object(result, ctype, pyobj); } -static void _my_PyErr_WriteUnraisable(char *objdescr, PyObject *obj, +static void _my_PyErr_WriteUnraisable(PyObject *t, PyObject *v, PyObject *tb, + char *objdescr, PyObject *obj, char *extra_error_line) { /* like PyErr_WriteUnraisable(), but write a full traceback */ - PyObject *f, *t, *v, *tb; - PyErr_Fetch(&t, &v, &tb); + PyObject *f; #if PY_MAJOR_VERSION >= 3 /* jump through hoops to ensure the tb is attached to v, on Python 3 */ PyErr_NormalizeException(&t, &v, &tb); @@ -5377,8 +5875,12 @@ } onerror_cb = PyTuple_GET_ITEM(cb_args, 3); if (onerror_cb == Py_None) { - _my_PyErr_WriteUnraisable("From cffi callback ", py_ob, + PyObject *ecap, *t, *v, *tb; + PyErr_Fetch(&t, &v, &tb); + ecap = _cffi_start_error_capture(); + _my_PyErr_WriteUnraisable(t, v, tb, "From cffi callback ", py_ob, extra_error_line); + _cffi_stop_error_capture(ecap); } else { PyObject *exc1, *val1, *tb1, *res1, *exc2, *val2, *tb2; @@ -5402,14 +5904,17 @@ } else { /* double exception! print a double-traceback... */ + PyObject *ecap; PyErr_Fetch(&exc2, &val2, &tb2); - PyErr_Restore(exc1, val1, tb1); - _my_PyErr_WriteUnraisable("From cffi callback ", py_ob, + ecap = _cffi_start_error_capture(); + _my_PyErr_WriteUnraisable(exc1, val1, tb1, + "From cffi callback ", py_ob, extra_error_line); - PyErr_Restore(exc2, val2, tb2); extra_error_line = ("\nDuring the call to 'onerror', " "another exception occurred:\n\n"); - _my_PyErr_WriteUnraisable(NULL, NULL, extra_error_line); + _my_PyErr_WriteUnraisable(exc2, val2, tb2, + NULL, NULL, extra_error_line); + _cffi_stop_error_capture(ecap); } } goto done; @@ -5488,11 +5993,12 @@ static PyObject *b_callback(PyObject *self, PyObject *args) { CTypeDescrObject *ct; - CDataObject *cd; + CDataObject_closure *cd; PyObject *ob, *error_ob = Py_None, *onerror_ob = Py_None; PyObject *infotuple; cif_description_t *cif_descr; ffi_closure *closure; + void *closure_exec; if (!PyArg_ParseTuple(args, "O!O|OO:callback", &CTypeDescr_Type, &ct, &ob, &error_ob, &onerror_ob)) @@ -5502,15 +6008,24 @@ if (infotuple == NULL) return NULL; +#ifdef CFFI_TRUST_LIBFFI + closure = ffi_closure_alloc(sizeof(ffi_closure), &closure_exec); +#else closure = cffi_closure_alloc(); - - cd = PyObject_GC_New(CDataObject, &CDataOwningGC_Type); + closure_exec = closure; +#endif + if (closure == NULL) { + Py_DECREF(infotuple); + return NULL; + } + cd = PyObject_GC_New(CDataObject_closure, &CDataOwningGC_Type); if (cd == NULL) goto error; Py_INCREF(ct); - cd->c_type = ct; - cd->c_data = (char *)closure; - cd->c_weakreflist = NULL; + cd->head.c_type = ct; + cd->head.c_data = (char *)closure_exec; + cd->head.c_weakreflist = NULL; + cd->closure = closure; PyObject_GC_Track(cd); cif_descr = (cif_description_t *)ct->ct_extra; @@ -5520,8 +6035,13 @@ "return type or with '...'", ct->ct_name); goto error; } +#ifdef CFFI_TRUST_LIBFFI + if (ffi_prep_closure_loc(closure, &cif_descr->cif, + invoke_callback, infotuple, closure_exec) != FFI_OK) { +#else if (ffi_prep_closure(closure, &cif_descr->cif, invoke_callback, infotuple) != FFI_OK) { +#endif PyErr_SetString(PyExc_SystemError, "libffi failed to build this callback"); goto error; @@ -5544,8 +6064,13 @@ error: closure->user_data = NULL; - if (cd == NULL) + if (cd == NULL) { +#ifdef CFFI_TRUST_LIBFFI + ffi_closure_free(closure); +#else cffi_closure_free(closure); +#endif + } else Py_DECREF(cd); Py_XDECREF(infotuple); @@ -5596,7 +6121,7 @@ if (!PyText_Check(tmpkey)) { #if PY_MAJOR_VERSION < 3 if (PyUnicode_Check(tmpkey)) { - char *text = PyText_AsUTF8(tmpkey); + const char *text = PyText_AsUTF8(tmpkey); if (text == NULL) goto error; Py_DECREF(tmpkey); @@ -5888,27 +6413,46 @@ } return PyBytes_FromStringAndSize(start, length); } -#ifdef HAVE_WCHAR_H else if (cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR) { - const wchar_t *start = (wchar_t *)cd->c_data; - assert(cd->c_type->ct_itemdescr->ct_size == sizeof(wchar_t)); - if (length < 0) { - /*READ(start, sizeof(wchar_t))*/ - length = 0; - while (start[length]) - length++; - /*READ(start, sizeof(wchar_t) * length)*/ + switch (cd->c_type->ct_itemdescr->ct_size) { + case 2: { + const cffi_char16_t *start = (cffi_char16_t *)cd->c_data; + if (length < 0) { + /*READ(start, 2)*/ + length = 0; + while (start[length]) + length++; + /*READ(start, 2 * length)*/ + } + else { + /*READ(start, 2 * length)*/ + maxlen = length; + length = 0; + while (length < maxlen && start[length]) + length++; + } + return _my_PyUnicode_FromChar16(start, length); } - else { - /*READ(start, sizeof(wchar_t) * length)*/ - maxlen = length; - length = 0; - while (length < maxlen && start[length]) - length++; + case 4: { + const cffi_char32_t *start = (cffi_char32_t *)cd->c_data; + if (length < 0) { + /*READ(start, 4)*/ + length = 0; + while (start[length]) + length++; + /*READ(start, 4 * length)*/ + } + else { + /*READ(start, 4 * length)*/ + maxlen = length; + length = 0; + while (length < maxlen && start[length]) + length++; + } + return _my_PyUnicode_FromChar32(start, length); } - return _my_PyUnicode_FromWideChar(start, length); - } -#endif + } + } } else if (cd->c_type->ct_flags & CT_IS_ENUM) { return convert_cdata_to_enum_string(cd, 0); @@ -5922,12 +6466,14 @@ /*READ(cd->c_data, cd->c_type->ct_size)*/ if (cd->c_type->ct_size == sizeof(char)) return PyBytes_FromStringAndSize(cd->c_data, 1); -#ifdef HAVE_WCHAR_H else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) { - assert(cd->c_type->ct_size == sizeof(wchar_t)); - return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, 1); - } -#endif + switch (cd->c_type->ct_size) { + case 2: + return _my_PyUnicode_FromChar16((cffi_char16_t *)cd->c_data, 1); + case 4: + return _my_PyUnicode_FromChar32((cffi_char32_t *)cd->c_data, 1); + } + } } PyErr_Format(PyExc_TypeError, "string(): unexpected cdata '%s' argument", cd->c_type->ct_name); @@ -5972,12 +6518,14 @@ /* byte- and unicode strings */ ctitem = cd->c_type->ct_itemdescr; if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) { - if (ctitem->ct_size == sizeof(char)) + switch (ctitem->ct_size) { + case sizeof(char): return PyBytes_FromStringAndSize(cd->c_data, length); -#ifdef HAVE_WCHAR_H - else if (ctitem->ct_size == sizeof(wchar_t)) - return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length); -#endif + case 2: + return _my_PyUnicode_FromChar16((cffi_char16_t *)cd->c_data,length); + case 4: + return _my_PyUnicode_FromChar32((cffi_char32_t *)cd->c_data,length); + } } /* else, the result is a list. This implementation should be @@ -5993,6 +6541,7 @@ src = cd->c_data; itemsize = ctitem->ct_size; if (itemsize < 0) { + Py_DECREF(result); PyErr_Format(PyExc_ValueError, "'%s' points to items of unknown size", cd->c_type->ct_name); return NULL; @@ -6261,10 +6810,18 @@ return 0; } -static PyObject *direct_from_buffer(CTypeDescrObject *ct, PyObject *x) +static PyObject *direct_from_buffer(CTypeDescrObject *ct, PyObject *x, + int require_writable) { CDataObject *cd; Py_buffer *view; + Py_ssize_t arraylength; + + if (!(ct->ct_flags & CT_ARRAY)) { + PyErr_Format(PyExc_TypeError, "expected an array ctype, got '%s'", + ct->ct_name); + return NULL; + } /* PyPy 5.7 can obtain buffers for string (python 2) or bytes (python 3). from_buffer(u"foo") is disallowed. @@ -6281,9 +6838,44 @@ PyErr_NoMemory(); return NULL; } - if (_my_PyObject_GetContiguousBuffer(x, view, 0) < 0) + if (_my_PyObject_GetContiguousBuffer(x, view, require_writable) < 0) goto error1; + if (ct->ct_length >= 0) { + /* it's an array with a fixed length; make sure that the + buffer contains enough bytes. */ + if (view->len < ct->ct_size) { + PyErr_Format(PyExc_ValueError, + "buffer is too small (%zd bytes) for '%s' (%zd bytes)", + view->len, ct->ct_name, ct->ct_size); + goto error2; + } + arraylength = ct->ct_length; + } + else { + /* it's an open 'array[]' */ + if (ct->ct_itemdescr->ct_size == 1) { + /* fast path, performance only */ + arraylength = view->len; + } + else if (ct->ct_itemdescr->ct_size > 0) { + /* give it as many items as fit the buffer. Ignore a + partial last element. */ + arraylength = view->len / ct->ct_itemdescr->ct_size; + } + else { + /* it's an array 'empty[]'. Unsupported obscure case: + the problem is that setting the length of the result + to anything large (like SSIZE_T_MAX) is dangerous, + because if someone tries to loop over it, it will + turn effectively into an infinite loop. */ + PyErr_Format(PyExc_ZeroDivisionError, + "from_buffer('%s', ..): the actual length of the array " + "cannot be computed", ct->ct_name); + goto error2; + } + } + cd = (CDataObject *)PyObject_GC_New(CDataObject_owngc_frombuf, &CDataOwningGC_Type); if (cd == NULL) @@ -6293,7 +6885,7 @@ cd->c_type = ct; cd->c_data = view->buf; cd->c_weakreflist = NULL; - ((CDataObject_owngc_frombuf *)cd)->length = view->len; + ((CDataObject_owngc_frombuf *)cd)->length = arraylength; ((CDataObject_owngc_frombuf *)cd)->bufferview = view; PyObject_GC_Track(cd); return (PyObject *)cd; @@ -6309,15 +6901,13 @@ { CTypeDescrObject *ct; PyObject *x; - - if (!PyArg_ParseTuple(args, "O!O", &CTypeDescr_Type, &ct, &x)) + int require_writable = 0; + + if (!PyArg_ParseTuple(args, "O!O|i", &CTypeDescr_Type, &ct, &x, + &require_writable)) return NULL; - if (!(ct->ct_flags & CT_IS_UNSIZED_CHAR_A)) { - PyErr_Format(PyExc_TypeError, "needs 'char[]', got '%s'", ct->ct_name); - return NULL; - } - return direct_from_buffer(ct, x); + return direct_from_buffer(ct, x, require_writable); } static int _fetch_as_buffer(PyObject *x, Py_buffer *view, int writable_only) @@ -6384,10 +6974,12 @@ CDataObject *cd; CDataObject *origobj; PyObject *destructor; - static char *keywords[] = {"cdata", "destructor", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O:gc", keywords, - &CData_Type, &origobj, &destructor)) + Py_ssize_t ignored; /* for pypy */ + static char *keywords[] = {"cdata", "destructor", "size", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|n:gc", keywords, + &CData_Type, &origobj, &destructor, + &ignored)) return NULL; if (destructor == Py_None) { @@ -6405,6 +6997,15 @@ return (PyObject *)cd; } +static PyObject *b_release(PyObject *self, PyObject *arg) +{ + if (!CData_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "expected a 'cdata' object"); + return NULL; + } + return cdata_exit(arg, NULL); +} + /************************************************************/ static char _testfunc0(char a, char b) @@ -6579,6 +7180,20 @@ return -42; } +#if 0 /* libffi doesn't properly support complexes currently */ + /* also, MSVC might not support _Complex... */ + /* if this is enabled one day, remember to also add _Complex + * arguments in addition to return values. */ +static float _Complex _testfunc24(float a, float b) +{ + return a + I*2.0*b; +} +static double _Complex _testfunc25(double a, double b) +{ + return a + I*2.0*b; +} +#endif + static PyObject *b__testfunc(PyObject *self, PyObject *args) { /* for testing only */ From pypy.commits at gmail.com Thu Jan 31 04:26:55 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:55 -0800 (PST) Subject: [pypy-commit] cffi default: Real test of a pkgconfig integration Message-ID: <5c52bf5f.1c69fb81.f3586.ba39@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3197:e0251d298a40 Date: 2019-01-08 11:08 +0100 http://bitbucket.org/cffi/cffi/changeset/e0251d298a40/ Log: Real test of a pkgconfig integration Fix encoding errors Given testing Python program ``` from cffi import FFI ffibuilder = FFI() ffibuilder.cdef( "char* zsys_hostname();" ) ffibuilder.set_source( "_czmq", "#include ", pkgconfig=["libczmq"] ) if __name__ == "__main__": ffibuilder.compile(verbose=True) ``` We can run ffibuilder from source dir of czmq ``` PKG_CONFIG_PATH=`pwd`/src python3 t.py generating ./_czmq.c ... gcc -pthread -shared -flto -fuse-linker-plugin -ffat-lto-objects -flto-partition=none ./_czmq.o -L/usr/local/lib64 -L/usr/lib64 -lczmq -lzmq -lpython3.6m -o ./_czmq.cpython-36m-x86_64-linux-gnu.so ``` ``` python3 t.py generating ./_czmq.c ... gcc -pthread -shared -flto -fuse-linker-plugin -ffat-lto-objects -flto-partition=none ./_czmq.o -L/usr/lib64 -lczmq -lpython3.6m -o ./_czmq.cpython-36m-x86_64 -linux-gnu.so ``` Note that in the first case `/usr/local` has been added to the compiler path as provided by local pkg-config file. diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -645,7 +645,7 @@ if "libraries" in kwds: del kwds["libraries"] # real library names are going to be # provided by pkg-config - pkgconfig.merge_flags(kwds, pkgconfig.kwargs(kwds["pkgconfig"])) + pkgconfig.merge_flags(kwds, pkgconfig.flags(kwds["pkgconfig"])) del kwds["pkgconfig"] self._assigned_source = (str(module_name), source, source_extension, kwds) diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -1,5 +1,6 @@ # pkg-config, https://www.freedesktop.org/wiki/Software/pkg-config/ integration for cffi import subprocess +import sys def is_installed(): """Check if pkg-config is installed or not""" @@ -53,28 +54,29 @@ # drop starting -I -L -l from cflags def dropILl(string): def _dropILl(string): - if string.startswith(b"-I") or string.startswith(b"-L") or string.startswith(b"-l"): + if string.startswith(u"-I") or string.startswith(u"-L") or string.startswith(u"-l"): return string [2:] return [_dropILl(x) for x in string.split()] # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by cffi def macros(string): def _macros(string): - return tuple(string [2:].split(b"=", 2)) - return [_macros(x) for x in string.split() if x.startswith(b"-D")] + return tuple(string [2:].split(u"=", 2)) + return [_macros(x) for x in string.split() if x.startswith(u"-D")] def drop_macros(string): - return [x for x in string.split() if not x.startswith(b"-D")] + return [x for x in string.split() if not x.startswith(u"-D")] # return kwargs for given libname def kwargs(libname): + fse = sys.getfilesystemencoding() return { - "include_dirs" : dropILl(call(libname, "--cflags-only-I")), - "library_dirs" : dropILl(call(libname, "--libs-only-L")), - "libraries" : dropILl(call(libname, "--libs-only-l")), - "define_macros" : macros(call(libname, "--cflags-only-other")), - "extra_compile_args" : drop_macros(call(libname, "--cflags-only-other")), - "extra_link_args" : call(libname, "--libs-only-other").split() + "include_dirs" : dropILl(call(libname, "--cflags-only-I").decode(fse)), + "library_dirs" : dropILl(call(libname, "--libs-only-L").decode(fse)), + "libraries" : dropILl(call(libname, "--libs-only-l").decode('ascii')), + "define_macros" : macros(call(libname, "--cflags-only-other").decode('ascii')), + "extra_compile_args" : drop_macros(call(libname, "--cflags-only-other").decode('ascii')), + "extra_link_args" : call(libname, "--libs-only-other").decode('ascii').split() } # merge all arguments together diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py --- a/testing/cffi1/test_pkgconfig.py +++ b/testing/cffi1/test_pkgconfig.py @@ -34,10 +34,10 @@ def test_pkgconfig(): kwargs = pkgconfig.flags("python-3.6") assert kwargs == { - 'include_dirs': [b'/usr/include/python3.6m'], - 'library_dirs': [b'/usr/lib64'], - 'libraries': [b'python3.6'], - 'define_macros': [(b'CFFI_TEST', b'1')], - 'extra_compile_args': [b'-O42'], - 'extra_link_args': [b'-lm'] + 'include_dirs': [u'/usr/include/python3.6m'], + 'library_dirs': [u'/usr/lib64'], + 'libraries': [u'python3.6'], + 'define_macros': [(u'CFFI_TEST', u'1')], + 'extra_compile_args': [u'-O42'], + 'extra_link_args': [u'-lm'] } From pypy.commits at gmail.com Thu Jan 31 04:26:56 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:56 -0800 (PST) Subject: [pypy-commit] cffi default: fix encoding of `--libs-only-l` it's about file system names Message-ID: <5c52bf60.1c69fb81.ebec2.fd2e@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3198:428261c7ae2c Date: 2019-01-08 19:58 +0100 http://bitbucket.org/cffi/cffi/changeset/428261c7ae2c/ Log: fix encoding of `--libs-only-l` it's about file system names diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -73,7 +73,7 @@ return { "include_dirs" : dropILl(call(libname, "--cflags-only-I").decode(fse)), "library_dirs" : dropILl(call(libname, "--libs-only-L").decode(fse)), - "libraries" : dropILl(call(libname, "--libs-only-l").decode('ascii')), + "libraries" : dropILl(call(libname, "--libs-only-l").decode(fse)), "define_macros" : macros(call(libname, "--cflags-only-other").decode('ascii')), "extra_compile_args" : drop_macros(call(libname, "--cflags-only-other").decode('ascii')), "extra_link_args" : call(libname, "--libs-only-other").decode('ascii').split() From pypy.commits at gmail.com Thu Jan 31 04:26:58 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:26:58 -0800 (PST) Subject: [pypy-commit] cffi default: Exception based flow Message-ID: <5c52bf62.1c69fb81.6d8d.76b8@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3199:58d9217d4315 Date: 2019-01-09 08:39 +0100 http://bitbucket.org/cffi/cffi/changeset/58d9217d4315/ Log: Exception based flow Now the with non pkg-config backup would be ``` module_name = "_czmq" source = "#include " try: print(f"### pkg-config path") ffibuilder.set_source( module_name, source, pkgconfig=["libczmq"] ) except Exception as e: print(f"Exception e: {e}") ffibuilder.set_source( module_name, source, libraries=["czmq"] ) ``` diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -641,10 +641,7 @@ if os.sep in module_name or (os.altsep and os.altsep in module_name): raise ValueError("'module_name' must not contain '/': use a dotted " "name to make a 'package.module' location") - if "pkgconfig" in kwds and pkgconfig.is_installed(): - if "libraries" in kwds: - del kwds["libraries"] # real library names are going to be - # provided by pkg-config + if "pkgconfig" in kwds: pkgconfig.merge_flags(kwds, pkgconfig.flags(kwds["pkgconfig"])) del kwds["pkgconfig"] self._assigned_source = (str(module_name), source, diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -2,14 +2,6 @@ import subprocess import sys -def is_installed(): - """Check if pkg-config is installed or not""" - try: - subprocess.check_output(["pkg-config", "--version"]) - return True - except subprocess.CalledProcessError: - return False - def merge_flags(cfg1, cfg2): """Merge values from cffi config flags cfg2 to cf1 @@ -39,14 +31,19 @@ Usage ... - ffibuilder.set_source("_foo", libraries = ["foo", "bar"], pkgconfig = ["libfoo", "libbar"]) + ffibuilder.set_source("_foo", pkgconfig = ["libfoo", "libbar"]) - If `pkg-config` is installed on build machine, then arguments - `include_dirs`, `library_dirs`, `libraries`, `define_macros`, - `extra_compile_args` and `extra_link_args` are extended with an output of - `pkg-config` for `libfoo` and `libbar`. + If pkg-config is installed on build machine, then arguments include_dirs, + library_dirs, libraries, define_macros, extra_compile_args and + extra_link_args are extended with an output of pkg-config for libfoo and + libbar. + + Raise `FileNotFoundError` if pkg-config is not installed or + `subprocess.CalledProcessError` if pkg-config fails. """ + subprocess.check_output(["pkg-config", "--version"]) + # make API great again! if isinstance(libs, (str, bytes)): libs = (libs, ) diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py --- a/testing/cffi1/test_pkgconfig.py +++ b/testing/cffi1/test_pkgconfig.py @@ -32,8 +32,8 @@ def test_pkgconfig(): - kwargs = pkgconfig.flags("python-3.6") - assert kwargs == { + flags = pkgconfig.flags("python-3.6") + assert flags == { 'include_dirs': [u'/usr/include/python3.6m'], 'library_dirs': [u'/usr/lib64'], 'libraries': [u'python3.6'], From pypy.commits at gmail.com Thu Jan 31 04:27:00 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:27:00 -0800 (PST) Subject: [pypy-commit] cffi default: Document version pinning for pkgconfig Message-ID: <5c52bf64.1c69fb81.2982a.172a@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3200:59a7c949248b Date: 2019-01-09 08:46 +0100 http://bitbucket.org/cffi/cffi/changeset/59a7c949248b/ Log: Document version pinning for pkgconfig diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -31,7 +31,7 @@ Usage ... - ffibuilder.set_source("_foo", pkgconfig = ["libfoo", "libbar"]) + ffibuilder.set_source("_foo", pkgconfig = ["libfoo", "libbar >= 1.8.3"]) If pkg-config is installed on build machine, then arguments include_dirs, library_dirs, libraries, define_macros, extra_compile_args and From pypy.commits at gmail.com Thu Jan 31 04:27:01 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:27:01 -0800 (PST) Subject: [pypy-commit] cffi default: Define and raise specific hierarchy of exceptions Message-ID: <5c52bf65.1c69fb81.efa15.60da@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3201:93d6970fc206 Date: 2019-01-10 13:28 +0100 http://bitbucket.org/cffi/cffi/changeset/93d6970fc206/ Log: Define and raise specific hierarchy of exceptions * PkgConfigNotFound - not installed * PkgConfigError - base pkg-config errors * PkgConfigModuleNotFound - pc file for module was not found * PkgConfigModuleVersionNotFound - requested version was not found Boilerplate now looks ``` from cffi.error import PkgConfigNotFound, PkgConfigError ... try: # try pkg-config way ffibuilder.set_source(..., pkgconfig=["libczmq >= 4.0.0"]) except PkgConfigNotFound as e: # if pkg-config is not installed, try backup ffibuilder.set_source(..., libraries=["czmq", "zmq", "uuid", "pgm"]) except PkgConfigError as e: # here we catch both PkgConfigModuleNotFound and PkgConfigModuleVersionNotFound # and raise it again - simply to show they are raised raise e from None ``` diff --git a/cffi/error.py b/cffi/error.py --- a/cffi/error.py +++ b/cffi/error.py @@ -21,3 +21,15 @@ """ An error raised when incomplete structures are passed into cdef, but no verification has been done """ + +class PkgConfigNotFound(Exception): + """ An error raised when pkgconfig was not found""" + +class PkgConfigError(Exception): + """ Generic super class for pkg-config related errors""" + +class PkgConfigModuleNotFound(PkgConfigError): + """ Module or it's pkg-config file was not found on a system""" + +class PkgConfigModuleVersionNotFound(PkgConfigError): + """ Requested version of module was not found""" diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -1,7 +1,11 @@ # pkg-config, https://www.freedesktop.org/wiki/Software/pkg-config/ integration for cffi import subprocess import sys +import re +from .error import PkgConfigNotFound +from .error import PkgConfigModuleVersionNotFound +from .error import PkgConfigError def merge_flags(cfg1, cfg2): """Merge values from cffi config flags cfg2 to cf1 @@ -19,11 +23,29 @@ def call(libname, flag): - """Calls pkg-config and returing the output""" + """Calls pkg-config and returing the output if found + """ a = ["pkg-config", "--print-errors"] a.append(flag) a.append(libname) - return subprocess.check_output(a) + pc = None + try: + pc = subprocess.Popen(a, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except FileNotFoundError: + pass + if pc is None: + raise PkgConfigNotFound("pkg-config was not found on this system") + + bout, berr = pc.communicate() + if berr is not None: + err = berr.decode(sys.getfilesystemencoding()) + if re.search("Package .* was not found in the pkg-config search path", err, re.MULTILINE) is not None: + raise PkgConfigNotFoundError(err) + elif re.search("Requested '.*' but version of ", err, re.MULTILINE): + raise PkgConfigModuleVersionNotFound(err) + else: + PkgConfigError(err) + return bout def flags(libs): @@ -38,8 +60,11 @@ extra_link_args are extended with an output of pkg-config for libfoo and libbar. - Raise `FileNotFoundError` if pkg-config is not installed or - `subprocess.CalledProcessError` if pkg-config fails. + Raises + * PkgConfigNotFound if pkg-config is not installed + * PkgConfigModuleNotFound if requested module not found + * PkgConfigModuleVersionNotFound if requested version does not match + * PkgConfigError for all other errors """ subprocess.check_output(["pkg-config", "--version"]) From pypy.commits at gmail.com Thu Jan 31 04:27:03 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:27:03 -0800 (PST) Subject: [pypy-commit] cffi default: P: cffi must be compatible with 3.2 Message-ID: <5c52bf67.1c69fb81.f3c00.0928@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3202:d2065033e33b Date: 2019-01-10 13:30 +0100 http://bitbucket.org/cffi/cffi/changeset/d2065033e33b/ Log: P: cffi must be compatible with 3.2 S: can't use u prefixes, as it is syntax error there diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -62,7 +62,7 @@ Raises * PkgConfigNotFound if pkg-config is not installed - * PkgConfigModuleNotFound if requested module not found + * PkgConfigModuleNotFoun if requested module not found * PkgConfigModuleVersionNotFound if requested version does not match * PkgConfigError for all other errors """ @@ -76,18 +76,18 @@ # drop starting -I -L -l from cflags def dropILl(string): def _dropILl(string): - if string.startswith(u"-I") or string.startswith(u"-L") or string.startswith(u"-l"): + if string.startswith("-I") or string.startswith("-L") or string.startswith("-l"): return string [2:] return [_dropILl(x) for x in string.split()] # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by cffi def macros(string): def _macros(string): - return tuple(string [2:].split(u"=", 2)) - return [_macros(x) for x in string.split() if x.startswith(u"-D")] + return tuple(string [2:].split("=", 2)) + return [_macros(x) for x in string.split() if x.startswith("-D")] def drop_macros(string): - return [x for x in string.split() if not x.startswith(u"-D")] + return [x for x in string.split() if not x.startswith("-D")] # return kwargs for given libname def kwargs(libname): From pypy.commits at gmail.com Thu Jan 31 04:27:05 2019 From: pypy.commits at gmail.com (vyskocilm) Date: Thu, 31 Jan 2019 01:27:05 -0800 (PST) Subject: [pypy-commit] cffi default: Reduce number of exceptions Message-ID: <5c52bf69.1c69fb81.412c.1fbc@mx.google.com> Author: Michal Vyskocil Branch: Changeset: r3203:703ab36069d0 Date: 2019-01-11 22:38 +0100 http://bitbucket.org/cffi/cffi/changeset/703ab36069d0/ Log: Reduce number of exceptions diff --git a/cffi/error.py b/cffi/error.py --- a/cffi/error.py +++ b/cffi/error.py @@ -22,14 +22,9 @@ cdef, but no verification has been done """ -class PkgConfigNotFound(Exception): - """ An error raised when pkgconfig was not found""" +class PkgConfigError(Exception): + """ An error raised for all pkg-config related errors + except version mismatch""" -class PkgConfigError(Exception): - """ Generic super class for pkg-config related errors""" - -class PkgConfigModuleNotFound(PkgConfigError): - """ Module or it's pkg-config file was not found on a system""" - -class PkgConfigModuleVersionNotFound(PkgConfigError): - """ Requested version of module was not found""" +class PkgConfigModuleVersionNotFound(Exception): + """ An error raised when requested version was not found""" diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -3,7 +3,6 @@ import sys import re -from .error import PkgConfigNotFound from .error import PkgConfigModuleVersionNotFound from .error import PkgConfigError @@ -34,14 +33,12 @@ except FileNotFoundError: pass if pc is None: - raise PkgConfigNotFound("pkg-config was not found on this system") + raise PkgConfigError("pkg-config was not found on this system") bout, berr = pc.communicate() if berr is not None: err = berr.decode(sys.getfilesystemencoding()) - if re.search("Package .* was not found in the pkg-config search path", err, re.MULTILINE) is not None: - raise PkgConfigNotFoundError(err) - elif re.search("Requested '.*' but version of ", err, re.MULTILINE): + if re.search("Requested '.*' but version of ", err, re.MULTILINE) is not None: raise PkgConfigModuleVersionNotFound(err) else: PkgConfigError(err) @@ -61,8 +58,6 @@ libbar. Raises - * PkgConfigNotFound if pkg-config is not installed - * PkgConfigModuleNotFoun if requested module not found * PkgConfigModuleVersionNotFound if requested version does not match * PkgConfigError for all other errors """ From pypy.commits at gmail.com Thu Jan 31 06:14:39 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 03:14:39 -0800 (PST) Subject: [pypy-commit] cffi default: Tweaks to the pkgconfig support Message-ID: <5c52d89f.1c69fb81.18d25.2ed5@mx.google.com> Author: Armin Rigo Branch: Changeset: r3205:bafc95c0591a Date: 2019-01-31 12:14 +0100 http://bitbucket.org/cffi/cffi/changeset/bafc95c0591a/ Log: Tweaks to the pkgconfig support diff --git a/cffi/__init__.py b/cffi/__init__.py --- a/cffi/__init__.py +++ b/cffi/__init__.py @@ -3,6 +3,7 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing +from .error import PkgConfigError __version__ = "1.12.0" __version_info__ = (1, 12, 0) diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -2,7 +2,6 @@ from .lock import allocate_lock from .error import CDefError from . import model -from . import pkgconfig try: callable @@ -642,8 +641,11 @@ raise ValueError("'module_name' must not contain '/': use a dotted " "name to make a 'package.module' location") if "pkgconfig" in kwds: - pkgconfig.merge_flags(kwds, pkgconfig.flags(kwds["pkgconfig"])) - del kwds["pkgconfig"] + from . import pkgconfig + libs = kwds.pop("pkgconfig") + if not isinstance(libs, (list, tuple)): + libs = [libs] + pkgconfig.merge_flags(kwds, pkgconfig.flags_from_pkgconfig(libs)) self._assigned_source = (str(module_name), source, source_extension, kwds) diff --git a/cffi/error.py b/cffi/error.py --- a/cffi/error.py +++ b/cffi/error.py @@ -1,8 +1,9 @@ class FFIError(Exception): - pass + __module__ = 'cffi' class CDefError(Exception): + __module__ = 'cffi' def __str__(self): try: current_decl = self.args[1] @@ -16,15 +17,15 @@ class VerificationError(Exception): """ An error raised when verification fails """ + __module__ = 'cffi' class VerificationMissing(Exception): """ An error raised when incomplete structures are passed into cdef, but no verification has been done """ + __module__ = 'cffi' class PkgConfigError(Exception): - """ An error raised for all pkg-config related errors - except version mismatch""" - -class PkgConfigModuleVersionNotFound(Exception): - """ An error raised when requested version was not found""" + """ An error raised for missing modules in pkg-config + """ + __module__ = 'cffi' diff --git a/cffi/pkgconfig.py b/cffi/pkgconfig.py --- a/cffi/pkgconfig.py +++ b/cffi/pkgconfig.py @@ -1,51 +1,63 @@ # pkg-config, https://www.freedesktop.org/wiki/Software/pkg-config/ integration for cffi -import subprocess -import sys -import re +import sys, os, subprocess -from .error import PkgConfigModuleVersionNotFound from .error import PkgConfigError + def merge_flags(cfg1, cfg2): """Merge values from cffi config flags cfg2 to cf1 Example: - merge_flags({"libraries": ["one"]}, {"libraries": "two"}) - {"libraries}" : ["one", "two"]} + merge_flags({"libraries": ["one"]}, {"libraries": ["two"]}) + {"libraries": ["one", "two"]} """ for key, value in cfg2.items(): - if not key in cfg1: - cfg1 [key] = value + if key not in cfg1: + cfg1[key] = value else: - cfg1 [key].extend(value) + if not isinstance(cfg1[key], list): + raise TypeError("cfg1[%r] should be a list of strings" % (key,)) + if not isinstance(value, list): + raise TypeError("cfg2[%r] should be a list of strings" % (key,)) + cfg1[key].extend(value) return cfg1 -def call(libname, flag): - """Calls pkg-config and returing the output if found +def call(libname, flag, encoding=sys.getfilesystemencoding()): + """Calls pkg-config and returns the output if found """ a = ["pkg-config", "--print-errors"] a.append(flag) a.append(libname) - pc = None try: pc = subprocess.Popen(a, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except FileNotFoundError: - pass - if pc is None: - raise PkgConfigError("pkg-config was not found on this system") - + except EnvironmentError as e: + raise PkgConfigError("cannot run pkg-config: %s" % (str(e).strip(),)) + bout, berr = pc.communicate() - if berr is not None: - err = berr.decode(sys.getfilesystemencoding()) - if re.search("Requested '.*' but version of ", err, re.MULTILINE) is not None: - raise PkgConfigModuleVersionNotFound(err) - else: - PkgConfigError(err) + if pc.returncode != 0: + try: + berr = berr.decode(encoding) + except Exception: + pass + raise PkgConfigError(berr.strip()) + + if sys.version_info >= (3,) and not isinstance(bout, str): # Python 3.x + try: + bout = bout.decode(encoding) + except UnicodeDecodeError: + raise PkgConfigError("pkg-config %s %s returned bytes that cannot " + "be decoded with encoding %r:\n%r" % + (flag, libname, encoding, bout)) + + if os.altsep != '\\' and '\\' in bout: + raise PkgConfigError("pkg-config %s %s returned an unsupported " + "backslash-escaped output:\n%r" % + (flag, libname, bout)) return bout -def flags(libs): +def flags_from_pkgconfig(libs): r"""Return compiler line flags for FFI.set_source based on pkg-config output Usage @@ -57,49 +69,53 @@ extra_link_args are extended with an output of pkg-config for libfoo and libbar. - Raises - * PkgConfigModuleVersionNotFound if requested version does not match - * PkgConfigError for all other errors + Raises PkgConfigError in case the pkg-config call fails. """ - subprocess.check_output(["pkg-config", "--version"]) + def get_include_dirs(string): + return [x[2:] for x in string.split() if x.startswith("-I")] - # make API great again! - if isinstance(libs, (str, bytes)): - libs = (libs, ) - - # drop starting -I -L -l from cflags - def dropILl(string): - def _dropILl(string): - if string.startswith("-I") or string.startswith("-L") or string.startswith("-l"): - return string [2:] - return [_dropILl(x) for x in string.split()] + def get_library_dirs(string): + return [x[2:] for x in string.split() if x.startswith("-L")] - # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by cffi - def macros(string): - def _macros(string): - return tuple(string [2:].split("=", 2)) - return [_macros(x) for x in string.split() if x.startswith("-D")] + def get_libraries(string): + return [x[2:] for x in string.split() if x.startswith("-l")] - def drop_macros(string): - return [x for x in string.split() if not x.startswith("-D")] + # convert -Dfoo=bar to list of tuples [("foo", "bar")] expected by distutils + def get_macros(string): + def _macro(x): + x = x[2:] # drop "-D" + if '=' in x: + return tuple(x.split("=", 1)) # "-Dfoo=bar" => ("foo", "bar") + else: + return (x, None) # "-Dfoo" => ("foo", None) + return [_macro(x) for x in string.split() if x.startswith("-D")] + + def get_other_cflags(string): + return [x for x in string.split() if not x.startswith("-I") and + not x.startswith("-D")] + + def get_other_libs(string): + return [x for x in string.split() if not x.startswith("-L") and + not x.startswith("-l")] # return kwargs for given libname def kwargs(libname): fse = sys.getfilesystemencoding() + all_cflags = call(libname, "--cflags") + all_libs = call(libname, "--libs") return { - "include_dirs" : dropILl(call(libname, "--cflags-only-I").decode(fse)), - "library_dirs" : dropILl(call(libname, "--libs-only-L").decode(fse)), - "libraries" : dropILl(call(libname, "--libs-only-l").decode(fse)), - "define_macros" : macros(call(libname, "--cflags-only-other").decode('ascii')), - "extra_compile_args" : drop_macros(call(libname, "--cflags-only-other").decode('ascii')), - "extra_link_args" : call(libname, "--libs-only-other").decode('ascii').split() - } + "include_dirs": get_include_dirs(all_cflags), + "library_dirs": get_library_dirs(all_libs), + "libraries": get_libraries(all_libs), + "define_macros": get_macros(all_cflags), + "extra_compile_args": get_other_cflags(all_cflags), + "extra_link_args": get_other_libs(all_libs), + } # merge all arguments together ret = {} for libname in libs: - foo = kwargs(libname) - merge_flags(ret, foo) - + lib_flags = kwargs(libname) + merge_flags(ret, lib_flags) return ret diff --git a/testing/cffi1/test_pkgconfig.py b/testing/cffi1/test_pkgconfig.py --- a/testing/cffi1/test_pkgconfig.py +++ b/testing/cffi1/test_pkgconfig.py @@ -2,24 +2,19 @@ import subprocess import py import cffi.pkgconfig as pkgconfig +from cffi import PkgConfigError + def mock_call(libname, flag): - assert libname=="python-3.6", "mocked pc function supports python-3.6 input ONLY" - + assert libname=="foobarbaz" flags = { - "--cflags-only-I": b"-I/usr/include/python3.6m\n", - "--libs-only-L": b"-L/usr/lib64\n", - "--libs-only-l": b"-lpython3.6\n", - "--cflags-only-other": b"-DCFFI_TEST=1 -O42\n", - "--libs-only-other": b"-lm\n", + "--cflags": "-I/usr/include/python3.6m -DABCD -DCFFI_TEST=1 -O42\n", + "--libs": "-L/usr/lib64 -lpython3.6 -shared\n", } return flags[flag] -pkgconfig.call = mock_call - def test_merge_flags(): - d1 = {"ham": [1, 2, 3], "spam" : ["a", "b", "c"], "foo" : []} d2 = {"spam" : ["spam", "spam", "spam"], "bar" : ["b", "a", "z"]} @@ -32,12 +27,68 @@ def test_pkgconfig(): - flags = pkgconfig.flags("python-3.6") + assert pkgconfig.flags_from_pkgconfig([]) == {} + + saved = pkgconfig.call + try: + pkgconfig.call = mock_call + flags = pkgconfig.flags_from_pkgconfig(["foobarbaz"]) + finally: + pkgconfig.call = saved assert flags == { - 'include_dirs': [u'/usr/include/python3.6m'], - 'library_dirs': [u'/usr/lib64'], - 'libraries': [u'python3.6'], - 'define_macros': [(u'CFFI_TEST', u'1')], - 'extra_compile_args': [u'-O42'], - 'extra_link_args': [u'-lm'] + 'include_dirs': ['/usr/include/python3.6m'], + 'library_dirs': ['/usr/lib64'], + 'libraries': ['python3.6'], + 'define_macros': [('ABCD', None), ('CFFI_TEST', '1')], + 'extra_compile_args': ['-O42'], + 'extra_link_args': ['-shared'] } + +class mock_subprocess: + PIPE = Ellipsis + class Popen: + def __init__(self, cmd, stdout, stderr): + if mock_subprocess.RESULT is None: + raise OSError("oops can't run") + assert cmd == ['pkg-config', '--print-errors', '--cflags', 'libfoo'] + def communicate(self): + bout, berr, rc = mock_subprocess.RESULT + self.returncode = rc + return bout, berr + +def test_call(): + saved = pkgconfig.subprocess + try: + pkgconfig.subprocess = mock_subprocess + + mock_subprocess.RESULT = None + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value) == "cannot run pkg-config: oops can't run" + + mock_subprocess.RESULT = b"", "Foo error!\n", 1 + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value) == "Foo error!" + + mock_subprocess.RESULT = b"abc\\def\n", "", 0 + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value).startswith("pkg-config --cflags libfoo returned an " + "unsupported backslash-escaped output:") + + mock_subprocess.RESULT = b"abc def\n", "", 0 + result = pkgconfig.call("libfoo", "--cflags") + assert result == "abc def\n" + + mock_subprocess.RESULT = b"abc def\n", "", 0 + result = pkgconfig.call("libfoo", "--cflags") + assert result == "abc def\n" + + if sys.version_info >= (3,): + mock_subprocess.RESULT = b"\xff\n", "", 0 + e = py.test.raises(PkgConfigError, pkgconfig.call, + "libfoo", "--cflags", encoding="utf-8") + assert str(e.value) == ( + "pkg-config --cflags libfoo returned bytes that cannot be " + "decoded with encoding 'utf-8':\nb'\\xff\\n'") + + finally: + pkgconfig.subprocess = saved From pypy.commits at gmail.com Thu Jan 31 06:44:06 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 03:44:06 -0800 (PST) Subject: [pypy-commit] cffi default: Change the API and document it. Message-ID: <5c52df86.1c69fb81.2ee5d.f06e@mx.google.com> Author: Armin Rigo Branch: Changeset: r3206:cfa9cfaf529e Date: 2019-01-31 12:44 +0100 http://bitbucket.org/cffi/cffi/changeset/cfa9cfaf529e/ Log: Change the API and document it. diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -640,15 +640,19 @@ if os.sep in module_name or (os.altsep and os.altsep in module_name): raise ValueError("'module_name' must not contain '/': use a dotted " "name to make a 'package.module' location") - if "pkgconfig" in kwds: - from . import pkgconfig - libs = kwds.pop("pkgconfig") - if not isinstance(libs, (list, tuple)): - libs = [libs] - pkgconfig.merge_flags(kwds, pkgconfig.flags_from_pkgconfig(libs)) self._assigned_source = (str(module_name), source, source_extension, kwds) + def set_source_pkgconfig(self, module_name, pkgconfig_libs, source, + source_extension='.c', **kwds): + from . import pkgconfig + if not isinstance(pkgconfig_libs, list): + raise TypeError("the pkgconfig_libs argument must be a list " + "of package names") + kwds2 = pkgconfig.flags_from_pkgconfig(pkgconfig_libs) + pkgconfig.merge_flags(kwds, kwds2) + self.set_source(module_name, source, source_extension, **kwds) + def distutils_extension(self, tmpdir='build', verbose=True): from distutils.dir_util import mkpath from .recompiler import recompile diff --git a/doc/source/cdef.rst b/doc/source/cdef.rst --- a/doc/source/cdef.rst +++ b/doc/source/cdef.rst @@ -431,6 +431,23 @@ } ''', source_extension='.cpp') +.. _pkgconfig: + +**ffibuilder.set_source_pkgconfig(module_name, pkgconfig_libs, +c_header_source, [\*\*keywords...])**: + +*New in version 1.12.* This is equivalent to ``set_source()`` but it +first calls the system utility ``pkg-config`` with the package names +given in the list ``pkgconfig_libs``. It collects the information +obtained in this way and adds it to the explicitly-provided +``**keywords`` (if any). This should probably not be used on Windows. + +If the ``pkg-config`` program is not installed or does not know about +the requested library, the call fails with ``cffi.PkgConfigError``. If +necessary, you can catch this error and try to call ``set_source()`` +directly (or, equivalently, call ``set_source_pkgconfig()`` with an +empty list as the ``pkgconfig_libs`` argument). + Letting the C compiler fill the gaps ------------------------------------ diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -36,7 +36,10 @@ cffi would contain possible crashes and/or memory leaks. Hopefully, this has been fixed (see `issue #362`_). +* `Direct support for pkg-config`__. + .. _`issue #362`: https://bitbucket.org/cffi/cffi/issues/362/ +.. __: cdef.html#pkgconfig v1.11.5 From pypy.commits at gmail.com Thu Jan 31 06:47:52 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 03:47:52 -0800 (PST) Subject: [pypy-commit] cffi default: tweak docs Message-ID: <5c52e068.1c69fb81.412c.4e9d@mx.google.com> Author: Armin Rigo Branch: Changeset: r3207:32960b92c175 Date: 2019-01-31 12:47 +0100 http://bitbucket.org/cffi/cffi/changeset/32960b92c175/ Log: tweak docs diff --git a/doc/source/cdef.rst b/doc/source/cdef.rst --- a/doc/source/cdef.rst +++ b/doc/source/cdef.rst @@ -445,8 +445,9 @@ If the ``pkg-config`` program is not installed or does not know about the requested library, the call fails with ``cffi.PkgConfigError``. If necessary, you can catch this error and try to call ``set_source()`` -directly (or, equivalently, call ``set_source_pkgconfig()`` with an -empty list as the ``pkgconfig_libs`` argument). +directly. (Ideally, you should also do that if the ``ffibuilder`` +instance has no method ``set_source_pkgconfig()``, to support older +versions of cffi.) Letting the C compiler fill the gaps From pypy.commits at gmail.com Thu Jan 31 11:12:10 2019 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Jan 2019 08:12:10 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5c531e5a.1c69fb81.8a370.8c93@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95756:e01a180ee74a Date: 2019-01-31 16:11 +0000 http://bitbucket.org/pypy/pypy/changeset/e01a180ee74a/ Log: hg merge default diff --git a/extra_tests/cffi_tests/cffi0/test_function.py b/extra_tests/cffi_tests/cffi0/test_function.py --- a/extra_tests/cffi_tests/cffi0/test_function.py +++ b/extra_tests/cffi_tests/cffi0/test_function.py @@ -46,14 +46,15 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_lround_no_return_value(self): + def test_getenv_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void lround(double x); + void getenv(char *); """) - m = ffi.dlopen(lib_m) - x = m.lround(1.23) + needs_dlopen_none() + m = ffi.dlopen(None) + x = m.getenv(b"FOO") assert x is None def test_dlopen_filename(self): diff --git a/pypy/doc/interpreter.rst b/pypy/doc/interpreter.rst --- a/pypy/doc/interpreter.rst +++ b/pypy/doc/interpreter.rst @@ -156,7 +156,7 @@ environment found in `Frames`. Frames and Functions have references to a code object. Here is a list of Code attributes: -* ``co_flags`` flags if this code object has nested scopes/generators +* ``co_flags`` flags if this code object has nested scopes/generators/etc. * ``co_stacksize`` the maximum depth the stack can reach while executing the code * ``co_code`` the actual bytecode string diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -107,7 +107,10 @@ ``PyOS_InputHook``, ``PyErr_FormatFromCause`` (Py3.6), * Implement new wordcode instruction encoding (Py3.6) * Log additional gc-minor and gc-collect-step info in the PYPYLOG -* Set ``reverse-debugger`` active by default. For more information, see +* The ``reverse-debugger`` (revdb) branch has been merged to the default + branch, so it should always be up-to-date. You still need a special pypy + build, but you can compile it from the same source as the one we distribute + for the v7.0.0 release. For more information, see https://bitbucket.org/pypy/revdb * Support underscores in numerical literals like ``'4_2'`` (Py3.6) * Pre-emptively raise MemoryError if the size of dequeue in ``_collections.deque`` @@ -126,7 +129,8 @@ * Speed up ``max(list-of-int)`` from non-jitted code * Fix Windows ``os.listdir()`` for some cases (see CPython #32539) * Add ``select.PIPE_BUF`` -* Use ``subprocess`` to avoid shell injection in ``shutil`` module +* Use ``subprocess`` to avoid shell injection in ``shutil`` module - backport + of https://bugs.python.org/issue34540 * Rename ``_Py_ZeroStruct`` to ``_Py_FalseStruct`` (Py3.5, Py3.6) * Remove some cpyext names for Py3.5, Py3.6 * Enable use of unicode file names in ``dlopen`` diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -512,6 +512,7 @@ header = DEFAULT_HEADER if func.__name__ in FUNCTIONS_BY_HEADER[header]: raise ValueError("%s already registered" % func.__name__) + func._revdb_c_only_ = True # hack for revdb api_function = COnlyApiFunction(argtypes, restype, func) FUNCTIONS_BY_HEADER[header][func.__name__] = api_function return api_function diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -67,7 +67,7 @@ """Returns True if we have a "split GC address space", i.e. if we are translating with an option that doesn't support taking raw addresses inside GC objects and "hacking" at them. This is - notably the case with --reversedb.""" + notably the case with --revdb.""" return False # for test purposes we allow objects to be pinned and use diff --git a/rpython/rlib/src/boehm-rawrefcount.c b/rpython/rlib/src/boehm-rawrefcount.c --- a/rpython/rlib/src/boehm-rawrefcount.c +++ b/rpython/rlib/src/boehm-rawrefcount.c @@ -191,6 +191,7 @@ #endif assert(result->ob_refcnt == REFCNT_FROM_PYPY); result->ob_refcnt = 1; + result->ob_pypy_link = 0; p->pyobj = NULL; *pp = p->next_in_bucket; p->next_in_bucket = hash_free_list; diff --git a/rpython/translator/revdb/gencsupp.py b/rpython/translator/revdb/gencsupp.py --- a/rpython/translator/revdb/gencsupp.py +++ b/rpython/translator/revdb/gencsupp.py @@ -51,6 +51,10 @@ ## return False def prepare_function(funcgen): + if getattr(getattr(funcgen.graph, 'func', None), '_revdb_c_only_', False): + extra_enter_text = 'RPY_REVDB_C_ONLY_ENTER' + extra_return_text = 'RPY_REVDB_C_ONLY_LEAVE' + return extra_enter_text, extra_return_text stack_bottom = False for block in funcgen.graph.iterblocks(): for op in block.operations: diff --git a/rpython/translator/revdb/src-revdb/revdb.c b/rpython/translator/revdb/src-revdb/revdb.c --- a/rpython/translator/revdb/src-revdb/revdb.c +++ b/rpython/translator/revdb/src-revdb/revdb.c @@ -253,7 +253,10 @@ "(use REVDB=logfile)\n", (int)getpid()); } - rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); + if (rpy_rev_fileno >= 0) + rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); + else + rpy_revdb.buf_p = NULL; rpy_revdb.buf_limit = rpy_rev_buffer + sizeof(rpy_rev_buffer) - 32; rpy_revdb.unique_id_seen = 1; @@ -269,17 +272,23 @@ ssize_t full_size; assert(rpy_revdb.lock); + if (rpy_revdb.buf_p == NULL) + return; + assert(rpy_rev_fileno >= 0); + /* write the current buffer content to the OS */ full_size = rpy_revdb.buf_p - rpy_rev_buffer; rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); - if (rpy_rev_fileno >= 0) - write_all(rpy_rev_buffer, full_size); + write_all(rpy_rev_buffer, full_size); } static ssize_t current_packet_size(void) { /* must be called with the lock held */ - return rpy_revdb.buf_p - (rpy_rev_buffer + sizeof(int16_t)); + if (rpy_revdb.buf_p != NULL) + return rpy_revdb.buf_p - (rpy_rev_buffer + sizeof(int16_t)); + else + return 0; } RPY_EXTERN @@ -327,6 +336,11 @@ rpy_reverse_db_flush(); assert(current_packet_size() == 0); + if (rpy_rev_fileno < 0) + return; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + *(int16_t *)p = async_code; memcpy(rpy_revdb.buf_p, &content, sizeof(uint64_t)); rpy_revdb.buf_p += sizeof(uint64_t); @@ -472,6 +486,9 @@ if (rpy_rev_fileno < 0) return 1; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + base_offset = lseek(rpy_rev_fileno, 0, SEEK_CUR); if (base_offset < 0) { perror("lseek"); @@ -488,6 +505,9 @@ if (rpy_rev_fileno < 0) return; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + base_offset = lseek(rpy_rev_fileno, 0, SEEK_CUR); if (base_offset < 0) { perror("lseek"); @@ -1033,9 +1053,9 @@ " echo 0 | sudo tee /proc/sys/kernel/randomize_va_space\n" "\n" "It has been reported that on Linux kernel 4.12.4-1-ARCH,\n" - "ASLR cannot be disabled at all for libpypy-c.so. For now\n" - "there is no good solution. Either you downgrade the\n" - "kernel, or you translate with --no-shared (and you loose\n" + "ASLR cannot be disabled at all for libpypy-c.so. It works\n" + "again in kernel 4.19 (and maybe sooner). Either change\n" + "kernels, or translate with --no-shared (but then you loose\n" "PyPy's cpyext ability).\n" "\n", argv[0]); exit(1); diff --git a/rpython/translator/revdb/src-revdb/revdb_include.h b/rpython/translator/revdb/src-revdb/revdb_include.h --- a/rpython/translator/revdb/src-revdb/revdb_include.h +++ b/rpython/translator/revdb/src-revdb/revdb_include.h @@ -16,7 +16,8 @@ #endif bool_t watch_enabled; int lock; - char *buf_p, *buf_limit, *buf_readend; + char *buf_p; /* NULL during recording if recording is actually disabled */ + char *buf_limit, *buf_readend; uint64_t stop_point_seen, stop_point_break; uint64_t unique_id_seen, unique_id_break; } rpy_revdb_t; @@ -85,9 +86,13 @@ { \ decl_e = variable; \ _RPY_REVDB_PRINT("[ wr ]", _e); \ - memcpy(rpy_revdb.buf_p, &_e, sizeof(_e)); \ - if ((rpy_revdb.buf_p += sizeof(_e)) > rpy_revdb.buf_limit) \ - rpy_reverse_db_flush(); \ + char *_dst = rpy_revdb.buf_p; \ + if (_dst) { \ + memcpy(_dst, &_e, sizeof(_e)); \ + if ((rpy_revdb.buf_p = _dst + sizeof(_e)) \ + > rpy_revdb.buf_limit) \ + rpy_reverse_db_flush(); \ + } \ } #define _RPY_REVDB_EMIT_REPLAY(decl_e, variable) \ @@ -179,6 +184,13 @@ rpy_reverse_db_bad_acquire_gil("release"); \ } +#define RPY_REVDB_C_ONLY_ENTER \ + char *saved_bufp = rpy_revdb.buf_p; \ + rpy_revdb.buf_p = NULL; + +#define RPY_REVDB_C_ONLY_LEAVE \ + rpy_revdb.buf_p = saved_bufp; + #define RPY_REVDB_CALLBACKLOC(locnum) \ rpy_reverse_db_callback_loc(locnum) From pypy.commits at gmail.com Thu Jan 31 11:39:44 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 31 Jan 2019 08:39:44 -0800 (PST) Subject: [pypy-commit] pypy default: reword this paraphraph, as gc hooks were already in 6.0 Message-ID: <5c5324d0.1c69fb81.4632b.2e01@mx.google.com> Author: Antonio Cuni Branch: Changeset: r95757:24e4b884491f Date: 2019-01-31 17:38 +0100 http://bitbucket.org/pypy/pypy/changeset/24e4b884491f/ Log: reword this paraphraph, as gc hooks were already in 6.0 diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -19,9 +19,10 @@ Until we can work with downstream providers to distribute builds with PyPy, we have made packages for some common packages `available as wheels`_. -The GC now has `hooks`_ to gain more insights into its performance, and it is -now possible to manually manage the GC by using a combination of -``gc.disable`` and ``gc.collect_step``. See the `GC blog post`_. +The GC `hooks`_ , which can be used to gain more insights into its +performance, has been improved and it is now possible to manually manage the +GC by using a combination of ``gc.disable`` and ``gc.collect_step``. See the +`GC blog post`_. We updated the `cffi`_ module included in PyPy to version 1.12, and the From pypy.commits at gmail.com Thu Jan 31 11:42:53 2019 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Jan 2019 08:42:53 -0800 (PST) Subject: [pypy-commit] pypy py3tests: hg merge py3.5 Message-ID: <5c53258d.1c69fb81.335cf.a595@mx.google.com> Author: Ronan Lamy Branch: py3tests Changeset: r95758:9e5517b71594 Date: 2019-01-31 16:13 +0000 http://bitbucket.org/pypy/pypy/changeset/9e5517b71594/ Log: hg merge py3.5 diff too long, truncating to 2000 out of 15743 lines diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at @@ -40,16 +40,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -59,8 +59,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -70,10 +70,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -114,12 +114,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -134,8 +134,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -144,10 +145,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -163,6 +164,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -176,6 +178,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -187,7 +190,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -198,7 +200,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -210,6 +211,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -217,12 +219,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -242,7 +246,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -270,12 +273,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -290,10 +296,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -301,28 +309,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -332,6 +338,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -340,6 +347,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -349,8 +357,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -364,7 +373,6 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -384,12 +392,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -325,8 +325,31 @@ a = array.array('H', [10000, 20000, 30000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 6 ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer("char[]", a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 3 + assert c[1] == 20500 + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) + + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect def test_memmove(self): ffi = FFI() diff --git a/extra_tests/cffi_tests/cffi0/test_function.py b/extra_tests/cffi_tests/cffi0/test_function.py --- a/extra_tests/cffi_tests/cffi0/test_function.py +++ b/extra_tests/cffi_tests/cffi0/test_function.py @@ -46,14 +46,15 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_lround_no_return_value(self): + def test_getenv_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void lround(double x); + void getenv(char *); """) - m = ffi.dlopen(lib_m) - x = m.lround(1.23) + needs_dlopen_none() + m = ffi.dlopen(None) + x = m.getenv(b"FOO") assert x is None def test_dlopen_filename(self): diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -239,11 +239,33 @@ def test_ffi_from_buffer(): import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] + assert list(a) == [10000, 20500, 30000, 40000] + py.test.raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(): ffi = _cffi1_backend.FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) @@ -1647,14 +1676,6 @@ py.test.raises(TypeError, len, q.a) py.test.raises(TypeError, list, q.a) - def test_from_buffer(self): - import array - a = array.array('H', [10000, 20000, 30000]) - c = ffi.from_buffer(a) - assert ffi.typeof(c) is ffi.typeof("char[]") - ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ "char", diff --git a/extra_tests/cffi_tests/cffi1/test_parse_c_type.py b/extra_tests/cffi_tests/cffi1/test_parse_c_type.py --- a/extra_tests/cffi_tests/cffi1/test_parse_c_type.py +++ b/extra_tests/cffi_tests/cffi1/test_parse_c_type.py @@ -4,7 +4,12 @@ from cffi import cffi_opcode if '__pypy__' in sys.builtin_module_names: - py.test.skip("not available on pypy", allow_module_level=True) + try: + # pytest >= 4.0 + py.test.skip("not available on pypy", allow_module_level=True) + except TypeError: + # older pytest + py.test.skip("not available on pypy") cffi_dir = os.path.dirname(cffi_opcode.__file__) diff --git a/extra_tests/cffi_tests/cffi1/test_recompiler.py b/extra_tests/cffi_tests/cffi1/test_recompiler.py --- a/extra_tests/cffi_tests/cffi1/test_recompiler.py +++ b/extra_tests/cffi_tests/cffi1/test_recompiler.py @@ -5,7 +5,7 @@ from cffi import recompiler from extra_tests.cffi_tests.udir import udir from extra_tests.cffi_tests.support import u, long -from extra_tests.cffi_tests.support import FdWriteCapture, StdErrCapture +from extra_tests.cffi_tests.support import FdWriteCapture, StdErrCapture, _verify try: import importlib @@ -36,7 +36,7 @@ # add '-Werror' to the existing 'extra_compile_args' flags kwds['extra_compile_args'] = (kwds.get('extra_compile_args', []) + ['-Werror']) - return recompiler._verify(ffi, module_name, source, *args, **kwds) + return _verify(ffi, module_name, source, *args, **kwds) def test_set_source_no_slashes(): ffi = FFI() @@ -1539,15 +1539,18 @@ assert (pt.x, pt.y) == (99*500*999, -99*500*999) def test_extern_python_1(): + import warnings ffi = FFI() - ffi.cdef(""" + with warnings.catch_warnings(record=True) as log: + ffi.cdef(""" extern "Python" { int bar(int, int); void baz(int, int); int bok(void); void boz(void); } - """) + """) + assert len(log) == 0, "got a warning: %r" % (log,) lib = verify(ffi, 'test_extern_python_1', """ static void baz(int, int); /* forward */ """) diff --git a/extra_tests/cffi_tests/cffi1/test_verify1.py b/extra_tests/cffi_tests/cffi1/test_verify1.py --- a/extra_tests/cffi_tests/cffi1/test_verify1.py +++ b/extra_tests/cffi_tests/cffi1/test_verify1.py @@ -4,6 +4,7 @@ from cffi import CDefError from cffi import recompiler from extra_tests.cffi_tests.support import * +from extra_tests.cffi_tests.support import _verify import _cffi_backend lib_m = ['m'] @@ -38,9 +39,8 @@ except AttributeError: pass self.set_source(module_name, preamble) - return recompiler._verify(self, module_name, preamble, *args, - extra_compile_args=self._extra_compile_args, - **kwds) + return _verify(self, module_name, preamble, *args, + extra_compile_args=self._extra_compile_args, **kwds) class FFI_warnings_not_error(FFI): _extra_compile_args = [] diff --git a/extra_tests/cffi_tests/support.py b/extra_tests/cffi_tests/support.py --- a/extra_tests/cffi_tests/support.py +++ b/extra_tests/cffi_tests/support.py @@ -62,3 +62,28 @@ def getvalue(self): return self._value + +def _verify(ffi, module_name, preamble, *args, **kwds): + import imp + from cffi.recompiler import recompile + from .udir import udir + assert module_name not in sys.modules, "module name conflict: %r" % ( + module_name,) + kwds.setdefault('tmpdir', str(udir)) + outputfilename = recompile(ffi, module_name, preamble, *args, **kwds) + module = imp.load_dynamic(module_name, outputfilename) + # + # hack hack hack: copy all *bound methods* from module.ffi back to the + # ffi instance. Then calls like ffi.new() will invoke module.ffi.new(). + for name in dir(module.ffi): + if not name.startswith('_'): + attr = getattr(module.ffi, name) + if attr is not getattr(ffi, name, object()): + setattr(ffi, name, attr) + def typeof_disabled(*args, **kwds): + raise NotImplementedError + ffi._typeof = typeof_disabled + for name in dir(ffi): + if not name.startswith('_') and not hasattr(module.ffi, name): + setattr(ffi, name, NotImplemented) + return module.lib diff --git a/pypy/module/test_lib_pypy/ctypes_tests/__init__.py b/extra_tests/ctypes_tests/__init__.py rename from pypy/module/test_lib_pypy/ctypes_tests/__init__.py rename to extra_tests/ctypes_tests/__init__.py diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/extra_tests/ctypes_tests/_ctypes_test.c rename from pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c rename to extra_tests/ctypes_tests/_ctypes_test.c diff --git a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py b/extra_tests/ctypes_tests/conftest.py rename from pypy/module/test_lib_pypy/ctypes_tests/conftest.py rename to extra_tests/ctypes_tests/conftest.py --- a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py +++ b/extra_tests/ctypes_tests/conftest.py @@ -3,10 +3,6 @@ import sys import os -def pytest_ignore_collect(path): - if '__pypy__' not in sys.builtin_module_names: - return True - # XXX: copied from pypy/tool/cpyext/extbuild.py if os.name != 'nt': so_ext = 'so' @@ -85,8 +81,7 @@ return outputfilename # end copy -def compile_so_file(): - udir = pytest.ensuretemp('_ctypes_test') +def compile_so_file(udir): cfile = py.path.local(__file__).dirpath().join("_ctypes_test.c") if sys.platform == 'win32': @@ -96,8 +91,12 @@ return c_compile([cfile], str(udir / '_ctypes_test'), libraries=libraries) -# we need to run after the "tmpdir" plugin which installs pytest.ensuretemp - at pytest.mark.trylast -def pytest_configure(config): - global sofile - sofile = compile_so_file() + at pytest.fixture(scope='session') +def sofile(tmpdir_factory): + udir = tmpdir_factory.mktemp('_ctypes_test') + return str(compile_so_file(udir)) + + at pytest.fixture +def dll(sofile): + from ctypes import CDLL + return CDLL(str(sofile)) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/support.py b/extra_tests/ctypes_tests/support.py rename from pypy/module/test_lib_pypy/ctypes_tests/support.py rename to extra_tests/ctypes_tests/support.py diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py b/extra_tests/ctypes_tests/test_anon.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_anon.py rename to extra_tests/ctypes_tests/test_anon.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py +++ b/extra_tests/ctypes_tests/test_anon.py @@ -1,86 +1,55 @@ import pytest from ctypes import * -from .support import BaseCTypesTestChecker -class TestAnon(BaseCTypesTestChecker): + at pytest.mark.pypy_only +def test_nested(): + class ANON_S(Structure): + _fields_ = [("a", c_int)] - def test_anon(self): - class ANON(Union): - _fields_ = [("a", c_int), - ("b", c_int)] + class ANON_U(Union): + _fields_ = [("_", ANON_S), + ("b", c_int)] + _anonymous_ = ["_"] - class Y(Structure): - _fields_ = [("x", c_int), - ("_", ANON), - ("y", c_int)] - _anonymous_ = ["_"] + class Y(Structure): + _fields_ = [("x", c_int), + ("_", ANON_U), + ("y", c_int)] + _anonymous_ = ["_"] - assert Y.a.offset == sizeof(c_int) - assert Y.b.offset == sizeof(c_int) + assert Y.x.offset == 0 + assert Y.a.offset == sizeof(c_int) + assert Y.b.offset == sizeof(c_int) + assert Y._.offset == sizeof(c_int) + assert Y.y.offset == sizeof(c_int) * 2 - assert ANON.a.offset == 0 - assert ANON.b.offset == 0 + assert Y._names_ == ['x', 'a', 'b', 'y'] - def test_anon_nonseq(self): - # TypeError: _anonymous_ must be a sequence - with pytest.raises(TypeError): - type(Structure)( - "Name", (Structure,), {"_fields_": [], "_anonymous_": 42}) +def test_anonymous_fields_on_instance(): + # this is about the *instance-level* access of anonymous fields, + # which you'd guess is the most common, but used not to work + # (issue #2230) - def test_anon_nonmember(self): - # AttributeError: type object 'Name' has no attribute 'x' - with pytest.raises(AttributeError): - type(Structure)( - "Name", (Structure,), {"_fields_": [], "_anonymous_": ["x"]}) + class B(Structure): + _fields_ = [("x", c_int), ("y", c_int), ("z", c_int)] + class A(Structure): + _anonymous_ = ["b"] + _fields_ = [("b", B)] - def test_nested(self): - class ANON_S(Structure): - _fields_ = [("a", c_int)] + a = A() + a.x = 5 + assert a.x == 5 + assert a.b.x == 5 + a.b.x += 1 + assert a.x == 6 - class ANON_U(Union): - _fields_ = [("_", ANON_S), - ("b", c_int)] - _anonymous_ = ["_"] + class C(Structure): + _anonymous_ = ["a"] + _fields_ = [("v", c_int), ("a", A)] - class Y(Structure): - _fields_ = [("x", c_int), - ("_", ANON_U), - ("y", c_int)] - _anonymous_ = ["_"] - - assert Y.x.offset == 0 - assert Y.a.offset == sizeof(c_int) - assert Y.b.offset == sizeof(c_int) - assert Y._.offset == sizeof(c_int) - assert Y.y.offset == sizeof(c_int) * 2 - - assert Y._names_ == ['x', 'a', 'b', 'y'] - - def test_anonymous_fields_on_instance(self): - # this is about the *instance-level* access of anonymous fields, - # which you'd guess is the most common, but used not to work - # (issue #2230) - - class B(Structure): - _fields_ = [("x", c_int), ("y", c_int), ("z", c_int)] - class A(Structure): - _anonymous_ = ["b"] - _fields_ = [("b", B)] - - a = A() - a.x = 5 - assert a.x == 5 - assert a.b.x == 5 - a.b.x += 1 - assert a.x == 6 - - class C(Structure): - _anonymous_ = ["a"] - _fields_ = [("v", c_int), ("a", A)] - - c = C() - c.v = 3 - c.y = -8 - assert c.v == 3 - assert c.y == c.a.y == c.a.b.y == -8 - assert not hasattr(c, 'b') + c = C() + c.v = 3 + c.y = -8 + assert c.v == 3 + assert c.y == c.a.y == c.a.b.y == -8 + assert not hasattr(c, 'b') diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py b/extra_tests/ctypes_tests/test_array.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_array.py rename to extra_tests/ctypes_tests/test_array.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py +++ b/extra_tests/ctypes_tests/test_array.py @@ -1,177 +1,64 @@ import pytest from ctypes import * -from .support import BaseCTypesTestChecker -formats = "bBhHiIlLqQfd" +def test_slice(): + values = list(range(5)) + numarray = c_int * 5 -formats = c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, \ - c_long, c_ulonglong, c_float, c_double + na = numarray(*(c_int(x) for x in values)) -class TestArray(BaseCTypesTestChecker): - def test_simple(self): - # create classes holding simple numeric types, and check - # various properties. + assert list(na[0:0]) == [] + assert list(na[:]) == values + assert list(na[:10]) == values - init = range(15, 25) +def test_init_again(): + sz = (c_char * 3)() + addr1 = addressof(sz) + sz.__init__(*b"foo") + addr2 = addressof(sz) + assert addr1 == addr2 - for fmt in formats: - alen = len(init) - int_array = ARRAY(fmt, alen) +def test_array_of_structures(): + class X(Structure): + _fields_ = [('x', c_int), ('y', c_int)] - ia = int_array(*init) - # length of instance ok? - assert len(ia) == alen + Y = X * 2 + y = Y() + x = X() + x.y = 3 + y[1] = x + assert y[1].y == 3 - # slot values ok? - values = [ia[i] for i in range(len(init))] - assert values == init +def test_output_simple(): + A = c_char * 10 + TP = POINTER(A) + x = TP(A()) + assert x[0] != b'' - # change the items - from operator import setitem - new_values = range(42, 42+alen) - [setitem(ia, n, new_values[n]) for n in range(alen)] - values = [ia[i] for i in range(len(init))] - assert values == new_values + A = c_wchar * 10 + TP = POINTER(A) + x = TP(A()) + assert x[0] != b'' - # are the items initialized to 0? - ia = int_array() - values = [ia[i] for i in range(len(init))] - assert values == [0] * len(init) +def test_output_simple_array(): + A = c_char * 10 + AA = A * 10 + aa = AA() + assert aa[0] != b'' - # Too many in itializers should be caught - with pytest.raises(IndexError): - int_array(*range(alen*2)) +def test_output_complex_test(): + class Car(Structure): + _fields_ = [("brand", c_char * 10), + ("speed", c_float), + ("owner", c_char * 10)] - CharArray = ARRAY(c_char, 3) + assert isinstance(Car(b"abcdefghi", 42.0, b"12345").brand, bytes) + assert Car(b"abcdefghi", 42.0, b"12345").brand == b"abcdefghi" + assert Car(b"abcdefghio", 42.0, b"12345").brand == b"abcdefghio" + with pytest.raises(ValueError): + Car(b"abcdefghiop", 42.0, b"12345") - ca = CharArray("a", "b", "c") - - # Should this work? It doesn't: - # CharArray("abc") - with pytest.raises(TypeError): - CharArray("abc") - - assert ca[0] == "a" - assert ca[1] == "b" - assert ca[2] == "c" - assert ca[-3] == "a" - assert ca[-2] == "b" - assert ca[-1] == "c" - - assert len(ca) == 3 - - # slicing is now supported, but not extended slicing (3-argument)! - from operator import getslice, delitem - with pytest.raises(TypeError): - getslice(ca, 0, 1, -1) - - # cannot delete items - with pytest.raises(TypeError): - delitem(ca, 0) - - def test_numeric_arrays(self): - - alen = 5 - - numarray = ARRAY(c_int, alen) - - na = numarray() - values = [na[i] for i in range(alen)] - assert values == [0] * alen - - na = numarray(*[c_int()] * alen) - values = [na[i] for i in range(alen)] - assert values == [0]*alen - - na = numarray(1, 2, 3, 4, 5) - values = [i for i in na] - assert values == [1, 2, 3, 4, 5] - - na = numarray(*map(c_int, (1, 2, 3, 4, 5))) - values = [i for i in na] - assert values == [1, 2, 3, 4, 5] - - def test_slice(self): - values = range(5) - numarray = c_int * 5 - - na = numarray(*(c_int(x) for x in values)) - - assert list(na[0:0]) == [] - assert list(na[:]) == values - assert list(na[:10]) == values - - def test_classcache(self): - assert not ARRAY(c_int, 3) is ARRAY(c_int, 4) - assert ARRAY(c_int, 3) is ARRAY(c_int, 3) - - def test_from_address(self): - # Failed with 0.9.8, reported by JUrner - p = create_string_buffer("foo") - sz = (c_char * 3).from_address(addressof(p)) - assert sz[:] == "foo" - assert sz.value == "foo" - - def test_init_again(self): - sz = (c_char * 3)() - addr1 = addressof(sz) - sz.__init__(*"foo") - addr2 = addressof(sz) - assert addr1 == addr2 - - try: - create_unicode_buffer - except NameError: - pass - else: - def test_from_addressW(self): - p = create_unicode_buffer("foo") - sz = (c_wchar * 3).from_address(addressof(p)) - assert sz[:] == "foo" - assert sz.value == "foo" - -class TestSophisticatedThings(BaseCTypesTestChecker): - def test_array_of_structures(self): - class X(Structure): - _fields_ = [('x', c_int), ('y', c_int)] - - Y = X * 2 - y = Y() - x = X() - x.y = 3 - y[1] = x - assert y[1].y == 3 - - def test_output_simple(self): - A = c_char * 10 - TP = POINTER(A) - x = TP(A()) - assert x[0] != '' - - A = c_wchar * 10 - TP = POINTER(A) - x = TP(A()) - assert x[0] != '' - - def test_output_simple_array(self): - A = c_char * 10 - AA = A * 10 - aa = AA() - assert aa[0] != '' - - def test_output_complex_test(self): - class Car(Structure): - _fields_ = [("brand", c_char * 10), - ("speed", c_float), - ("owner", c_char * 10)] - - assert isinstance(Car("abcdefghi", 42.0, "12345").brand, bytes) - assert Car("abcdefghi", 42.0, "12345").brand == "abcdefghi" - assert Car("abcdefghio", 42.0, "12345").brand == "abcdefghio" - with pytest.raises(ValueError): - Car("abcdefghiop", 42.0, "12345") - - A = Car._fields_[2][1] - TP = POINTER(A) - x = TP(A()) - assert x[0] != '' + A = Car._fields_[2][1] + TP = POINTER(A) + x = TP(A()) + assert x[0] != b'' diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_base.py b/extra_tests/ctypes_tests/test_base.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_base.py rename to extra_tests/ctypes_tests/test_base.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_base.py +++ b/extra_tests/ctypes_tests/test_base.py @@ -1,26 +1,24 @@ -from .support import WhiteBoxTests - +import pytest from ctypes import * -# WhiteBoxTests +pytestmark = pytest.mark.pypy_only -class TestCTypesBase(WhiteBoxTests): - def test_pointer(self): - p = pointer(pointer(c_int(2))) - x = p[0] - assert x._base is p +def test_pointer(): + p = pointer(pointer(c_int(2))) + x = p[0] + assert x._base is p - def test_structure(self): - class X(Structure): - _fields_ = [('x', POINTER(c_int)), - ('y', POINTER(c_int))] +def test_structure(): + class X(Structure): + _fields_ = [('x', POINTER(c_int)), + ('y', POINTER(c_int))] - x = X() - assert x.y._base is x - assert x.y._index == 1 + x = X() + assert x.y._base is x + assert x.y._index == 1 - def test_array(self): - X = POINTER(c_int) * 24 - x = X() - assert x[16]._base is x - assert x[16]._index == 16 +def test_array(): + X = POINTER(c_int) * 24 + x = X() + assert x[16]._base is x + assert x[16]._index == 16 diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py b/extra_tests/ctypes_tests/test_bitfields.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py rename to extra_tests/ctypes_tests/test_bitfields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py +++ b/extra_tests/ctypes_tests/test_bitfields.py @@ -1,249 +1,19 @@ import pytest from ctypes import * -from .support import BaseCTypesTestChecker -import os -import ctypes -signed_int_types = (c_byte, c_short, c_int, c_long, c_longlong) -unsigned_int_types = (c_ubyte, c_ushort, c_uint, c_ulong, c_ulonglong) -int_types = unsigned_int_types + signed_int_types +def test_set_fields_attr(): + class A(Structure): + pass + A._fields_ = [("a", c_byte), ("b", c_ubyte)] +def test_set_fields_attr_bitfields(): + class A(Structure): + pass + A._fields_ = [("a", POINTER(A)), ("b", c_ubyte, 4)] -def setup_module(mod): - import conftest - _ctypes_test = str(conftest.sofile) - func = CDLL(_ctypes_test).unpack_bitfields - func.argtypes = POINTER(BITS), c_char - mod.func = func - - -class BITS(Structure): - _fields_ = [("A", c_int, 1), - ("B", c_int, 2), - ("C", c_int, 3), - ("D", c_int, 4), - ("E", c_int, 5), - ("F", c_int, 6), - ("G", c_int, 7), - ("H", c_int, 8), - ("I", c_int, 9), - - ("M", c_short, 1), - ("N", c_short, 2), - ("O", c_short, 3), - ("P", c_short, 4), - ("Q", c_short, 5), - ("R", c_short, 6), - ("S", c_short, 7)] - - -class TestC: - def test_ints(self): - for i in range(512): - for name in "ABCDEFGHI": - b = BITS() - setattr(b, name, i) - assert (name, i, getattr(b, name)) == (name, i, func(byref(b), name)) - - def test_shorts(self): - for i in range(256): - for name in "MNOPQRS": - b = BITS() - setattr(b, name, i) - assert (name, i, getattr(b, name)) == (name, i, func(byref(b), name)) - - -class TestBitField: - def test_longlong(self): - class X(Structure): - _fields_ = [("a", c_longlong, 1), - ("b", c_longlong, 62), - ("c", c_longlong, 1)] - - assert sizeof(X) == sizeof(c_longlong) - x = X() - x.a, x.b, x.c = -1, 7, -1 - assert (x.a, x.b, x.c) == (-1, 7, -1) - - x = X() - x.a, x.b, x.c = -1, -7, -1 - assert (x.a, x.b, x.c) == (-1, -7, -1) - - def test_ulonglong(self): - class X(Structure): - _fields_ = [("a", c_ulonglong, 1), - ("b", c_ulonglong, 62), - ("c", c_ulonglong, 1)] - - assert sizeof(X) == sizeof(c_longlong) - x = X() - assert (x.a, x.b, x.c) == (0, 0, 0) - x.a, x.b, x.c = 7, 2305843009213693953, 7 - assert (x.a, x.b, x.c) == (1, 2305843009213693953, 1) - - def test_signed(self): - for c_typ in signed_int_types: - class X(Structure): - _fields_ = [("dummy", c_typ), - ("a", c_typ, 3), - ("b", c_typ, 3), - ("c", c_typ, 1)] - assert sizeof(X) == sizeof(c_typ)*2 - - x = X() - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, 0, 0) - x.a = -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, -1, 0, 0) - x.a, x.b = 0, -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, -1, 0) - - def test_unsigned(self): - for c_typ in unsigned_int_types: - class X(Structure): - _fields_ = [("a", c_typ, 3), - ("b", c_typ, 3), - ("c", c_typ, 1)] - assert sizeof(X) == sizeof(c_typ) - - x = X() - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, 0, 0) - x.a = -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, 7, 0, 0) - x.a, x.b = 0, -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, 7, 0) - - def fail_fields(self, *fields): - return self.get_except(type(Structure), "X", (), - {"_fields_": fields}) - - def test_nonint_types(self): - # bit fields are not allowed on non-integer types. - result = self.fail_fields(("a", c_char_p, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_char_p') - - result = self.fail_fields(("a", c_void_p, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_void_p') - - if c_int != c_long: - result = self.fail_fields(("a", POINTER(c_int), 1)) - assert result == (TypeError, 'bit fields not allowed for type LP_c_int') - - result = self.fail_fields(("a", c_char, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_char') - - try: - c_wchar - except NameError: - pass - else: - result = self.fail_fields(("a", c_wchar, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_wchar') - - class Dummy(Structure): - _fields_ = [] - - result = self.fail_fields(("a", Dummy, 1)) - assert result == (TypeError, 'bit fields not allowed for type Dummy') - - def test_single_bitfield_size(self): - for c_typ in int_types: - result = self.fail_fields(("a", c_typ, -1)) - assert result == (ValueError, 'number of bits invalid for bit field') - - result = self.fail_fields(("a", c_typ, 0)) - assert result == (ValueError, 'number of bits invalid for bit field') - - class X(Structure): - _fields_ = [("a", c_typ, 1)] - assert sizeof(X) == sizeof(c_typ) - - class X(Structure): - _fields_ = [("a", c_typ, sizeof(c_typ)*8)] - assert sizeof(X) == sizeof(c_typ) - - result = self.fail_fields(("a", c_typ, sizeof(c_typ)*8 + 1)) - assert result == (ValueError, 'number of bits invalid for bit field') - - def test_multi_bitfields_size(self): - class X(Structure): - _fields_ = [("a", c_short, 1), - ("b", c_short, 14), - ("c", c_short, 1)] - assert sizeof(X) == sizeof(c_short) - - class X(Structure): - _fields_ = [("a", c_short, 1), - ("a1", c_short), - ("b", c_short, 14), - ("c", c_short, 1)] - assert sizeof(X) == sizeof(c_short)*3 - assert X.a.offset == 0 - assert X.a1.offset == sizeof(c_short) - assert X.b.offset == sizeof(c_short)*2 - assert X.c.offset == sizeof(c_short)*2 - - class X(Structure): - _fields_ = [("a", c_short, 3), - ("b", c_short, 14), - ("c", c_short, 14)] - assert sizeof(X) == sizeof(c_short)*3 - assert X.a.offset == sizeof(c_short)*0 - assert X.b.offset == sizeof(c_short)*1 - assert X.c.offset == sizeof(c_short)*2 - - def get_except(self, func, *args, **kw): - try: - func(*args, **kw) - except Exception as detail: - import traceback - traceback.print_exc() - return detail.__class__, str(detail) - - def test_mixed_1(self): - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_int, 4)] - if os.name in ("nt", "ce"): - assert sizeof(X) == sizeof(c_int)*2 - else: - assert sizeof(X) == sizeof(c_int) - - def test_mixed_2(self): - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_int, 32)] - assert sizeof(X) == sizeof(c_int)*2 - - def test_mixed_3(self): - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_ubyte, 4)] - assert sizeof(X) == sizeof(c_byte) - - def test_anon_bitfields(self): - # anonymous bit-fields gave a strange error message - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_ubyte, 4)] - class Y(Structure): - _anonymous_ = ["_"] - _fields_ = [("_", X)] - - def test_set_fields_attr(self): - class A(Structure): - pass - A._fields_ = [("a", c_byte), - ("b", c_ubyte)] - - def test_set_fields_attr_bitfields(self): - class A(Structure): - pass - A._fields_ = [("a", POINTER(A)), - ("b", c_ubyte, 4)] - - def test_set_fields_cycle_fails(self): - class A(Structure): - pass - with pytest.raises(AttributeError): - A._fields_ = [("a", A)] +def test_set_fields_cycle_fails(): + class A(Structure): + pass + with pytest.raises(AttributeError): + A._fields_ = [("a", A)] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/extra_tests/ctypes_tests/test_buffers.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py rename to extra_tests/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/extra_tests/ctypes_tests/test_buffers.py @@ -1,71 +1,33 @@ from ctypes import * -from .support import BaseCTypesTestChecker -class TestStringBuffer(BaseCTypesTestChecker): +def test_buffer(): + b = create_string_buffer(32) + assert len(b) == 32 + assert sizeof(b) == 32 * sizeof(c_char) + assert type(b[0]) is bytes - def test_buffer(self): - b = create_string_buffer(32) - assert len(b) == 32 - assert sizeof(b) == 32 * sizeof(c_char) - assert type(b[0]) is str + b = create_string_buffer(b"abc") + assert len(b) == 4 # trailing nul char + assert sizeof(b) == 4 * sizeof(c_char) + assert type(b[0]) is bytes + assert b[0] == b"a" + assert b[:] == b"abc\0" - b = create_string_buffer(b"abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_char) - assert type(b[0]) is bytes - assert b[0] == b"a" - assert b[:] == b"abc\0" +def test_from_buffer(): + b1 = bytearray(b"abcde") + b = (c_char * 5).from_buffer(b1) + assert b[2] == b"c" + # + b1 = bytearray(b"abcd") + b = c_int.from_buffer(b1) + assert b.value in (1684234849, # little endian + 1633837924) # big endian - def test_string_conversion(self): - b = create_string_buffer(u"abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_char) - assert type(b[0]) is str - assert b[0] == "a" - assert b[:] == "abc\0" - - def test_from_buffer(self): - b1 = bytearray("abcde") - b = (c_char * 5).from_buffer(b1) - assert b[2] == "c" - # - b1 = bytearray("abcd") - b = c_int.from_buffer(b1) - assert b.value in (1684234849, # little endian - 1633837924) # big endian - - def test_from_buffer_keepalive(self): - # Issue #2878 - b1 = bytearray("ab") - array = (c_uint16 * 32)() - array[6] = c_uint16.from_buffer(b1) - # this is also what we get on CPython. I don't think it makes - # sense because the array contains just a copy of the number. - assert array._objects == {'6': b1} - - try: - c_wchar - except NameError: - pass - else: - def test_unicode_buffer(self): - b = create_unicode_buffer(32) - assert len(b) == 32 - assert sizeof(b) == 32 * sizeof(c_wchar) - assert type(b[0]) is unicode - - b = create_unicode_buffer(u"abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_wchar) - assert type(b[0]) is unicode - assert b[0] == u"a" - assert b[:] == "abc\0" - - def test_unicode_conversion(self): - b = create_unicode_buffer(b"abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_wchar) - assert type(b[0]) is unicode - assert b[0] == u"a" - assert b[:] == "abc\0" - +def test_from_buffer_keepalive(): + # Issue #2878 + b1 = bytearray(b"ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callback_traceback.py b/extra_tests/ctypes_tests/test_callback_traceback.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_callback_traceback.py rename to extra_tests/ctypes_tests/test_callback_traceback.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callback_traceback.py +++ b/extra_tests/ctypes_tests/test_callback_traceback.py @@ -1,80 +1,35 @@ # derived from test_random_things.py -import py +import pytest + from ctypes import * -import sys -def callback_func(arg): - 42 / arg - raise ValueError(arg) +_rawffi = pytest.importorskip('_rawffi') -class TestCallbackTraceback: - # When an exception is raised in a ctypes callback function, the C - # code prints a traceback. +# +# This test makes sure the exception types *and* the exception +# value is printed correctly. + + at pytest.mark.skipif("sys.flags.inspect") +def test_SystemExit(monkeypatch, capsys): + """ + When an exception is raised in a ctypes callback function, the C + code prints a traceback. When SystemExit is raised, the interpreter + normally exits immediately. + """ + def callback_func(arg): + raise SystemExit(42) + def custom_exit(value): + raise Exception("<<>>" % (value,)) + monkeypatch.setattr(_rawffi, 'exit', custom_exit) + cb = CFUNCTYPE(c_int, c_int)(callback_func) + cb2 = cast(cast(cb, c_void_p), CFUNCTYPE(c_int, c_int)) + out, err = capsys.readouterr() + assert not err + cb2(0) + out, err = capsys.readouterr() + assert err.splitlines()[-1] == "Exception: <<>>" # - # This test makes sure the exception types *and* the exception - # value is printed correctly. - # - # Changed in 0.9.3: No longer is '(in callback)' prepended to the - # error message - instead a additional frame for the C code is - # created, then a full traceback printed. When SystemExit is - # raised in a callback function, the interpreter exits. - - def capture_stderr(self, func, *args, **kw): - # helper - call function 'func', and return the captured stderr - import StringIO - old_stderr = sys.stderr - logger = sys.stderr = StringIO.StringIO() - try: - func(*args, **kw) - finally: - sys.stderr = old_stderr - return logger.getvalue() - - def test_ValueError(self): - cb = CFUNCTYPE(c_int, c_int)(callback_func) - out = self.capture_stderr(cb, 42) - assert out.splitlines()[-1] == ( - "ValueError: 42") - - def test_IntegerDivisionError(self): - cb = CFUNCTYPE(c_int, c_int)(callback_func) - out = self.capture_stderr(cb, 0) - assert out.splitlines()[-1][:19] == ( - "ZeroDivisionError: ") - - def test_FloatDivisionError(self): - cb = CFUNCTYPE(c_int, c_double)(callback_func) - out = self.capture_stderr(cb, 0.0) - assert out.splitlines()[-1][:19] == ( - "ZeroDivisionError: ") - - def test_TypeErrorDivisionError(self): - cb = CFUNCTYPE(c_int, c_char_p)(callback_func) - out = self.capture_stderr(cb, "spam") - assert out.splitlines()[-1].startswith( - "TypeError: " - "unsupported operand type(s) for") - - def test_SystemExit(self): - import _rawffi - if sys.flags.inspect: - skip("requires sys.flags.inspect == 0") - def callback_func(arg): - raise SystemExit(42) - def custom_exit(value): - raise Exception("<<>>" % (value,)) - original_exit = _rawffi.exit - try: - _rawffi.exit = custom_exit - # - cb = CFUNCTYPE(c_int, c_int)(callback_func) - cb2 = cast(cast(cb, c_void_p), CFUNCTYPE(c_int, c_int)) - out = self.capture_stderr(cb2, 0) - assert out.splitlines()[-1] == "Exception: <<>>" - # - cb = CFUNCTYPE(c_int, c_int)(callback_func) - out = self.capture_stderr(cb, 0) - assert out.splitlines()[-1] == "Exception: <<>>" - # - finally: - _rawffi.exit = original_exit + cb = CFUNCTYPE(c_int, c_int)(callback_func) + cb(0) + out, err = capsys.readouterr() + assert err.splitlines()[-1] == "Exception: <<>>" diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/extra_tests/ctypes_tests/test_callbacks.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py rename to extra_tests/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/extra_tests/ctypes_tests/test_callbacks.py @@ -1,283 +1,199 @@ +import pytest + +import math from ctypes import * -import pytest from .support import BaseCTypesTestChecker -class TestCallbacks(BaseCTypesTestChecker): - functype = CFUNCTYPE - -## def tearDown(self): -## import gc -## gc.collect() - - def callback(self, *args): - self.got_args = args - return args[-1] - - def check_type(self, typ, arg): - unwrapped_types = { - c_float: (float,), - c_double: (float,), - c_char: (str,), - c_char_p: (str,), - c_uint: (int, long), - c_ulong: (int, long), - } - - PROTO = self.functype.im_func(typ, typ) - cfunc = PROTO(self.callback) - result = cfunc(arg) - if typ == c_float: - assert abs(result - arg) < 0.000001 - else: - assert self.got_args == (arg,) - assert result == arg - - result2 = cfunc(typ(arg)) - assert type(result2) in unwrapped_types.get(typ, (int, long)) - - PROTO = self.functype.im_func(typ, c_byte, typ) - result = PROTO(self.callback)(-3, arg) - if typ == c_float: - assert abs(result - arg) < 0.000001 - else: - assert self.got_args == (-3, arg) - assert result == arg - - ################ - - def test_byte(self): - self.check_type(c_byte, 42) - self.check_type(c_byte, -42) - - def test_ubyte(self): - self.check_type(c_ubyte, 42) - - def test_short(self): - self.check_type(c_short, 42) - self.check_type(c_short, -42) - - def test_ushort(self): - self.check_type(c_ushort, 42) - - def test_int(self): - self.check_type(c_int, 42) - self.check_type(c_int, -42) - - def test_uint(self): - self.check_type(c_uint, 42) - - def test_long(self): - self.check_type(c_long, 42) - self.check_type(c_long, -42) - - def test_ulong(self): - self.check_type(c_ulong, 42) - - def test_longlong(self): - self.check_type(c_longlong, 42) - self.check_type(c_longlong, -42) - - def test_ulonglong(self): - self.check_type(c_ulonglong, 42) - - def test_float(self): - # only almost equal: double -> float -> double - import math - self.check_type(c_float, math.e) - self.check_type(c_float, -math.e) - - def test_double(self): - self.check_type(c_double, 3.14) - self.check_type(c_double, -3.14) - - def test_char(self): - self.check_type(c_char, "x") - self.check_type(c_char, "a") - - # disabled: would now (correctly) raise a RuntimeWarning about - # a memory leak. A callback function cannot return a non-integral - # C type without causing a memory leak. -## def test_char_p(self): -## self.check_type(c_char_p, "abc") -## self.check_type(c_char_p, "def") - - - @pytest.mark.xfail( - reason="we are less strict about callback return type sanity") - def test_unsupported_restype_1(self): - # Only "fundamental" result types are supported for callback - # functions, the type must have a non-NULL stgdict->setfunc. - # POINTER(c_double), for example, is not supported. - - prototype = self.functype.im_func(POINTER(c_double)) - # The type is checked when the prototype is called - with pytest.raises(TypeError): - prototype(lambda: None) - +functypes = [CFUNCTYPE] try: - WINFUNCTYPE + functypes.append(WINFUNCTYPE) except NameError: pass -else: - class TestStdcallCallbacks(TestCallbacks): - functype = WINFUNCTYPE -################################################################ -class TestSampleCallbacks(BaseCTypesTestChecker): +def callback(*args): + callback.got_args = args + return args[-1] - def test_integrate(self): - # Derived from some then non-working code, posted by David Foster - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) +unwrapped_types = { + c_float: (float,), + c_double: (float,), + c_char: (bytes,), + c_char_p: (bytes,), + c_uint: (int,), + c_ulong: (int,), + } - # The function prototype called by 'integrate': double func(double); - CALLBACK = CFUNCTYPE(c_double, c_double) + at pytest.mark.parametrize("typ, arg", [ + (c_byte, 42), + (c_byte, -42), + (c_ubyte, 42), + (c_short, 42), + (c_short, -42), + (c_ushort, 42), + (c_int, 42), + (c_int, -42), + (c_uint, 42), + (c_long, 42), + (c_long, -42), + (c_ulong, 42), + (c_longlong, 42), + (c_longlong, -42), + (c_ulonglong, 42), + (c_float, math.e), # only almost equal: double -> float -> double + (c_float, -math.e), + (c_double, 3.14), + (c_double, -3.14), + (c_char, b"x"), + (c_char, b"a"), +]) + at pytest.mark.parametrize('functype', functypes) +def test_types(typ, arg, functype): + PROTO = functype(typ, typ) + cfunc = PROTO(callback) + result = cfunc(arg) + if typ == c_float: + assert abs(result - arg) < 0.000001 + else: + assert callback.got_args == (arg,) + assert result == arg - # The integrate function itself, exposed from the _ctypes_test dll - integrate = dll.integrate - integrate.argtypes = (c_double, c_double, CALLBACK, c_long) - integrate.restype = c_double + result2 = cfunc(typ(arg)) + assert type(result2) in unwrapped_types.get(typ, (int,)) - def func(x): - print 'calculating x**2 of',x - return x**2 + PROTO = functype(typ, c_byte, typ) + result = PROTO(callback)(-3, arg) + if typ == c_float: + assert abs(result - arg) < 0.000001 + else: + assert callback.got_args == (-3, arg) + assert result == arg - result = integrate(0.0, 1.0, CALLBACK(func), 10) - diff = abs(result - 1./3.) + at pytest.mark.parametrize('functype', functypes) +def test_unsupported_restype_1(functype): + # Only "fundamental" result types are supported for callback + # functions, the type must have a non-NULL stgdict->setfunc. + # POINTER(c_double), for example, is not supported. - assert diff < 0.01, "%s not less than 0.01" % diff + prototype = functype(POINTER(c_double)) + # The type is checked when the prototype is called + with pytest.raises(TypeError): + prototype(lambda: None) -################################################################ -class TestMoreCallbacks(BaseCTypesTestChecker): +def test_callback_with_struct_argument(): + class RECT(Structure): + _fields_ = [("left", c_int), ("top", c_int), + ("right", c_int), ("bottom", c_int)] - def test_callback_with_struct_argument(self): - class RECT(Structure): - _fields_ = [("left", c_int), ("top", c_int), - ("right", c_int), ("bottom", c_int)] + proto = CFUNCTYPE(c_int, RECT) - proto = CFUNCTYPE(c_int, RECT) - def callback(point): - point.left *= -1 - return point.left+point.top+point.right+point.bottom + def callback(point): + point.left *= -1 + return point.left + point.top + point.right + point.bottom - cbp = proto(callback) + cbp = proto(callback) + rect = RECT(-1000, 100, 10, 1) + res = cbp(rect) + assert res == 1111 + assert rect.left == -1000 # must not have been changed! - rect = RECT(-1000,100,10,1) +def test_callback_from_c_with_struct_argument(dll): + class RECT(Structure): + _fields_ = [("left", c_long), ("top", c_long), + ("right", c_long), ("bottom", c_long)] - res = cbp(rect) + proto = CFUNCTYPE(c_int, RECT) - assert res == 1111 - assert rect.left == -1000 # must not have been changed! + def callback(point): + return point.left + point.top + point.right + point.bottom - def test_callback_from_c_with_struct_argument(self): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) + cbp = proto(callback) + rect = RECT(1000, 100, 10, 1) - class RECT(Structure): - _fields_ = [("left", c_long), ("top", c_long), - ("right", c_long), ("bottom", c_long)] + call_callback_with_rect = dll.call_callback_with_rect + call_callback_with_rect.restype = c_int + call_callback_with_rect.argtypes = [proto, RECT] + res = call_callback_with_rect(cbp, rect) + assert res == 1111 - proto = CFUNCTYPE(c_int, RECT) - def callback(point): - return point.left+point.top+point.right+point.bottom +def test_callback_unsupported_return_struct(): + class RECT(Structure): + _fields_ = [("left", c_int), ("top", c_int), + ("right", c_int), ("bottom", c_int)] - cbp = proto(callback) - rect = RECT(1000,100,10,1) + proto = CFUNCTYPE(RECT, c_int) + with pytest.raises(TypeError): + proto(lambda r: 0) - call_callback_with_rect = dll.call_callback_with_rect - call_callback_with_rect.restype = c_int - call_callback_with_rect.argtypes = [proto, RECT] - res = call_callback_with_rect(cbp, rect) - assert res == 1111 - def test_callback_unsupported_return_struct(self): - class RECT(Structure): - _fields_ = [("left", c_int), ("top", c_int), - ("right", c_int), ("bottom", c_int)] +def test_qsort(dll): + PI = POINTER(c_int) + A = c_int*5 + a = A() + for i in range(5): + a[i] = 5-i - proto = CFUNCTYPE(RECT, c_int) - with pytest.raises(TypeError): - proto(lambda r: 0) + assert a[0] == 5 # sanity + def comp(a, b): + a = a.contents.value + b = b.contents.value + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + qs = dll.my_qsort + qs.restype = None + CMP = CFUNCTYPE(c_int, PI, PI) + qs.argtypes = (PI, c_size_t, c_size_t, CMP) - def test_qsort(self): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) + qs(cast(a, PI), 5, sizeof(c_int), CMP(comp)) - PI = POINTER(c_int) - A = c_int*5 - a = A() - for i in range(5): - a[i] = 5-i + res = list(a) - assert a[0] == 5 # sanity + assert res == [1,2,3,4,5] - def comp(a, b): - a = a.contents.value - b = b.contents.value - return cmp(a,b) - qs = dll.my_qsort - qs.restype = None - CMP = CFUNCTYPE(c_int, PI, PI) - qs.argtypes = (PI, c_size_t, c_size_t, CMP) +def test_pyobject_as_opaque(dll): + def callback(arg): + return arg() - qs(cast(a, PI), 5, sizeof(c_int), CMP(comp)) + CTP = CFUNCTYPE(c_int, py_object) + cfunc = dll._testfunc_callback_opaque + cfunc.argtypes = [CTP, py_object] + cfunc.restype = c_int + res = cfunc(CTP(callback), lambda : 3) + assert res == 3 - res = list(a) +def test_callback_void(capsys, dll): + def callback(): + pass - assert res == [1,2,3,4,5] + CTP = CFUNCTYPE(None) + cfunc = dll._testfunc_callback_void + cfunc.argtypes = [CTP] + cfunc.restype = int + cfunc(CTP(callback)) + out, err = capsys.readouterr() + assert (out, err) == ("", "") - def test_pyobject_as_opaque(self): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) - def callback(arg): - return arg() +def test_callback_pyobject(): + def callback(obj): + return obj - CTP = CFUNCTYPE(c_int, py_object) - cfunc = dll._testfunc_callback_opaque - cfunc.argtypes = [CTP, py_object] - cfunc.restype = c_int - res = cfunc(CTP(callback), lambda : 3) - assert res == 3 + FUNC = CFUNCTYPE(py_object, py_object) + cfunc = FUNC(callback) + param = c_int(42) + assert cfunc(param) is param - def test_callback_void(self, capsys): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) - - def callback(): - pass - - CTP = CFUNCTYPE(None) - cfunc = dll._testfunc_callback_void - cfunc.argtypes = [CTP] - cfunc.restype = int - cfunc(CTP(callback)) - out, err = capsys.readouterr() - assert (out, err) == ("", "") - - - def test_callback_pyobject(self): - def callback(obj): - return obj - - FUNC = CFUNCTYPE(py_object, py_object) - cfunc = FUNC(callback) - param = c_int(42) - assert cfunc(param) is param - - def test_raise_argumenterror(self): - def callback(x): - pass - FUNC = CFUNCTYPE(None, c_void_p) - cfunc = FUNC(callback) - param = c_uint(42) - with pytest.raises(ArgumentError): - cfunc(param) +def test_raise_argumenterror(): + def callback(x): + pass + FUNC = CFUNCTYPE(None, c_void_p) + cfunc = FUNC(callback) + param = c_uint(42) + with pytest.raises(ArgumentError): + cfunc(param) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py b/extra_tests/ctypes_tests/test_cast.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_cast.py rename to extra_tests/ctypes_tests/test_cast.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py +++ b/extra_tests/ctypes_tests/test_cast.py @@ -1,106 +1,30 @@ +import pytest + from ctypes import * -import sys, py -from .support import BaseCTypesTestChecker -def setup_module(mod): - import conftest - mod.lib = CDLL(str(conftest.sofile)) +def test_cast_functype(dll): + # make sure we can cast function type + my_sqrt = dll.my_sqrt + saved_objects = my_sqrt._objects.copy() + sqrt = cast(cast(my_sqrt, c_void_p), CFUNCTYPE(c_double, c_double)) + assert sqrt(4.0) == 2.0 + assert not cast(0, CFUNCTYPE(c_int)) + # + assert sqrt._objects is my_sqrt._objects # on CPython too + my_sqrt._objects.clear() + my_sqrt._objects.update(saved_objects) -class TestCast(BaseCTypesTestChecker): +def test_cast_argumenterror(): + param = c_uint(42) + with pytest.raises(ArgumentError): + cast(param, c_void_p) - def test_array2pointer(self): - array = (c_int * 3)(42, 17, 2) - - # casting an array to a pointer works. - ptr = cast(array, POINTER(c_int)) - assert [ptr[i] for i in range(3)] == [42, 17, 2] - - if 2*sizeof(c_short) == sizeof(c_int): - ptr = cast(array, POINTER(c_short)) - if sys.byteorder == "little": - assert [ptr[i] for i in range(6)] == ( - [42, 0, 17, 0, 2, 0]) - else: - assert [ptr[i] for i in range(6)] == ( - [0, 42, 0, 17, 0, 2]) - - def test_address2pointer(self): - array = (c_int * 3)(42, 17, 2) - - address = addressof(array) - ptr = cast(c_void_p(address), POINTER(c_int)) - assert [ptr[i] for i in range(3)] == [42, 17, 2] - - ptr = cast(address, POINTER(c_int)) - assert [ptr[i] for i in range(3)] == [42, 17, 2] - - def test_p2a_objects(self): - py.test.skip("we make copies of strings") - array = (c_char_p * 5)() From pypy.commits at gmail.com Thu Jan 31 11:44:23 2019 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Jan 2019 08:44:23 -0800 (PST) Subject: [pypy-commit] pypy apptest-file: hg merge default Message-ID: <5c5325e7.1c69fb81.1e754.3017@mx.google.com> Author: Ronan Lamy Branch: apptest-file Changeset: r95759:d25403a13858 Date: 2019-01-31 16:10 +0000 http://bitbucket.org/pypy/pypy/changeset/d25403a13858/ Log: hg merge default diff too long, truncating to 2000 out of 15681 lines diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -30,7 +30,7 @@ DEALINGS IN THE SOFTWARE. -PyPy Copyright holders 2003-2018 +PyPy Copyright holders 2003-2019 -------------------------------- Except when otherwise stated (look for LICENSE files or information at @@ -40,16 +40,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -59,8 +59,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -70,10 +70,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -114,12 +114,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -134,8 +134,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -144,10 +145,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -163,6 +164,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -176,6 +178,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -187,7 +190,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -198,7 +200,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -210,6 +211,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -217,12 +219,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -242,7 +246,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -270,12 +273,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -290,10 +296,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -301,28 +309,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -332,6 +338,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -340,6 +347,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -349,8 +357,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -364,7 +373,6 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -384,12 +392,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -325,8 +325,31 @@ a = array.array('H', [10000, 20000, 30000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 6 ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [10000, 20500, 30000] + assert c == ffi.from_buffer("char[]", a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 3 + assert c[1] == 20500 + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) + + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect def test_memmove(self): ffi = FFI() diff --git a/extra_tests/cffi_tests/cffi0/test_function.py b/extra_tests/cffi_tests/cffi0/test_function.py --- a/extra_tests/cffi_tests/cffi0/test_function.py +++ b/extra_tests/cffi_tests/cffi0/test_function.py @@ -46,14 +46,15 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_lround_no_return_value(self): + def test_getenv_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void lround(double x); + void getenv(char *); """) - m = ffi.dlopen(lib_m) - x = m.lround(1.23) + needs_dlopen_none() + m = ffi.dlopen(None) + x = m.getenv(b"FOO") assert x is None def test_dlopen_filename(self): diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -239,11 +239,33 @@ def test_ffi_from_buffer(): import array ffi = _cffi1_backend.FFI() - a = array.array('H', [10000, 20000, 30000]) + a = array.array('H', [10000, 20000, 30000, 40000]) c = ffi.from_buffer(a) assert ffi.typeof(c) is ffi.typeof("char[]") + assert len(c) == 8 ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] + assert list(a) == [10000, 20500, 30000, 40000] + py.test.raises(TypeError, ffi.from_buffer, a, True) + assert c == ffi.from_buffer("char[]", a, True) + assert c == ffi.from_buffer(a, require_writable=True) + # + c = ffi.from_buffer("unsigned short[]", a) + assert len(c) == 4 + assert c[1] == 20500 + # + c = ffi.from_buffer("unsigned short[2][2]", a) + assert len(c) == 2 + assert len(c[0]) == 2 + assert c[0][1] == 20500 + # + p = ffi.from_buffer(b"abcd") + assert p[2] == b"c" + # + assert p == ffi.from_buffer(b"abcd", require_writable=False) + py.test.raises((TypeError, BufferError), ffi.from_buffer, + "char[]", b"abcd", True) + py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(): ffi = _cffi1_backend.FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) @@ -1647,14 +1676,6 @@ py.test.raises(TypeError, len, q.a) py.test.raises(TypeError, list, q.a) - def test_from_buffer(self): - import array - a = array.array('H', [10000, 20000, 30000]) - c = ffi.from_buffer(a) - assert ffi.typeof(c) is ffi.typeof("char[]") - ffi.cast("unsigned short *", c)[1] += 500 - assert list(a) == [10000, 20500, 30000] - def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ "char", diff --git a/extra_tests/cffi_tests/cffi1/test_parse_c_type.py b/extra_tests/cffi_tests/cffi1/test_parse_c_type.py --- a/extra_tests/cffi_tests/cffi1/test_parse_c_type.py +++ b/extra_tests/cffi_tests/cffi1/test_parse_c_type.py @@ -4,7 +4,12 @@ from cffi import cffi_opcode if '__pypy__' in sys.builtin_module_names: - py.test.skip("not available on pypy", allow_module_level=True) + try: + # pytest >= 4.0 + py.test.skip("not available on pypy", allow_module_level=True) + except TypeError: + # older pytest + py.test.skip("not available on pypy") cffi_dir = os.path.dirname(cffi_opcode.__file__) diff --git a/extra_tests/cffi_tests/cffi1/test_recompiler.py b/extra_tests/cffi_tests/cffi1/test_recompiler.py --- a/extra_tests/cffi_tests/cffi1/test_recompiler.py +++ b/extra_tests/cffi_tests/cffi1/test_recompiler.py @@ -5,7 +5,7 @@ from cffi import recompiler from extra_tests.cffi_tests.udir import udir from extra_tests.cffi_tests.support import u, long -from extra_tests.cffi_tests.support import FdWriteCapture, StdErrCapture +from extra_tests.cffi_tests.support import FdWriteCapture, StdErrCapture, _verify try: import importlib @@ -36,7 +36,7 @@ # add '-Werror' to the existing 'extra_compile_args' flags kwds['extra_compile_args'] = (kwds.get('extra_compile_args', []) + ['-Werror']) - return recompiler._verify(ffi, module_name, source, *args, **kwds) + return _verify(ffi, module_name, source, *args, **kwds) def test_set_source_no_slashes(): ffi = FFI() @@ -1539,15 +1539,18 @@ assert (pt.x, pt.y) == (99*500*999, -99*500*999) def test_extern_python_1(): + import warnings ffi = FFI() - ffi.cdef(""" + with warnings.catch_warnings(record=True) as log: + ffi.cdef(""" extern "Python" { int bar(int, int); void baz(int, int); int bok(void); void boz(void); } - """) + """) + assert len(log) == 0, "got a warning: %r" % (log,) lib = verify(ffi, 'test_extern_python_1', """ static void baz(int, int); /* forward */ """) diff --git a/extra_tests/cffi_tests/cffi1/test_verify1.py b/extra_tests/cffi_tests/cffi1/test_verify1.py --- a/extra_tests/cffi_tests/cffi1/test_verify1.py +++ b/extra_tests/cffi_tests/cffi1/test_verify1.py @@ -4,6 +4,7 @@ from cffi import CDefError from cffi import recompiler from extra_tests.cffi_tests.support import * +from extra_tests.cffi_tests.support import _verify import _cffi_backend lib_m = ['m'] @@ -38,9 +39,8 @@ except AttributeError: pass self.set_source(module_name, preamble) - return recompiler._verify(self, module_name, preamble, *args, - extra_compile_args=self._extra_compile_args, - **kwds) + return _verify(self, module_name, preamble, *args, + extra_compile_args=self._extra_compile_args, **kwds) class FFI_warnings_not_error(FFI): _extra_compile_args = [] diff --git a/extra_tests/cffi_tests/support.py b/extra_tests/cffi_tests/support.py --- a/extra_tests/cffi_tests/support.py +++ b/extra_tests/cffi_tests/support.py @@ -62,3 +62,28 @@ def getvalue(self): return self._value + +def _verify(ffi, module_name, preamble, *args, **kwds): + import imp + from cffi.recompiler import recompile + from .udir import udir + assert module_name not in sys.modules, "module name conflict: %r" % ( + module_name,) + kwds.setdefault('tmpdir', str(udir)) + outputfilename = recompile(ffi, module_name, preamble, *args, **kwds) + module = imp.load_dynamic(module_name, outputfilename) + # + # hack hack hack: copy all *bound methods* from module.ffi back to the + # ffi instance. Then calls like ffi.new() will invoke module.ffi.new(). + for name in dir(module.ffi): + if not name.startswith('_'): + attr = getattr(module.ffi, name) + if attr is not getattr(ffi, name, object()): + setattr(ffi, name, attr) + def typeof_disabled(*args, **kwds): + raise NotImplementedError + ffi._typeof = typeof_disabled + for name in dir(ffi): + if not name.startswith('_') and not hasattr(module.ffi, name): + setattr(ffi, name, NotImplemented) + return module.lib diff --git a/pypy/module/test_lib_pypy/ctypes_tests/__init__.py b/extra_tests/ctypes_tests/__init__.py rename from pypy/module/test_lib_pypy/ctypes_tests/__init__.py rename to extra_tests/ctypes_tests/__init__.py diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/extra_tests/ctypes_tests/_ctypes_test.c rename from pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c rename to extra_tests/ctypes_tests/_ctypes_test.c diff --git a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py b/extra_tests/ctypes_tests/conftest.py rename from pypy/module/test_lib_pypy/ctypes_tests/conftest.py rename to extra_tests/ctypes_tests/conftest.py --- a/pypy/module/test_lib_pypy/ctypes_tests/conftest.py +++ b/extra_tests/ctypes_tests/conftest.py @@ -3,10 +3,6 @@ import sys import os -def pytest_ignore_collect(path): - if '__pypy__' not in sys.builtin_module_names: - return True - # XXX: copied from pypy/tool/cpyext/extbuild.py if os.name != 'nt': so_ext = 'so' @@ -85,8 +81,7 @@ return outputfilename # end copy -def compile_so_file(): - udir = pytest.ensuretemp('_ctypes_test') +def compile_so_file(udir): cfile = py.path.local(__file__).dirpath().join("_ctypes_test.c") if sys.platform == 'win32': @@ -96,8 +91,12 @@ return c_compile([cfile], str(udir / '_ctypes_test'), libraries=libraries) -# we need to run after the "tmpdir" plugin which installs pytest.ensuretemp - at pytest.mark.trylast -def pytest_configure(config): - global sofile - sofile = compile_so_file() + at pytest.fixture(scope='session') +def sofile(tmpdir_factory): + udir = tmpdir_factory.mktemp('_ctypes_test') + return str(compile_so_file(udir)) + + at pytest.fixture +def dll(sofile): + from ctypes import CDLL + return CDLL(str(sofile)) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/support.py b/extra_tests/ctypes_tests/support.py rename from pypy/module/test_lib_pypy/ctypes_tests/support.py rename to extra_tests/ctypes_tests/support.py diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py b/extra_tests/ctypes_tests/test_anon.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_anon.py rename to extra_tests/ctypes_tests/test_anon.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_anon.py +++ b/extra_tests/ctypes_tests/test_anon.py @@ -1,86 +1,55 @@ import pytest from ctypes import * -from .support import BaseCTypesTestChecker -class TestAnon(BaseCTypesTestChecker): + at pytest.mark.pypy_only +def test_nested(): + class ANON_S(Structure): + _fields_ = [("a", c_int)] - def test_anon(self): - class ANON(Union): - _fields_ = [("a", c_int), - ("b", c_int)] + class ANON_U(Union): + _fields_ = [("_", ANON_S), + ("b", c_int)] + _anonymous_ = ["_"] - class Y(Structure): - _fields_ = [("x", c_int), - ("_", ANON), - ("y", c_int)] - _anonymous_ = ["_"] + class Y(Structure): + _fields_ = [("x", c_int), + ("_", ANON_U), + ("y", c_int)] + _anonymous_ = ["_"] - assert Y.a.offset == sizeof(c_int) - assert Y.b.offset == sizeof(c_int) + assert Y.x.offset == 0 + assert Y.a.offset == sizeof(c_int) + assert Y.b.offset == sizeof(c_int) + assert Y._.offset == sizeof(c_int) + assert Y.y.offset == sizeof(c_int) * 2 - assert ANON.a.offset == 0 - assert ANON.b.offset == 0 + assert Y._names_ == ['x', 'a', 'b', 'y'] - def test_anon_nonseq(self): - # TypeError: _anonymous_ must be a sequence - with pytest.raises(TypeError): - type(Structure)( - "Name", (Structure,), {"_fields_": [], "_anonymous_": 42}) +def test_anonymous_fields_on_instance(): + # this is about the *instance-level* access of anonymous fields, + # which you'd guess is the most common, but used not to work + # (issue #2230) - def test_anon_nonmember(self): - # AttributeError: type object 'Name' has no attribute 'x' - with pytest.raises(AttributeError): - type(Structure)( - "Name", (Structure,), {"_fields_": [], "_anonymous_": ["x"]}) + class B(Structure): + _fields_ = [("x", c_int), ("y", c_int), ("z", c_int)] + class A(Structure): + _anonymous_ = ["b"] + _fields_ = [("b", B)] - def test_nested(self): - class ANON_S(Structure): - _fields_ = [("a", c_int)] + a = A() + a.x = 5 + assert a.x == 5 + assert a.b.x == 5 + a.b.x += 1 + assert a.x == 6 - class ANON_U(Union): - _fields_ = [("_", ANON_S), - ("b", c_int)] - _anonymous_ = ["_"] + class C(Structure): + _anonymous_ = ["a"] + _fields_ = [("v", c_int), ("a", A)] - class Y(Structure): - _fields_ = [("x", c_int), - ("_", ANON_U), - ("y", c_int)] - _anonymous_ = ["_"] - - assert Y.x.offset == 0 - assert Y.a.offset == sizeof(c_int) - assert Y.b.offset == sizeof(c_int) - assert Y._.offset == sizeof(c_int) - assert Y.y.offset == sizeof(c_int) * 2 - - assert Y._names_ == ['x', 'a', 'b', 'y'] - - def test_anonymous_fields_on_instance(self): - # this is about the *instance-level* access of anonymous fields, - # which you'd guess is the most common, but used not to work - # (issue #2230) - - class B(Structure): - _fields_ = [("x", c_int), ("y", c_int), ("z", c_int)] - class A(Structure): - _anonymous_ = ["b"] - _fields_ = [("b", B)] - - a = A() - a.x = 5 - assert a.x == 5 - assert a.b.x == 5 - a.b.x += 1 - assert a.x == 6 - - class C(Structure): - _anonymous_ = ["a"] - _fields_ = [("v", c_int), ("a", A)] - - c = C() - c.v = 3 - c.y = -8 - assert c.v == 3 - assert c.y == c.a.y == c.a.b.y == -8 - assert not hasattr(c, 'b') + c = C() + c.v = 3 + c.y = -8 + assert c.v == 3 + assert c.y == c.a.y == c.a.b.y == -8 + assert not hasattr(c, 'b') diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py b/extra_tests/ctypes_tests/test_array.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_array.py rename to extra_tests/ctypes_tests/test_array.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_array.py +++ b/extra_tests/ctypes_tests/test_array.py @@ -1,177 +1,64 @@ import pytest from ctypes import * -from .support import BaseCTypesTestChecker -formats = "bBhHiIlLqQfd" +def test_slice(): + values = list(range(5)) + numarray = c_int * 5 -formats = c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, \ - c_long, c_ulonglong, c_float, c_double + na = numarray(*(c_int(x) for x in values)) -class TestArray(BaseCTypesTestChecker): - def test_simple(self): - # create classes holding simple numeric types, and check - # various properties. + assert list(na[0:0]) == [] + assert list(na[:]) == values + assert list(na[:10]) == values - init = range(15, 25) +def test_init_again(): + sz = (c_char * 3)() + addr1 = addressof(sz) + sz.__init__(*b"foo") + addr2 = addressof(sz) + assert addr1 == addr2 - for fmt in formats: - alen = len(init) - int_array = ARRAY(fmt, alen) +def test_array_of_structures(): + class X(Structure): + _fields_ = [('x', c_int), ('y', c_int)] - ia = int_array(*init) - # length of instance ok? - assert len(ia) == alen + Y = X * 2 + y = Y() + x = X() + x.y = 3 + y[1] = x + assert y[1].y == 3 - # slot values ok? - values = [ia[i] for i in range(len(init))] - assert values == init +def test_output_simple(): + A = c_char * 10 + TP = POINTER(A) + x = TP(A()) + assert x[0] != b'' - # change the items - from operator import setitem - new_values = range(42, 42+alen) - [setitem(ia, n, new_values[n]) for n in range(alen)] - values = [ia[i] for i in range(len(init))] - assert values == new_values + A = c_wchar * 10 + TP = POINTER(A) + x = TP(A()) + assert x[0] != b'' - # are the items initialized to 0? - ia = int_array() - values = [ia[i] for i in range(len(init))] - assert values == [0] * len(init) +def test_output_simple_array(): + A = c_char * 10 + AA = A * 10 + aa = AA() + assert aa[0] != b'' - # Too many in itializers should be caught - with pytest.raises(IndexError): - int_array(*range(alen*2)) +def test_output_complex_test(): + class Car(Structure): + _fields_ = [("brand", c_char * 10), + ("speed", c_float), + ("owner", c_char * 10)] - CharArray = ARRAY(c_char, 3) + assert isinstance(Car(b"abcdefghi", 42.0, b"12345").brand, bytes) + assert Car(b"abcdefghi", 42.0, b"12345").brand == b"abcdefghi" + assert Car(b"abcdefghio", 42.0, b"12345").brand == b"abcdefghio" + with pytest.raises(ValueError): + Car(b"abcdefghiop", 42.0, b"12345") - ca = CharArray("a", "b", "c") - - # Should this work? It doesn't: - # CharArray("abc") - with pytest.raises(TypeError): - CharArray("abc") - - assert ca[0] == "a" - assert ca[1] == "b" - assert ca[2] == "c" - assert ca[-3] == "a" - assert ca[-2] == "b" - assert ca[-1] == "c" - - assert len(ca) == 3 - - # slicing is now supported, but not extended slicing (3-argument)! - from operator import getslice, delitem - with pytest.raises(TypeError): - getslice(ca, 0, 1, -1) - - # cannot delete items - with pytest.raises(TypeError): - delitem(ca, 0) - - def test_numeric_arrays(self): - - alen = 5 - - numarray = ARRAY(c_int, alen) - - na = numarray() - values = [na[i] for i in range(alen)] - assert values == [0] * alen - - na = numarray(*[c_int()] * alen) - values = [na[i] for i in range(alen)] - assert values == [0]*alen - - na = numarray(1, 2, 3, 4, 5) - values = [i for i in na] - assert values == [1, 2, 3, 4, 5] - - na = numarray(*map(c_int, (1, 2, 3, 4, 5))) - values = [i for i in na] - assert values == [1, 2, 3, 4, 5] - - def test_slice(self): - values = range(5) - numarray = c_int * 5 - - na = numarray(*(c_int(x) for x in values)) - - assert list(na[0:0]) == [] - assert list(na[:]) == values - assert list(na[:10]) == values - - def test_classcache(self): - assert not ARRAY(c_int, 3) is ARRAY(c_int, 4) - assert ARRAY(c_int, 3) is ARRAY(c_int, 3) - - def test_from_address(self): - # Failed with 0.9.8, reported by JUrner - p = create_string_buffer("foo") - sz = (c_char * 3).from_address(addressof(p)) - assert sz[:] == "foo" - assert sz.value == "foo" - - def test_init_again(self): - sz = (c_char * 3)() - addr1 = addressof(sz) - sz.__init__(*"foo") - addr2 = addressof(sz) - assert addr1 == addr2 - - try: - create_unicode_buffer - except NameError: - pass - else: - def test_from_addressW(self): - p = create_unicode_buffer("foo") - sz = (c_wchar * 3).from_address(addressof(p)) - assert sz[:] == "foo" - assert sz.value == "foo" - -class TestSophisticatedThings(BaseCTypesTestChecker): - def test_array_of_structures(self): - class X(Structure): - _fields_ = [('x', c_int), ('y', c_int)] - - Y = X * 2 - y = Y() - x = X() - x.y = 3 - y[1] = x - assert y[1].y == 3 - - def test_output_simple(self): - A = c_char * 10 - TP = POINTER(A) - x = TP(A()) - assert x[0] != '' - - A = c_wchar * 10 - TP = POINTER(A) - x = TP(A()) - assert x[0] != '' - - def test_output_simple_array(self): - A = c_char * 10 - AA = A * 10 - aa = AA() - assert aa[0] != '' - - def test_output_complex_test(self): - class Car(Structure): - _fields_ = [("brand", c_char * 10), - ("speed", c_float), - ("owner", c_char * 10)] - - assert isinstance(Car("abcdefghi", 42.0, "12345").brand, bytes) - assert Car("abcdefghi", 42.0, "12345").brand == "abcdefghi" - assert Car("abcdefghio", 42.0, "12345").brand == "abcdefghio" - with pytest.raises(ValueError): - Car("abcdefghiop", 42.0, "12345") - - A = Car._fields_[2][1] - TP = POINTER(A) - x = TP(A()) - assert x[0] != '' + A = Car._fields_[2][1] + TP = POINTER(A) + x = TP(A()) + assert x[0] != b'' diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_base.py b/extra_tests/ctypes_tests/test_base.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_base.py rename to extra_tests/ctypes_tests/test_base.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_base.py +++ b/extra_tests/ctypes_tests/test_base.py @@ -1,26 +1,24 @@ -from .support import WhiteBoxTests - +import pytest from ctypes import * -# WhiteBoxTests +pytestmark = pytest.mark.pypy_only -class TestCTypesBase(WhiteBoxTests): - def test_pointer(self): - p = pointer(pointer(c_int(2))) - x = p[0] - assert x._base is p +def test_pointer(): + p = pointer(pointer(c_int(2))) + x = p[0] + assert x._base is p - def test_structure(self): - class X(Structure): - _fields_ = [('x', POINTER(c_int)), - ('y', POINTER(c_int))] +def test_structure(): + class X(Structure): + _fields_ = [('x', POINTER(c_int)), + ('y', POINTER(c_int))] - x = X() - assert x.y._base is x - assert x.y._index == 1 + x = X() + assert x.y._base is x + assert x.y._index == 1 - def test_array(self): - X = POINTER(c_int) * 24 - x = X() - assert x[16]._base is x - assert x[16]._index == 16 +def test_array(): + X = POINTER(c_int) * 24 + x = X() + assert x[16]._base is x + assert x[16]._index == 16 diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py b/extra_tests/ctypes_tests/test_bitfields.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py rename to extra_tests/ctypes_tests/test_bitfields.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_bitfields.py +++ b/extra_tests/ctypes_tests/test_bitfields.py @@ -1,249 +1,19 @@ import pytest from ctypes import * -from .support import BaseCTypesTestChecker -import os -import ctypes -signed_int_types = (c_byte, c_short, c_int, c_long, c_longlong) -unsigned_int_types = (c_ubyte, c_ushort, c_uint, c_ulong, c_ulonglong) -int_types = unsigned_int_types + signed_int_types +def test_set_fields_attr(): + class A(Structure): + pass + A._fields_ = [("a", c_byte), ("b", c_ubyte)] +def test_set_fields_attr_bitfields(): + class A(Structure): + pass + A._fields_ = [("a", POINTER(A)), ("b", c_ubyte, 4)] -def setup_module(mod): - import conftest - _ctypes_test = str(conftest.sofile) - func = CDLL(_ctypes_test).unpack_bitfields - func.argtypes = POINTER(BITS), c_char - mod.func = func - - -class BITS(Structure): - _fields_ = [("A", c_int, 1), - ("B", c_int, 2), - ("C", c_int, 3), - ("D", c_int, 4), - ("E", c_int, 5), - ("F", c_int, 6), - ("G", c_int, 7), - ("H", c_int, 8), - ("I", c_int, 9), - - ("M", c_short, 1), - ("N", c_short, 2), - ("O", c_short, 3), - ("P", c_short, 4), - ("Q", c_short, 5), - ("R", c_short, 6), - ("S", c_short, 7)] - - -class TestC: - def test_ints(self): - for i in range(512): - for name in "ABCDEFGHI": - b = BITS() - setattr(b, name, i) - assert (name, i, getattr(b, name)) == (name, i, func(byref(b), name)) - - def test_shorts(self): - for i in range(256): - for name in "MNOPQRS": - b = BITS() - setattr(b, name, i) - assert (name, i, getattr(b, name)) == (name, i, func(byref(b), name)) - - -class TestBitField: - def test_longlong(self): - class X(Structure): - _fields_ = [("a", c_longlong, 1), - ("b", c_longlong, 62), - ("c", c_longlong, 1)] - - assert sizeof(X) == sizeof(c_longlong) - x = X() - x.a, x.b, x.c = -1, 7, -1 - assert (x.a, x.b, x.c) == (-1, 7, -1) - - x = X() - x.a, x.b, x.c = -1, -7, -1 - assert (x.a, x.b, x.c) == (-1, -7, -1) - - def test_ulonglong(self): - class X(Structure): - _fields_ = [("a", c_ulonglong, 1), - ("b", c_ulonglong, 62), - ("c", c_ulonglong, 1)] - - assert sizeof(X) == sizeof(c_longlong) - x = X() - assert (x.a, x.b, x.c) == (0, 0, 0) - x.a, x.b, x.c = 7, 2305843009213693953, 7 - assert (x.a, x.b, x.c) == (1, 2305843009213693953, 1) - - def test_signed(self): - for c_typ in signed_int_types: - class X(Structure): - _fields_ = [("dummy", c_typ), - ("a", c_typ, 3), - ("b", c_typ, 3), - ("c", c_typ, 1)] - assert sizeof(X) == sizeof(c_typ)*2 - - x = X() - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, 0, 0) - x.a = -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, -1, 0, 0) - x.a, x.b = 0, -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, -1, 0) - - def test_unsigned(self): - for c_typ in unsigned_int_types: - class X(Structure): - _fields_ = [("a", c_typ, 3), - ("b", c_typ, 3), - ("c", c_typ, 1)] - assert sizeof(X) == sizeof(c_typ) - - x = X() - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, 0, 0) - x.a = -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, 7, 0, 0) - x.a, x.b = 0, -1 - assert (c_typ, x.a, x.b, x.c) == (c_typ, 0, 7, 0) - - def fail_fields(self, *fields): - return self.get_except(type(Structure), "X", (), - {"_fields_": fields}) - - def test_nonint_types(self): - # bit fields are not allowed on non-integer types. - result = self.fail_fields(("a", c_char_p, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_char_p') - - result = self.fail_fields(("a", c_void_p, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_void_p') - - if c_int != c_long: - result = self.fail_fields(("a", POINTER(c_int), 1)) - assert result == (TypeError, 'bit fields not allowed for type LP_c_int') - - result = self.fail_fields(("a", c_char, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_char') - - try: - c_wchar - except NameError: - pass - else: - result = self.fail_fields(("a", c_wchar, 1)) - assert result == (TypeError, 'bit fields not allowed for type c_wchar') - - class Dummy(Structure): - _fields_ = [] - - result = self.fail_fields(("a", Dummy, 1)) - assert result == (TypeError, 'bit fields not allowed for type Dummy') - - def test_single_bitfield_size(self): - for c_typ in int_types: - result = self.fail_fields(("a", c_typ, -1)) - assert result == (ValueError, 'number of bits invalid for bit field') - - result = self.fail_fields(("a", c_typ, 0)) - assert result == (ValueError, 'number of bits invalid for bit field') - - class X(Structure): - _fields_ = [("a", c_typ, 1)] - assert sizeof(X) == sizeof(c_typ) - - class X(Structure): - _fields_ = [("a", c_typ, sizeof(c_typ)*8)] - assert sizeof(X) == sizeof(c_typ) - - result = self.fail_fields(("a", c_typ, sizeof(c_typ)*8 + 1)) - assert result == (ValueError, 'number of bits invalid for bit field') - - def test_multi_bitfields_size(self): - class X(Structure): - _fields_ = [("a", c_short, 1), - ("b", c_short, 14), - ("c", c_short, 1)] - assert sizeof(X) == sizeof(c_short) - - class X(Structure): - _fields_ = [("a", c_short, 1), - ("a1", c_short), - ("b", c_short, 14), - ("c", c_short, 1)] - assert sizeof(X) == sizeof(c_short)*3 - assert X.a.offset == 0 - assert X.a1.offset == sizeof(c_short) - assert X.b.offset == sizeof(c_short)*2 - assert X.c.offset == sizeof(c_short)*2 - - class X(Structure): - _fields_ = [("a", c_short, 3), - ("b", c_short, 14), - ("c", c_short, 14)] - assert sizeof(X) == sizeof(c_short)*3 - assert X.a.offset == sizeof(c_short)*0 - assert X.b.offset == sizeof(c_short)*1 - assert X.c.offset == sizeof(c_short)*2 - - def get_except(self, func, *args, **kw): - try: - func(*args, **kw) - except Exception as detail: - import traceback - traceback.print_exc() - return detail.__class__, str(detail) - - def test_mixed_1(self): - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_int, 4)] - if os.name in ("nt", "ce"): - assert sizeof(X) == sizeof(c_int)*2 - else: - assert sizeof(X) == sizeof(c_int) - - def test_mixed_2(self): - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_int, 32)] - assert sizeof(X) == sizeof(c_int)*2 - - def test_mixed_3(self): - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_ubyte, 4)] - assert sizeof(X) == sizeof(c_byte) - - def test_anon_bitfields(self): - # anonymous bit-fields gave a strange error message - class X(Structure): - _fields_ = [("a", c_byte, 4), - ("b", c_ubyte, 4)] - class Y(Structure): - _anonymous_ = ["_"] - _fields_ = [("_", X)] - - def test_set_fields_attr(self): - class A(Structure): - pass - A._fields_ = [("a", c_byte), - ("b", c_ubyte)] - - def test_set_fields_attr_bitfields(self): - class A(Structure): - pass - A._fields_ = [("a", POINTER(A)), - ("b", c_ubyte, 4)] - - def test_set_fields_cycle_fails(self): - class A(Structure): - pass - with pytest.raises(AttributeError): - A._fields_ = [("a", A)] +def test_set_fields_cycle_fails(): + class A(Structure): + pass + with pytest.raises(AttributeError): + A._fields_ = [("a", A)] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/extra_tests/ctypes_tests/test_buffers.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py rename to extra_tests/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/extra_tests/ctypes_tests/test_buffers.py @@ -1,76 +1,38 @@ from ctypes import * -from .support import BaseCTypesTestChecker -class TestStringBuffer(BaseCTypesTestChecker): +def test_buffer(): + b = create_string_buffer(32) + assert len(b) == 32 + assert sizeof(b) == 32 * sizeof(c_char) + assert type(b[0]) is str - def test_buffer(self): - b = create_string_buffer(32) - assert len(b) == 32 - assert sizeof(b) == 32 * sizeof(c_char) - assert type(b[0]) is str + b = create_string_buffer(33L) + assert len(b) == 33 + assert sizeof(b) == 33 * sizeof(c_char) + assert type(b[0]) is str - b = create_string_buffer(33L) - assert len(b) == 33 - assert sizeof(b) == 33 * sizeof(c_char) - assert type(b[0]) is str + b = create_string_buffer(b"abc") + assert len(b) == 4 # trailing nul char + assert sizeof(b) == 4 * sizeof(c_char) + assert type(b[0]) is str + assert b[0] == b"a" + assert b[:] == b"abc\0" - b = create_string_buffer("abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_char) - assert type(b[0]) is str - assert b[0] == "a" - assert b[:] == "abc\0" +def test_from_buffer(): + b1 = bytearray(b"abcde") + b = (c_char * 5).from_buffer(b1) + assert b[2] == b"c" + # + b1 = bytearray(b"abcd") + b = c_int.from_buffer(b1) + assert b.value in (1684234849, # little endian + 1633837924) # big endian - def test_string_conversion(self): - b = create_string_buffer(u"abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_char) - assert type(b[0]) is str - assert b[0] == "a" - assert b[:] == "abc\0" - - def test_from_buffer(self): - b1 = bytearray("abcde") - b = (c_char * 5).from_buffer(b1) - assert b[2] == "c" - # - b1 = bytearray("abcd") - b = c_int.from_buffer(b1) - assert b.value in (1684234849, # little endian - 1633837924) # big endian - - def test_from_buffer_keepalive(self): - # Issue #2878 - b1 = bytearray("ab") - array = (c_uint16 * 32)() - array[6] = c_uint16.from_buffer(b1) - # this is also what we get on CPython. I don't think it makes - # sense because the array contains just a copy of the number. - assert array._objects == {'6': b1} - - try: - c_wchar - except NameError: - pass - else: - def test_unicode_buffer(self): - b = create_unicode_buffer(32) - assert len(b) == 32 - assert sizeof(b) == 32 * sizeof(c_wchar) - assert type(b[0]) is unicode - - b = create_unicode_buffer(u"abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_wchar) - assert type(b[0]) is unicode - assert b[0] == u"a" - assert b[:] == "abc\0" - - def test_unicode_conversion(self): - b = create_unicode_buffer("abc") - assert len(b) == 4 # trailing nul char - assert sizeof(b) == 4 * sizeof(c_wchar) - assert type(b[0]) is unicode - assert b[0] == u"a" - assert b[:] == "abc\0" - +def test_from_buffer_keepalive(): + # Issue #2878 + b1 = bytearray(b"ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callback_traceback.py b/extra_tests/ctypes_tests/test_callback_traceback.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_callback_traceback.py rename to extra_tests/ctypes_tests/test_callback_traceback.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callback_traceback.py +++ b/extra_tests/ctypes_tests/test_callback_traceback.py @@ -1,80 +1,35 @@ # derived from test_random_things.py -import py +import pytest + from ctypes import * -import sys -def callback_func(arg): - 42 / arg - raise ValueError(arg) +_rawffi = pytest.importorskip('_rawffi') -class TestCallbackTraceback: - # When an exception is raised in a ctypes callback function, the C - # code prints a traceback. +# +# This test makes sure the exception types *and* the exception +# value is printed correctly. + + at pytest.mark.skipif("sys.flags.inspect") +def test_SystemExit(monkeypatch, capsys): + """ + When an exception is raised in a ctypes callback function, the C + code prints a traceback. When SystemExit is raised, the interpreter + normally exits immediately. + """ + def callback_func(arg): + raise SystemExit(42) + def custom_exit(value): + raise Exception("<<>>" % (value,)) + monkeypatch.setattr(_rawffi, 'exit', custom_exit) + cb = CFUNCTYPE(c_int, c_int)(callback_func) + cb2 = cast(cast(cb, c_void_p), CFUNCTYPE(c_int, c_int)) + out, err = capsys.readouterr() + assert not err + cb2(0) + out, err = capsys.readouterr() + assert err.splitlines()[-1] == "Exception: <<>>" # - # This test makes sure the exception types *and* the exception - # value is printed correctly. - # - # Changed in 0.9.3: No longer is '(in callback)' prepended to the - # error message - instead a additional frame for the C code is - # created, then a full traceback printed. When SystemExit is - # raised in a callback function, the interpreter exits. - - def capture_stderr(self, func, *args, **kw): - # helper - call function 'func', and return the captured stderr - import StringIO - old_stderr = sys.stderr - logger = sys.stderr = StringIO.StringIO() - try: - func(*args, **kw) - finally: - sys.stderr = old_stderr - return logger.getvalue() - - def test_ValueError(self): - cb = CFUNCTYPE(c_int, c_int)(callback_func) - out = self.capture_stderr(cb, 42) - assert out.splitlines()[-1] == ( - "ValueError: 42") - - def test_IntegerDivisionError(self): - cb = CFUNCTYPE(c_int, c_int)(callback_func) - out = self.capture_stderr(cb, 0) - assert out.splitlines()[-1][:19] == ( - "ZeroDivisionError: ") - - def test_FloatDivisionError(self): - cb = CFUNCTYPE(c_int, c_double)(callback_func) - out = self.capture_stderr(cb, 0.0) - assert out.splitlines()[-1][:19] == ( - "ZeroDivisionError: ") - - def test_TypeErrorDivisionError(self): - cb = CFUNCTYPE(c_int, c_char_p)(callback_func) - out = self.capture_stderr(cb, "spam") - assert out.splitlines()[-1].startswith( - "TypeError: " - "unsupported operand type(s) for") - - def test_SystemExit(self): - import _rawffi - if sys.flags.inspect: - skip("requires sys.flags.inspect == 0") - def callback_func(arg): - raise SystemExit(42) - def custom_exit(value): - raise Exception("<<>>" % (value,)) - original_exit = _rawffi.exit - try: - _rawffi.exit = custom_exit - # - cb = CFUNCTYPE(c_int, c_int)(callback_func) - cb2 = cast(cast(cb, c_void_p), CFUNCTYPE(c_int, c_int)) - out = self.capture_stderr(cb2, 0) - assert out.splitlines()[-1] == "Exception: <<>>" - # - cb = CFUNCTYPE(c_int, c_int)(callback_func) - out = self.capture_stderr(cb, 0) - assert out.splitlines()[-1] == "Exception: <<>>" - # - finally: - _rawffi.exit = original_exit + cb = CFUNCTYPE(c_int, c_int)(callback_func) + cb(0) + out, err = capsys.readouterr() + assert err.splitlines()[-1] == "Exception: <<>>" diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/extra_tests/ctypes_tests/test_callbacks.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py rename to extra_tests/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/extra_tests/ctypes_tests/test_callbacks.py @@ -1,283 +1,194 @@ +import pytest + +import math from ctypes import * -import pytest from .support import BaseCTypesTestChecker -class TestCallbacks(BaseCTypesTestChecker): - functype = CFUNCTYPE - -## def tearDown(self): -## import gc -## gc.collect() - - def callback(self, *args): - self.got_args = args - return args[-1] - - def check_type(self, typ, arg): - unwrapped_types = { - c_float: (float,), - c_double: (float,), - c_char: (str,), - c_char_p: (str,), - c_uint: (int, long), - c_ulong: (int, long), - } - - PROTO = self.functype.im_func(typ, typ) - cfunc = PROTO(self.callback) - result = cfunc(arg) - if typ == c_float: - assert abs(result - arg) < 0.000001 - else: - assert self.got_args == (arg,) - assert result == arg - - result2 = cfunc(typ(arg)) - assert type(result2) in unwrapped_types.get(typ, (int, long)) - - PROTO = self.functype.im_func(typ, c_byte, typ) - result = PROTO(self.callback)(-3, arg) - if typ == c_float: - assert abs(result - arg) < 0.000001 - else: - assert self.got_args == (-3, arg) - assert result == arg - - ################ - - def test_byte(self): - self.check_type(c_byte, 42) - self.check_type(c_byte, -42) - - def test_ubyte(self): - self.check_type(c_ubyte, 42) - - def test_short(self): - self.check_type(c_short, 42) - self.check_type(c_short, -42) - - def test_ushort(self): - self.check_type(c_ushort, 42) - - def test_int(self): - self.check_type(c_int, 42) - self.check_type(c_int, -42) - - def test_uint(self): - self.check_type(c_uint, 42) - - def test_long(self): - self.check_type(c_long, 42) - self.check_type(c_long, -42) - - def test_ulong(self): - self.check_type(c_ulong, 42) - - def test_longlong(self): - self.check_type(c_longlong, 42) - self.check_type(c_longlong, -42) - - def test_ulonglong(self): - self.check_type(c_ulonglong, 42) - - def test_float(self): - # only almost equal: double -> float -> double - import math - self.check_type(c_float, math.e) - self.check_type(c_float, -math.e) - - def test_double(self): - self.check_type(c_double, 3.14) - self.check_type(c_double, -3.14) - - def test_char(self): - self.check_type(c_char, "x") - self.check_type(c_char, "a") - - # disabled: would now (correctly) raise a RuntimeWarning about - # a memory leak. A callback function cannot return a non-integral - # C type without causing a memory leak. -## def test_char_p(self): -## self.check_type(c_char_p, "abc") -## self.check_type(c_char_p, "def") - - - @pytest.mark.xfail( - reason="we are less strict about callback return type sanity") - def test_unsupported_restype_1(self): - # Only "fundamental" result types are supported for callback - # functions, the type must have a non-NULL stgdict->setfunc. - # POINTER(c_double), for example, is not supported. - - prototype = self.functype.im_func(POINTER(c_double)) - # The type is checked when the prototype is called - with pytest.raises(TypeError): - prototype(lambda: None) - +functypes = [CFUNCTYPE] try: - WINFUNCTYPE + functypes.append(WINFUNCTYPE) except NameError: pass -else: - class TestStdcallCallbacks(TestCallbacks): - functype = WINFUNCTYPE -################################################################ -class TestSampleCallbacks(BaseCTypesTestChecker): +def callback(*args): + callback.got_args = args + return args[-1] - def test_integrate(self): - # Derived from some then non-working code, posted by David Foster - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) +unwrapped_types = { + c_float: (float,), + c_double: (float,), + c_char: (str,), + c_char_p: (str,), + c_uint: (int, long), + c_ulong: (int, long), + } - # The function prototype called by 'integrate': double func(double); - CALLBACK = CFUNCTYPE(c_double, c_double) + at pytest.mark.parametrize("typ, arg", [ + (c_byte, 42), + (c_byte, -42), + (c_ubyte, 42), + (c_short, 42), + (c_short, -42), + (c_ushort, 42), + (c_int, 42), + (c_int, -42), + (c_uint, 42), + (c_long, 42), + (c_long, -42), + (c_ulong, 42), + (c_longlong, 42), + (c_longlong, -42), + (c_ulonglong, 42), + (c_float, math.e), # only almost equal: double -> float -> double + (c_float, -math.e), + (c_double, 3.14), + (c_double, -3.14), + (c_char, b"x"), + (c_char, b"a"), +]) + at pytest.mark.parametrize('functype', functypes) +def test_types(typ, arg, functype): + PROTO = functype(typ, typ) + cfunc = PROTO(callback) + result = cfunc(arg) + if typ == c_float: + assert abs(result - arg) < 0.000001 + else: + assert callback.got_args == (arg,) + assert result == arg - # The integrate function itself, exposed from the _ctypes_test dll - integrate = dll.integrate - integrate.argtypes = (c_double, c_double, CALLBACK, c_long) - integrate.restype = c_double + result2 = cfunc(typ(arg)) + assert type(result2) in unwrapped_types.get(typ, (int, long)) - def func(x): - print 'calculating x**2 of',x - return x**2 + PROTO = functype(typ, c_byte, typ) + result = PROTO(callback)(-3, arg) + if typ == c_float: + assert abs(result - arg) < 0.000001 + else: + assert callback.got_args == (-3, arg) + assert result == arg - result = integrate(0.0, 1.0, CALLBACK(func), 10) - diff = abs(result - 1./3.) + at pytest.mark.parametrize('functype', functypes) +def test_unsupported_restype_1(functype): + # Only "fundamental" result types are supported for callback + # functions, the type must have a non-NULL stgdict->setfunc. + # POINTER(c_double), for example, is not supported. - assert diff < 0.01, "%s not less than 0.01" % diff + prototype = functype(POINTER(c_double)) + # The type is checked when the prototype is called + with pytest.raises(TypeError): + prototype(lambda: None) -################################################################ -class TestMoreCallbacks(BaseCTypesTestChecker): +def test_callback_with_struct_argument(): + class RECT(Structure): + _fields_ = [("left", c_int), ("top", c_int), + ("right", c_int), ("bottom", c_int)] - def test_callback_with_struct_argument(self): - class RECT(Structure): - _fields_ = [("left", c_int), ("top", c_int), - ("right", c_int), ("bottom", c_int)] + proto = CFUNCTYPE(c_int, RECT) - proto = CFUNCTYPE(c_int, RECT) - def callback(point): - point.left *= -1 - return point.left+point.top+point.right+point.bottom + def callback(point): + point.left *= -1 + return point.left + point.top + point.right + point.bottom - cbp = proto(callback) + cbp = proto(callback) + rect = RECT(-1000, 100, 10, 1) + res = cbp(rect) + assert res == 1111 + assert rect.left == -1000 # must not have been changed! - rect = RECT(-1000,100,10,1) +def test_callback_from_c_with_struct_argument(dll): + class RECT(Structure): + _fields_ = [("left", c_long), ("top", c_long), + ("right", c_long), ("bottom", c_long)] - res = cbp(rect) + proto = CFUNCTYPE(c_int, RECT) - assert res == 1111 - assert rect.left == -1000 # must not have been changed! + def callback(point): + return point.left + point.top + point.right + point.bottom - def test_callback_from_c_with_struct_argument(self): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) + cbp = proto(callback) + rect = RECT(1000, 100, 10, 1) - class RECT(Structure): - _fields_ = [("left", c_long), ("top", c_long), - ("right", c_long), ("bottom", c_long)] + call_callback_with_rect = dll.call_callback_with_rect + call_callback_with_rect.restype = c_int + call_callback_with_rect.argtypes = [proto, RECT] + res = call_callback_with_rect(cbp, rect) + assert res == 1111 - proto = CFUNCTYPE(c_int, RECT) - def callback(point): - return point.left+point.top+point.right+point.bottom +def test_callback_unsupported_return_struct(): + class RECT(Structure): + _fields_ = [("left", c_int), ("top", c_int), + ("right", c_int), ("bottom", c_int)] - cbp = proto(callback) - rect = RECT(1000,100,10,1) + proto = CFUNCTYPE(RECT, c_int) + with pytest.raises(TypeError): + proto(lambda r: 0) - call_callback_with_rect = dll.call_callback_with_rect - call_callback_with_rect.restype = c_int - call_callback_with_rect.argtypes = [proto, RECT] - res = call_callback_with_rect(cbp, rect) - assert res == 1111 - def test_callback_unsupported_return_struct(self): - class RECT(Structure): - _fields_ = [("left", c_int), ("top", c_int), - ("right", c_int), ("bottom", c_int)] +def test_qsort(dll): + PI = POINTER(c_int) + A = c_int*5 + a = A() + for i in range(5): + a[i] = 5-i - proto = CFUNCTYPE(RECT, c_int) - with pytest.raises(TypeError): - proto(lambda r: 0) + assert a[0] == 5 # sanity + def comp(a, b): + a = a.contents.value + b = b.contents.value + return cmp(a,b) + qs = dll.my_qsort + qs.restype = None + CMP = CFUNCTYPE(c_int, PI, PI) + qs.argtypes = (PI, c_size_t, c_size_t, CMP) - def test_qsort(self): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) + qs(cast(a, PI), 5, sizeof(c_int), CMP(comp)) - PI = POINTER(c_int) - A = c_int*5 - a = A() - for i in range(5): - a[i] = 5-i + res = list(a) - assert a[0] == 5 # sanity + assert res == [1,2,3,4,5] - def comp(a, b): - a = a.contents.value - b = b.contents.value - return cmp(a,b) - qs = dll.my_qsort - qs.restype = None - CMP = CFUNCTYPE(c_int, PI, PI) - qs.argtypes = (PI, c_size_t, c_size_t, CMP) +def test_pyobject_as_opaque(dll): + def callback(arg): + return arg() - qs(cast(a, PI), 5, sizeof(c_int), CMP(comp)) + CTP = CFUNCTYPE(c_int, py_object) + cfunc = dll._testfunc_callback_opaque + cfunc.argtypes = [CTP, py_object] + cfunc.restype = c_int + res = cfunc(CTP(callback), lambda : 3) + assert res == 3 - res = list(a) +def test_callback_void(capsys, dll): + def callback(): + pass - assert res == [1,2,3,4,5] + CTP = CFUNCTYPE(None) + cfunc = dll._testfunc_callback_void + cfunc.argtypes = [CTP] + cfunc.restype = int + cfunc(CTP(callback)) + out, err = capsys.readouterr() + assert (out, err) == ("", "") - def test_pyobject_as_opaque(self): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) - def callback(arg): - return arg() +def test_callback_pyobject(): + def callback(obj): + return obj - CTP = CFUNCTYPE(c_int, py_object) - cfunc = dll._testfunc_callback_opaque - cfunc.argtypes = [CTP, py_object] - cfunc.restype = c_int - res = cfunc(CTP(callback), lambda : 3) - assert res == 3 + FUNC = CFUNCTYPE(py_object, py_object) + cfunc = FUNC(callback) + param = c_int(42) + assert cfunc(param) is param - def test_callback_void(self, capsys): - import conftest - _ctypes_test = str(conftest.sofile) - dll = CDLL(_ctypes_test) - - def callback(): - pass - - CTP = CFUNCTYPE(None) - cfunc = dll._testfunc_callback_void - cfunc.argtypes = [CTP] - cfunc.restype = int - cfunc(CTP(callback)) - out, err = capsys.readouterr() - assert (out, err) == ("", "") - - - def test_callback_pyobject(self): - def callback(obj): - return obj - - FUNC = CFUNCTYPE(py_object, py_object) - cfunc = FUNC(callback) - param = c_int(42) - assert cfunc(param) is param - - def test_raise_argumenterror(self): - def callback(x): - pass - FUNC = CFUNCTYPE(None, c_void_p) - cfunc = FUNC(callback) - param = c_uint(42) - with pytest.raises(ArgumentError): - cfunc(param) +def test_raise_argumenterror(): + def callback(x): + pass + FUNC = CFUNCTYPE(None, c_void_p) + cfunc = FUNC(callback) + param = c_uint(42) + with pytest.raises(ArgumentError): + cfunc(param) diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py b/extra_tests/ctypes_tests/test_cast.py rename from pypy/module/test_lib_pypy/ctypes_tests/test_cast.py rename to extra_tests/ctypes_tests/test_cast.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_cast.py +++ b/extra_tests/ctypes_tests/test_cast.py @@ -1,106 +1,30 @@ +import pytest + from ctypes import * -import sys, py -from .support import BaseCTypesTestChecker -def setup_module(mod): - import conftest - mod.lib = CDLL(str(conftest.sofile)) +def test_cast_functype(dll): + # make sure we can cast function type + my_sqrt = dll.my_sqrt + saved_objects = my_sqrt._objects.copy() + sqrt = cast(cast(my_sqrt, c_void_p), CFUNCTYPE(c_double, c_double)) + assert sqrt(4.0) == 2.0 + assert not cast(0, CFUNCTYPE(c_int)) + # + assert sqrt._objects is my_sqrt._objects # on CPython too + my_sqrt._objects.clear() + my_sqrt._objects.update(saved_objects) -class TestCast(BaseCTypesTestChecker): +def test_cast_argumenterror(): + param = c_uint(42) + with pytest.raises(ArgumentError): + cast(param, c_void_p) - def test_array2pointer(self): - array = (c_int * 3)(42, 17, 2) - - # casting an array to a pointer works. - ptr = cast(array, POINTER(c_int)) - assert [ptr[i] for i in range(3)] == [42, 17, 2] - - if 2*sizeof(c_short) == sizeof(c_int): - ptr = cast(array, POINTER(c_short)) - if sys.byteorder == "little": - assert [ptr[i] for i in range(6)] == ( - [42, 0, 17, 0, 2, 0]) - else: - assert [ptr[i] for i in range(6)] == ( - [0, 42, 0, 17, 0, 2]) - - def test_address2pointer(self): - array = (c_int * 3)(42, 17, 2) - - address = addressof(array) - ptr = cast(c_void_p(address), POINTER(c_int)) - assert [ptr[i] for i in range(3)] == [42, 17, 2] - - ptr = cast(address, POINTER(c_int)) - assert [ptr[i] for i in range(3)] == [42, 17, 2] From pypy.commits at gmail.com Thu Jan 31 11:56:27 2019 From: pypy.commits at gmail.com (antocuni) Date: Thu, 31 Jan 2019 08:56:27 -0800 (PST) Subject: [pypy-commit] pypy release-pypy3.5-7.x: hg merge py3.5 Message-ID: <5c5328bb.1c69fb81.e35e5.28f5@mx.google.com> Author: Antonio Cuni Branch: release-pypy3.5-7.x Changeset: r95760:3d229d8603d1 Date: 2019-01-31 17:55 +0100 http://bitbucket.org/pypy/pypy/changeset/3d229d8603d1/ Log: hg merge py3.5 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -40,16 +40,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -59,8 +59,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -70,10 +70,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -114,12 +114,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -134,8 +134,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -144,10 +145,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -163,6 +164,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -176,6 +178,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -187,7 +190,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -198,7 +200,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -210,6 +211,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -217,12 +219,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -242,7 +246,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -270,12 +273,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -290,10 +296,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -301,28 +309,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -332,6 +338,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -340,6 +347,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -349,8 +357,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -364,7 +373,6 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -384,12 +392,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/extra_tests/cffi_tests/cffi0/test_function.py b/extra_tests/cffi_tests/cffi0/test_function.py --- a/extra_tests/cffi_tests/cffi0/test_function.py +++ b/extra_tests/cffi_tests/cffi0/test_function.py @@ -46,14 +46,15 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_lround_no_return_value(self): + def test_getenv_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void lround(double x); + void getenv(char *); """) - m = ffi.dlopen(lib_m) - x = m.lround(1.23) + needs_dlopen_none() + m = ffi.dlopen(None) + x = m.getenv(b"FOO") assert x is None def test_dlopen_filename(self): diff --git a/lib-python/3/test/test_dictviews.py b/lib-python/3/test/test_dictviews.py --- a/lib-python/3/test/test_dictviews.py +++ b/lib-python/3/test/test_dictviews.py @@ -1,9 +1,11 @@ +from test import support import copy import pickle import unittest class DictSetTest(unittest.TestCase): + @support.cpython_only def test_constructors_not_callable(self): kt = type({}.keys()) self.assertRaises(TypeError, kt, {}) diff --git a/lib_pypy/_collections.py b/lib_pypy/_collections.py --- a/lib_pypy/_collections.py +++ b/lib_pypy/_collections.py @@ -390,7 +390,7 @@ class defaultdict(dict): __slots__ = ["default_factory"] - + def __init__(self, *args, **kwds): if len(args) > 0: default_factory = args[0] @@ -401,10 +401,10 @@ default_factory = None self.default_factory = default_factory super(defaultdict, self).__init__(*args, **kwds) - + def __missing__(self, key): # from defaultdict docs - if self.default_factory is None: + if self.default_factory is None: raise KeyError(key) self[key] = value = self.default_factory() return value @@ -420,7 +420,7 @@ def copy(self): return type(self)(self.default_factory, self) - + def __copy__(self): return self.copy() @@ -438,9 +438,3 @@ """ return (type(self), (self.default_factory,), None, None, iter(self.items())) - - -try: - from _pypy_collections import OrderedDict -except ImportError: - pass diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -71,9 +71,9 @@ # module/cpyext/include/patchlevel.h # # The short X.Y version. -version = '7.0' +version = '7.1' # The full version, including alpha/beta/rc tags. -release = '7.0.0' +release = '7.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -7,16 +7,16 @@ Armin Rigo Maciej Fijalkowski Carl Friedrich Bolz-Tereick + Antonio Cuni Amaury Forgeot d'Arc - Antonio Cuni Matti Picus Samuele Pedroni Ronan Lamy Alex Gaynor Philip Jenvey + Richard Plangger Brian Kearns - Richard Plangger - Michael Hudson + Michael Hudson-Doyle Manuel Jacob David Schneider Holger Krekel @@ -26,8 +26,8 @@ Anders Chrigstrom Wim Lavrijsen Eric van Riet Paap + Remi Meier Richard Emslie - Remi Meier Alexander Schremmer Dan Villiom Podlaski Christiansen Lukas Diekmann @@ -37,10 +37,10 @@ Niklaus Haldimann Camillo Bruni Laura Creighton - Romain Guillebert Toon Verwaest Leonardo Santagada Seo Sanghyeon + Romain Guillebert Ronny Pfannschmidt Justin Peel Raffael Tfirst @@ -81,12 +81,12 @@ Squeaky Edd Barrett Timo Paulssen + Laurence Tratt Marius Gedminas Nicolas Truessel Alexandre Fayolle Simon Burton Martin Matusiak - Laurence Tratt Wenzhu Man Konstantin Lopuhin John Witulski @@ -101,8 +101,9 @@ Jean-Philippe St. Pierre Guido van Rossum Pavel Vinogradov + Stefan Beyer + William Leslie Paweł Piotr Przeradowski - William Leslie marky1991 Ilya Osadchiy Tobias Oberstein @@ -111,10 +112,10 @@ Taavi Burns Adrian Kuhn tav + Stian Andreassen Georg Brandl Joannah Nanjekye Bert Freudenberg - Stian Andreassen Wanja Saatkamp Mike Blume Gerald Klix @@ -130,6 +131,7 @@ Vasily Kuznetsov Preston Timmons David Ripton + Pieter Zieschang Dusty Phillips Lukas Renggli Guenter Jantzen @@ -143,6 +145,7 @@ Andrew Durdin Ben Young Michael Schneider + Yusuke Tsutsumi Nicholas Riley Jason Chu Igor Trindade Oliveira @@ -154,7 +157,6 @@ Mariano Anaya anatoly techtonik Karl Bartel - Stefan Beyer Gabriel Lavoie Jared Grubb Alecsandru Patrascu @@ -165,7 +167,6 @@ Victor Stinner Andrews Medina Aaron Iles - p_zieschang at yahoo.de Toby Watson Daniel Patrick Stuart Williams @@ -177,6 +178,7 @@ Mikael Schönenberg Stanislaw Halik Mihnea Saracin + Matt Jackson Berkin Ilbeyi Gasper Zejn Faye Zhao @@ -184,12 +186,14 @@ Anders Qvist Corbin Simpson Chirag Jadwani + Pauli Virtanen Jonathan David Riehl Beatrice During Alex Perry Robert Zaremba Alan McIntyre Alexander Sedov + David C Ellis Vaibhav Sood Reuben Cummings Attila Gobi @@ -209,7 +213,6 @@ Arjun Naik Aaron Gallagher Alexis Daboville - Pieter Zieschang Karl Ramm Lukas Vacek Omer Katz @@ -237,12 +240,15 @@ Catalin Gabriel Manciu Jacob Oscarson Ryan Gonzalez + Antoine Dupre Kristjan Valur Jonsson Lucio Torre Richard Lancaster Dan Buch Lene Wagner Tomo Cocoa + Miro Hrončok + Anthony Sottile David Lievens Neil Blakey-Milner Henrik Vendelbo @@ -257,10 +263,12 @@ Bobby Impollonia Roberto De Ioris Jeong YunWon + andrewjlawrence Christopher Armstrong Aaron Tubbs Vasantha Ganesh K Jason Michalski + Radu Ciorba Markus Holtermann Andrew Thompson Yusei Tahara @@ -268,28 +276,26 @@ Fabio Niephaus Akira Li Gustavo Niemeyer - Rafał Gałczyński + Nate Bragg Lucas Stadler roberto at goyle + Carl Bordum Hansen Matt Bogosian Yury V. Zaytsev florinpapa Anders Sigfridsson - Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert Anna Katrina Dominguez Kim Jin Su Amber Brown - Miro Hrončok - Anthony Sottile - Nate Bragg + Andrew Stepanov + Rafał Gałczyński Ben Darnell Juan Francisco Cantero Hurtado Godefroid Chappelle Julian Berman - Michael Hudson-Doyle Stephan Busemann Dan Colish timo @@ -299,6 +305,7 @@ halgari Jim Baker Chris Lambacher + John Aldis coolbutuseless at gmail.com Mike Bayer Rodrigo Araújo @@ -307,6 +314,7 @@ OlivierBlanvillain Jonas Pfannschmidt Zearin + Johan Forsberg Andrey Churin Dan Crosta reubano at gmail.com @@ -316,8 +324,9 @@ Steve Papanik Eli Stevens Boglarka Vezer - gabrielg + gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com PavloKapyshin + Hervé Beraud Tomer Chachamu Christopher Groskopf Asmo Soinio @@ -331,7 +340,6 @@ Michael Chermside Anna Ravencroft remarkablerocket - Pauli Virtanen Petre Vijiac Berker Peksag Christian Muirhead @@ -351,12 +359,13 @@ Zooko Wilcox-O Hearn James Lan jiaaro + Evgenii Gorinov Markus Unterwaditzer Kristoffer Kleine Graham Markall Dan Loewenherz werat - Andrew Stepanov + Filip Salomonsson Niclas Olofsson Chris Pressey Tobias Diaz diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -16,9 +16,6 @@ How to Create a PyPy Release ++++++++++++++++++++++++++++ -Overview --------- - As a meta rule setting up issues in the tracker for items here may help not forgetting things. A set of todo files may also work. @@ -28,17 +25,54 @@ Release Steps -------------- +++++++++++++++ -* If needed, make a release branch -* Bump the - pypy version number in module/sys/version.py and in - module/cpyext/include/patchlevel.h and in doc/conf.py. The branch - will capture the revision number of this change for the release. +Make the release branch +------------------------ - Some of the next updates may be done before or after branching; make - sure things are ported back to the trunk and to the branch as - necessary. +This is needed only in case you are doing a new major version; if not, you can +probably reuse the existing release branch. + +We want to be able to freely merge default into the branch and vice-versa; +thus we need to do a complicate dance to avoid to patch the version number +when we do a merge:: + + $ hg up -r default + $ # edit the version to e.g. 7.0.0-final + $ hg ci + $ hg branch release-pypy2.7-7.x && hg ci + $ hg up -r default + $ # edit the version to 7.1.0-alpha0 + $ hg ci + $ hg up -r release-pypy2.7-7.x + $ hg merge default + $ # edit the version to AGAIN 7.0.0-final + $ hg ci + +Then, we need to do the same for the 3.x branch:: + + $ hg up -r py3.5 + $ hg merge default # this brings the version fo 7.1.0-alpha0 + $ hg branch release-pypy3.5-7.x + $ # edit the version to 7.0.0-final + $ hg ci + $ hg up -r py3.5 + $ hg merge release-pypy3.5-7.x + $ # edit the version to 7.1.0-alpha0 + $ hg ci + +To change the version, you need to edit three files: + + - ``module/sys/version.py`` + + - ``module/cpyext/include/patchlevel.h`` + + - ``doc/conf.py`` + + +Other steps +----------- + * Make sure the RPython builds on the buildbot pass with no failures diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst --- a/pypy/doc/index-of-release-notes.rst +++ b/pypy/doc/index-of-release-notes.rst @@ -6,6 +6,7 @@ .. toctree:: + release-v7.0.0.rst release-v6.0.0.rst release-v5.10.1.rst release-v5.10.0.rst diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -7,6 +7,7 @@ .. toctree:: whatsnew-head.rst + whatsnew-pypy2-7.0.0.rst whatsnew-pypy2-6.0.0.rst whatsnew-pypy2-5.10.0.rst whatsnew-pypy2-5.10.0.rst @@ -41,6 +42,7 @@ .. toctree:: whatsnew-pypy3-head.rst + whatsnew-pypy3-7.0.0.rst whatsnew-pypy3-5.9.0.rst whatsnew-pypy3-5.8.0.rst whatsnew-pypy3-5.7.0.rst diff --git a/pypy/doc/interpreter.rst b/pypy/doc/interpreter.rst --- a/pypy/doc/interpreter.rst +++ b/pypy/doc/interpreter.rst @@ -156,7 +156,7 @@ environment found in `Frames`. Frames and Functions have references to a code object. Here is a list of Code attributes: -* ``co_flags`` flags if this code object has nested scopes/generators +* ``co_flags`` flags if this code object has nested scopes/generators/etc. * ``co_stacksize`` the maximum depth the stack can reach while executing the code * ``co_code`` the actual bytecode string diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/release-v7.0.0.rst @@ -0,0 +1,150 @@ +====================================================== +PyPy v7.0.0: triple release of 2.7, 3.5 and 3.6-alpha +====================================================== + +The PyPy team is proud to release the version 7.0.0 of PyPy, which includes +three different interpreters: + + - PyPy2.7, which is an interpreter supporting the syntax and the features of + Python 2.7 + + - PyPy3.5, which supports Python 3.5 + + - PyPy3.6-alpha: this is the first official release of PyPy to support 3.6 + features, although it is still considered alpha quality. + +All the interpreters are based on much the same codebase, thus the triple +release. + +Until we can work with downstream providers to distribute builds with PyPy, we +have made packages for some common packages `available as wheels`_. + +The GC now has `hooks`_ to gain more insights into its performance, and it is +now possible to manually manage the GC by using a combination of +``gc.disable`` and ``gc.collect_step``. See the `GC blog post`_. + + +We updated the `cffi`_ module included in PyPy to version 1.12, and the +`cppyy`_ backend to 1.4. Please use these to wrap your C and C++ code, +respectively, for a JIT friendly experience. + +As always, this release is 100% compatible with the previous one and fixed +several issues and bugs raised by the growing community of PyPy users. +We strongly recommend updating. + +The PyPy3.6 release and the Windows PyPy3.5 release are still not production +quality so your mileage may vary. There are open issues with incomplete +compatibility and c-extension support. + +The utf8 branch that changes internal representation of unicode to utf8 did not +make it into the release, so there is still more goodness coming. +You can download the v6.0 releases here: + + http://pypy.org/download.html + +We would like to thank our donors for the continued support of the PyPy +project. If PyPy is not quite good enough for your needs, we are available for +direct consulting work. + +We would also like to thank our contributors and encourage new people to join +the project. PyPy has many layers and we need help with all of them: `PyPy`_ +and `RPython`_ documentation improvements, tweaking popular `modules`_ to run +on pypy, or general `help`_ with making RPython's JIT even better. + +.. _`PyPy`: index.html +.. _`RPython`: https://rpython.readthedocs.org +.. _`help`: project-ideas.html +.. _`cffi`: http://cffi.readthedocs.io +.. _`cppyy`: https://cppyy.readthedocs.io +.. _`available as wheels`: https://github.com/antocuni/pypy-wheels +.. _`GC blog post`: https://morepypy.blogspot.com/2019/01/pypy-for-low-latency-systems.html + + +What is PyPy? +============= + +PyPy is a very compliant Python interpreter, almost a drop-in replacement for +CPython 2.7, 3.5 and 3.6. It's fast (`PyPy and CPython 2.7.x`_ performance +comparison) due to its integrated tracing JIT compiler. + +We also welcome developers of other `dynamic languages`_ to see what RPython +can do for them. + +The PyPy release supports: + + * **x86** machines on most common operating systems + (Linux 32/64 bits, Mac OS X 64 bits, Windows 32 bits, OpenBSD, FreeBSD) + + * big- and little-endian variants of **PPC64** running Linux, + + * **s390x** running Linux + +Unfortunately at the moment of writing our ARM buildbots are out of service, +so for now we are **not** releasing any binary for the ARM architecture. + +.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org +.. _`dynamic languages`: http://rpython.readthedocs.io/en/latest/examples.html + + +Changelog +========= + +If not specified, the changes are shared across versions + +* Support ``__set_name__``, ``__init_subclass__`` (Py3.6) +* Support ``cppyy`` in Py3.5 and Py3.6 +* Use implementation-specific site directories in ``sysconfig`` (Py3.5, Py3.6) +* Adding detection of gcc to ``sysconfig`` (Py3.5, Py3.6) +* Fix multiprocessing regression on newer glibcs +* Make sure 'blocking-ness' of socket is set along with default timeout +* Include ``crypt.h`` for ``crypt()`` on Linux +* Improve and re-organize the contributing_ documentation +* Make the ``__module__`` attribute writable, fixing an incompatibility with + NumPy 1.16 +* Implement ``Py_ReprEnter``, ``Py_ReprLeave(), ``PyMarshal_ReadObjectFromString``, + ``PyMarshal_WriteObjectToString``, ``PyObject_DelItemString``, + ``PyMapping_DelItem``, ``PyMapping_DelItemString``, ``PyEval_GetFrame``, + ``PyOS_InputHook``, ``PyErr_FormatFromCause`` (Py3.6), +* Implement new wordcode instruction encoding (Py3.6) +* Log additional gc-minor and gc-collect-step info in the PYPYLOG +* The ``reverse-debugger`` (revdb) branch has been merged to the default + branch, so it should always be up-to-date. You still need a special pypy + build, but you can compile it from the same source as the one we distribute + for the v7.0.0 release. For more information, see + https://bitbucket.org/pypy/revdb +* Support underscores in numerical literals like ``'4_2'`` (Py3.6) +* Pre-emptively raise MemoryError if the size of dequeue in ``_collections.deque`` + is too large (Py3.5) +* Fix multithreading issues in calls to ``os.setenv`` +* Add missing defines and typedefs for numpy and pandas on MSVC +* Add CPython macros like ``Py_NAN`` to header files +* Rename the ``MethodType`` to ``instancemethod``, like CPython +* Better support for `async with` in generators (Py3.5, Py3.6) +* Improve the performance of ``pow(a, b, c)`` if ``c`` is a large integer +* Now ``vmprof`` works on FreeBSD +* Support GNU Hurd, fixes for FreeBSD +* Add deprecation warning if type of result of ``__float__`` is float inherited + class (Py3.6) +* Fix async generator bug when yielding a ``StopIteration`` (Py3.6) +* Speed up ``max(list-of-int)`` from non-jitted code +* Fix Windows ``os.listdir()`` for some cases (see CPython #32539) +* Add ``select.PIPE_BUF`` +* Use ``subprocess`` to avoid shell injection in ``shutil`` module - backport + of https://bugs.python.org/issue34540 +* Rename ``_Py_ZeroStruct`` to ``_Py_FalseStruct`` (Py3.5, Py3.6) +* Remove some cpyext names for Py3.5, Py3.6 +* Enable use of unicode file names in ``dlopen`` +* Backport CPython fix for ``thread.RLock`` +* Make GC hooks measure time in seconds (as opposed to an opaque unit) +* Refactor and reorganize tests in ``test_lib_pypy`` +* Check error values in ``socket.setblocking`` (Py3.6) +* Add support for FsPath to os.unlink() (Py3.6) +* Fix freezing builtin modules at translation +* Tweak ``W_UnicodeDictionaryStrategy`` which speeds up dictionaries with only + unicode keys + +We also refactored many parts of the JIT bridge optimizations, as well as cpyext +internals, and together with new contributors fixed issues, added new +documentation, and cleaned up the codebase. + +.. _contributing: http://doc.pypy.org/en/latest/contributing.html diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,69 +1,7 @@ ========================== -What's new in PyPy2.7 6.0+ +What's new in PyPy2.7 7.0+ ========================== -.. this is a revision shortly after release-pypy-6.0.0 -.. startrev: e50e11af23f1 +.. this is a revision shortly after release-pypy-7.0.0 +.. startrev: 481c69f7d81f -.. branch: cppyy-packaging - -Main items: vastly better template resolution and improved performance. In -detail: upgrade to backend 1.4, improved handling of templated methods and -functions (in particular automatic deduction of types), improved pythonization -interface, range of compatibility fixes for Python3, free functions now take -fast libffi path when possible, moves for strings (incl. from Python str), -easier/faster handling of std::vector by numpy, improved and faster object -identity preservation - -.. branch: socket_default_timeout_blockingness - -Make sure 'blocking-ness' of socket is set along with default timeout - -.. branch: crypt_h - -Include crypt.h for crypt() on Linux - -.. branch: gc-more-logging - -Log additional gc-minor and gc-collect-step info in the PYPYLOG - -.. branch: reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb - - -.. branch: pyparser-improvements-3 - -Small refactorings in the Python parser. - -.. branch: fix-readme-typo - -.. branch: avoid_shell_injection_in_shutil - -Backport CPython fix for possible shell injection issue in `distutils.spawn`, -https://bugs.python.org/issue34540 - -.. branch: cffi_dlopen_unicode - -Enable use of unicode file names in `dlopen` - -.. branch: rlock-in-rpython - -Backport CPython fix for `thread.RLock` - - -.. branch: expose-gc-time - -Make GC hooks measure time in seconds (as opposed to an opaque unit). - -.. branch: cleanup-test_lib_pypy - -Update most test_lib_pypy/ tests and move them to extra_tests/. - -.. branch: gc-disable - -Make it possible to manually manage the GC by using a combination of -gc.disable() and gc.collect_step(). Make sure to write a proper release -announcement in which we explain that existing programs could leak memory if -they run for too much time between a gc.disable()/gc.enable() diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-pypy2-7.0.0.rst copy from pypy/doc/whatsnew-head.rst copy to pypy/doc/whatsnew-pypy2-7.0.0.rst diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-7.0.0.rst copy from pypy/doc/whatsnew-pypy3-head.rst copy to pypy/doc/whatsnew-pypy3-7.0.0.rst diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -1,19 +1,7 @@ -======================== -What's new in PyPy3 6.0+ -======================== - -.. this is the revision after release-pypy3.5-v6.0 -.. startrev: 580e3e26cd32 - -.. branch: hroncok/fix-multiprocessing-regression-on-newer--1524656522151 - -Fix multiprocessing regression on newer glibcs - -.. branch: py3.5-user-site-impl - -Use implementation-specific site directories in sysconfig like in Python2 - -.. branch: py3.5-reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb +======================== +What's new in PyPy3 7.0+ +======================== + +.. this is the revision after release-pypy3.5-v7.0 +.. startrev: 9d2fa7c63b7c + diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -3,7 +3,6 @@ # See test/test_app_main. # Missing vs CPython: -b, -d, -x -from __future__ import print_function, unicode_literals USAGE1 = __doc__ = """\ Options and arguments (and corresponding environment variables): -B : don't write .py[co] files on import; also PYTHONDONTWRITEBYTECODE=x @@ -334,7 +333,7 @@ del encerr def create_stdio(fd, writing, name, encoding, errors, unbuffered): - import io + import _io # stdin is always opened in buffered mode, first because it # shouldn't make a difference in common use cases, second because # TextIOWrapper depends on the presence of a read1() method which @@ -342,7 +341,7 @@ buffering = 0 if unbuffered and writing else -1 mode = 'w' if writing else 'r' try: - buf = io.open(fd, mode + 'b', buffering, closefd=False) + buf = _io.open(fd, mode + 'b', buffering, closefd=False) except OSError as e: if e.errno != errno.EBADF: raise @@ -352,7 +351,7 @@ raw.name = name # translate \r\n to \n for sys.stdin on Windows newline = None if sys.platform == 'win32' and not writing else '\n' - stream = io.TextIOWrapper(buf, encoding, errors, newline=newline, + stream = _io.TextIOWrapper(buf, encoding, errors, newline=newline, line_buffering=unbuffered or raw.isatty()) stream.mode = mode return stream @@ -549,12 +548,6 @@ return options -# this indirection is needed to be able to import this module on python2, else -# we have a SyntaxError: unqualified exec in a nested function - at hidden_applevel -def exec_(src, dic): - exec(src, dic) - @hidden_applevel def run_command_line(interactive, inspect, @@ -663,7 +656,7 @@ else: if not isolated: sys.path.insert(0, '') - success = run_toplevel(exec_, bytes, mainmodule.__dict__) + success = run_toplevel(exec, bytes, mainmodule.__dict__) elif run_module != 0: # handle the "-m" command # '' on sys.path is required also here @@ -703,7 +696,7 @@ python_startup, 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co_python_startup, mainmodule.__dict__) + exec(co_python_startup, mainmodule.__dict__) mainmodule.__file__ = python_startup mainmodule.__cached__ = None run_toplevel(run_it) @@ -721,7 +714,7 @@ def run_it(): co_stdin = compile(sys.stdin.read(), '', 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co_stdin, mainmodule.__dict__) + exec(co_stdin, mainmodule.__dict__) mainmodule.__file__ = '' mainmodule.__cached__ = None success = run_toplevel(run_it) @@ -763,7 +756,7 @@ co = marshal.load(f) if type(co) is not type((lambda:0).__code__): raise RuntimeError("Bad code object in .pyc file") - exec_(co, namespace) + exec(co, namespace) args = (execfile, filename, mainmodule.__dict__) else: filename = sys.argv[0] @@ -791,7 +784,7 @@ code = f.read() co = compile(code, filename, 'exec', PyCF_ACCEPT_NULL_BYTES) - exec_(co, namespace) + exec(co, namespace) args = (execfile, filename, mainmodule.__dict__) success = run_toplevel(*args) diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py --- a/pypy/interpreter/test/test_app_main.py +++ b/pypy/interpreter/test/test_app_main.py @@ -1043,36 +1043,6 @@ assert data.startswith("15\\u20ac ('strict', 'backslashreplace')") -class TestAppMain: - def test_print_info(self): - from pypy.interpreter import app_main - import sys, cStringIO - prev_so = sys.stdout - prev_ti = getattr(sys, 'pypy_translation_info', 'missing') - sys.pypy_translation_info = { - 'translation.foo': True, - 'translation.bar': 42, - 'translation.egg.something': None, - 'objspace.x': 'hello', - } - try: - sys.stdout = f = cStringIO.StringIO() - py.test.raises(SystemExit, app_main.print_info) - finally: - sys.stdout = prev_so - if prev_ti == 'missing': - del sys.pypy_translation_info - else: - sys.pypy_translation_info = prev_ti - assert f.getvalue() == ("[objspace]\n" - " x = 'hello'\n" - "[translation]\n" - " bar = 42\n" - " [egg]\n" - " something = None\n" - " foo = True\n") - - @py.test.mark.skipif('config.getoption("runappdirect")') class AppTestAppMain: def setup_class(self): diff --git a/pypy/module/__builtin__/state.py b/pypy/module/__builtin__/state.py --- a/pypy/module/__builtin__/state.py +++ b/pypy/module/__builtin__/state.py @@ -2,8 +2,8 @@ class State: def __init__(self, space): self.w_open = space.appexec([], """(): - import io - return io.open""") - + import _io + return _io.open""") + def get(space): return space.fromcache(State) diff --git a/pypy/module/_collections/__init__.py b/pypy/module/_collections/__init__.py --- a/pypy/module/_collections/__init__.py +++ b/pypy/module/_collections/__init__.py @@ -8,6 +8,7 @@ appleveldefs = { 'defaultdict': 'app_defaultdict.defaultdict', + 'OrderedDict': 'app_odict.OrderedDict', } interpleveldefs = { @@ -25,15 +26,3 @@ space = self.space space.getattr(self, space.newtext('defaultdict')) # force importing space.delattr(self, space.newtext('__missing__')) - - def startup(self, space): - # OrderedDict is normally present, but in some cases the line - # "from __pypy__ import reversed_dict, move_to_end" from - # _pypy_collections.py raises - space.appexec([self], """(mod): - try: - from _pypy_collections import OrderedDict - mod.OrderedDict = OrderedDict - except ImportError: - pass - """) diff --git a/lib_pypy/_pypy_collections.py b/pypy/module/_collections/app_odict.py rename from lib_pypy/_pypy_collections.py rename to pypy/module/_collections/app_odict.py --- a/lib_pypy/_pypy_collections.py +++ b/pypy/module/_collections/app_odict.py @@ -1,6 +1,5 @@ from __pypy__ import reversed_dict, move_to_end, objects_in_repr from _operator import eq as _eq -import _collections_abc class OrderedDict(dict): @@ -29,7 +28,33 @@ raise TypeError('expected at most 1 arguments, got %d' % len(args)) self.__update(*args, **kwds) - update = __update = _collections_abc.MutableMapping.update + def update(*args, **kwds): + ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. + If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v + In either case, this is followed by: for k, v in F.items(): D[k] = v + ''' + if not args: + raise TypeError("descriptor 'update' of 'OrderedDict' object " + "needs an argument") + self, *args = args + if len(args) > 1: + raise TypeError('update expected at most 1 arguments, got %d' % + len(args)) + if args: + other = args[0] + if hasattr(other, 'items'): + for key, value in other.items(): + self[key] = value + elif hasattr(other, "keys"): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + __update = update def __reversed__(self): return reversed_dict(self) @@ -106,17 +131,20 @@ "D.values() -> an object providing a view on D's values" return _OrderedDictValuesView(self) +dict_keys = type({}.keys()) +dict_values = type({}.values()) +dict_items = type({}.items()) -class _OrderedDictKeysView(_collections_abc.KeysView): +class _OrderedDictKeysView(dict_keys): def __reversed__(self): - yield from reversed_dict(self._mapping) + yield from reversed_dict(self._dict) -class _OrderedDictItemsView(_collections_abc.ItemsView): +class _OrderedDictItemsView(dict_items): def __reversed__(self): - for key in reversed_dict(self._mapping): - yield (key, self._mapping[key]) + for key in reversed_dict(self._dict): + yield (key, self._dict[key]) -class _OrderedDictValuesView(_collections_abc.ValuesView): +class _OrderedDictValuesView(dict_values): def __reversed__(self): - for key in reversed_dict(self._mapping): - yield self._mapping[key] + for key in reversed_dict(self._dict): + yield self._dict[key] diff --git a/pypy/module/_collections/test/test_ordereddict.py b/pypy/module/_collections/test/test_ordereddict.py --- a/pypy/module/_collections/test/test_ordereddict.py +++ b/pypy/module/_collections/test/test_ordereddict.py @@ -22,3 +22,17 @@ assert d['x'] == 42 d.update({'y': 2}) assert d['y'] == 42 + + def test_reversed(self): + import sys + from _collections import OrderedDict + + pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] + od = OrderedDict(pairs) + if '__pypy__' in sys.builtin_module_names: + # dict ordering is wrong when testing interpreted on top of CPython + pairs = list(dict(od).items()) + assert list(reversed(od)) == [t[0] for t in reversed(pairs)] + assert list(reversed(od.keys())) == [t[0] for t in reversed(pairs)] + assert list(reversed(od.values())) == [t[1] for t in reversed(pairs)] + assert list(reversed(od.items())) == list(reversed(pairs)) diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -512,6 +512,7 @@ header = DEFAULT_HEADER if func.__name__ in FUNCTIONS_BY_HEADER[header]: raise ValueError("%s already registered" % func.__name__) + func._revdb_c_only_ = True # hack for revdb api_function = COnlyApiFunction(argtypes, restype, func) FUNCTIONS_BY_HEADER[header][func.__name__] = api_function return api_function diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -32,9 +32,8 @@ * module/sys/version.py * doc/conf.py */ -#define PYPY_VERSION "7.0.0" -#define PYPY_VERSION_NUM 0x07000000 - +#define PYPY_VERSION "7.1.0-alpha0" +#define PYPY_VERSION_NUM 0x07010000 /* Defined to mean a PyPy where cpyext holds more regular references to PyObjects, e.g. staying alive as long as the internal PyPy object stays alive. */ diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -13,7 +13,7 @@ # make sure to keep PYPY_VERSION in sync with: # module/cpyext/include/patchlevel.h # doc/conf.py -PYPY_VERSION = (7, 0, 0, "final", 0) +PYPY_VERSION = (7, 1, 0, "alpha", 0) import pypy diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -11,7 +11,7 @@ WrappedDefault, applevel, interp2app, unwrap_spec) from pypy.interpreter.mixedmodule import MixedModule from pypy.interpreter.signature import Signature -from pypy.interpreter.typedef import TypeDef +from pypy.interpreter.typedef import TypeDef, interp_attrproperty_w from pypy.interpreter.unicodehelper import decode_utf8 from pypy.objspace.std.util import negate @@ -1538,6 +1538,12 @@ descr_or, descr_ror = _as_set_op('or', 'update') descr_xor, descr_rxor = _as_set_op('xor', 'symmetric_difference_update') +def new_dict_items(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewItemsObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewItemsObject(W_DictViewObject, SetLikeDictView): def descr_iter(self, space): return W_DictMultiIterItemsObject(space, self.w_dict.iteritems()) @@ -1557,18 +1563,32 @@ return space.w_False return space.newbool(space.eq_w(w_value, w_found)) +def new_dict_keys(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewKeysObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewKeysObject(W_DictViewObject, SetLikeDictView): def descr_iter(self, space): return W_DictMultiIterKeysObject(space, self.w_dict.iterkeys()) + def descr_contains(self, space, w_key): return self.w_dict.descr_contains(space, w_key) +def new_dict_values(space, w_type, w_dict): + w_dict = space.interp_w(W_DictMultiObject, w_dict) + w_obj = space.allocate_instance(W_DictViewValuesObject, w_type) + W_DictViewObject.__init__(w_obj, space, w_dict) + return w_obj + class W_DictViewValuesObject(W_DictViewObject): def descr_iter(self, space): return W_DictMultiIterValuesObject(space, self.w_dict.itervalues()) W_DictViewItemsObject.typedef = TypeDef( "dict_items", + __new__ = interp2app(new_dict_items), __repr__ = interp2app(W_DictViewItemsObject.descr_repr), __len__ = interp2app(W_DictViewItemsObject.descr_len), __iter__ = interp2app(W_DictViewItemsObject.descr_iter), @@ -1590,10 +1610,12 @@ __xor__ = interp2app(W_DictViewItemsObject.descr_xor), __rxor__ = interp2app(W_DictViewItemsObject.descr_rxor), isdisjoint = interp2app(W_DictViewItemsObject.descr_isdisjoint), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewItemsObject), ) W_DictViewKeysObject.typedef = TypeDef( "dict_keys", + __new__ = interp2app(new_dict_keys), __repr__ = interp2app(W_DictViewKeysObject.descr_repr), __len__ = interp2app(W_DictViewKeysObject.descr_len), __iter__ = interp2app(W_DictViewKeysObject.descr_iter), @@ -1615,11 +1637,14 @@ __xor__ = interp2app(W_DictViewKeysObject.descr_xor), __rxor__ = interp2app(W_DictViewKeysObject.descr_rxor), isdisjoint = interp2app(W_DictViewKeysObject.descr_isdisjoint), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewKeysObject), ) W_DictViewValuesObject.typedef = TypeDef( "dict_values", + __new__ = interp2app(new_dict_values), __repr__ = interp2app(W_DictViewValuesObject.descr_repr), __len__ = interp2app(W_DictViewValuesObject.descr_len), __iter__ = interp2app(W_DictViewValuesObject.descr_iter), + _dict = interp_attrproperty_w('w_dict', cls=W_DictViewValuesObject), ) diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -787,17 +787,6 @@ assert len(d.items()) == 2 assert len(d.values()) == 2 - def test_constructors_not_callable(self): - kt = type({}.keys()) - raises(TypeError, kt, {}) - raises(TypeError, kt) - it = type({}.items()) - raises(TypeError, it, {}) - raises(TypeError, it) - vt = type({}.values()) - raises(TypeError, vt, {}) - raises(TypeError, vt) - def test_dict_keys(self): d = {1: 10, "a": "ABC"} keys = d.keys() diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -67,7 +67,7 @@ """Returns True if we have a "split GC address space", i.e. if we are translating with an option that doesn't support taking raw addresses inside GC objects and "hacking" at them. This is - notably the case with --reversedb.""" + notably the case with --revdb.""" return False # for test purposes we allow objects to be pinned and use diff --git a/rpython/rlib/src/boehm-rawrefcount.c b/rpython/rlib/src/boehm-rawrefcount.c --- a/rpython/rlib/src/boehm-rawrefcount.c +++ b/rpython/rlib/src/boehm-rawrefcount.c @@ -191,6 +191,7 @@ #endif assert(result->ob_refcnt == REFCNT_FROM_PYPY); result->ob_refcnt = 1; + result->ob_pypy_link = 0; p->pyobj = NULL; *pp = p->next_in_bucket; p->next_in_bucket = hash_free_list; diff --git a/rpython/tool/setuptools_msvc.py b/rpython/tool/setuptools_msvc.py --- a/rpython/tool/setuptools_msvc.py +++ b/rpython/tool/setuptools_msvc.py @@ -27,7 +27,6 @@ import platform import itertools import distutils.errors -from pkg_resources.extern.packaging.version import LegacyVersion from setuptools.extern.six.moves import filterfalse @@ -201,6 +200,7 @@ """ if "numpy.distutils" in sys.modules: import numpy as np + from pkg_resources.extern.packaging.version import LegacyVersion if LegacyVersion(np.__version__) < LegacyVersion('1.11.2'): return np.distutils.ccompiler.gen_lib_options(*args, **kwargs) return get_unpatched(msvc14_gen_lib_options)(*args, **kwargs) diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -56,7 +56,12 @@ # use setuptools from python3 to find tools try: vcdict = _find_vcvarsall(vsver, x64flag) + except ImportError as e: + if 'setuptools' in str(e): + log.error('is setuptools installed (perhaps try %s -mensurepip)?' % sys.executable) + log.error('looking for compiler %s raised exception "%s' % (vsver, str(e))) except Exception as e: + log.error('looking for compiler %s raised exception "%s' % (vsver, str(e))) return None else: if x64flag: diff --git a/rpython/translator/revdb/gencsupp.py b/rpython/translator/revdb/gencsupp.py --- a/rpython/translator/revdb/gencsupp.py +++ b/rpython/translator/revdb/gencsupp.py @@ -51,6 +51,10 @@ ## return False def prepare_function(funcgen): + if getattr(getattr(funcgen.graph, 'func', None), '_revdb_c_only_', False): + extra_enter_text = 'RPY_REVDB_C_ONLY_ENTER' + extra_return_text = 'RPY_REVDB_C_ONLY_LEAVE' + return extra_enter_text, extra_return_text stack_bottom = False for block in funcgen.graph.iterblocks(): for op in block.operations: diff --git a/rpython/translator/revdb/src-revdb/revdb.c b/rpython/translator/revdb/src-revdb/revdb.c --- a/rpython/translator/revdb/src-revdb/revdb.c +++ b/rpython/translator/revdb/src-revdb/revdb.c @@ -253,7 +253,10 @@ "(use REVDB=logfile)\n", (int)getpid()); } - rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); + if (rpy_rev_fileno >= 0) + rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); + else + rpy_revdb.buf_p = NULL; rpy_revdb.buf_limit = rpy_rev_buffer + sizeof(rpy_rev_buffer) - 32; rpy_revdb.unique_id_seen = 1; @@ -269,17 +272,23 @@ ssize_t full_size; assert(rpy_revdb.lock); + if (rpy_revdb.buf_p == NULL) + return; + assert(rpy_rev_fileno >= 0); + /* write the current buffer content to the OS */ full_size = rpy_revdb.buf_p - rpy_rev_buffer; rpy_revdb.buf_p = rpy_rev_buffer + sizeof(int16_t); - if (rpy_rev_fileno >= 0) - write_all(rpy_rev_buffer, full_size); + write_all(rpy_rev_buffer, full_size); } static ssize_t current_packet_size(void) { /* must be called with the lock held */ - return rpy_revdb.buf_p - (rpy_rev_buffer + sizeof(int16_t)); + if (rpy_revdb.buf_p != NULL) + return rpy_revdb.buf_p - (rpy_rev_buffer + sizeof(int16_t)); + else + return 0; } RPY_EXTERN @@ -327,6 +336,11 @@ rpy_reverse_db_flush(); assert(current_packet_size() == 0); + if (rpy_rev_fileno < 0) + return; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + *(int16_t *)p = async_code; memcpy(rpy_revdb.buf_p, &content, sizeof(uint64_t)); rpy_revdb.buf_p += sizeof(uint64_t); @@ -472,6 +486,9 @@ if (rpy_rev_fileno < 0) return 1; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + base_offset = lseek(rpy_rev_fileno, 0, SEEK_CUR); if (base_offset < 0) { perror("lseek"); @@ -488,6 +505,9 @@ if (rpy_rev_fileno < 0) return; + /* should not be here from the middle of a @c_only function */ + assert(rpy_revdb.buf_p != NULL); + base_offset = lseek(rpy_rev_fileno, 0, SEEK_CUR); if (base_offset < 0) { perror("lseek"); @@ -1033,9 +1053,9 @@ " echo 0 | sudo tee /proc/sys/kernel/randomize_va_space\n" "\n" "It has been reported that on Linux kernel 4.12.4-1-ARCH,\n" - "ASLR cannot be disabled at all for libpypy-c.so. For now\n" - "there is no good solution. Either you downgrade the\n" - "kernel, or you translate with --no-shared (and you loose\n" + "ASLR cannot be disabled at all for libpypy-c.so. It works\n" + "again in kernel 4.19 (and maybe sooner). Either change\n" + "kernels, or translate with --no-shared (but then you loose\n" "PyPy's cpyext ability).\n" "\n", argv[0]); exit(1); diff --git a/rpython/translator/revdb/src-revdb/revdb_include.h b/rpython/translator/revdb/src-revdb/revdb_include.h --- a/rpython/translator/revdb/src-revdb/revdb_include.h +++ b/rpython/translator/revdb/src-revdb/revdb_include.h @@ -16,7 +16,8 @@ #endif bool_t watch_enabled; int lock; - char *buf_p, *buf_limit, *buf_readend; + char *buf_p; /* NULL during recording if recording is actually disabled */ + char *buf_limit, *buf_readend; uint64_t stop_point_seen, stop_point_break; uint64_t unique_id_seen, unique_id_break; } rpy_revdb_t; @@ -85,9 +86,13 @@ { \ decl_e = variable; \ _RPY_REVDB_PRINT("[ wr ]", _e); \ - memcpy(rpy_revdb.buf_p, &_e, sizeof(_e)); \ - if ((rpy_revdb.buf_p += sizeof(_e)) > rpy_revdb.buf_limit) \ - rpy_reverse_db_flush(); \ + char *_dst = rpy_revdb.buf_p; \ + if (_dst) { \ + memcpy(_dst, &_e, sizeof(_e)); \ + if ((rpy_revdb.buf_p = _dst + sizeof(_e)) \ + > rpy_revdb.buf_limit) \ + rpy_reverse_db_flush(); \ + } \ } #define _RPY_REVDB_EMIT_REPLAY(decl_e, variable) \ @@ -179,6 +184,13 @@ rpy_reverse_db_bad_acquire_gil("release"); \ } +#define RPY_REVDB_C_ONLY_ENTER \ + char *saved_bufp = rpy_revdb.buf_p; \ + rpy_revdb.buf_p = NULL; + +#define RPY_REVDB_C_ONLY_LEAVE \ + rpy_revdb.buf_p = saved_bufp; + #define RPY_REVDB_CALLBACKLOC(locnum) \ rpy_reverse_db_callback_loc(locnum) From pypy.commits at gmail.com Thu Jan 31 13:24:42 2019 From: pypy.commits at gmail.com (rlamy) Date: Thu, 31 Jan 2019 10:24:42 -0800 (PST) Subject: [pypy-commit] extradoc extradoc: Add myself Message-ID: <5c533d6a.1c69fb81.8cf7a.017d@mx.google.com> Author: Ronan Lamy Branch: extradoc Changeset: r5942:001cd1e932ba Date: 2019-01-31 18:24 +0000 http://bitbucket.org/pypy/extradoc/changeset/001cd1e932ba/ Log: Add myself diff --git a/sprintinfo/ddorf2019/people.txt b/sprintinfo/ddorf2019/people.txt --- a/sprintinfo/ddorf2019/people.txt +++ b/sprintinfo/ddorf2019/people.txt @@ -21,6 +21,7 @@ John Witulski ? lives there Semih Demir ? lives there Stefan Troost ? lives there +Ronan Lamy Feb 3 - 8 Hotel Diana ============================ ============== =========================== From pypy.commits at gmail.com Thu Jan 31 15:39:59 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 12:39:59 -0800 (PST) Subject: [pypy-commit] pypy default: Issue #2945 Message-ID: <5c535d1f.1c69fb81.4f228.1451@mx.google.com> Author: Armin Rigo Branch: Changeset: r95762:0861546064a7 Date: 2019-01-31 21:39 +0100 http://bitbucket.org/pypy/pypy/changeset/0861546064a7/ Log: Issue #2945 Completely remove the confusing mention of ``-O0`` diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -223,7 +223,7 @@ Rerun the ``Makefile`` with the ``make lldebug`` or ``make lldebug0`` target, which will build in a way that running under a debugger makes sense. Appropriate compilation flags are added to add debug info, and for ``lldebug0`` -compiler optimizations are set to ``-O0``. If you stop in a debugger, you will +compiler optimizations are fully disabled. If you stop in a debugger, you will see the very wordy machine-generated C code from the rpython translation step, which takes a little bit of reading to relate back to the rpython code. From pypy.commits at gmail.com Thu Jan 31 15:57:45 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 12:57:45 -0800 (PST) Subject: [pypy-commit] cffi default: oops thanks BlastRock NA for spotting this Message-ID: <5c536149.1c69fb81.54c32.e9e5@mx.google.com> Author: Armin Rigo Branch: Changeset: r3208:8596bb760efa Date: 2019-01-31 21:58 +0100 http://bitbucket.org/cffi/cffi/changeset/8596bb760efa/ Log: oops thanks BlastRock NA for spotting this diff --git a/c/misc_thread_common.h b/c/misc_thread_common.h --- a/c/misc_thread_common.h +++ b/c/misc_thread_common.h @@ -107,8 +107,9 @@ //fprintf(stderr, "thread_canary_dealloc(%p): ZOMBIE\n", ob); _thread_canary_detach_with_lock(ob); } - else + else { //fprintf(stderr, "thread_canary_dealloc(%p): not a zombie\n", ob); + } if (ob->tls != NULL) { //fprintf(stderr, "thread_canary_dealloc(%p): was local_thread_canary\n", ob); From pypy.commits at gmail.com Thu Jan 31 16:36:21 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 13:36:21 -0800 (PST) Subject: [pypy-commit] cffi default: Reorganize Message-ID: <5c536a55.1c69fb81.cde92.c1d6@mx.google.com> Author: Armin Rigo Branch: Changeset: r3209:59b8d697b9b9 Date: 2019-01-31 22:36 +0100 http://bitbucket.org/cffi/cffi/changeset/59b8d697b9b9/ Log: Reorganize diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -6,13 +6,15 @@ v1.12 ===== -* Support for ``ffi.cdef(..., pack=N)`` where N is a power of two. - Means to emulate ``#pragma pack(N)`` on MSVC. Also, the default on - Windows is now ``pack=8``, like on MSVC. This might make a difference - in corner cases, although I can't think of one in the context of CFFI. - The old way ``ffi.cdef(..., packed=True)`` remains and is equivalent - to ``pack=1`` (saying e.g. that fields like ``int`` should be aligned - to 1 byte instead of 4). +* `Direct support for pkg-config`__. + +* ``ffi.from_buffer()`` takes a new optional *first* argument that gives + the array type of the result. It also takes an optional keyword argument + ``require_writable`` to refuse read-only Python buffers. + +* ``ffi.new()``, ``ffi.gc()`` or ``ffi.from_buffer()`` cdata objects + can now be released at known times, either by using the ``with`` + keyword or by calling the new ``ffi.release()``. * Windows, CPython 3.x: cffi modules are linked with ``python3.dll`` again. This makes them independant on the exact CPython version, @@ -23,27 +25,28 @@ * CPython 2.x: ``ffi.dlopen()`` failed with non-ascii file names on Posix -* ``ffi.from_buffer()`` takes a new optional *first* argument that gives - the array type of the result. It also takes an optional keyword argument - ``require_writable`` to refuse read-only Python buffers. - -* ``ffi.new()``, ``ffi.gc()`` or ``ffi.from_buffer()`` cdata objects - can now be released at known times, either by using the ``with`` - keyword or by calling the new ``ffi.release()``. - * CPython: if a thread is started from C and then runs Python code (with callbacks or with the embedding solution), then previous versions of cffi would contain possible crashes and/or memory leaks. Hopefully, this has been fixed (see `issue #362`_). -* `Direct support for pkg-config`__. +* Support for ``ffi.cdef(..., pack=N)`` where N is a power of two. + Means to emulate ``#pragma pack(N)`` on MSVC. Also, the default on + Windows is now ``pack=8``, like on MSVC. This might make a difference + in corner cases, although I can't think of one in the context of CFFI. + The old way ``ffi.cdef(..., packed=True)`` remains and is equivalent + to ``pack=1`` (saying e.g. that fields like ``int`` should be aligned + to 1 byte instead of 4). +.. __: cdef.html#pkgconfig .. _`issue #362`: https://bitbucket.org/cffi/cffi/issues/362/ -.. __: cdef.html#pkgconfig +Older Versions +============== + v1.11.5 -======= +------- * `Issue #357`_: fix ``ffi.emit_python_code()`` which generated a buggy Python file if you are using a ``struct`` with an anonymous ``union`` @@ -78,7 +81,7 @@ v1.11.4 -======= +------- * Windows: reverted linking with ``python3.dll``, because virtualenv does not make this DLL available to virtual environments @@ -91,7 +94,7 @@ v1.11.3 -======= +------- * Fix on CPython 3.x: reading the attributes ``__loader__`` or ``__spec__`` from the cffi-generated lib modules gave a buggy @@ -108,13 +111,13 @@ v1.11.2 -======= +------- * Fix Windows issue with managing the thread-state on CPython 3.0 to 3.5 v1.11.1 -======= +------- * Fix tests, remove deprecated C API usage @@ -127,7 +130,7 @@ v1.11 -===== +----- * Support the modern standard types ``char16_t`` and ``char32_t``. These work like ``wchar_t``: they represent one unicode character, or @@ -182,9 +185,6 @@ .. __: http://bugs.python.org/issue31105 -Older Versions -============== - v1.10.1 ------- From pypy.commits at gmail.com Thu Jan 31 16:43:13 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 13:43:13 -0800 (PST) Subject: [pypy-commit] pypy default: update to cffi/59b8d697b9b9 Message-ID: <5c536bf1.1c69fb81.add7f.ce1c@mx.google.com> Author: Armin Rigo Branch: Changeset: r95763:4c96a83024a4 Date: 2019-01-31 22:42 +0100 http://bitbucket.org/pypy/pypy/changeset/4c96a83024a4/ Log: update to cffi/59b8d697b9b9 diff --git a/extra_tests/cffi_tests/cffi1/test_pkgconfig.py b/extra_tests/cffi_tests/cffi1/test_pkgconfig.py new file mode 100644 --- /dev/null +++ b/extra_tests/cffi_tests/cffi1/test_pkgconfig.py @@ -0,0 +1,95 @@ +# Generated by pypy/tool/import_cffi.py +import sys +import subprocess +import py +import cffi.pkgconfig as pkgconfig +from cffi import PkgConfigError + + +def mock_call(libname, flag): + assert libname=="foobarbaz" + flags = { + "--cflags": "-I/usr/include/python3.6m -DABCD -DCFFI_TEST=1 -O42\n", + "--libs": "-L/usr/lib64 -lpython3.6 -shared\n", + } + return flags[flag] + + +def test_merge_flags(): + d1 = {"ham": [1, 2, 3], "spam" : ["a", "b", "c"], "foo" : []} + d2 = {"spam" : ["spam", "spam", "spam"], "bar" : ["b", "a", "z"]} + + pkgconfig.merge_flags(d1, d2) + assert d1 == { + "ham": [1, 2, 3], + "spam" : ["a", "b", "c", "spam", "spam", "spam"], + "bar" : ["b", "a", "z"], + "foo" : []} + + +def test_pkgconfig(): + assert pkgconfig.flags_from_pkgconfig([]) == {} + + saved = pkgconfig.call + try: + pkgconfig.call = mock_call + flags = pkgconfig.flags_from_pkgconfig(["foobarbaz"]) + finally: + pkgconfig.call = saved + assert flags == { + 'include_dirs': ['/usr/include/python3.6m'], + 'library_dirs': ['/usr/lib64'], + 'libraries': ['python3.6'], + 'define_macros': [('ABCD', None), ('CFFI_TEST', '1')], + 'extra_compile_args': ['-O42'], + 'extra_link_args': ['-shared'] + } + +class mock_subprocess: + PIPE = Ellipsis + class Popen: + def __init__(self, cmd, stdout, stderr): + if mock_subprocess.RESULT is None: + raise OSError("oops can't run") + assert cmd == ['pkg-config', '--print-errors', '--cflags', 'libfoo'] + def communicate(self): + bout, berr, rc = mock_subprocess.RESULT + self.returncode = rc + return bout, berr + +def test_call(): + saved = pkgconfig.subprocess + try: + pkgconfig.subprocess = mock_subprocess + + mock_subprocess.RESULT = None + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value) == "cannot run pkg-config: oops can't run" + + mock_subprocess.RESULT = b"", "Foo error!\n", 1 + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value) == "Foo error!" + + mock_subprocess.RESULT = b"abc\\def\n", "", 0 + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value).startswith("pkg-config --cflags libfoo returned an " + "unsupported backslash-escaped output:") + + mock_subprocess.RESULT = b"abc def\n", "", 0 + result = pkgconfig.call("libfoo", "--cflags") + assert result == "abc def\n" + + mock_subprocess.RESULT = b"abc def\n", "", 0 + result = pkgconfig.call("libfoo", "--cflags") + assert result == "abc def\n" + + if sys.version_info >= (3,): + mock_subprocess.RESULT = b"\xff\n", "", 0 + e = py.test.raises(PkgConfigError, pkgconfig.call, + "libfoo", "--cflags", encoding="utf-8") + assert str(e.value) == ( + "pkg-config --cflags libfoo returned bytes that cannot be " + "decoded with encoding 'utf-8':\nb'\\xff\\n'") + + finally: + pkgconfig.subprocess = saved diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -3,6 +3,7 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing +from .error import PkgConfigError __version__ = "1.12.0" __version_info__ = (1, 12, 0) diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -643,6 +643,16 @@ self._assigned_source = (str(module_name), source, source_extension, kwds) + def set_source_pkgconfig(self, module_name, pkgconfig_libs, source, + source_extension='.c', **kwds): + from . import pkgconfig + if not isinstance(pkgconfig_libs, list): + raise TypeError("the pkgconfig_libs argument must be a list " + "of package names") + kwds2 = pkgconfig.flags_from_pkgconfig(pkgconfig_libs) + pkgconfig.merge_flags(kwds, kwds2) + self.set_source(module_name, source, source_extension, **kwds) + def distutils_extension(self, tmpdir='build', verbose=True): from distutils.dir_util import mkpath from .recompiler import recompile diff --git a/lib_pypy/cffi/error.py b/lib_pypy/cffi/error.py --- a/lib_pypy/cffi/error.py +++ b/lib_pypy/cffi/error.py @@ -1,8 +1,9 @@ class FFIError(Exception): - pass + __module__ = 'cffi' class CDefError(Exception): + __module__ = 'cffi' def __str__(self): try: current_decl = self.args[1] @@ -16,8 +17,15 @@ class VerificationError(Exception): """ An error raised when verification fails """ + __module__ = 'cffi' class VerificationMissing(Exception): """ An error raised when incomplete structures are passed into cdef, but no verification has been done """ + __module__ = 'cffi' + +class PkgConfigError(Exception): + """ An error raised for missing modules in pkg-config + """ + __module__ = 'cffi' From pypy.commits at gmail.com Thu Jan 31 16:49:32 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 13:49:32 -0800 (PST) Subject: [pypy-commit] cffi default: Backport 4d18a461a973 from pypy Message-ID: <5c536d6c.1c69fb81.14b01.620d@mx.google.com> Author: Armin Rigo Branch: Changeset: r3210:86332166be5b Date: 2019-01-31 22:49 +0100 http://bitbucket.org/cffi/cffi/changeset/86332166be5b/ Log: Backport 4d18a461a973 from pypy diff --git a/cffi/api.py b/cffi/api.py --- a/cffi/api.py +++ b/cffi/api.py @@ -592,7 +592,7 @@ if sys.platform == "win32": # we need 'libpypy-c.lib'. Current distributions of # pypy (>= 4.1) contain it as 'libs/python27.lib'. - pythonlib = "python27" + pythonlib = "python{0[0]}{0[1]}".format(sys.version_info) if hasattr(sys, 'prefix'): ensure('library_dirs', os.path.join(sys.prefix, 'libs')) else: From pypy.commits at gmail.com Thu Jan 31 16:50:53 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 13:50:53 -0800 (PST) Subject: [pypy-commit] pypy default: update to cffi/86332166be5b Message-ID: <5c536dbd.1c69fb81.b286c.1dda@mx.google.com> Author: Armin Rigo Branch: Changeset: r95764:bb047ae33a2f Date: 2019-01-31 22:50 +0100 http://bitbucket.org/pypy/pypy/changeset/bb047ae33a2f/ Log: update to cffi/86332166be5b diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -592,7 +592,7 @@ if sys.platform == "win32": # we need 'libpypy-c.lib'. Current distributions of # pypy (>= 4.1) contain it as 'libs/python27.lib'. - pythonlib = "python27" + pythonlib = "python{0[0]}{0[1]}".format(sys.version_info) if hasattr(sys, 'prefix'): ensure('library_dirs', os.path.join(sys.prefix, 'libs')) else: From pypy.commits at gmail.com Thu Jan 31 16:50:54 2019 From: pypy.commits at gmail.com (arigo) Date: Thu, 31 Jan 2019 13:50:54 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5c536dbe.1c69fb81.8cf7a.3f04@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95765:2b258d7b2475 Date: 2019-01-31 22:50 +0100 http://bitbucket.org/pypy/pypy/changeset/2b258d7b2475/ Log: hg merge default diff --git a/extra_tests/cffi_tests/cffi1/test_pkgconfig.py b/extra_tests/cffi_tests/cffi1/test_pkgconfig.py new file mode 100644 --- /dev/null +++ b/extra_tests/cffi_tests/cffi1/test_pkgconfig.py @@ -0,0 +1,95 @@ +# Generated by pypy/tool/import_cffi.py +import sys +import subprocess +import py +import cffi.pkgconfig as pkgconfig +from cffi import PkgConfigError + + +def mock_call(libname, flag): + assert libname=="foobarbaz" + flags = { + "--cflags": "-I/usr/include/python3.6m -DABCD -DCFFI_TEST=1 -O42\n", + "--libs": "-L/usr/lib64 -lpython3.6 -shared\n", + } + return flags[flag] + + +def test_merge_flags(): + d1 = {"ham": [1, 2, 3], "spam" : ["a", "b", "c"], "foo" : []} + d2 = {"spam" : ["spam", "spam", "spam"], "bar" : ["b", "a", "z"]} + + pkgconfig.merge_flags(d1, d2) + assert d1 == { + "ham": [1, 2, 3], + "spam" : ["a", "b", "c", "spam", "spam", "spam"], + "bar" : ["b", "a", "z"], + "foo" : []} + + +def test_pkgconfig(): + assert pkgconfig.flags_from_pkgconfig([]) == {} + + saved = pkgconfig.call + try: + pkgconfig.call = mock_call + flags = pkgconfig.flags_from_pkgconfig(["foobarbaz"]) + finally: + pkgconfig.call = saved + assert flags == { + 'include_dirs': ['/usr/include/python3.6m'], + 'library_dirs': ['/usr/lib64'], + 'libraries': ['python3.6'], + 'define_macros': [('ABCD', None), ('CFFI_TEST', '1')], + 'extra_compile_args': ['-O42'], + 'extra_link_args': ['-shared'] + } + +class mock_subprocess: + PIPE = Ellipsis + class Popen: + def __init__(self, cmd, stdout, stderr): + if mock_subprocess.RESULT is None: + raise OSError("oops can't run") + assert cmd == ['pkg-config', '--print-errors', '--cflags', 'libfoo'] + def communicate(self): + bout, berr, rc = mock_subprocess.RESULT + self.returncode = rc + return bout, berr + +def test_call(): + saved = pkgconfig.subprocess + try: + pkgconfig.subprocess = mock_subprocess + + mock_subprocess.RESULT = None + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value) == "cannot run pkg-config: oops can't run" + + mock_subprocess.RESULT = b"", "Foo error!\n", 1 + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value) == "Foo error!" + + mock_subprocess.RESULT = b"abc\\def\n", "", 0 + e = py.test.raises(PkgConfigError, pkgconfig.call, "libfoo", "--cflags") + assert str(e.value).startswith("pkg-config --cflags libfoo returned an " + "unsupported backslash-escaped output:") + + mock_subprocess.RESULT = b"abc def\n", "", 0 + result = pkgconfig.call("libfoo", "--cflags") + assert result == "abc def\n" + + mock_subprocess.RESULT = b"abc def\n", "", 0 + result = pkgconfig.call("libfoo", "--cflags") + assert result == "abc def\n" + + if sys.version_info >= (3,): + mock_subprocess.RESULT = b"\xff\n", "", 0 + e = py.test.raises(PkgConfigError, pkgconfig.call, + "libfoo", "--cflags", encoding="utf-8") + assert str(e.value) == ( + "pkg-config --cflags libfoo returned bytes that cannot be " + "decoded with encoding 'utf-8':\nb'\\xff\\n'") + + finally: + pkgconfig.subprocess = saved diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -3,6 +3,7 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing +from .error import PkgConfigError __version__ = "1.12.0" __version_info__ = (1, 12, 0) diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -643,6 +643,16 @@ self._assigned_source = (str(module_name), source, source_extension, kwds) + def set_source_pkgconfig(self, module_name, pkgconfig_libs, source, + source_extension='.c', **kwds): + from . import pkgconfig + if not isinstance(pkgconfig_libs, list): + raise TypeError("the pkgconfig_libs argument must be a list " + "of package names") + kwds2 = pkgconfig.flags_from_pkgconfig(pkgconfig_libs) + pkgconfig.merge_flags(kwds, kwds2) + self.set_source(module_name, source, source_extension, **kwds) + def distutils_extension(self, tmpdir='build', verbose=True): from distutils.dir_util import mkpath from .recompiler import recompile diff --git a/lib_pypy/cffi/error.py b/lib_pypy/cffi/error.py --- a/lib_pypy/cffi/error.py +++ b/lib_pypy/cffi/error.py @@ -1,8 +1,9 @@ class FFIError(Exception): - pass + __module__ = 'cffi' class CDefError(Exception): + __module__ = 'cffi' def __str__(self): try: current_decl = self.args[1] @@ -16,8 +17,15 @@ class VerificationError(Exception): """ An error raised when verification fails """ + __module__ = 'cffi' class VerificationMissing(Exception): """ An error raised when incomplete structures are passed into cdef, but no verification has been done """ + __module__ = 'cffi' + +class PkgConfigError(Exception): + """ An error raised for missing modules in pkg-config + """ + __module__ = 'cffi' diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -220,11 +220,12 @@ Making a debug build of PyPy ---------------------------- -If the Makefile is rerun with the lldebug or lldebug0 target, appropriate -compilation flags are added to add debug info and reduce compiler optimizations -to ``-O0`` respectively. If you stop in a debugger, you will see the -very wordy machine-generated C code from the rpython translation step, which -takes a little bit of reading to relate back to the rpython code. +Rerun the ``Makefile`` with the ``make lldebug`` or ``make lldebug0`` target, +which will build in a way that running under a debugger makes sense. +Appropriate compilation flags are added to add debug info, and for ``lldebug0`` +compiler optimizations are fully disabled. If you stop in a debugger, you will +see the very wordy machine-generated C code from the rpython translation step, +which takes a little bit of reading to relate back to the rpython code. Build cffi import libraries for the stdlib ------------------------------------------ diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -19,9 +19,10 @@ Until we can work with downstream providers to distribute builds with PyPy, we have made packages for some common packages `available as wheels`_. -The GC now has `hooks`_ to gain more insights into its performance, and it is -now possible to manually manage the GC by using a combination of -``gc.disable`` and ``gc.collect_step``. See the `GC blog post`_. +The GC `hooks`_ , which can be used to gain more insights into its +performance, has been improved and it is now possible to manually manage the +GC by using a combination of ``gc.disable`` and ``gc.collect_step``. See the +`GC blog post`_. We updated the `cffi`_ module included in PyPy to version 1.12, and the