[pypy-commit] pypy cpyext-ext: merge default into cpyext-ext
mattip
pypy.commits at gmail.com
Fri Jun 3 06:50:44 EDT 2016
Author: Matti Picus <matti.picus at gmail.com>
Branch: cpyext-ext
Changeset: r84893:f6f66900d0d9
Date: 2016-06-03 13:49 +0300
http://bitbucket.org/pypy/pypy/changeset/f6f66900d0d9/
Log: merge default into cpyext-ext
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -125,4 +125,9 @@
.. branch: traceviewer-common-merge-point-formats
-Teach RPython JIT's off-line traceviewer the most common ``debug_merge_point`` formats.
\ No newline at end of file
+Teach RPython JIT's off-line traceviewer the most common ``debug_merge_point`` formats.
+
+.. branch: cpyext-pickle
+
+Enable pickling of W_PyCFunctionObject by monkeypatching pickle.Pickler.dispatch
+at cpyext import time
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -564,7 +564,6 @@
self.emit_jump(ops.JUMP_FORWARD, end)
self.use_next_block(next_except)
self.emit_op(ops.END_FINALLY) # this END_FINALLY will always re-raise
- self.is_dead_code()
self.use_next_block(otherwise)
self.visit_sequence(te.orelse)
self.use_next_block(end)
diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py
--- a/pypy/module/_cffi_backend/test/test_recompiler.py
+++ b/pypy/module/_cffi_backend/test/test_recompiler.py
@@ -1784,3 +1784,9 @@
assert ffi.list_types() == (['CFFIb', 'CFFIbb', 'CFFIbbb'],
['CFFIa', 'CFFIcc', 'CFFIccc'],
['CFFIaa', 'CFFIaaa', 'CFFIg'])
+
+ def test_FFIFunctionWrapper(self):
+ ffi, lib = self.prepare("void f(void);", "test_FFIFunctionWrapper",
+ "void f(void) { }")
+ assert lib.f.__get__(42) is lib.f
+ assert lib.f.__get__(42, int) is lib.f
diff --git a/pypy/module/_cffi_backend/wrapper.py b/pypy/module/_cffi_backend/wrapper.py
--- a/pypy/module/_cffi_backend/wrapper.py
+++ b/pypy/module/_cffi_backend/wrapper.py
@@ -100,6 +100,11 @@
doc = '%s;\n\nCFFI C function from %s.lib' % (doc, self.modulename)
return space.wrap(doc)
+ def descr_get(self, space, w_obj, w_type=None):
+ # never bind anything, but a __get__ is still present so that
+ # pydoc displays useful information (namely, the __repr__)
+ return self
+
@jit.unroll_safe
def prepare_args(space, rawfunctype, args_w, start_index):
@@ -136,5 +141,6 @@
__name__ = interp_attrproperty('fnname', cls=W_FunctionWrapper),
__module__ = interp_attrproperty('modulename', cls=W_FunctionWrapper),
__doc__ = GetSetProperty(W_FunctionWrapper.descr_get_doc),
+ __get__ = interp2app(W_FunctionWrapper.descr_get),
)
W_FunctionWrapper.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -1,4 +1,5 @@
from pypy.interpreter.mixedmodule import MixedModule
+from pypy.interpreter import gateway
from pypy.module.cpyext.state import State
from pypy.module.cpyext import api
@@ -14,6 +15,12 @@
def startup(self, space):
space.fromcache(State).startup(space)
+ method = pypy.module.cpyext.typeobject.get_new_method_def(space)
+ w_obj = pypy.module.cpyext.methodobject.W_PyCFunctionObject(space, method, space.wrap(''))
+ space.appexec([space.type(w_obj)], """(methodtype):
+ from pickle import Pickler
+ Pickler.dispatch[methodtype] = Pickler.save_global
+ """)
def register_atexit(self, function):
if len(self.atexit_funcs) >= 32:
@@ -66,6 +73,7 @@
import pypy.module.cpyext.pyfile
import pypy.module.cpyext.pystrtod
import pypy.module.cpyext.pytraceback
+import pypy.module.cpyext.methodobject
# now that all rffi_platform.Struct types are registered, configure them
api.configure_types()
diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -44,8 +44,8 @@
dealloc=cfunction_dealloc)
def cfunction_attach(space, py_obj, w_obj):
+ assert isinstance(w_obj, W_PyCFunctionObject)
py_func = rffi.cast(PyCFunctionObject, py_obj)
- assert isinstance(w_obj, W_PyCFunctionObject)
py_func.c_m_ml = w_obj.ml
py_func.c_m_self = make_ref(space, w_obj.w_self)
py_func.c_m_module = make_ref(space, w_obj.w_module)
diff --git a/pypy/module/cpyext/test/array.c b/pypy/module/cpyext/test/array.c
--- a/pypy/module/cpyext/test/array.c
+++ b/pypy/module/cpyext/test/array.c
@@ -1502,7 +1502,7 @@
static PyObject *
array_reduce(arrayobject *array)
{
- PyObject *dict, *result, *list;
+ PyObject *dict, *result, *list, *mod, *obj;
dict = PyObject_GetAttrString((PyObject *)array, "__dict__");
if (dict == NULL) {
@@ -1512,6 +1512,18 @@
dict = Py_None;
Py_INCREF(dict);
}
+ /* Return a tuple of (callable object, typecode, values, state) */
+ mod = PyImport_ImportModule("array");
+ if (mod == NULL) {
+ Py_DECREF(dict);
+ return NULL;
+ }
+ obj = PyObject_GetAttrString(mod, "_reconstruct");
+ Py_DECREF(mod);
+ if (obj == NULL) {
+ Py_DECREF(dict);
+ return NULL;
+ }
/* Unlike in Python 3.x, we never use the more efficient memory
* representation of an array for pickling. This is unfortunately
* necessary to allow array objects to be unpickled by Python 3.x,
@@ -1524,7 +1536,7 @@
return NULL;
}
result = Py_BuildValue(
- "O(cO)O", Py_TYPE(array), array->ob_descr->typecode, list, dict);
+ "O(cO)O", obj, array->ob_descr->typecode, list, dict);
Py_DECREF(list);
Py_DECREF(dict);
return result;
@@ -1916,6 +1928,11 @@
char c;
PyObject *initial = NULL, *it = NULL;
struct arraydescr *descr;
+ if (type == NULL)
+ {
+ /* when called from _reconstruct */
+ type = &Arraytype;
+ }
if (type == &Arraytype && !_PyArg_NoKeywords("array.array()", kwds))
return NULL;
@@ -2017,6 +2034,11 @@
return NULL;
}
+static PyObject *
+_reconstruct(PyTypeObject *type, PyObject *args)
+{
+ return array_new(type, args, NULL);
+}
PyDoc_STRVAR(module_doc,
"This module defines an object type which can efficiently represent\n\
@@ -2223,6 +2245,7 @@
/* No functions in array module. */
static PyMethodDef a_methods[] = {
+ {"_reconstruct", (PyCFunction)_reconstruct, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL} /* Sentinel */
};
@@ -2244,6 +2267,8 @@
return;
Py_INCREF((PyObject *)&Arraytype);
+ if (PyType_Ready(&Arraytype) < 0)
+ return;
PyModule_AddObject(m, "ArrayType", (PyObject *)&Arraytype);
Py_INCREF((PyObject *)&Arraytype);
PyModule_AddObject(m, "array", (PyObject *)&Arraytype);
diff --git a/pypy/module/cpyext/test/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py
--- a/pypy/module/cpyext/test/test_arraymodule.py
+++ b/pypy/module/cpyext/test/test_arraymodule.py
@@ -67,3 +67,13 @@
'\x02\0\0\0'
'\x03\0\0\0'
'\x04\0\0\0')
+
+ def test_pickle(self):
+ import pickle
+ module = self.import_module(name='array')
+ arr = module.array('i', [1,2,3,4])
+ s = pickle.dumps(arr)
+ # pypy exports __dict__ on cpyext objects, so the pickle picks up the {} state value
+ #assert s == "carray\n_reconstruct\np0\n(S'i'\np1\n(lp2\nI1\naI2\naI3\naI4\natp3\nRp4\n."
+ rra = pickle.loads(s) # rra is arr backwards
+ #assert arr.tolist() == rra.tolist()
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -176,9 +176,8 @@
hexstring = hexstring.lower()
data = []
length = len(hexstring)
- i = -2
+ i = 0
while True:
- i += 2
while i < length and hexstring[i] == ' ':
i += 1
if i >= length:
@@ -193,6 +192,7 @@
if bot == -1:
raise oefmt(space.w_ValueError, NON_HEX_MSG, i + 1)
data.append(chr(top*16 + bot))
+ i += 2
# in CPython bytearray.fromhex is a staticmethod, so
# we ignore w_type and always return a bytearray
diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py
--- a/pypy/objspace/std/objectobject.py
+++ b/pypy/objspace/std/objectobject.py
@@ -180,7 +180,13 @@
if w_reduce is not None:
w_cls = space.getattr(w_obj, space.wrap('__class__'))
w_cls_reduce_meth = space.getattr(w_cls, w_st_reduce)
- w_cls_reduce = space.getattr(w_cls_reduce_meth, space.wrap('im_func'))
+ try:
+ w_cls_reduce = space.getattr(w_cls_reduce_meth, space.wrap('im_func'))
+ except OperationError as e:
+ # i.e. PyCFunction from cpyext
+ if not e.match(space, space.w_AttributeError):
+ raise
+ w_cls_reduce = space.w_None
w_objtype = space.w_object
w_obj_dict = space.getattr(w_objtype, space.wrap('__dict__'))
w_obj_reduce = space.getitem(w_obj_dict, w_st_reduce)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -467,7 +467,11 @@
assert saveerrloc.is_imm()
cb.emit_call_release_gil(saveerrloc.value)
else:
- cb.emit()
+ effectinfo = descr.get_extra_info()
+ if effectinfo is None or effectinfo.check_can_collect():
+ cb.emit()
+ else:
+ cb.emit_no_collect()
return fcond
def _genop_same_as(self, op, arglocs, regalloc, fcond):
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -397,9 +397,9 @@
else:
self.rm.force_spill_var(var)
- def before_call(self, force_store=[], save_all_regs=False):
- self.rm.before_call(force_store, save_all_regs)
- self.vfprm.before_call(force_store, save_all_regs)
+ def before_call(self, save_all_regs=False):
+ self.rm.before_call(save_all_regs)
+ self.vfprm.before_call(save_all_regs)
def _sync_var(self, v):
if v.type == FLOAT:
@@ -552,8 +552,7 @@
prepare_op_call_f = _prepare_op_call
prepare_op_call_n = _prepare_op_call
- def _prepare_call(self, op, force_store=[], save_all_regs=False,
- first_arg_index=1):
+ def _prepare_call(self, op, save_all_regs=False, first_arg_index=1):
args = [None] * (op.numargs() + 3)
calldescr = op.getdescr()
assert isinstance(calldescr, CallDescr)
@@ -571,17 +570,27 @@
args[1] = imm(size)
args[2] = sign_loc
- args[0] = self._call(op, args, force_store, save_all_regs)
+ effectinfo = calldescr.get_extra_info()
+ if save_all_regs:
+ gc_level = 2
+ elif effectinfo is None or effectinfo.check_can_collect():
+ gc_level = 1
+ else:
+ gc_level = 0
+
+ args[0] = self._call(op, args, gc_level)
return args
- def _call(self, op, arglocs, force_store=[], save_all_regs=False):
- # spill variables that need to be saved around calls
- self.vfprm.before_call(force_store, save_all_regs=save_all_regs)
- if not save_all_regs:
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- save_all_regs = 2
- self.rm.before_call(force_store, save_all_regs=save_all_regs)
+ def _call(self, op, arglocs, gc_level):
+ # spill variables that need to be saved around calls:
+ # gc_level == 0: callee cannot invoke the GC
+ # gc_level == 1: can invoke GC, save all regs that contain pointers
+ # gc_level == 2: can force, save all regs
+ save_all_regs = gc_level == 2
+ self.vfprm.before_call(save_all_regs=save_all_regs)
+ if gc_level == 1 and self.cpu.gc_ll_descr.gcrootmap:
+ save_all_regs = 2
+ self.rm.before_call(save_all_regs=save_all_regs)
resloc = self.after_call(op)
return resloc
@@ -1068,7 +1077,7 @@
def _prepare_op_call_assembler(self, op, fcond):
locs = self.locs_for_call_assembler(op)
tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
- resloc = self._call(op, locs + [tmploc], save_all_regs=True)
+ resloc = self._call(op, locs + [tmploc], gc_level=2)
return locs + [resloc, tmploc]
prepare_op_call_assembler_i = _prepare_op_call_assembler
diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -574,27 +574,113 @@
self.assembler.regalloc_mov(reg, to)
# otherwise it's clean
+ def _bc_spill(self, v, new_free_regs):
+ self._sync_var(v)
+ new_free_regs.append(self.reg_bindings.pop(v))
+
def before_call(self, force_store=[], save_all_regs=0):
- """ Spill registers before a call, as described by
- 'self.save_around_call_regs'. Registers are not spilled if
- they don't survive past the current operation, unless they
- are listed in 'force_store'. 'save_all_regs' can be 0 (default),
- 1 (save all), or 2 (save default+PTRs).
+ """Spill or move some registers before a call. By default,
+ this means: for every register in 'self.save_around_call_regs',
+ if there is a variable there and it survives longer than
+ the current operation, then it is spilled/moved somewhere else.
+
+ 'save_all_regs' can be 0 (default set of registers), 1 (do that
+ for all registers), or 2 (default + gc ptrs).
+
+ Overview of what we do (the implementation does it differently,
+ for the same result):
+
+ * we first check the set of registers that are free: call it F.
+
+ * possibly_free_vars() is implied for all variables (except
+ the ones listed in force_store): if they don't survive past
+ the current operation, they are forgotten now. (Their
+ register remain not in F, because they are typically
+ arguments to the call, so they should not be overwritten by
+ the next step.)
+
+ * then for every variable that needs to be spilled/moved: if
+ there is an entry in F that is acceptable, pick it and emit a
+ move. Otherwise, emit a spill. Start doing this with the
+ variables that survive the shortest time, to give them a
+ better change to remain in a register---similar algo as
+ _pick_variable_to_spill().
+
+ Note: when a register is moved, it often (but not always) means
+ we could have been more clever and picked a better register in
+ the first place, when we did so earlier. It is done this way
+ anyway, as a local hack in this function, because on x86 CPUs
+ such register-register moves are almost free.
"""
+ new_free_regs = []
+ move_or_spill = []
+
for v, reg in self.reg_bindings.items():
- if v not in force_store and self.longevity[v][1] <= self.position:
+ max_age = self.longevity[v][1]
+ if v not in force_store and max_age <= self.position:
# variable dies
del self.reg_bindings[v]
- self.free_regs.append(reg)
+ new_free_regs.append(reg)
continue
- if save_all_regs != 1 and reg not in self.save_around_call_regs:
- if save_all_regs == 0:
- continue # we don't have to
- if v.type != REF:
- continue # only save GC pointers
- self._sync_var(v)
- del self.reg_bindings[v]
- self.free_regs.append(reg)
+
+ if save_all_regs == 1:
+ # we need to spill all registers in this mode
+ self._bc_spill(v, new_free_regs)
+ #
+ elif save_all_regs == 2 and v.type == REF:
+ # we need to spill all GC ptrs in this mode
+ self._bc_spill(v, new_free_regs)
+ #
+ elif reg not in self.save_around_call_regs:
+ continue # in a register like ebx/rbx: it is fine where it is
+ #
+ else:
+ # this is a register like eax/rax, which needs either
+ # spilling or moving.
+ move_or_spill.append((v, max_age))
+
+ if len(move_or_spill) > 0:
+ while len(self.free_regs) > 0:
+ new_reg = self.free_regs.pop()
+ if new_reg in self.save_around_call_regs:
+ new_free_regs.append(new_reg) # not this register...
+ continue
+ # This 'new_reg' is suitable for moving a candidate to.
+ # Pick the one with the smallest max_age. (This
+ # is one step of a naive sorting algo, slow in theory,
+ # but the list should always be very small so it
+ # doesn't matter.)
+ best_i = 0
+ smallest_max_age = move_or_spill[0][1]
+ for i in range(1, len(move_or_spill)):
+ max_age = move_or_spill[i][1]
+ if max_age < smallest_max_age:
+ best_i = i
+ smallest_max_age = max_age
+ v, max_age = move_or_spill.pop(best_i)
+ # move from 'reg' to 'new_reg'
+ reg = self.reg_bindings[v]
+ if not we_are_translated():
+ if move_or_spill:
+ assert max_age <= min([_a for _, _a in move_or_spill])
+ assert reg in self.save_around_call_regs
+ assert new_reg not in self.save_around_call_regs
+ self.assembler.regalloc_mov(reg, new_reg)
+ self.reg_bindings[v] = new_reg # change the binding
+ new_free_regs.append(reg)
+ #
+ if len(move_or_spill) == 0:
+ break
+ else:
+ # no more free registers to move to, spill the rest
+ for v, max_age in move_or_spill:
+ self._bc_spill(v, new_free_regs)
+
+ # re-add registers in 'new_free_regs', but in reverse order,
+ # so that the last ones (added just above, from
+ # save_around_call_regs) are picked last by future '.pop()'
+ while len(new_free_regs) > 0:
+ self.free_regs.append(new_free_regs.pop())
def after_call(self, v):
""" Adjust registers according to the result of the call,
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -603,7 +603,11 @@
assert saveerrloc.is_imm()
cb.emit_call_release_gil(saveerrloc.value)
else:
- cb.emit()
+ effectinfo = descr.get_extra_info()
+ if effectinfo is None or effectinfo.check_can_collect():
+ cb.emit()
+ else:
+ cb.emit_no_collect()
def _genop_call(self, op, arglocs, regalloc):
oopspecindex = regalloc.get_oopspecindex(op)
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -1,6 +1,7 @@
from rpython.jit.backend.llsupport.regalloc import (RegisterManager, FrameManager,
TempVar, compute_vars_longevity,
BaseRegalloc)
+from rpython.jit.backend.llsupport.descr import CallDescr
from rpython.jit.backend.ppc.arch import (WORD, MY_COPY_OF_REGS, IS_PPC_32)
from rpython.jit.codewriter import longlong
from rpython.jit.backend.ppc.jump import (remap_frame_layout,
@@ -369,9 +370,9 @@
# This operation is used only for testing
self.force_spill_var(op.getarg(0))
- def before_call(self, force_store=[], save_all_regs=False):
- self.rm.before_call(force_store, save_all_regs)
- self.fprm.before_call(force_store, save_all_regs)
+ def before_call(self, save_all_regs=False):
+ self.rm.before_call(save_all_regs)
+ self.fprm.before_call(save_all_regs)
def after_call(self, v):
if v.type == FLOAT:
@@ -756,7 +757,7 @@
src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
- self._spill_before_call(save_all_regs=False)
+ self._spill_before_call(gc_level=0)
return [src_ptr_loc, dst_ptr_loc,
src_ofs_loc, dst_ofs_loc, length_loc]
@@ -789,13 +790,15 @@
prepare_call_f = _prepare_call
prepare_call_n = _prepare_call
- def _spill_before_call(self, save_all_regs=False):
- # spill variables that need to be saved around calls
+ def _spill_before_call(self, gc_level):
+ # spill variables that need to be saved around calls:
+ # gc_level == 0: callee cannot invoke the GC
+ # gc_level == 1: can invoke GC, save all regs that contain pointers
+ # gc_level == 2: can force, save all regs
+ save_all_regs = gc_level == 2
self.fprm.before_call(save_all_regs=save_all_regs)
- if not save_all_regs:
- gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- save_all_regs = 2
+ if gc_level == 1 and self.cpu.gc_ll_descr.gcrootmap:
+ save_all_regs = 2
self.rm.before_call(save_all_regs=save_all_regs)
def _prepare_call(self, op, save_all_regs=False):
@@ -803,7 +806,18 @@
args.append(None)
for i in range(op.numargs()):
args.append(self.loc(op.getarg(i)))
- self._spill_before_call(save_all_regs)
+
+ calldescr = op.getdescr()
+ assert isinstance(calldescr, CallDescr)
+ effectinfo = calldescr.get_extra_info()
+ if save_all_regs:
+ gc_level = 2
+ elif effectinfo is None or effectinfo.check_can_collect():
+ gc_level = 1
+ else:
+ gc_level = 0
+ self._spill_before_call(gc_level=gc_level)
+
if op.type != VOID:
resloc = self.after_call(op)
args[0] = resloc
@@ -932,7 +946,7 @@
def _prepare_call_assembler(self, op):
locs = self.locs_for_call_assembler(op)
- self._spill_before_call(save_all_regs=True)
+ self._spill_before_call(gc_level=2)
if op.type != VOID:
resloc = self.after_call(op)
else:
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2112,7 +2112,11 @@
assert isinstance(saveerrloc, ImmedLoc)
cb.emit_call_release_gil(saveerrloc.value)
else:
- cb.emit()
+ effectinfo = descr.get_extra_info()
+ if effectinfo is None or effectinfo.check_can_collect():
+ cb.emit()
+ else:
+ cb.emit_no_collect()
def _store_force_index(self, guard_op):
assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -795,22 +795,22 @@
else:
self._consider_call(op)
- def _call(self, op, arglocs, force_store=[], guard_not_forced=False):
+ def _call(self, op, arglocs, gc_level):
# we need to save registers on the stack:
#
# - at least the non-callee-saved registers
#
- # - we assume that any call can collect, and we
- # save also the callee-saved registers that contain GC pointers
+ # - if gc_level > 0, we save also the callee-saved registers that
+ # contain GC pointers
#
- # - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
- # anyway, in case we need to do cpu.force(). The issue is that
- # grab_frame_values() would not be able to locate values in
- # callee-saved registers.
+ # - gc_level == 2 for CALL_MAY_FORCE or CALL_ASSEMBLER. We
+ # have to save all regs anyway, in case we need to do
+ # cpu.force(). The issue is that grab_frame_values() would
+ # not be able to locate values in callee-saved registers.
#
- save_all_regs = guard_not_forced
- self.xrm.before_call(force_store, save_all_regs=save_all_regs)
- if not save_all_regs:
+ save_all_regs = gc_level == 2
+ self.xrm.before_call(save_all_regs=save_all_regs)
+ if gc_level == 1:
gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
# we save all the registers for shadowstack and asmgcc for now
# --- for asmgcc too: we can't say "register x is a gc ref"
@@ -818,7 +818,7 @@
# more for now.
if gcrootmap: # and gcrootmap.is_shadow_stack:
save_all_regs = 2
- self.rm.before_call(force_store, save_all_regs=save_all_regs)
+ self.rm.before_call(save_all_regs=save_all_regs)
if op.type != 'v':
if op.type == FLOAT:
resloc = self.xrm.after_call(op)
@@ -838,9 +838,18 @@
sign_loc = imm1
else:
sign_loc = imm0
+ #
+ effectinfo = calldescr.get_extra_info()
+ if guard_not_forced:
+ gc_level = 2
+ elif effectinfo is None or effectinfo.check_can_collect():
+ gc_level = 1
+ else:
+ gc_level = 0
+ #
self._call(op, [imm(size), sign_loc] +
[self.loc(op.getarg(i)) for i in range(op.numargs())],
- guard_not_forced=guard_not_forced)
+ gc_level=gc_level)
def _consider_real_call(self, op):
effectinfo = op.getdescr().get_extra_info()
@@ -899,7 +908,7 @@
def _consider_call_assembler(self, op):
locs = self.locs_for_call_assembler(op)
- self._call(op, locs, guard_not_forced=True)
+ self._call(op, locs, gc_level=2)
consider_call_assembler_i = _consider_call_assembler
consider_call_assembler_r = _consider_call_assembler
consider_call_assembler_f = _consider_call_assembler
diff --git a/rpython/jit/codewriter/call.py b/rpython/jit/codewriter/call.py
--- a/rpython/jit/codewriter/call.py
+++ b/rpython/jit/codewriter/call.py
@@ -14,6 +14,7 @@
from rpython.translator.backendopt.canraise import RaiseAnalyzer
from rpython.translator.backendopt.writeanalyze import ReadWriteAnalyzer
from rpython.translator.backendopt.graphanalyze import DependencyTracker
+from rpython.translator.backendopt.collectanalyze import CollectAnalyzer
class CallControl(object):
@@ -37,9 +38,9 @@
self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
self.quasiimmut_analyzer = QuasiImmutAnalyzer(translator)
self.randomeffects_analyzer = RandomEffectsAnalyzer(translator)
- self.seen = DependencyTracker(self.readwrite_analyzer)
- else:
- self.seen = None
+ self.collect_analyzer = CollectAnalyzer(translator)
+ self.seen_rw = DependencyTracker(self.readwrite_analyzer)
+ self.seen_gc = DependencyTracker(self.collect_analyzer)
#
for index, jd in enumerate(jitdrivers_sd):
jd.index = index
@@ -294,9 +295,9 @@
"but the function has no result" % (op, ))
#
effectinfo = effectinfo_from_writeanalyze(
- self.readwrite_analyzer.analyze(op, self.seen), self.cpu,
+ self.readwrite_analyzer.analyze(op, self.seen_rw), self.cpu,
extraeffect, oopspecindex, can_invalidate, call_release_gil_target,
- extradescr,
+ extradescr, self.collect_analyzer.analyze(op, self.seen_gc),
)
#
assert effectinfo is not None
diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py
--- a/rpython/jit/codewriter/effectinfo.py
+++ b/rpython/jit/codewriter/effectinfo.py
@@ -116,7 +116,8 @@
oopspecindex=OS_NONE,
can_invalidate=False,
call_release_gil_target=_NO_CALL_RELEASE_GIL_TARGET,
- extradescrs=None):
+ extradescrs=None,
+ can_collect=True):
readonly_descrs_fields = frozenset_or_none(readonly_descrs_fields)
readonly_descrs_arrays = frozenset_or_none(readonly_descrs_arrays)
readonly_descrs_interiorfields = frozenset_or_none(
@@ -133,7 +134,8 @@
write_descrs_interiorfields,
extraeffect,
oopspecindex,
- can_invalidate)
+ can_invalidate,
+ can_collect)
tgt_func, tgt_saveerr = call_release_gil_target
if tgt_func:
key += (object(),) # don't care about caching in this case
@@ -184,6 +186,7 @@
#
result.extraeffect = extraeffect
result.can_invalidate = can_invalidate
+ result.can_collect = can_collect
result.oopspecindex = oopspecindex
result.extradescrs = extradescrs
result.call_release_gil_target = call_release_gil_target
@@ -230,6 +233,9 @@
def check_can_invalidate(self):
return self.can_invalidate
+ def check_can_collect(self):
+ return self.can_collect
+
def check_is_elidable(self):
return (self.extraeffect == self.EF_ELIDABLE_CAN_RAISE or
self.extraeffect == self.EF_ELIDABLE_OR_MEMORYERROR or
@@ -268,7 +274,8 @@
can_invalidate=False,
call_release_gil_target=
EffectInfo._NO_CALL_RELEASE_GIL_TARGET,
- extradescr=None):
+ extradescr=None,
+ can_collect=True):
from rpython.translator.backendopt.writeanalyze import top_set
if effects is top_set or extraeffect == EffectInfo.EF_RANDOM_EFFECTS:
readonly_descrs_fields = None
@@ -343,6 +350,9 @@
else:
assert 0
#
+ if extraeffect >= EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE:
+ can_collect = True
+ #
return EffectInfo(readonly_descrs_fields,
readonly_descrs_arrays,
readonly_descrs_interiorfields,
@@ -353,7 +363,8 @@
oopspecindex,
can_invalidate,
call_release_gil_target,
- extradescr)
+ extradescr,
+ can_collect)
def consider_struct(TYPE, fieldname):
if fieldType(TYPE, fieldname) is lltype.Void:
diff --git a/rpython/jit/codewriter/test/test_call.py b/rpython/jit/codewriter/test/test_call.py
--- a/rpython/jit/codewriter/test/test_call.py
+++ b/rpython/jit/codewriter/test/test_call.py
@@ -334,3 +334,37 @@
assert call_op.opname == 'direct_call'
with py.test.raises(Exception):
call_descr = cc.getcalldescr(call_op)
+
+def test_can_or_cannot_collect():
+ from rpython.jit.backend.llgraph.runner import LLGraphCPU
+ prebuilts = [[5], [6]]
+ l = []
+ def f1(n):
+ if n > 1:
+ raise IndexError
+ return prebuilts[n] # cannot collect
+ f1._dont_inline_ = True
+
+ def f2(n):
+ return [n] # can collect
+ f2._dont_inline_ = True
+
+ def f(n):
+ a = f1(n)
+ b = f2(n)
+ return len(a) + len(b)
+
+ rtyper = support.annotate(f, [1])
+ jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+ cc = CallControl(LLGraphCPU(rtyper), jitdrivers_sd=[jitdriver_sd])
+ res = cc.find_all_graphs(FakePolicy())
+ [f_graph] = [x for x in res if x.func is f]
+ for index, expected in [
+ (0, False), # f1()
+ (1, True), # f2()
+ (2, False), # len()
+ (3, False)]: # len()
+ call_op = f_graph.startblock.operations[index]
+ assert call_op.opname == 'direct_call'
+ call_descr = cc.getcalldescr(call_op)
+ assert call_descr.extrainfo.check_can_collect() == expected
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -1358,11 +1358,14 @@
return cls.minimal_size_in_nursery
def write_barrier(self, addr_struct):
- if self.header(addr_struct).tid & GCFLAG_TRACK_YOUNG_PTRS:
+ # see OP_GC_BIT in translator/c/gc.py
+ if llop.gc_bit(lltype.Signed, self.header(addr_struct),
+ GCFLAG_TRACK_YOUNG_PTRS):
self.remember_young_pointer(addr_struct)
def write_barrier_from_array(self, addr_array, index):
- if self.header(addr_array).tid & GCFLAG_TRACK_YOUNG_PTRS:
+ if llop.gc_bit(lltype.Signed, self.header(addr_array),
+ GCFLAG_TRACK_YOUNG_PTRS):
if self.card_page_indices > 0:
self.remember_young_pointer_from_array2(addr_array, index)
else:
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -5,7 +5,7 @@
from rpython.rlib.unroll import unrolling_iterable
from rpython.rtyper import rmodel, annlowlevel
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, llgroup
-from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS, llop
+from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.memory import gctypelayout
from rpython.memory.gctransform.log import log
from rpython.memory.gctransform.support import get_rtti, ll_call_destructor
@@ -14,7 +14,7 @@
from rpython.memory.gctypelayout import ll_weakref_deref, WEAKREF, WEAKREFPTR
from rpython.memory.gctypelayout import FIN_TRIGGER_FUNC, FIN_HANDLER_ARRAY
from rpython.tool.sourcetools import func_with_new_name
-from rpython.translator.backendopt import graphanalyze
+from rpython.translator.backendopt.collectanalyze import CollectAnalyzer
from rpython.translator.backendopt.finalizer import FinalizerAnalyzer
from rpython.translator.backendopt.support import var_needsgc
import types
@@ -23,33 +23,6 @@
TYPE_ID = llgroup.HALFWORD
-class CollectAnalyzer(graphanalyze.BoolGraphAnalyzer):
-
- def analyze_direct_call(self, graph, seen=None):
- try:
- func = graph.func
- except AttributeError:
- pass
- else:
- if getattr(func, '_gctransformer_hint_cannot_collect_', False):
- return False
- if getattr(func, '_gctransformer_hint_close_stack_', False):
- return True
- return graphanalyze.BoolGraphAnalyzer.analyze_direct_call(self, graph,
- seen)
- def analyze_external_call(self, funcobj, seen=None):
- if funcobj.random_effects_on_gcobjs:
- return True
- return graphanalyze.BoolGraphAnalyzer.analyze_external_call(
- self, funcobj, seen)
- def analyze_simple_operation(self, op, graphinfo):
- if op.opname in ('malloc', 'malloc_varsize'):
- flags = op.args[1].value
- return flags['flavor'] == 'gc'
- else:
- return (op.opname in LL_OPERATIONS and
- LL_OPERATIONS[op.opname].canmallocgc)
-
def propagate_no_write_barrier_needed(result, block, mallocvars,
collect_analyzer, entrymap,
startindex=0):
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -213,6 +213,8 @@
return self_type
if self_type in (bool, int, long):
return other_type
+ if self_type is float or other_type is float:
+ return float
if self_type.SIGNED == other_type.SIGNED:
return build_int(None, self_type.SIGNED, max(self_type.BITS, other_type.BITS))
raise AssertionError("Merging these types (%s, %s) is not supported" % (self_type, other_type))
@@ -297,6 +299,7 @@
def _widen(self, other, value):
"""
if one argument is int or long, the other type wins.
+ if one argument is float, the result is float.
otherwise, produce the largest class to hold the result.
"""
self_type = type(self)
diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -18,11 +18,11 @@
class Test_r_int:
def test__add__(self):
- self.binary_test(lambda x, y: x + y)
+ self.binary_test(lambda x, y: x + y, includes_floats=True)
def test__sub__(self):
- self.binary_test(lambda x, y: x - y)
+ self.binary_test(lambda x, y: x - y, includes_floats=True)
def test__mul__(self):
- self.binary_test(lambda x, y: x * y)
+ self.binary_test(lambda x, y: x * y, includes_floats=True)
x = 3; y = [2]
assert x*y == r_int(x)*y
assert y*x == y*r_int(x)
@@ -58,12 +58,15 @@
cmp = f(r_int(arg))
assert res == cmp
- def binary_test(self, f, rargs = None):
+ def binary_test(self, f, rargs=None, includes_floats=False):
if not rargs:
rargs = (-10, -1, 3, 55)
+ types_list = [(int, r_int), (r_int, int), (r_int, r_int)]
+ if includes_floats:
+ types_list += [(float, r_int), (r_int, float)]
for larg in (-10, -1, 0, 3, 1234):
for rarg in rargs:
- for types in ((int, r_int), (r_int, int), (r_int, r_int)):
+ for types in types_list:
res = f(larg, rarg)
left, right = types
cmp = f(left(larg), right(rarg))
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -470,6 +470,7 @@
'gc_pin' : LLOp(canrun=True),
'gc_unpin' : LLOp(canrun=True),
'gc__is_pinned' : LLOp(canrun=True),
+ 'gc_bit' : LLOp(sideeffects=False, canrun=True),
'gc_get_rpy_roots' : LLOp(),
'gc_get_rpy_referents': LLOp(),
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -1,3 +1,4 @@
+import random, sys
from rpython.flowspace.operation import op
from rpython.rlib import debug
from rpython.rlib.rarithmetic import is_valid_int
@@ -680,6 +681,11 @@
def op_gc_writebarrier(addr):
pass
+def op_gc_bit(hdr, bitmask):
+ if hdr.tid & bitmask:
+ return random.randrange(1, sys.maxint)
+ return 0
+
def op_shrink_array(array, smallersize):
return False
diff --git a/rpython/translator/backendopt/collectanalyze.py b/rpython/translator/backendopt/collectanalyze.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/backendopt/collectanalyze.py
@@ -0,0 +1,33 @@
+from rpython.translator.backendopt import graphanalyze
+from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
+
+# NB. tests are in rpython/memory/gctransform/test/test_framework.py
+
+
+class CollectAnalyzer(graphanalyze.BoolGraphAnalyzer):
+
+ def analyze_direct_call(self, graph, seen=None):
+ try:
+ func = graph.func
+ except AttributeError:
+ pass
+ else:
+ if getattr(func, '_gctransformer_hint_cannot_collect_', False):
+ return False
+ if getattr(func, '_gctransformer_hint_close_stack_', False):
+ return True
+ return graphanalyze.BoolGraphAnalyzer.analyze_direct_call(self, graph,
+ seen)
+ def analyze_external_call(self, funcobj, seen=None):
+ if funcobj.random_effects_on_gcobjs:
+ return True
+ return graphanalyze.BoolGraphAnalyzer.analyze_external_call(
+ self, funcobj, seen)
+
+ def analyze_simple_operation(self, op, graphinfo):
+ if op.opname in ('malloc', 'malloc_varsize'):
+ flags = op.args[1].value
+ return flags['flavor'] == 'gc'
+ else:
+ return (op.opname in LL_OPERATIONS and
+ LL_OPERATIONS[op.opname].canmallocgc)
diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py
--- a/rpython/translator/c/gc.py
+++ b/rpython/translator/c/gc.py
@@ -391,6 +391,34 @@
raise AssertionError(subopnum)
return ' '.join(parts)
+ def OP_GC_BIT(self, funcgen, op):
+ # This is a two-arguments operation (x, y) where x is a
+ # pointer and y is a constant power of two. It returns 0 if
+ # "(*(Signed*)x) & y == 0", and non-zero if it is "== y".
+ #
+ # On x86-64, emitting this is better than emitting a load
+ # followed by an INT_AND for the case where y doesn't fit in
+ # 32 bits. I've seen situations where a register was wasted
+ # to contain the constant 2**32 throughout a complete messy
+ # function; the goal of this GC_BIT is to avoid that.
+ #
+ # Don't abuse, though. If you need to check several bits in
+ # sequence, then it's likely better to load the whole Signed
+ # first; using GC_BIT would result in multiple accesses to
+ # memory.
+ #
+ bitmask = op.args[1].value
+ assert bitmask > 0 and (bitmask & (bitmask - 1)) == 0
+ offset = 0
+ while bitmask >= 0x100:
+ offset += 1
+ bitmask >>= 8
+ if sys.byteorder == 'big':
+ offset = 'sizeof(Signed)-%s' % (offset+1)
+ return '%s = ((char *)%s)[%s] & %d;' % (funcgen.expr(op.result),
+ funcgen.expr(op.args[0]),
+ offset, bitmask)
+
class ShadowStackFrameworkGcPolicy(BasicFrameworkGcPolicy):
def gettransformer(self, translator):
More information about the pypy-commit
mailing list