[pypy-commit] pypy jit-write-barrier-from-array: Fix the XXX: in case the JIT generates a SETARRAYITEM_GC on a
Armin Rigo
noreply at buildbot.pypy.org
Fri Jun 3 18:59:11 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch: jit-write-barrier-from-array
Changeset: r44666:4c23034d8a37
Date: 2011-06-03 19:12 +0200
http://bitbucket.org/pypy/pypy/changeset/4c23034d8a37/
Log: Fix the XXX: in case the JIT generates a SETARRAYITEM_GC on a list
which it cannot prove is short enough, we should really use
write_barrier_from_array instead of the default write_barrier. I
think that this is what caused the slow-down of slowspitfire on May
25.
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -527,6 +527,7 @@
def __init__(self, gc_ll_descr):
self.llop1 = gc_ll_descr.llop1
self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+ self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
self.fielddescr_tid = get_field_descr(gc_ll_descr,
gc_ll_descr.GCClass.HDR, 'tid')
self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +547,13 @@
funcaddr = llmemory.cast_ptr_to_adr(funcptr)
return cpu.cast_adr_to_int(funcaddr)
+ def get_write_barrier_from_array_fn(self, cpu):
+ llop1 = self.llop1
+ funcptr = llop1.get_write_barrier_from_array_failing_case(
+ self.WB_ARRAY_FUNCPTR)
+ funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+ return cpu.cast_adr_to_int(funcaddr) # this may return 0
+
class GcLLDescr_framework(GcLLDescription):
DEBUG = False # forced to True by x86/test/test_zrpy_gc.py
@@ -617,6 +625,8 @@
[lltype.Signed, lltype.Signed], llmemory.GCREF))
self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
[llmemory.Address, llmemory.Address], lltype.Void))
+ self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+ [llmemory.Address, lltype.Signed], lltype.Void))
self.write_barrier_descr = WriteBarrierDescr(self)
#
def malloc_array(itemsize, tid, num_elem):
@@ -808,6 +818,7 @@
# GETFIELD_RAW from the array 'gcrefs.list'.
#
newops = []
+ known_lengths = {}
# we can only remember one malloc since the next malloc can possibly
# collect
last_malloc = None
@@ -838,19 +849,40 @@
v = op.getarg(2)
if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
bool(v.value)): # store a non-NULL
- # XXX detect when we should produce a
- # write_barrier_from_array
- self._gen_write_barrier(newops, op.getarg(0), v)
+ self._gen_write_barrier_array(newops, op.getarg(0),
+ op.getarg(1), v,
+ cpu, known_lengths)
op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+ elif op.getopnum() == rop.NEW_ARRAY:
+ v_length = op.getarg(0)
+ if isinstance(v_length, ConstInt):
+ known_lengths[op.result] = v_length.getint()
# ----------
newops.append(op)
return newops
- def _gen_write_barrier(self, newops, v_base, v_value):
- args = [v_base, v_value]
+ def _gen_write_barrier(self, newops, v_base, v_value_or_index):
+ # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
+ # (regular case), or an index (case of write_barrier_from_array)
+ args = [v_base, v_value_or_index]
newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
descr=self.write_barrier_descr))
+ def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+ cpu, known_lengths):
+ if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+ # If we know statically the length of 'v', and it is not too
+ # big, then produce a regular write_barrier. If it's unknown or
+ # too big, produce instead a write_barrier_from_array.
+ LARGE = 130
+ length = known_lengths.get(v_base, LARGE)
+ if length >= LARGE:
+ # unknown or too big: produce a write_barrier_from_array
+ self._gen_write_barrier(newops, v_base, v_index)
+ return
+ # fall-back case: produce a write_barrier
+ self._gen_write_barrier(newops, v_base, v_value)
+
def can_inline_malloc(self, descr):
assert isinstance(descr, BaseSizeDescr)
if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -288,6 +288,18 @@
def get_write_barrier_failing_case(self, FPTRTYPE):
return llhelper(FPTRTYPE, self._write_barrier_failing_case)
+ _have_wb_from_array = False
+
+ def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+ self.record.append(('barrier_from_array', adr_struct, v_index))
+
+ def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+ if self._have_wb_from_array:
+ return llhelper(FPTRTYPE,
+ self._write_barrier_from_array_failing_case)
+ else:
+ return lltype.nullptr(FPTRTYPE.TO)
+
class TestFramework(object):
gc = 'hybrid'
@@ -303,9 +315,20 @@
config = config_
class FakeCPU(object):
def cast_adr_to_int(self, adr):
- ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
- assert ptr._obj._callable == llop1._write_barrier_failing_case
- return 42
+ if not adr:
+ return 0
+ try:
+ ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+ assert ptr._obj._callable == \
+ llop1._write_barrier_failing_case
+ return 42
+ except lltype.InvalidCast:
+ ptr = llmemory.cast_adr_to_ptr(
+ adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+ assert ptr._obj._callable == \
+ llop1._write_barrier_from_array_failing_case
+ return 43
+
gcdescr = get_description(config_)
translator = FakeTranslator()
llop1 = FakeLLOp()
@@ -515,29 +538,88 @@
def test_rewrite_assembler_3(self):
# check write barriers before SETARRAYITEM_GC
- v_base = BoxPtr()
- v_index = BoxInt()
- v_value = BoxPtr()
- array_descr = AbstractDescr()
- operations = [
- ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
- descr=array_descr),
- ]
- gc_ll_descr = self.gc_ll_descr
- operations = get_deep_immutable_oplist(operations)
- operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
- assert len(operations) == 2
- #
- assert operations[0].getopnum() == rop.COND_CALL_GC_WB
- assert operations[0].getarg(0) == v_base
- assert operations[0].getarg(1) == v_value
- assert operations[0].result is None
- #
- assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
- assert operations[1].getarg(0) == v_base
- assert operations[1].getarg(1) == v_index
- assert operations[1].getarg(2) == v_value
- assert operations[1].getdescr() == array_descr
+ for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+ v_base = BoxPtr()
+ v_index = BoxInt()
+ v_value = BoxPtr()
+ array_descr = AbstractDescr()
+ operations = [
+ ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+ None, descr=array_descr),
+ ]
+ if v_new_length is not None:
+ operations.insert(0, ResOperation(rop.NEW_ARRAY,
+ [v_new_length], v_base,
+ descr=array_descr))
+ # we need to insert another, unrelated NEW_ARRAY here
+ # to prevent the initialization_store optimization
+ operations.insert(1, ResOperation(rop.NEW_ARRAY,
+ [ConstInt(12)], BoxPtr(),
+ descr=array_descr))
+ gc_ll_descr = self.gc_ll_descr
+ operations = get_deep_immutable_oplist(operations)
+ operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+ if v_new_length is not None:
+ assert operations[0].getopnum() == rop.NEW_ARRAY
+ assert operations[1].getopnum() == rop.NEW_ARRAY
+ del operations[:2]
+ assert len(operations) == 2
+ #
+ assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+ assert operations[0].getarg(0) == v_base
+ assert operations[0].getarg(1) == v_value
+ assert operations[0].result is None
+ #
+ assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+ assert operations[1].getarg(0) == v_base
+ assert operations[1].getarg(1) == v_index
+ assert operations[1].getarg(2) == v_value
+ assert operations[1].getdescr() == array_descr
+
+ def test_rewrite_assembler_4(self):
+ # check write barriers before SETARRAYITEM_GC,
+ # if we have actually a write_barrier_from_array.
+ self.llop1._have_wb_from_array = True
+ for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+ v_base = BoxPtr()
+ v_index = BoxInt()
+ v_value = BoxPtr()
+ array_descr = AbstractDescr()
+ operations = [
+ ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+ None, descr=array_descr),
+ ]
+ if v_new_length is not None:
+ operations.insert(0, ResOperation(rop.NEW_ARRAY,
+ [v_new_length], v_base,
+ descr=array_descr))
+ # we need to insert another, unrelated NEW_ARRAY here
+ # to prevent the initialization_store optimization
+ operations.insert(1, ResOperation(rop.NEW_ARRAY,
+ [ConstInt(12)], BoxPtr(),
+ descr=array_descr))
+ gc_ll_descr = self.gc_ll_descr
+ operations = get_deep_immutable_oplist(operations)
+ operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+ if v_new_length is not None:
+ assert operations[0].getopnum() == rop.NEW_ARRAY
+ assert operations[1].getopnum() == rop.NEW_ARRAY
+ del operations[:2]
+ assert len(operations) == 2
+ #
+ assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+ assert operations[0].getarg(0) == v_base
+ if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+ assert operations[0].getarg(1) == v_value
+ else:
+ assert operations[0].getarg(1) == v_index
+ assert operations[0].result is None
+ #
+ assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+ assert operations[1].getarg(0) == v_base
+ assert operations[1].getarg(1) == v_index
+ assert operations[1].getarg(2) == v_value
+ assert operations[1].getdescr() == array_descr
def test_rewrite_assembler_initialization_store(self):
S = lltype.GcStruct('S', ('parent', OBJECT),
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -499,9 +499,8 @@
funcname = op.getarg(0)._get_str()
break
else:
- funcname = "<loop %d>" % len(self.loop_run_counters)
- # invent the counter, so we don't get too confused
- return funcname
+ funcname = '?'
+ return "Loop %d: %s" % (len(self.loop_run_counters), funcname)
def _register_counter(self):
if self._debug:
@@ -2079,6 +2078,8 @@
# function remember_young_pointer() from the GC. The two arguments
# to the call are in arglocs[:2]. The rest, arglocs[2:], contains
# registers that need to be saved and restored across the call.
+ # If op.getarg(1) is a int, it is an array index and we must call
+ # instead remember_young_pointer_from_array().
descr = op.getdescr()
if we_are_translated():
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
@@ -2110,13 +2111,19 @@
remap_frame_layout(self, arglocs[:2], [edi, esi],
X86_64_SCRATCH_REG)
+ if op.getarg(1).type == INT:
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ assert func != 0
+ else:
+ func = descr.get_write_barrier_fn(self.cpu)
+
# misaligned stack in the call, but it's ok because the write barrier
# is not going to call anything more. Also, this assumes that the
# write barrier does not touch the xmm registers. (Slightly delicate
# assumption, given that the write barrier can end up calling the
# platform's malloc() from AddressStack.append(). XXX may need to
# be done properly)
- self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+ self.mc.CALL(imm(func))
if IS_X86_32:
self.mc.ADD_ri(esp.value, 2*WORD)
for i in range(2, len(arglocs)):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -864,12 +864,12 @@
def consider_cond_call_gc_wb(self, op):
assert op.result is None
args = op.getarglist()
- loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
- # ^^^ we force loc_newvalue in a reg (unless it's a Const),
+ loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
+ # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
# because it will be needed anyway by the following setfield_gc.
# It avoids loading it twice from the memory.
loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
- arglocs = [loc_base, loc_newvalue]
+ arglocs = [loc_base, loc_newvalue_or_index]
# add eax, ecx and edx as extra "arguments" to ensure they are
# saved and restored. Fish in self.rm to know which of these
# registers really need to be saved (a bit of a hack). Moreover,
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -456,6 +456,73 @@
def test_compile_framework_7(self):
self.run('compile_framework_7')
+ def define_compile_framework_8(cls):
+ # Array of pointers, of unknown length (test write_barrier_from_array)
+ def before(n, x):
+ return n, x, None, None, None, None, None, None, None, None, [X(123)], None
+ def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+ if n < 1900:
+ check(l[0].x == 123)
+ l = [None] * (16 + (n & 7))
+ l[0] = X(123)
+ l[1] = X(n)
+ l[2] = X(n+10)
+ l[3] = X(n+20)
+ l[4] = X(n+30)
+ l[5] = X(n+40)
+ l[6] = X(n+50)
+ l[7] = X(n+60)
+ l[8] = X(n+70)
+ l[9] = X(n+80)
+ l[10] = X(n+90)
+ l[11] = X(n+100)
+ l[12] = X(n+110)
+ l[13] = X(n+120)
+ l[14] = X(n+130)
+ l[15] = X(n+140)
+ if n < 1800:
+ check(len(l) == 16 + (n & 7))
+ check(l[0].x == 123)
+ check(l[1].x == n)
+ check(l[2].x == n+10)
+ check(l[3].x == n+20)
+ check(l[4].x == n+30)
+ check(l[5].x == n+40)
+ check(l[6].x == n+50)
+ check(l[7].x == n+60)
+ check(l[8].x == n+70)
+ check(l[9].x == n+80)
+ check(l[10].x == n+90)
+ check(l[11].x == n+100)
+ check(l[12].x == n+110)
+ check(l[13].x == n+120)
+ check(l[14].x == n+130)
+ check(l[15].x == n+140)
+ n -= x.foo
+ return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+ def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+ check(len(l) >= 16)
+ check(l[0].x == 123)
+ check(l[1].x == 2)
+ check(l[2].x == 12)
+ check(l[3].x == 22)
+ check(l[4].x == 32)
+ check(l[5].x == 42)
+ check(l[6].x == 52)
+ check(l[7].x == 62)
+ check(l[8].x == 72)
+ check(l[9].x == 82)
+ check(l[10].x == 92)
+ check(l[11].x == 102)
+ check(l[12].x == 112)
+ check(l[13].x == 122)
+ check(l[14].x == 132)
+ check(l[15].x == 142)
+ return before, f, after
+
+ def test_compile_framework_8(self):
+ self.run('compile_framework_8')
+
def define_compile_framework_external_exception_handling(cls):
def before(n, x):
x = X(0)
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -471,7 +471,8 @@
'STRSETITEM/3',
'UNICODESETITEM/3',
#'RUNTIMENEW/1', # ootype operation
- 'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier)
+ 'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
+ # (for the write barrier, latter is in an array)
'DEBUG_MERGE_POINT/2', # debugging only
'JIT_DEBUG/*', # debugging only
'VIRTUAL_REF_FINISH/2', # removed before it's passed to the backend
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1020,6 +1020,7 @@
objhdr.tid |= GCFLAG_CARDS_SET
remember_young_pointer_from_array._dont_inline_ = True
+ assert self.card_page_indices > 0
self.remember_young_pointer_from_array = (
remember_young_pointer_from_array)
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -860,9 +860,9 @@
def gct_get_write_barrier_from_array_failing_case(self, hop):
op = hop.spaceop
- hop.genop("same_as",
- [self.write_barrier_from_array_failing_case_ptr],
- resultvar=op.result)
+ v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+ lltype.nullptr(op.result.concretetype.TO))
+ hop.genop("same_as", [v], resultvar=op.result)
def gct_zero_gc_pointers_inside(self, hop):
if not self.malloc_zero_filled:
More information about the pypy-commit
mailing list