[pypy-commit] pypy stmgc-c4: prepare for fastpath for ptr_eq and fix slowpath of stm-barriers
Raemi
noreply at buildbot.pypy.org
Thu Jul 18 08:31:12 CEST 2013
Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c4
Changeset: r65454:ef39cd09001d
Date: 2013-07-18 08:30 +0200
http://bitbucket.org/pypy/pypy/changeset/ef39cd09001d/
Log: prepare for fastpath for ptr_eq and fix slowpath of stm-barriers
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -89,6 +89,11 @@
self._build_b_slowpath(d, False)
self._build_b_slowpath(d, True)
self._build_b_slowpath(d, False, for_frame=True)
+ # only for stm:
+ if hasattr(gc_ll_descr, 'stm_ptr_eq_FUNCPTR'):
+ self._build_ptr_eq_slowpath()
+ else:
+ self.ptr_eq_slowpath = None
# only one of those
self.build_frame_realloc_slowpath()
if self.cpu.supports_floats:
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -110,6 +110,8 @@
# the only ops with descrs that get recorded in a trace
from rpython.jit.metainterp.history import AbstractDescr
descr = op.getdescr()
+ if not we_are_translated() and descr is None:
+ return
llref = cast_instance_to_gcref(descr)
new_llref = rgc._make_sure_does_not_move(llref)
new_d = rgc.try_cast_gcref_to_instance(AbstractDescr, new_llref)
diff --git a/rpython/jit/backend/llsupport/stmrewrite.py b/rpython/jit/backend/llsupport/stmrewrite.py
--- a/rpython/jit/backend/llsupport/stmrewrite.py
+++ b/rpython/jit/backend/llsupport/stmrewrite.py
@@ -173,12 +173,4 @@
return isinstance(box, ConstPtr) and not box.value
def handle_ptr_eq(self, op):
- if self._is_null(op.getarg(0)) or self._is_null(op.getarg(1)):
- self.newops.append(op)
- return
- args = op.getarglist()
- result = op.result
- if op.getopnum() in (rop.PTR_EQ, rop.INSTANCE_PTR_EQ):
- self._do_stm_call('stm_ptr_eq', args, result)
- else:
- self._do_stm_call('stm_ptr_ne', args, result)
+ self.newops.append(op)
diff --git a/rpython/jit/backend/llsupport/test/test_stmrewrite.py b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
--- a/rpython/jit/backend/llsupport/test/test_stmrewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
@@ -517,7 +517,7 @@
jump(i1)
""", """
[p1, p2]
- i1 = call(ConstClass(stm_ptr_eq), p1, p2, descr=stm_ptr_eq_descr)
+ i1 = ptr_eq(p1, p2)
jump(i1)
""")
@@ -528,7 +528,7 @@
jump(i1)
""", """
[p1, p2]
- i1 = call(ConstClass(stm_ptr_eq), p1, p2, descr=stm_ptr_eq_descr)
+ i1 = instance_ptr_eq(p1, p2)
jump(i1)
""")
@@ -539,7 +539,7 @@
jump(i1)
""", """
[p1, p2]
- i1 = call(ConstClass(stm_ptr_ne), p1, p2, descr=stm_ptr_ne_descr)
+ i1 = ptr_ne(p1, p2)
jump(i1)
""")
@@ -550,7 +550,7 @@
jump(i1)
""", """
[p1, p2]
- i1 = call(ConstClass(stm_ptr_ne), p1, p2, descr=stm_ptr_ne_descr)
+ i1 = instance_ptr_ne(p1, p2)
jump(i1)
""")
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -21,6 +21,7 @@
# | scratch |
# | space |
# +--------------------+ <== aligned to 16 bytes
+# STACK TOP
# All the rest of the data is in a GC-managed variable-size "frame".
# This frame object's address is always stored in the register EBP/RBP.
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -309,6 +309,61 @@
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
self.stack_check_slowpath = rawstart
+
+ def _build_ptr_eq_slowpath(self):
+ cpu = self.cpu
+ is_stm = cpu.gc_ll_descr.stm
+ assert is_stm
+
+ func = cpu.gc_ll_descr.get_malloc_fn_addr('stm_ptr_eq')
+ #
+ # This builds a helper function called from the slow path of
+ # ptr_eq/ne. It must save all registers, and optionally
+ # all XMM registers. It takes a single argument just pushed
+ # on the stack even on X86_64. It must restore stack alignment
+ # accordingly.
+ mc = codebuf.MachineCodeBlockWrapper()
+ #
+ self._push_all_regs_to_frame(mc, [], withfloats=False,
+ callee_only=True)
+ #
+ if IS_X86_32:
+ # ||val2|val1|retaddr| growing->, || aligned
+ mc.SUB_ri(esp.value, 5 * WORD)
+ # ||val2|val1|retaddr|x||x|x|x|x|
+ mc.MOV_rs(eax.value, 6 * WORD)
+ mc.MOV_rs(ecx.value, 7 * WORD)
+ # eax=val1, ecx=val2
+ mc.MOV_sr(0, eax.value)
+ mc.MOV_sr(WORD, ecx.value)
+ # ||val2|val1|retaddr|x||x|x|val2|val1|
+ else:
+ # ||val2|val1||retaddr|
+ mc.SUB_ri(esp.value, WORD)
+ # ||val2|val1||retaddr|x||
+ mc.MOV_rs(edi.value, 2 * WORD)
+ mc.MOV_rs(esi.value, 3 * WORD)
+ #
+ mc.CALL(imm(func))
+ # eax has result
+ if IS_X86_32:
+ mc.ADD_ri(esp.value, 5 * WORD)
+ else:
+ mc.ADD_ri(esp.value, WORD)
+ #
+ # result in eax, save (not sure if necessary)
+ mc.PUSH_r(eax.value)
+ #
+ self._pop_all_regs_from_frame(mc, [], withfloats=False,
+ callee_only=True)
+ #
+ mc.POP_r(eax.value)
+ mc.RET16_i(2 * WORD)
+
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.ptr_eq_slowpath = rawstart
+
+
def _build_b_slowpath(self, descr, withcards, withfloats=False,
for_frame=False):
is_stm = self.cpu.gc_ll_descr.stm
@@ -370,8 +425,13 @@
mc.CALL(imm(func))
if descr.returns_modified_object:
- # new addr in eax, save in scratch reg
- mc.PUSH_r(eax.value)
+ # new addr in eax, save to now unused arg
+ if for_frame:
+ mc.PUSH_r(eax.value)
+ elif IS_X86_32:
+ mc.MOV_sr(3 * WORD, eax.value)
+ else:
+ mc.MOV_sr(WORD, eax.value)
if withcards:
# A final TEST8 before the RET, for the caller. Careful to
@@ -392,20 +452,24 @@
self._pop_all_regs_from_frame(mc, [], withfloats, callee_only=True)
if descr.returns_modified_object:
- mc.POP_r(eax.value)
+ if IS_X86_32:
+ mc.MOV_rs(eax.value, 3 * WORD)
+ else:
+ mc.MOV_rs(eax.value, WORD)
mc.RET16_i(WORD)
else:
if IS_X86_32:
- mc.MOV_rs(edx.value, 4 * WORD)
- mc.MOVSD_xs(xmm0.value, 3 * WORD)
- mc.MOV_rs(eax.value, WORD) # restore
+ mc.MOV_rs(edx.value, 5 * WORD)
+ mc.MOVSD_xs(xmm0.value, 4 * WORD)
+ mc.MOV_rs(eax.value, 2 * WORD) # restore
self._restore_exception(mc, exc0, exc1)
- mc.MOV(exc0, RawEspLoc(WORD * 5, REF))
- mc.MOV(exc1, RawEspLoc(WORD * 6, INT))
+ mc.MOV(exc0, RawEspLoc(WORD * 6, REF))
+ mc.MOV(exc1, RawEspLoc(WORD * 7, INT))
+
+ mc.POP_r(eax.value) # return value
+
mc.LEA_rs(esp.value, 7 * WORD)
- if descr.returns_modified_object:
- mc.POP_r(eax.value)
mc.RET()
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
@@ -932,6 +996,43 @@
self.mc.LEA_rm(result_loc.value, (loc.value, delta))
return genop_binary_or_lea
+
+ def genop_ptr_eq(self, op, arglocs, result_loc):
+ assert self.cpu.gc_ll_descr.stm
+ rl = result_loc.lowest8bits()
+ self._stm_ptr_eq_fastpath(self.mc, arglocs, result_loc)
+ self.mc.TEST_rr(eax.value, eax.value)
+ self.mc.SET_ir(rx86.Conditions['NZ'], rl.value)
+ self.mc.MOVZX8_rr(result_loc.value, rl.value)
+
+ def genop_ptr_ne(self, op, arglocs, result_loc):
+ assert self.cpu.gc_ll_descr.stm
+ rl = result_loc.lowest8bits()
+ self._stm_ptr_eq_fastpath(self.mc, arglocs, result_loc)
+ self.mc.TEST_rr(eax.value, eax.value)
+ self.mc.SET_ir(rx86.Conditions['Z'], rl.value)
+ self.mc.MOVZX8_rr(result_loc.value, rl.value)
+
+ def genop_guard_ptr_eq(self, op, guard_op, guard_token,
+ arglocs, result_loc):
+ guard_opnum = guard_op.getopnum()
+ self._stm_ptr_eq_fastpath(self.mc, arglocs, result_loc)
+ self.mc.TEST_rr(eax.value, eax.value)
+ if guard_opnum == rop.GUARD_FALSE:
+ self.implement_guard(guard_token, "Z")
+ else:
+ self.implement_guard(guard_token, "NZ")
+
+ def genop_guard_ptr_ne(self, op, guard_op, guard_token,
+ arglocs, result_loc):
+ guard_opnum = guard_op.getopnum()
+ self._stm_ptr_eq_fastpath(self.mc, arglocs, result_loc)
+ self.mc.TEST_rr(eax.value, eax.value)
+ if guard_opnum == rop.GUARD_FALSE:
+ self.implement_guard(guard_token, "NZ")
+ else:
+ self.implement_guard(guard_token, "Z")
+
def _cmpop(cond, rev_cond):
def genop_cmp(self, op, arglocs, result_loc):
rl = result_loc.lowest8bits()
@@ -1079,8 +1180,8 @@
genop_int_ne = _cmpop("NE", "NE")
genop_int_gt = _cmpop("G", "L")
genop_int_ge = _cmpop("GE", "LE")
- genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq
- genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne
+ genop_instance_ptr_eq = genop_ptr_eq
+ genop_instance_ptr_ne = genop_ptr_ne
genop_float_lt = _cmpop_float('B', 'A')
genop_float_le = _cmpop_float('BE', 'AE')
@@ -1100,8 +1201,8 @@
genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E")
genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE")
genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G")
- genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq
- genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne
+ genop_guard_instance_ptr_eq = genop_guard_ptr_eq
+ genop_guard_instance_ptr_ne = genop_guard_ptr_ne
genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE")
genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE")
@@ -1114,6 +1215,9 @@
genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE")
genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A")
+
+
+
def genop_math_sqrt(self, op, arglocs, resloc):
self.mc.SQRTSD(arglocs[0], resloc)
@@ -2001,6 +2105,19 @@
self.mc.overwrite(jmp_location - 1, chr(offset))
# ------------------- END CALL ASSEMBLER -----------------------
+ def _stm_ptr_eq_fastpath(self, mc, arglocs, result_loc):
+ assert self.cpu.gc_ll_descr.stm
+ assert self.ptr_eq_slowpath is not None
+ a_base = arglocs[0]
+ b_base = arglocs[1]
+ #
+ mc.PUSH(b_base)
+ mc.PUSH(a_base)
+ func = self.ptr_eq_slowpath
+ mc.CALL(imm(func))
+ assert isinstance(result_loc, RegLoc)
+ mc.MOV_rr(result_loc.value, eax.value)
+
def _stm_barrier_fastpath(self, mc, descr, arglocs, is_frame=False,
align_stack=False):
assert self.cpu.gc_ll_descr.stm
diff --git a/rpython/translator/stm/inevitable.py b/rpython/translator/stm/inevitable.py
--- a/rpython/translator/stm/inevitable.py
+++ b/rpython/translator/stm/inevitable.py
@@ -7,6 +7,7 @@
ALWAYS_ALLOW_OPERATIONS = set([
'force_cast', 'keepalive', 'cast_ptr_to_adr',
+ 'cast_adr_to_int',
'debug_print', 'debug_assert', 'cast_opaque_ptr', 'hint',
'stack_current', 'gc_stack_bottom',
'cast_current_ptr_to_int', # this variant of 'cast_ptr_to_int' is ok
@@ -14,7 +15,7 @@
'jit_force_quasi_immutable', 'jit_marker', 'jit_is_virtual',
'jit_record_known_class',
'gc_identityhash', 'gc_id', 'gc_can_move', 'gc__collect',
- 'gc_adr_of_root_stack_top',
+ 'gc_adr_of_root_stack_top', 'gc_get_original_copy',
'stmgc_get_original_copy',
'weakref_create', 'weakref_deref',
'stm_threadlocalref_get', 'stm_threadlocalref_set',
More information about the pypy-commit
mailing list