[pypy-commit] pypy stmgc-c4: improve code generation for transaction_break and pointer_equal
Raemi
noreply at buildbot.pypy.org
Tue Oct 29 10:55:15 CET 2013
Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c4
Changeset: r67679:f6fa761a828e
Date: 2013-10-28 14:58 +0100
http://bitbucket.org/pypy/pypy/changeset/f6fa761a828e/
Log: improve code generation for transaction_break and pointer_equal
diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -1,3 +1,8 @@
+------------------------------------------------------------
+
+make stm_transaction_break use cond_call (or other ways to not
+spill all registers)
+
------------------------------------------------------------
constptrs always require slowpath of read_barrier if they
diff --git a/rpython/jit/backend/llsupport/stmrewrite.py b/rpython/jit/backend/llsupport/stmrewrite.py
--- a/rpython/jit/backend/llsupport/stmrewrite.py
+++ b/rpython/jit/backend/llsupport/stmrewrite.py
@@ -319,8 +319,8 @@
lst = op.getarglist()
lst[1] = self.gen_barrier(lst[1], 'W')
op = op.copy_and_change(op.getopnum(), args=lst)
- # then a read barrier the source string
- self.handle_category_operations(op, 'R')
+ # then an immutable read barrier the source string
+ self.handle_category_operations(op, 'I')
@specialize.arg(1)
def _do_stm_call(self, funcname, args, result):
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -425,16 +425,12 @@
def _build_ptr_eq_slowpath(self):
cpu = self.cpu
- is_stm = cpu.gc_ll_descr.stm
- assert is_stm
-
- func = cpu.gc_ll_descr.get_malloc_fn_addr('stm_ptr_eq')
+ assert cpu.gc_ll_descr.stm
#
# This builds a helper function called from the slow path of
# ptr_eq/ne. It must save all registers, and optionally
- # all XMM registers. It takes a single argument just pushed
- # on the stack even on X86_64. It must restore stack alignment
- # accordingly.
+ # all XMM registers. It takes two values pushed on the stack,
+ # even on X86_64. It must restore stack alignment accordingly.
mc = codebuf.MachineCodeBlockWrapper()
#
self._push_all_regs_to_frame(mc, [], withfloats=False,
@@ -457,7 +453,12 @@
mc.MOV_rs(edi.value, 2 * WORD)
mc.MOV_rs(esi.value, 3 * WORD)
#
- mc.CALL(imm(func))
+ if not we_are_translated(): # for tests
+ fn = cpu.gc_ll_descr.get_malloc_fn_addr('stm_ptr_eq')
+ mc.CALL(imm(fn))
+ else:
+ fn = stmtlocal.stm_pointer_equal_fn
+ mc.CALL(imm(self.cpu.cast_ptr_to_int(fn)))
# eax has result
if IS_X86_32:
# ||val2|val1|retaddr|x||x|x|val2|val1|
@@ -2367,12 +2368,16 @@
self.mc.overwrite(jmp_location - 1, chr(offset))
# ------------------- END CALL ASSEMBLER -----------------------
- def _stm_ptr_eq_fastpath(self, mc, arglocs, result_loc):
+ def _stm_ptr_eq_fastpath(self, mc, arglocs):
assert self.cpu.gc_ll_descr.stm
assert self.ptr_eq_slowpath is not None
a_base = arglocs[0]
b_base = arglocs[1]
-
+ if isinstance(a_base, ImmedLoc):
+ # make sure there is a non-immed as the first
+ # argument to mc.CMP(). (2 immeds are caught below)
+ a_base, b_base = b_base, a_base
+
#
# FASTPATH
#
@@ -2386,25 +2391,11 @@
else:
j_ok1 = 0
else:
- # do the dance, even if a or b is an Immed
- # XXX: figure out if CMP() is able to handle it without
- # the explicit MOV before it (CMP(a_base, b_base))
+ mc.CMP(a_base, b_base)
+ # reverse flags: if p1==p2, set NZ
sl = X86_64_SCRATCH_REG.lowest8bits()
- mc.MOV(X86_64_SCRATCH_REG, a_base)
- if isinstance(b_base, ImmedLoc) \
- and rx86.fits_in_32bits(b_base.value):
- mc.CMP_ri(X86_64_SCRATCH_REG.value, b_base.value)
- elif not isinstance(b_base, ImmedLoc):
- mc.CMP(X86_64_SCRATCH_REG, b_base)
- else:
- # imm64, need another temporary reg :(
- mc.PUSH_r(eax.value)
- mc.MOV_ri64(eax.value, b_base.value)
- mc.CMP_rr(X86_64_SCRATCH_REG.value, eax.value)
- mc.POP_r(eax.value)
- # reverse flags: if p1==p2, set NZ
mc.SET_ir(rx86.Conditions['Z'], sl.value)
- mc.AND8_rr(sl.value, sl.value)
+ mc.TEST8_rr(sl.value, sl.value)
mc.J_il8(rx86.Conditions['NZ'], 0)
j_ok1 = mc.get_relative_pos()
@@ -3163,8 +3154,37 @@
# call stm_transaction_break() with the address of the
# STM_RESUME_BUF and the custom longjmp function
self.push_gcmap(mc, gcmap, mov=True)
+ #
+ # save all registers
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ for gpr in self._regalloc.rm.reg_bindings.values():
+ v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value]
+ mc.MOV_br(v * WORD + base_ofs, gpr.value)
+ if IS_X86_64:
+ coeff = 1
+ else:
+ coeff = 2
+ ofs = len(gpr_reg_mgr_cls.all_regs)
+ for xr in self._regalloc.xrm.reg_bindings.values():
+ mc.MOVSD_bx((ofs + xr.value * coeff) * WORD + base_ofs, xr.value)
+ #
+ # CALL break function
fn = self.stm_transaction_break_path
mc.CALL(imm(fn))
+ # HERE is the place an aborted transaction retries
+ #
+ # restore regs
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ for gpr in self._regalloc.rm.reg_bindings.values():
+ v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value]
+ mc.MOV_rb(gpr.value, v * WORD + base_ofs)
+ if IS_X86_64:
+ coeff = 1
+ else:
+ coeff = 2
+ ofs = len(gpr_reg_mgr_cls.all_regs)
+ for xr in self._regalloc.xrm.reg_bindings.values():
+ mc.MOVSD_xb(xr.value, (ofs + xr.value * coeff) * WORD + base_ofs)
#
# patch the JZ above
if jz_location:
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -412,11 +412,9 @@
args = op.getarglist()
if args[0].type == REF:
assert args[1].type == REF
- # XXX: this is certainly not wanted.
- # We force immed64 into registers here.
- x = self.make_sure_var_in_reg(args[0], args, selected_reg=ecx)
- y = self.make_sure_var_in_reg(args[1], args, selected_reg=eax)
- self.rm.possibly_free_var(args[1])
+ # move both args to reg or immed
+ x = self.make_sure_var_in_reg(args[0], args)
+ y = self.make_sure_var_in_reg(args[1], args)
else:
x = self.make_sure_var_in_reg(args[0], args)
y = self.loc(args[1])
@@ -1288,10 +1286,9 @@
assert isinstance(check_type_box, ConstInt)
check_type = check_type_box.getint()
#
- # XXX use the extra 3 words in the stm resume buffer to save
- # up to 3 registers, too. For now we just flush them all.
- self.xrm.before_call(save_all_regs=1)
- self.rm.before_call(save_all_regs=1)
+ # only save regs for the should_break_transaction call
+ self.xrm.before_call()
+ self.rm.before_call()
gcmap = self.get_gcmap() # allocate the gcmap *before*
#
self.assembler.stm_transaction_break(check_type, gcmap)
diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py
--- a/rpython/jit/backend/x86/stmtlocal.py
+++ b/rpython/jit/backend/x86/stmtlocal.py
@@ -47,3 +47,9 @@
'stm_invalidate_jmp_buf',
[llmemory.Address], lltype.Void,
sandboxsafe=True, _nowrapper=True, transactionsafe=True)
+stm_pointer_equal_fn = rffi.llexternal(
+ 'stm_pointer_equal',
+ [llmemory.Address, llmemory.Address], lltype.Bool,
+ sandboxsafe=True, _nowrapper=True, transactionsafe=True)
+
+
diff --git a/rpython/jit/backend/x86/test/test_stm_integration.py b/rpython/jit/backend/x86/test/test_stm_integration.py
--- a/rpython/jit/backend/x86/test/test_stm_integration.py
+++ b/rpython/jit/backend/x86/test/test_stm_integration.py
@@ -835,8 +835,8 @@
args = [s for i, s in enumerate((s1, s2))
if not isinstance((p1, p2)[i], Const)] + [7]
- frame = self.cpu.execute_token(looptoken, *args)
- frame = rffi.cast(JITFRAMEPTR, frame)
+ deadframe = self.cpu.execute_token(looptoken, *args)
+ frame = rffi.cast(JITFRAMEPTR, deadframe)
frame_adr = rffi.cast(lltype.Signed, frame.jf_descr)
guard_failed = frame_adr != id(finaldescr)
@@ -849,8 +849,10 @@
if a == b or a == 0 or b == 0:
assert (a, b) not in called_on
+ assert (b, a) not in called_on
else:
- assert [(a, b)] == called_on
+ assert ([(a, b)] == called_on
+ or [(b, a)] == called_on)
if guard is not None:
if a == b:
More information about the pypy-commit
mailing list