[pypy-commit] pypy stmgc-c4: improve code generation for transaction_break and pointer_equal

Raemi noreply at buildbot.pypy.org
Tue Oct 29 10:55:15 CET 2013


Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c4
Changeset: r67679:f6fa761a828e
Date: 2013-10-28 14:58 +0100
http://bitbucket.org/pypy/pypy/changeset/f6fa761a828e/

Log:	improve code generation for transaction_break and pointer_equal

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -1,3 +1,8 @@
+------------------------------------------------------------
+
+make stm_transaction_break use cond_call (or other ways to not
+spill all registers)
+
 ------------------------------------------------------------
 
 constptrs always require slowpath of read_barrier if they
diff --git a/rpython/jit/backend/llsupport/stmrewrite.py b/rpython/jit/backend/llsupport/stmrewrite.py
--- a/rpython/jit/backend/llsupport/stmrewrite.py
+++ b/rpython/jit/backend/llsupport/stmrewrite.py
@@ -319,8 +319,8 @@
         lst = op.getarglist()
         lst[1] = self.gen_barrier(lst[1], 'W')
         op = op.copy_and_change(op.getopnum(), args=lst)
-        # then a read barrier the source string
-        self.handle_category_operations(op, 'R')
+        # then an immutable read barrier the source string
+        self.handle_category_operations(op, 'I')
 
     @specialize.arg(1)
     def _do_stm_call(self, funcname, args, result):
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -425,16 +425,12 @@
 
     def _build_ptr_eq_slowpath(self):
         cpu = self.cpu
-        is_stm = cpu.gc_ll_descr.stm
-        assert is_stm
-
-        func = cpu.gc_ll_descr.get_malloc_fn_addr('stm_ptr_eq')
+        assert cpu.gc_ll_descr.stm
         #
         # This builds a helper function called from the slow path of
         # ptr_eq/ne.  It must save all registers, and optionally
-        # all XMM registers.  It takes a single argument just pushed
-        # on the stack even on X86_64.  It must restore stack alignment
-        # accordingly.
+        # all XMM registers. It takes two values pushed on the stack,
+        # even on X86_64.  It must restore stack alignment accordingly.
         mc = codebuf.MachineCodeBlockWrapper()
         #
         self._push_all_regs_to_frame(mc, [], withfloats=False,
@@ -457,7 +453,12 @@
             mc.MOV_rs(edi.value, 2 * WORD)
             mc.MOV_rs(esi.value, 3 * WORD)
         #
-        mc.CALL(imm(func))
+        if not we_are_translated(): # for tests
+            fn = cpu.gc_ll_descr.get_malloc_fn_addr('stm_ptr_eq')
+            mc.CALL(imm(fn))
+        else:
+            fn = stmtlocal.stm_pointer_equal_fn
+            mc.CALL(imm(self.cpu.cast_ptr_to_int(fn)))
         # eax has result
         if IS_X86_32:
             # ||val2|val1|retaddr|x||x|x|val2|val1|
@@ -2367,12 +2368,16 @@
         self.mc.overwrite(jmp_location - 1, chr(offset))
 
     # ------------------- END CALL ASSEMBLER -----------------------
-    def _stm_ptr_eq_fastpath(self, mc, arglocs, result_loc):
+    def _stm_ptr_eq_fastpath(self, mc, arglocs):
         assert self.cpu.gc_ll_descr.stm
         assert self.ptr_eq_slowpath is not None
         a_base = arglocs[0]
         b_base = arglocs[1]
-
+        if isinstance(a_base, ImmedLoc):
+            # make sure there is a non-immed as the first
+            # argument to mc.CMP(). (2 immeds are caught below)
+            a_base, b_base = b_base, a_base
+        
         #
         # FASTPATH
         #
@@ -2386,25 +2391,11 @@
             else:
                 j_ok1 = 0
         else:
-            # do the dance, even if a or b is an Immed
-            # XXX: figure out if CMP() is able to handle it without
-            #      the explicit MOV before it (CMP(a_base, b_base))
+            mc.CMP(a_base, b_base)
+            # reverse flags: if p1==p2, set NZ
             sl = X86_64_SCRATCH_REG.lowest8bits()
-            mc.MOV(X86_64_SCRATCH_REG, a_base)
-            if isinstance(b_base, ImmedLoc) \
-              and rx86.fits_in_32bits(b_base.value):
-                mc.CMP_ri(X86_64_SCRATCH_REG.value, b_base.value)
-            elif not isinstance(b_base, ImmedLoc):
-                mc.CMP(X86_64_SCRATCH_REG, b_base)
-            else:
-                # imm64, need another temporary reg :(
-                mc.PUSH_r(eax.value)
-                mc.MOV_ri64(eax.value, b_base.value)
-                mc.CMP_rr(X86_64_SCRATCH_REG.value, eax.value)
-                mc.POP_r(eax.value)
-            # reverse flags: if p1==p2, set NZ
             mc.SET_ir(rx86.Conditions['Z'], sl.value)
-            mc.AND8_rr(sl.value, sl.value)
+            mc.TEST8_rr(sl.value, sl.value)
             mc.J_il8(rx86.Conditions['NZ'], 0)
             j_ok1 = mc.get_relative_pos()
 
@@ -3163,8 +3154,37 @@
         # call stm_transaction_break() with the address of the
         # STM_RESUME_BUF and the custom longjmp function
         self.push_gcmap(mc, gcmap, mov=True)
+        #
+        # save all registers
+        base_ofs = self.cpu.get_baseofs_of_frame_field()
+        for gpr in self._regalloc.rm.reg_bindings.values():
+            v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value]
+            mc.MOV_br(v * WORD + base_ofs, gpr.value)
+        if IS_X86_64:
+            coeff = 1
+        else:
+            coeff = 2
+        ofs = len(gpr_reg_mgr_cls.all_regs)
+        for xr in self._regalloc.xrm.reg_bindings.values():
+            mc.MOVSD_bx((ofs + xr.value * coeff) * WORD + base_ofs, xr.value)
+        #
+        # CALL break function
         fn = self.stm_transaction_break_path
         mc.CALL(imm(fn))
+        # HERE is the place an aborted transaction retries
+        #
+        # restore regs
+        base_ofs = self.cpu.get_baseofs_of_frame_field()
+        for gpr in self._regalloc.rm.reg_bindings.values():
+            v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value]
+            mc.MOV_rb(gpr.value, v * WORD + base_ofs)
+        if IS_X86_64:
+            coeff = 1
+        else:
+            coeff = 2
+        ofs = len(gpr_reg_mgr_cls.all_regs)
+        for xr in self._regalloc.xrm.reg_bindings.values():
+            mc.MOVSD_xb(xr.value, (ofs + xr.value * coeff) * WORD + base_ofs)
         #
         # patch the JZ above
         if jz_location:
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -412,11 +412,9 @@
         args = op.getarglist()
         if args[0].type == REF:
             assert args[1].type == REF
-            # XXX: this is certainly not wanted.
-            # We force immed64 into registers here.
-            x = self.make_sure_var_in_reg(args[0], args, selected_reg=ecx)
-            y = self.make_sure_var_in_reg(args[1], args, selected_reg=eax)
-            self.rm.possibly_free_var(args[1])
+            # move both args to reg or immed
+            x = self.make_sure_var_in_reg(args[0], args)
+            y = self.make_sure_var_in_reg(args[1], args)
         else:
             x = self.make_sure_var_in_reg(args[0], args)
             y = self.loc(args[1])
@@ -1288,10 +1286,9 @@
         assert isinstance(check_type_box, ConstInt)
         check_type = check_type_box.getint()
         #
-        # XXX use the extra 3 words in the stm resume buffer to save
-        # up to 3 registers, too.  For now we just flush them all.
-        self.xrm.before_call(save_all_regs=1)
-        self.rm.before_call(save_all_regs=1)
+        # only save regs for the should_break_transaction call
+        self.xrm.before_call()
+        self.rm.before_call()
         gcmap = self.get_gcmap() # allocate the gcmap *before*
         #
         self.assembler.stm_transaction_break(check_type, gcmap)
diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py
--- a/rpython/jit/backend/x86/stmtlocal.py
+++ b/rpython/jit/backend/x86/stmtlocal.py
@@ -47,3 +47,9 @@
     'stm_invalidate_jmp_buf',
     [llmemory.Address], lltype.Void,
     sandboxsafe=True, _nowrapper=True, transactionsafe=True)
+stm_pointer_equal_fn = rffi.llexternal(
+    'stm_pointer_equal',
+    [llmemory.Address, llmemory.Address], lltype.Bool,
+    sandboxsafe=True, _nowrapper=True, transactionsafe=True)
+
+
diff --git a/rpython/jit/backend/x86/test/test_stm_integration.py b/rpython/jit/backend/x86/test/test_stm_integration.py
--- a/rpython/jit/backend/x86/test/test_stm_integration.py
+++ b/rpython/jit/backend/x86/test/test_stm_integration.py
@@ -835,8 +835,8 @@
                     args = [s for i, s in enumerate((s1, s2))
                             if not isinstance((p1, p2)[i], Const)] + [7]
                                         
-                    frame = self.cpu.execute_token(looptoken, *args)
-                    frame = rffi.cast(JITFRAMEPTR, frame)
+                    deadframe = self.cpu.execute_token(looptoken, *args)
+                    frame = rffi.cast(JITFRAMEPTR, deadframe)
                     frame_adr = rffi.cast(lltype.Signed, frame.jf_descr)
                     guard_failed = frame_adr != id(finaldescr)
 
@@ -849,8 +849,10 @@
                         
                     if a == b or a == 0 or b == 0:
                         assert (a, b) not in called_on
+                        assert (b, a) not in called_on
                     else:
-                        assert [(a, b)] == called_on
+                        assert ([(a, b)] == called_on
+                                or [(b, a)] == called_on)
 
                     if guard is not None:
                         if a == b:


More information about the pypy-commit mailing list