[pypy-commit] pypy continulet-jit-2: Starting to hack at the x86 backend. Right now I'm just hacking and

arigo noreply at buildbot.pypy.org
Thu Mar 1 17:11:45 CET 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: continulet-jit-2
Changeset: r53054:b2c0c89fe245
Date: 2012-03-01 16:01 +0100
http://bitbucket.org/pypy/pypy/changeset/b2c0c89fe245/

Log:	Starting to hack at the x86 backend. Right now I'm just hacking and
	will rely on "hg diff" to merge it more cleanly keeping both
	versions.

diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -30,3 +30,16 @@
 #
 # Note that with asmgcc, the locations corresponding to callee-save registers
 # are never used.
+
+# In the offstack version (i.e. when using stacklets): the off-stack allocated
+# area starts with the FRAME_FIXED_SIZE words in the same order as they would
+# be on the real stack (which is top-to-bottom, so it's actually the opposite
+# order as the one in the comments above); but whereas the real stack would
+# have the spilled values stored in (ebp-20), (ebp-24), etc., the off-stack
+# has them stored in (ebp+8), (ebp+12), etc.
+#
+# In stacklet mode, the real frame contains always just OFFSTACK_REAL_FRAME
+# words reserved for temporary usage like call arguments.  To maintain
+# alignment on 32-bit, OFFSTACK_REAL_FRAME % 4 == 3, and it is at least 17
+# to handle all other cases.
+OFFSTACK_REAL_FRAME = 19
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -13,7 +13,8 @@
     gpr_reg_mgr_cls, _valid_addressing_size)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
-                                       IS_X86_32, IS_X86_64)
+                                       IS_X86_32, IS_X86_64,
+                                       OFFSTACK_REAL_FRAME)
 
 from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
                                          esp, ebp, esi, edi,
@@ -84,6 +85,9 @@
         self.malloc_slowpath1 = 0
         self.malloc_slowpath2 = 0
         self.memcpy_addr = 0
+        self.offstack_malloc = 0
+        self.offstack_realloc = 0
+        self.offstack_free = 0
         self.setup_failure_recovery()
         self._debug = False
         self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
@@ -107,7 +111,11 @@
         # the address of the function called by 'new'
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
-        self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
+        cpi = self.cpu.cast_ptr_to_int
+        self.memcpy_addr           = cpi(support.memcpy_fn)
+        self.offstack_malloc_addr  = cpi(support.offstack_malloc_fn)
+        self.offstack_realloc_addr = cpi(support.offstack_realloc_fn)
+        self.offstack_free_addr    = cpi(support.offstack_free_fn)
         self._build_failure_recovery(False)
         self._build_failure_recovery(True)
         if self.cpu.supports_floats:
@@ -435,17 +443,17 @@
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         #
         self._call_header_with_stack_check()
-        stackadjustpos = self._patchable_stackadjust()
         clt._debug_nbargs = len(inputargs)
         operations = regalloc.prepare_loop(inputargs, operations,
                                            looptoken, clt.allgcrefs)
         looppos = self.mc.get_relative_pos()
         looptoken._x86_loop_code = looppos
         clt.frame_depth = -1     # temporarily
-        clt.param_depth = -1     # temporarily
-        frame_depth, param_depth = self._assemble(regalloc, operations)
+        #clt.param_depth = -1     # temporarily
+        (frame_depth#, param_depth
+                ) = self._assemble(regalloc, operations)
         clt.frame_depth = frame_depth
-        clt.param_depth = param_depth
+        #clt.param_depth = param_depth
         #
         size_excluding_failure_stuff = self.mc.get_relative_pos()
         self.write_pending_failure_recoveries()
@@ -459,8 +467,8 @@
             rawstart + size_excluding_failure_stuff,
             rawstart))
         debug_stop("jit-backend-addr")
-        self._patch_stackadjust(rawstart + stackadjustpos,
-                                frame_depth + param_depth)
+        #self._patch_stackadjust(rawstart + stackadjustpos,
+        #                        frame_depth )#+ param_depth)
         self.patch_pending_failure_recoveries(rawstart)
         #
         ops_offset = self.mc.ops_offset
@@ -529,7 +537,7 @@
         ops_offset = self.mc.ops_offset
         self.fixup_target_tokens(rawstart)
         self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
-        self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
+        #self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
         self.teardown()
         # oprofile support
         if self.cpu.profile_agent is not None:
@@ -701,14 +709,14 @@
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
         frame_depth = regalloc.fm.get_frame_depth()
-        param_depth = regalloc.param_depth
+        #param_depth = regalloc.param_depth
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
             target_frame_depth = jump_target_descr._x86_clt.frame_depth
-            target_param_depth = jump_target_descr._x86_clt.param_depth
+            #target_param_depth = jump_target_descr._x86_clt.param_depth
             frame_depth = max(frame_depth, target_frame_depth)
-            param_depth = max(param_depth, target_param_depth)
-        return frame_depth, param_depth
+            #param_depth = max(param_depth, target_param_depth)
+        return frame_depth#, param_depth
 
     def _patchable_stackadjust(self):
         # stack adjustment LEA
@@ -733,10 +741,28 @@
     def _call_header(self):
         # NB. the shape of the frame is hard-coded in get_basic_shape() too.
         # Also, make sure this is consistent with FRAME_FIXED_SIZE.
-        self.mc.PUSH_r(ebp.value)
-        self.mc.MOV_rr(ebp.value, esp.value)
-        for loc in self.cpu.CALLEE_SAVE_REGISTERS:
-            self.mc.PUSH_r(loc.value)
+        if IS_X86_32:
+            self.mc.SUB_ri(esp.value, WORD * (OFFSTACK_REAL_FRAME-1))
+            self.mc.PUSH_i32(4096)     # XXX XXX!
+        elif IS_X86_64:
+            save_regs = [r9, r8, ecx, edx, esi, edi]
+            assert OFFSTACK_REAL_FRAME >= len(save_regs)
+            self.mc.SUB_ri(esp.value, WORD * (OFFSTACK_REAL_FRAME
+                                              - len(save_regs)))
+            for reg in save_regs:
+                self.mc.PUSH_r(reg.value)
+            self.mc.MOV_ri(edi.value, 4096)     # XXX XXX!
+        self.mc.CALL(imm(self.offstack_malloc_addr))
+        if IS_X86_64:
+            for i in range(len(save_regs)):      # XXX looks heavy
+                reg = save_regs[len(save_regs) - 1 - i]
+                self.mc.MOV_rs(reg.value, WORD * i)
+        self.mc.MOV_mr((eax.value, WORD * (FRAME_FIXED_SIZE-1)),
+                       ebp.value)                      # (new ebp) <- ebp
+        self.mc.LEA_rm(ebp.value, (eax.value, WORD * (FRAME_FIXED_SIZE-1)))
+        for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)):
+            loc = self.cpu.CALLEE_SAVE_REGISTERS[i]
+            self.mc.MOV_br(WORD*(-1-i), loc.value)     # (ebp-4-4*i) <- reg
 
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap and gcrootmap.is_shadow_stack:
@@ -761,16 +787,17 @@
         self._call_header()
 
     def _call_footer(self):
-        self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
-
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap and gcrootmap.is_shadow_stack:
             self._call_footer_shadowstack(gcrootmap)
 
+        self.mc.ADD_ri(esp.value, WORD * OFFSTACK_REAL_FRAME)
         for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
-            self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
+            loc = self.cpu.CALLEE_SAVE_REGISTERS[i]
+            self.mc.MOV_rb(loc.value, WORD*(-1-i))     # (ebp-4-4*i) -> reg
+        self.mc.MOV_rb(ebp.value, 0)                   # (ebp) -> ebp
+        # XXX free!
 
-        self.mc.POP_r(ebp.value)
         self.mc.RET()
 
     def _call_header_shadowstack(self, gcrootmap):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -23,6 +23,7 @@
      TempBox
 from pypy.jit.backend.x86.arch import WORD, FRAME_FIXED_SIZE
 from pypy.jit.backend.x86.arch import IS_X86_32, IS_X86_64, MY_COPY_OF_REGS
+from pypy.jit.backend.x86.arch import OFFSTACK_REAL_FRAME
 from pypy.rlib.rarithmetic import r_longlong
 
 class X86RegisterManager(RegisterManager):
@@ -129,9 +130,9 @@
 class X86FrameManager(FrameManager):
     @staticmethod
     def frame_pos(i, box_type):
-        if IS_X86_32 and box_type == FLOAT:
-            return StackLoc(i, get_ebp_ofs(i+1), box_type)
-        else:
+        #if IS_X86_32 and box_type == FLOAT:
+        #    return StackLoc(i, get_ebp_ofs(i+1), box_type)
+        #else:
             return StackLoc(i, get_ebp_ofs(i), box_type)
     @staticmethod
     def frame_size(box_type):
@@ -168,7 +169,7 @@
 
     def _prepare(self, inputargs, operations, allgcrefs):
         self.fm = X86FrameManager()
-        self.param_depth = 0
+        #self.param_depth = 0
         cpu = self.assembler.cpu
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
@@ -197,7 +198,7 @@
                        allgcrefs):
         operations = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
-        self.param_depth = prev_depths[1]
+        #self.param_depth = prev_depths[1]
         self.min_bytes_before_label = 0
         return operations
 
@@ -206,11 +207,24 @@
                                           at_least_position)
 
     def reserve_param(self, n):
+        xxx
         self.param_depth = max(self.param_depth, n)
 
     def _set_initial_bindings(self, inputargs):
         if IS_X86_64:
             inputargs = self._set_initial_bindings_regs_64(inputargs)
+
+        cur_frame_ofs = WORD * (OFFSTACK_REAL_FRAME + 1)
+        mc = self.assembler.mc
+        for box in inputargs:
+            assert isinstance(box, Box)
+            if IS_X86_32 and box.type == FLOAT:
+                xxx
+            loc = self.fm.loc(box)
+            mc.MOV_rs(eax.value, cur_frame_ofs)
+            mc.MOV_br(loc.value, eax.value)
+        return
+
         #                   ...
         # stack layout:     arg2
         #                   arg1
@@ -1518,11 +1532,15 @@
         else:
             oplist[num] = value
 
+##def get_ebp_ofs(position):
+##    # Argument is a frame position (0, 1, 2...).
+##    # Returns (ebp-20), (ebp-24), (ebp-28)...
+##    # i.e. the n'th word beyond the fixed frame size.
+##    return -WORD * (FRAME_FIXED_SIZE + position)
 def get_ebp_ofs(position):
     # Argument is a frame position (0, 1, 2...).
-    # Returns (ebp-20), (ebp-24), (ebp-28)...
-    # i.e. the n'th word beyond the fixed frame size.
-    return -WORD * (FRAME_FIXED_SIZE + position)
+    # Returns (ebp+8), (ebp+12), (ebp+16)...
+    return WORD * (2 + position)
 
 def _valid_addressing_size(size):
     return size == 1 or size == 2 or size == 4 or size == 8
diff --git a/pypy/jit/backend/x86/support.py b/pypy/jit/backend/x86/support.py
--- a/pypy/jit/backend/x86/support.py
+++ b/pypy/jit/backend/x86/support.py
@@ -33,6 +33,14 @@
 memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
                                        rffi.SIZE_T], lltype.Void,
                             sandboxsafe=True, _nowrapper=True)
+offstack_malloc_fn = rffi.llexternal('malloc', [rffi.SIZE_T],
+                                     llmemory.Address,
+                                     sandboxsafe=True, _nowrapper=True)
+offstack_realloc_fn = rffi.llexternal('realloc', [llmemory.Address,
+                                             rffi.SIZE_T], llmemory.Address,
+                                      sandboxsafe=True, _nowrapper=True)
+offstack_free_fn = rffi.llexternal('free', [llmemory.Address], lltype.Void,
+                                   sandboxsafe=True, _nowrapper=True)
 
 # ____________________________________________________________
 


More information about the pypy-commit mailing list