[pypy-commit] pypy arm-backed-float: more shadowstack integration

bivab noreply at buildbot.pypy.org
Tue May 24 14:06:51 CEST 2011


Author: David Schneider <david.schneider at picle.org>
Branch: arm-backed-float
Changeset: r44401:c7ca92cd5833
Date: 2011-05-24 13:56 +0200
http://bitbucket.org/pypy/pypy/changeset/c7ca92cd5833/

Log:	more shadowstack integration

diff --git a/pypy/jit/backend/arm/arch.py b/pypy/jit/backend/arm/arch.py
--- a/pypy/jit/backend/arm/arch.py
+++ b/pypy/jit/backend/arm/arch.py
@@ -12,6 +12,7 @@
 MY_COPY_OF_REGS = WORD
 # The Address in the PC points two words befind the current instruction
 PC_OFFSET = 8
+FORCE_INDEX_OFS = 0
 
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 eci = ExternalCompilationInfo(post_include_bits=["""
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -175,6 +175,8 @@
                 stack_loc = self.decode32(enc, i+1)
                 if group == self.FLOAT_TYPE:
                     value = self.decode64(stack, frame_depth - stack_loc*WORD)
+                    self.fail_boxes_float.setitem(fail_index, value)
+                    continue
                 else:
                     value = self.decode32(stack, frame_depth - stack_loc*WORD)
                 i += 4
@@ -182,6 +184,8 @@
                 reg = ord(enc[i])
                 if group == self.FLOAT_TYPE:
                     value = self.decode64(vfp_regs, reg*2*WORD)
+                    self.fail_boxes_float.setitem(fail_index, value)
+                    continue
                 else:
                     value = self.decode32(regs, reg*WORD)
 
@@ -377,6 +381,9 @@
             self.mc.writechar(chr(0))
 
     def gen_func_epilog(self, mc=None, cond=c.AL):
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self.gen_footer_shadowstack(gcrootmap)
         if mc is None:
             mc = self.mc
         offset = 1
@@ -399,6 +406,29 @@
         # store the force index
         self.mc.SUB_ri(r.sp.value, r.sp.value, (N_REGISTERS_SAVED_BY_MALLOC+offset)*WORD)
         self.mc.MOV_rr(r.fp.value, r.sp.value)
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self.gen_shadowstack_header(gcrootmap)
+
+    def gen_shadowstack_header(self, gcrootmap):
+        # we need to put two words into the shadowstack: the MARKER
+        # and the address of the frame (ebp, actually)
+        # XXX add some comments
+        rst = gcrootmap.get_root_stack_top_addr()
+        self.mc.gen_load_int(r.ip.value, rst)
+        self.mc.LDR_ri(r.r4.value, r.ip.value) # LDR r4, [rootstacktop]
+        self.mc.ADD_ri(r.r5.value, r.r4.value, imm=2*WORD) # ADD r5, r4 [2*WORD]
+        self.mc.gen_load_int(r.r6.value, gcrootmap.MARKER)
+        self.mc.STR_ri(r.r6.value, r.r4.value)
+        self.mc.STR_ri(r.fp.value, r.r4.value, WORD) 
+        self.mc.STR_ri(r.r5.value, r.ip.value)
+
+    def gen_footer_shadowstack(self, gcrootmap):
+        rst = gcrootmap.get_root_stack_top_addr()
+        self.mc.gen_load_int(r.ip.value, rst)
+        self.mc.LDR_ri(r.r4.value, r.ip.value) # LDR r4, [rootstacktop]
+        self.mc.SUB_ri(r.r5.value, r.r4.value, imm=2*WORD) # ADD r5, r4 [2*WORD]
+        self.mc.STR_ri(r.r5.value, r.ip.value)
 
     def gen_bootstrap_code(self, nonfloatlocs, floatlocs, inputargs):
         for i in range(len(nonfloatlocs)):
diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -29,6 +29,8 @@
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
 
+NO_FORCE_INDEX = -1
+
 class IntOpAsslember(object):
 
     _mixin_ = True
@@ -273,11 +275,13 @@
         self._gen_path_to_exit_path(op, op.getarglist(), arglocs, c.AL)
         return fcond
 
-    def emit_op_call(self, op, args, regalloc, fcond):
+    def emit_op_call(self, op, args, regalloc, fcond, force_index=-1):
         adr = args[0].value
         arglist = op.getarglist()[1:]
-        cond =  self._emit_call(adr, arglist, regalloc, fcond,
-                                op.result)
+        if force_index == -1:
+            force_index = self.write_new_force_index()
+        cond =  self._emit_call(force_index, adr, arglist, 
+                                    regalloc, fcond, op.result)
         descr = op.getdescr()
         #XXX Hack, Hack, Hack
         if op.result and not we_are_translated() and not isinstance(descr, LoopToken):
@@ -291,7 +295,7 @@
     # XXX improve this interface
     # emit_op_call_may_force
     # XXX improve freeing of stuff here
-    def _emit_call(self, adr, args, regalloc, fcond=c.AL, result=None):
+    def _emit_call(self, force_index, adr, args, regalloc, fcond=c.AL, result=None):
         n_args = len(args)
         reg_args = count_reg_args(args)
 
@@ -343,6 +347,7 @@
         regalloc.before_call(save_all_regs=2)
         #the actual call
         self.mc.BL(adr)
+        self.mark_gc_roots(force_index)
         regalloc.possibly_free_vars(args)
         # readjust the sp in case we passed some args on the stack
         if n_args > 4:
@@ -636,7 +641,7 @@
             length_box = bytes_box
             length_loc = bytes_loc
         # call memcpy()
-        self._emit_call(self.memcpy_addr, [dstaddr_box, srcaddr_box, length_box], regalloc)
+        self._emit_call(NO_FORCE_INDEX, self.memcpy_addr, [dstaddr_box, srcaddr_box, length_box], regalloc)
 
         regalloc.possibly_free_vars(args)
         regalloc.possibly_free_var(length_box)
@@ -733,7 +738,7 @@
         # XXX check this
         assert op.numargs() == len(descr._arm_arglocs[0])
         resbox = TempInt()
-        self._emit_call(descr._arm_direct_bootstrap_code, op.getarglist(),
+        self._emit_call(fail_index, descr._arm_direct_bootstrap_code, op.getarglist(),
                                 regalloc, fcond, result=resbox)
         if op.result is None:
             value = self.cpu.done_with_this_frame_void_v
@@ -849,6 +854,20 @@
         self._emit_guard(guard_op, arglocs, c.GE)
         return fcond
 
+    def write_new_force_index(self):
+        # for shadowstack only: get a new, unused force_index number and
+        # write it to FORCE_INDEX_OFS.  Used to record the call shape
+        # (i.e. where the GC pointers are in the stack) around a CALL
+        # instruction that doesn't already have a force_index.
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            clt = self.current_clt
+            force_index = clt.reserve_and_record_some_faildescr_index()
+            self._write_fail_index(force_index)
+            return force_index
+        else:
+            return 0
+
     def _write_fail_index(self, fail_index):
         self.mc.gen_load_int(r.ip.value, fail_index)
         self.mc.STR_ri(r.ip.value, r.fp.value)
@@ -867,7 +886,8 @@
             self.mc.ADD_ri(size.value, size.value, ofs_items_loc.value)
         else:
             self.mc.ADD_rr(size.value, size.value, ofs_items_loc.value)
-        self._emit_call(self.malloc_func_addr, [size_box], regalloc,
+        force_index = self.write_new_force_index()
+        self._emit_call(force_index, self.malloc_func_addr, [size_box], regalloc,
                                     result=result)
 
     def emit_op_new(self, op, arglocs, regalloc, fcond):
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -823,7 +823,8 @@
             self.fastpath_malloc_fixedsize(op, op.getdescr())
         else:
             arglocs = self._prepare_args_for_new_op(op.getdescr())
-            self.assembler._emit_call(self.assembler.malloc_func_addr,
+            force_index = self.assembler.write_new_force_index()
+            self.assembler._emit_call(force_index, self.assembler.malloc_func_addr,
                                     arglocs, self, result=op.result)
             self.possibly_free_vars(arglocs)
         self.possibly_free_var(op.result)
@@ -836,7 +837,8 @@
             self.fastpath_malloc_fixedsize(op, descrsize)
         else:
             callargs = self._prepare_args_for_new_op(descrsize)
-            self.assembler._emit_call(self.assembler.malloc_func_addr,
+            force_index = self.assembler.write_new_force_index()
+            self.assembler._emit_call(force_index, self.assembler.malloc_func_addr,
                                         callargs, self, result=op.result)
             self.possibly_free_vars(callargs)
         self.possibly_free_var(op.result)
@@ -857,7 +859,9 @@
                 op.getdescr())
             arglocs = [imm(x) for x in args]
             arglocs.append(self.loc(box_num_elem))
-            self.assembler._emit_call(self.assembler.malloc_array_func_addr, arglocs, self, op.result)
+            force_index = self.write_new_force_index()
+            self.assembler._emit_call(force_index, self.assembler.malloc_array_func_addr,
+                                        arglocs, self, op.result)
             return []
         # boehm GC
         itemsize, scale, basesize, ofs_length, _ = (
@@ -914,7 +918,8 @@
         gc_ll_descr = self.cpu.gc_ll_descr
         if gc_ll_descr.get_funcptr_for_newstr is not None:
             loc = self.loc(op.getarg(0))
-            self.assembler._emit_call(self.assembler.malloc_str_func_addr, [loc], self, op.result)
+            force_index = self.write_new_force_index()
+            self.assembler._emit_call(force_index, self.assembler.malloc_str_func_addr, [loc], self, op.result)
             return []
         # boehm GC
         ofs_items, itemsize, ofs = symbolic.get_array_token(rstr.STR,
@@ -926,7 +931,9 @@
         gc_ll_descr = self.cpu.gc_ll_descr
         if gc_ll_descr.get_funcptr_for_newunicode is not None:
             loc = self.loc(op.getarg(0))
-            self.assembler._emit_call(self.assembler.malloc_unicode_func_addr, [loc], self, op.result)
+            force_index = self.write_new_force_index()
+            self.assembler._emit_call(force_index, self.assembler.malloc_unicode_func_addr,
+                                        [loc], self, op.result)
             return []
         # boehm GC
         ofs_items, _, ofs = symbolic.get_array_token(rstr.UNICODE,
@@ -976,7 +983,7 @@
         for v in guard_op.getfailargs():
             if v in self.rm.reg_bindings or v in self.vfprm.reg_bindings:
                 self.force_spill_var(v)
-        self.assembler.emit_op_call(op, args, self, fcond)
+        self.assembler.emit_op_call(op, args, self, fcond, fail_index)
         locs = self._prepare_guard(guard_op)
         self.possibly_free_vars(guard_op.getfailargs())
         return locs
diff --git a/pypy/jit/backend/arm/runner.py b/pypy/jit/backend/arm/runner.py
--- a/pypy/jit/backend/arm/runner.py
+++ b/pypy/jit/backend/arm/runner.py
@@ -13,6 +13,8 @@
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
+        if gcdescr is not None:
+            gcdescr.force_index_ofs = FORCE_INDEX_OFS
         AbstractLLCPU.__init__(self, rtyper, stats, opts,
                                translate_support_code, gcdescr)
     def setup(self):


More information about the pypy-commit mailing list