[pypy-svn] pypy jit-shadowstack: Work in progress. A bit hard to test individual changes :-(
arigo
commits-noreply at bitbucket.org
Thu Mar 31 11:43:25 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch: jit-shadowstack
Changeset: r43036:3f4a55febb56
Date: 2011-03-31 11:40 +0200
http://bitbucket.org/pypy/pypy/changeset/3f4a55febb56/
Log: Work in progress. A bit hard to test individual changes :-(
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -349,6 +349,8 @@
INSN_rb = insn(rex_w, chr(base+3), register(1,8), stack_bp(2))
INSN_rm = insn(rex_w, chr(base+3), register(1,8), mem_reg_plus_const(2))
INSN_rj = insn(rex_w, chr(base+3), register(1,8), '\x05', immediate(2))
+ INSN_ji8 = insn(rex_w, '\x83', orbyte(base), '\x05', immediate(1),
+ immediate(2,'b'))
INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1), immediate(2,'b'))
INSN_bi32= insn(rex_w, '\x81', orbyte(base), stack_bp(1), immediate(2))
@@ -366,7 +368,8 @@
INSN_bi32(mc, offset, immed)
INSN_bi._always_inline_ = True # try to constant-fold single_byte()
- return INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj
+ return (INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj,
+ INSN_ji8)
def select_8_or_32_bit_immed(insn_8, insn_32):
def INSN(*args):
@@ -444,13 +447,13 @@
# ------------------------------ Arithmetic ------------------------------
- ADD_ri, ADD_rr, ADD_rb, _, _, ADD_rm, ADD_rj = common_modes(0)
- OR_ri, OR_rr, OR_rb, _, _, OR_rm, OR_rj = common_modes(1)
- AND_ri, AND_rr, AND_rb, _, _, AND_rm, AND_rj = common_modes(4)
- SUB_ri, SUB_rr, SUB_rb, _, _, SUB_rm, SUB_rj = common_modes(5)
- SBB_ri, SBB_rr, SBB_rb, _, _, SBB_rm, SBB_rj = common_modes(3)
- XOR_ri, XOR_rr, XOR_rb, _, _, XOR_rm, XOR_rj = common_modes(6)
- CMP_ri, CMP_rr, CMP_rb, CMP_bi, CMP_br, CMP_rm, CMP_rj = common_modes(7)
+ ADD_ri, ADD_rr, ADD_rb, _, _, ADD_rm, ADD_rj, _ = common_modes(0)
+ OR_ri, OR_rr, OR_rb, _, _, OR_rm, OR_rj, _ = common_modes(1)
+ AND_ri, AND_rr, AND_rb, _, _, AND_rm, AND_rj, _ = common_modes(4)
+ SUB_ri, SUB_rr, SUB_rb, _, _, SUB_rm, SUB_rj, SUB_ji8 = common_modes(5)
+ SBB_ri, SBB_rr, SBB_rb, _, _, SBB_rm, SBB_rj, _ = common_modes(3)
+ XOR_ri, XOR_rr, XOR_rb, _, _, XOR_rm, XOR_rj, _ = common_modes(6)
+ CMP_ri, CMP_rr, CMP_rb, CMP_bi, CMP_br, CMP_rm, CMP_rj, _ = common_modes(7)
CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -1,3 +1,4 @@
+import os
from pypy.rlib import rgc
from pypy.rlib.objectmodel import we_are_translated
from pypy.rlib.debug import fatalerror
@@ -15,7 +16,6 @@
from pypy.jit.backend.llsupport.descr import GcCache, get_field_descr
from pypy.jit.backend.llsupport.descr import GcPtrFieldDescr
from pypy.jit.backend.llsupport.descr import get_call_descr
-from pypy.rpython.memory.gctransform import asmgcroot
# ____________________________________________________________
@@ -212,10 +212,12 @@
return addr_ref
-class GcRootMap_asmgcc:
+class GcRootMap_asmgcc(object):
"""Handles locating the stack roots in the assembler.
This is the class supporting --gcrootfinder=asmgcc.
"""
+ is_shadow_stack = False
+
LOC_REG = 0
LOC_ESP_PLUS = 1
LOC_EBP_PLUS = 2
@@ -224,7 +226,7 @@
GCMAP_ARRAY = rffi.CArray(lltype.Signed)
CALLSHAPE_ARRAY_PTR = rffi.CArrayPtr(rffi.UCHAR)
- def __init__(self):
+ def __init__(self, gcdescr=None):
# '_gcmap' is an array of length '_gcmap_maxlength' of addresses.
# '_gcmap_curlength' tells how full the array really is.
# The addresses are actually grouped in pairs:
@@ -237,6 +239,13 @@
self._gcmap_deadentries = 0
self._gcmap_sorted = True
+ def add_jit2gc_hooks(self, jit2gc):
+ jit2gc.update({
+ 'gcmapstart': lambda: self.gcmapstart(),
+ 'gcmapend': lambda: self.gcmapend(),
+ 'gcmarksorted': lambda: self.gcmarksorted(),
+ })
+
def initialize(self):
# hack hack hack. Remove these lines and see MissingRTypeAttribute
# when the rtyper tries to annotate these methods only when GC-ing...
@@ -309,6 +318,7 @@
@rgc.no_collect
def freeing_block(self, start, stop):
+ from pypy.rpython.memory.gctransform import asmgcroot
# if [start:stop] is a raw block of assembler, then look up the
# corresponding gcroot markers, and mark them as freed now in
# self._gcmap by setting the 2nd address of every entry to NULL.
@@ -365,7 +375,7 @@
number >>= 7
shape.append(chr(number | flag))
- def add_ebp_offset(self, shape, offset):
+ def add_frame_offset(self, shape, offset):
assert (offset & 3) == 0
if offset >= 0:
num = self.LOC_EBP_PLUS | offset
@@ -388,6 +398,125 @@
return rawaddr
+class GcRootMap_shadowstack(object):
+ """Handles locating the stack roots in the assembler.
+ This is the class supporting --gcrootfinder=shadowstack.
+ """
+ is_shadow_stack = True
+ MARKER = 8
+
+ # The "shadowstack" is a portable way in which the GC finds the
+ # roots that live in the stack. Normally it is just a list of
+ # pointers to GC objects. The pointers may be moved around by a GC
+ # collection. But with the JIT, an entry can also be MARKER, in
+ # which case the next entry points to an assembler stack frame.
+ # During a residual CALL from the assembler (which may indirectly
+ # call the GC), we use the force_index stored in the assembler
+ # stack frame to identify the call: we can go from the force_index
+ # to a list of where the GC pointers are in the frame (this is the
+ # purpose of the present class).
+ #
+ # Note that across CALL_MAY_FORCE or CALL_ASSEMBLER, we can also go
+ # from the force_index to a ResumeGuardForcedDescr instance, which
+ # is used if the virtualizable or the virtualrefs need to be forced
+ # (see pypy.jit.backend.model). The force_index number in the stack
+ # frame is initially set to a non-negative value x, but it is
+ # occasionally turned into (~x) in case of forcing.
+
+ INTARRAYPTR = rffi.CArrayPtr(rffi.INT)
+ CALLSHAPES_ARRAY = rffi.CArray(INTARRAYPTR)
+
+ def __init__(self, gcdescr):
+ self._callshapes = lltype.nullptr(self.CALLSHAPES_ARRAY)
+ self._callshapes_maxlength = 0
+ self.force_index_ofs = gcdescr.force_index_ofs
+
+ def add_jit2gc_hooks(self, jit2gc):
+ #
+ def collect_jit_stack_root(callback, gc, addr):
+ if addr.signed[0] != GcRootMap_shadowstack.MARKER:
+ # common case
+ if gc.points_to_valid_gc_object(addr):
+ callback(gc, addr)
+ return WORD
+ else:
+ # case of a MARKER followed by an assembler stack frame
+ self.follow_stack_frame_of_assembler(callback, gc, addr)
+ return 2 * WORD
+ #
+ jit2gc.update({
+ 'rootstackhook': collect_jit_stack_root,
+ })
+
+ def initialize(self):
+ pass
+
+ def follow_stack_frame_of_assembler(self, callback, gc, addr):
+ frame_addr = addr.signed[1]
+ addr = llmemory.cast_int_to_adr(frame_addr + self.force_index_ofs)
+ force_index = addr.signed[0]
+ if force_index < 0:
+ force_index = ~force_index
+ callshape = self._callshapes[force_index]
+ n = 0
+ while True:
+ offset = rffi.cast(lltype.Signed, callshape[n])
+ if offset == 0:
+ break
+ addr = llmemory.cast_int_to_adr(frame_addr + offset)
+ callback(gc, addr)
+ n += 1
+
+ def get_basic_shape(self, is_64_bit=False):
+ return []
+
+ def add_frame_offset(self, shape, offset):
+ assert offset != 0
+ shape.append(offset)
+
+ def add_callee_save_reg(self, shape, register):
+ msg = "GC pointer in %s was not spilled" % register
+ os.write(2, '[llsupport/gc] %s\n' % msg)
+ raise AssertionError(msg)
+
+ def compress_callshape(self, shape, datablockwrapper):
+ length = len(shape)
+ SZINT = rffi.sizeof(rffi.INT)
+ rawaddr = datablockwrapper.malloc_aligned((length + 1) * SZINT, SZINT)
+ p = rffi.cast(self.INTARRAYPTR, rawaddr)
+ for i in range(length):
+ p[i] = rffi.cast(rffi.INT, shape[i])
+ p[length] = rffi.cast(rffi.INT, 0)
+ return p
+
+ def write_callshape(self, p, force_index):
+ if force_index >= self._callshapes_maxlength:
+ self._enlarge_callshape_list(force_index + 1)
+ self._callshapes[force_index] = p
+
+ def _enlarge_callshape_list(self, minsize):
+ newlength = 250 + (self._callshapes_maxlength // 3) * 4
+ if newlength < minsize:
+ newlength = minsize
+ newarray = lltype.malloc(self.CALLSHAPES_ARRAY, newlength,
+ flavor='raw', track_allocation=False)
+ if self._callshapes:
+ i = self._callshapes_maxlength - 1
+ while i >= 0:
+ newarray[i] = self._callshapes[i]
+ i -= 1
+ lltype.free(self._callshapes, flavor='raw')
+ self._callshapes = newarray
+ self._callshapes_maxlength = newlength
+
+ def freeing_block(self, start, stop):
+ pass # nothing needed here
+
+ def get_root_stack_top_addr(self):
+ rst_addr = llop.gc_adr_of_root_stack_top(llmemory.Address)
+ return rffi.cast(lltype.Signed, rst_addr)
+
+
class WriteBarrierDescr(AbstractDescr):
def __init__(self, gc_ll_descr):
self.llop1 = gc_ll_descr.llop1
@@ -437,7 +566,7 @@
except KeyError:
raise NotImplementedError("--gcrootfinder=%s not implemented"
" with the JIT" % (name,))
- gcrootmap = cls()
+ gcrootmap = cls(gcdescr)
self.gcrootmap = gcrootmap
self.gcrefs = GcRefList()
self.single_gcref_descr = GcPtrFieldDescr('', 0)
@@ -446,12 +575,9 @@
# where it can be fished and reused by the FrameworkGCTransformer
self.layoutbuilder = framework.TransformerLayoutBuilder(translator)
self.layoutbuilder.delay_encoding()
- self.translator._jit2gc = {
- 'layoutbuilder': self.layoutbuilder,
- 'gcmapstart': lambda: gcrootmap.gcmapstart(),
- 'gcmapend': lambda: gcrootmap.gcmapend(),
- 'gcmarksorted': lambda: gcrootmap.gcmarksorted(),
- }
+ self.translator._jit2gc = {'layoutbuilder': self.layoutbuilder}
+ gcrootmap.add_jit2gc_hooks(self.translator._jit2gc)
+
self.GCClass = self.layoutbuilder.GCClass
self.moving_gc = self.GCClass.moving_gc
self.HDRPTR = lltype.Ptr(self.GCClass.HDR)
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -506,6 +506,10 @@
s_gc = self.translator.annotator.bookkeeper.valueoftype(GCClass)
r_gc = self.translator.rtyper.getrepr(s_gc)
self.c_const_gc = rmodel.inputconst(r_gc, self.gcdata.gc)
+ s_gc_data = self.translator.annotator.bookkeeper.valueoftype(
+ gctypelayout.GCData)
+ r_gc_data = self.translator.rtyper.getrepr(s_gc_data)
+ self.c_const_gcdata = rmodel.inputconst(r_gc_data, self.gcdata)
self.malloc_zero_filled = GCClass.malloc_zero_filled
HDR = self.HDR = self.gcdata.gc.gcheaderbuilder.HDR
@@ -792,6 +796,15 @@
resulttype=llmemory.Address)
hop.genop('adr_add', [v_gc_adr, c_ofs], resultvar=op.result)
+ def gct_gc_adr_of_root_stack_top(self, hop):
+ op = hop.spaceop
+ ofs = llmemory.offsetof(self.c_const_gcdata.concretetype.TO,
+ 'inst_root_stack_top')
+ c_ofs = rmodel.inputconst(lltype.Signed, ofs)
+ v_gcdata_adr = hop.genop('cast_ptr_to_adr', [self.c_const_gcdata],
+ resulttype=llmemory.Address)
+ hop.genop('adr_add', [v_gcdata_adr, c_ofs], resultvar=op.result)
+
def gct_gc_x_swap_pool(self, hop):
op = hop.spaceop
[v_malloced] = op.args
@@ -1336,10 +1349,9 @@
self.rootstackhook = gctransformer.root_stack_jit_hook
if self.rootstackhook is None:
def collect_stack_root(callback, gc, addr):
- if we_are_translated():
- ll_assert(addr.address[0].signed[0] != 0,
- "unexpected null object header")
- callback(gc, addr)
+ if gc.points_to_valid_gc_object(addr):
+ callback(gc, addr)
+ return sizeofaddr
self.rootstackhook = collect_stack_root
def push_stack(self, addr):
@@ -1367,9 +1379,7 @@
addr = gcdata.root_stack_base
end = gcdata.root_stack_top
while addr != end:
- if gc.points_to_valid_gc_object(addr):
- rootstackhook(collect_stack_root, gc, addr)
- addr += sizeofaddr
+ addr += rootstackhook(collect_stack_root, gc, addr)
if self.collect_stacks_from_other_threads is not None:
self.collect_stacks_from_other_threads(collect_stack_root)
@@ -1480,9 +1490,7 @@
end = stacktop - sizeofaddr
addr = end.address[0]
while addr != end:
- if gc.points_to_valid_gc_object(addr):
- rootstackhook(callback, gc, addr)
- addr += sizeofaddr
+ addr += rootstackhook(callback, gc, addr)
def collect_more_stacks(callback):
ll_assert(get_aid() == gcdata.active_thread,
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -837,6 +837,7 @@
self.rm.possibly_free_vars_for_op(op)
def _fastpath_malloc(self, op, descr):
+ XXX
assert isinstance(descr, BaseSizeDescr)
gc_ll_descr = self.assembler.cpu.gc_ll_descr
self.rm.force_allocate_reg(op.result, selected_reg=eax)
@@ -859,7 +860,8 @@
def consider_new(self, op):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
- if gc_ll_descr.can_inline_malloc(op.getdescr()):
+ os.write(2, "fixme: consider_new\n")
+ if 0 and gc_ll_descr.can_inline_malloc(op.getdescr()): # XXX
self._fastpath_malloc(op, op.getdescr())
else:
args = gc_ll_descr.args_for_new(op.getdescr())
@@ -869,7 +871,8 @@
def consider_new_with_vtable(self, op):
classint = op.getarg(0).getint()
descrsize = heaptracker.vtable2descr(self.assembler.cpu, classint)
- if self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize):
+ os.write(2, "fixme: consider_new_with_vtable\n")
+ if 0 and self.assembler.cpu.gc_ll_descr.can_inline_malloc(descrsize): # XXX
self._fastpath_malloc(op, descrsize)
self.assembler.set_vtable(eax, imm(classint))
# result of fastpath malloc is in eax
@@ -1132,7 +1135,7 @@
# call memcpy()
self.rm.before_call()
self.xrm.before_call()
- self.assembler._emit_call(imm(self.assembler.memcpy_addr),
+ self.assembler._emit_call(-1, imm(self.assembler.memcpy_addr),
[dstaddr_loc, srcaddr_loc, length_loc])
self.rm.possibly_free_var(length_box)
self.rm.possibly_free_var(dstaddr_box)
@@ -1205,7 +1208,7 @@
for v, val in self.fm.frame_bindings.items():
if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
assert isinstance(val, StackLoc)
- gcrootmap.add_ebp_offset(shape, get_ebp_ofs(val.position))
+ gcrootmap.add_frame_offset(shape, get_ebp_ofs(val.position))
for v, reg in self.rm.reg_bindings.items():
if reg is eax:
continue # ok to ignore this one
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -23,18 +23,22 @@
self.fail_descr_list = []
self.fail_descr_free_list = []
+ def reserve_some_free_fail_descr_number(self):
+ lst = self.fail_descr_list
+ if len(self.fail_descr_free_list) > 0:
+ n = self.fail_descr_free_list.pop()
+ assert lst[n] is None
+ else:
+ n = len(lst)
+ lst.append(None)
+ return n
+
def get_fail_descr_number(self, descr):
assert isinstance(descr, history.AbstractFailDescr)
n = descr.index
if n < 0:
- lst = self.fail_descr_list
- if len(self.fail_descr_free_list) > 0:
- n = self.fail_descr_free_list.pop()
- assert lst[n] is None
- lst[n] = descr
- else:
- n = len(lst)
- lst.append(descr)
+ n = self.reserve_some_free_fail_descr_number()
+ self.fail_descr_list[n] = descr
descr.index = n
return n
@@ -294,6 +298,13 @@
def record_faildescr_index(self, n):
self.faildescr_indices.append(n)
+ def reserve_and_record_some_faildescr_index(self):
+ # like record_faildescr_index(), but invent and return a new,
+ # unused faildescr index
+ n = self.cpu.reserve_some_free_fail_descr_number()
+ self.record_faildescr_index(n)
+ return n
+
def compiling_a_bridge(self):
self.cpu.total_compiled_bridges += 1
self.bridges_count += 1
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -19,6 +19,8 @@
def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
gcdescr=None):
+ if gcdescr is not None:
+ gcdescr.force_index_ofs = FORCE_INDEX_OFS
AbstractLLCPU.__init__(self, rtyper, stats, opts,
translate_support_code, gcdescr)
@@ -127,7 +129,7 @@
fail_index = rffi.cast(TP, addr_of_force_index)[0]
assert fail_index >= 0, "already forced!"
faildescr = self.get_fail_descr_from_number(fail_index)
- rffi.cast(TP, addr_of_force_index)[0] = -1
+ rffi.cast(TP, addr_of_force_index)[0] = ~fail_index
frb = self.assembler._find_failure_recovery_bytecode(faildescr)
bytecode = rffi.cast(rffi.UCHARP, frb)
# start of "no gc operation!" block
diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -487,7 +487,9 @@
# ^^^ returns an address of nursery free pointer, for later modifications
'gc_adr_of_nursery_top' : LLOp(),
# ^^^ returns an address of pointer, since it can change at runtime
-
+ 'gc_adr_of_root_stack_top': LLOp(),
+ # ^^^ returns the address of gcdata.root_stack_top (for shadowstack only)
+
# experimental operations in support of thread cloning, only
# implemented by the Mark&Sweep GC
'gc_x_swap_pool': LLOp(canraise=(MemoryError,), canunwindgc=True),
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -171,25 +171,42 @@
self.float_const_abs_addr = float_constants + 16
def _build_malloc_fixedsize_slowpath(self):
+ # With asmgcc, we need two helpers, so that we can write two CALL
+ # instructions in assembler, with a mark_gc_roots in between.
+ # With shadowstack, this is not needed, so we produce a single helper.
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ #
# ---------- first helper for the slow path of malloc ----------
mc = codebuf.MachineCodeBlockWrapper()
if self.cpu.supports_floats: # save the XMM registers in
for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
mc.MOVSD_sx((WORD*2)+8*i, i)
mc.SUB_rr(edx.value, eax.value) # compute the size we want
- if IS_X86_32:
- mc.MOV_sr(WORD, edx.value) # save it as the new argument
- elif IS_X86_64:
- # rdi can be clobbered: its content was forced to the stack
- # by _fastpath_malloc(), like all other save_around_call_regs.
- mc.MOV_rr(edi.value, edx.value)
-
addr = self.cpu.gc_ll_descr.get_malloc_fixedsize_slowpath_addr()
- mc.JMP(imm(addr)) # tail call to the real malloc
- rawstart = mc.materialize(self.cpu.asmmemmgr, [])
- self.malloc_fixedsize_slowpath1 = rawstart
- # ---------- second helper for the slow path of malloc ----------
- mc = codebuf.MachineCodeBlockWrapper()
+ #
+ if gcrootmap.is_shadow_stack:
+ # ---- shadowstack ----
+ mc.SUB_ri(esp.value, 16 - WORD) # stack alignment of 16 bytes
+ if IS_X86_32:
+ mc.MOV_sr(0, edx.value) # push argument
+ elif IS_X86_64:
+ mc.MOV_rr(edi.value, edx.value)
+ mc.CALL(imm(addr))
+ mc.ADD_ri(esp.value, 16 - WORD)
+ else:
+ # ---- asmgcc ----
+ if IS_X86_32:
+ mc.MOV_sr(WORD, edx.value) # save it as the new argument
+ elif IS_X86_64:
+ # rdi can be clobbered: its content was forced to the stack
+ # by _fastpath_malloc(), like all other save_around_call_regs.
+ mc.MOV_rr(edi.value, edx.value)
+ mc.JMP(imm(addr)) # tail call to the real malloc
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.malloc_fixedsize_slowpath1 = rawstart
+ # ---------- second helper for the slow path of malloc ----------
+ mc = codebuf.MachineCodeBlockWrapper()
+ #
if self.cpu.supports_floats: # restore the XMM registers
for i in range(self.cpu.NUM_REGS):# from where they were saved
mc.MOVSD_xs(i, (WORD*2)+8*i)
@@ -550,6 +567,10 @@
for regloc in self.cpu.CALLEE_SAVE_REGISTERS:
self.mc.PUSH_r(regloc.value)
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self._call_header_shadowstack(gcrootmap)
+
def _call_header_with_stack_check(self):
if self.stack_check_slowpath == 0:
pass # no stack check (e.g. not translated)
@@ -571,12 +592,32 @@
def _call_footer(self):
self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self._call_footer_shadowstack(gcrootmap)
+
for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
self.mc.POP_r(ebp.value)
self.mc.RET()
+ def _call_header_shadowstack(self, gcrootmap):
+ # we need to put two words into the shadowstack: the MARKER
+ # and the address of the frame (ebp, actually)
+ rst = gcrootmap.get_root_stack_top_addr()
+ assert rx86.fits_in_32bits(rst)
+ self.mc.MOV_rj(eax.value, rst) # MOV eax, [rootstacktop]
+ self.mc.LEA_rm(edx.value, (eax.value, 2*WORD)) # LEA edx, [eax+2*WORD]
+ self.mc.MOV_mi((eax.value, 0), gcrootmap.MARKER) # MOV [eax], MARKER
+ self.mc.MOV_mr((eax.value, WORD), ebp.value) # MOV [eax+WORD], ebp
+ self.mc.MOV_jr(rst, edx.value) # MOV [rootstacktop], edx
+
+ def _call_footer_shadowstack(self, gcrootmap):
+ rst = gcrootmap.get_root_stack_top_addr()
+ assert rx86.fits_in_32bits(rst)
+ self.mc.SUB_ji8(rst, 2*WORD) # SUB [rootstacktop], 2*WORD
+
def _assemble_bootstrap_direct_call(self, arglocs, jmppos, stackdepth):
if IS_X86_64:
return self._assemble_bootstrap_direct_call_64(arglocs, jmppos, stackdepth)
@@ -896,7 +937,7 @@
self.implement_guard(guard_token, checkfalsecond)
return genop_cmp_guard_float
- def _emit_call(self, x, arglocs, start=0, tmp=eax):
+ def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
if IS_X86_64:
return self._emit_call_64(x, arglocs, start)
@@ -924,9 +965,9 @@
self._regalloc.reserve_param(p//WORD)
# x is a location
self.mc.CALL(x)
- self.mark_gc_roots()
+ self.mark_gc_roots(force_index)
- def _emit_call_64(self, x, arglocs, start=0):
+ def _emit_call_64(self, force_index, x, arglocs, start=0):
src_locs = []
dst_locs = []
xmm_src_locs = []
@@ -984,12 +1025,27 @@
self._regalloc.reserve_param(len(pass_on_stack))
self.mc.CALL(x)
- self.mark_gc_roots()
+ self.mark_gc_roots(force_index)
def call(self, addr, args, res):
- self._emit_call(imm(addr), args)
+ force_index = self.write_new_force_index()
+ self._emit_call(force_index, imm(addr), args)
assert res is eax
+ def write_new_force_index(self):
+ # for shadowstack only: get a new, unused force_index number and
+ # write it to FORCE_INDEX_OFS. Used to record the call shape
+ # (i.e. where the GC pointers are in the stack) around a CALL
+ # instruction that doesn't already have a force_index.
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ clt = self.currently_compiling_loop.compiled_loop_token
+ force_index = clt.reserve_and_record_some_faildescr_index()
+ self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
+ return force_index
+ else:
+ return 0
+
genop_int_neg = _unaryop("NEG")
genop_int_invert = _unaryop("NOT")
genop_int_add = _binaryop("ADD", True)
@@ -1796,8 +1852,9 @@
tmp = ecx
else:
tmp = eax
-
- self._emit_call(x, arglocs, 3, tmp=tmp)
+
+ force_index = self.write_new_force_index()
+ self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
# a float or a long long return
@@ -1842,8 +1899,8 @@
assert len(arglocs) - 2 == len(descr._x86_arglocs[0])
#
# Write a call to the direct_bootstrap_code of the target assembler
- self._emit_call(imm(descr._x86_direct_bootstrap_code), arglocs, 2,
- tmp=eax)
+ self._emit_call(fail_index, imm(descr._x86_direct_bootstrap_code),
+ arglocs, 2, tmp=eax)
if op.result is None:
assert result_loc is None
value = self.cpu.done_with_this_frame_void_v
@@ -1868,7 +1925,7 @@
jd = descr.outermost_jitdriver_sd
assert jd is not None
asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
- self._emit_call(imm(asm_helper_adr), [eax, arglocs[1]], 0,
+ self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
tmp=ecx)
if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
self.mc.FSTP_b(result_loc.value)
@@ -1990,11 +2047,16 @@
not_implemented("not implemented operation (guard): %s" %
op.getopname())
- def mark_gc_roots(self):
+ def mark_gc_roots(self, force_index):
+ if force_index < 0:
+ return # not needed
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap:
mark = self._regalloc.get_mark_gc_roots(gcrootmap)
- self.mc.insert_gcroot_marker(mark)
+ if gcrootmap.is_shadow_stack:
+ gcrootmap.write_callshape(mark, force_index)
+ else:
+ self.mc.insert_gcroot_marker(mark)
def target_arglocs(self, loop_token):
return loop_token._x86_arglocs
@@ -2025,11 +2087,16 @@
# result in EAX; slowpath_addr2 additionally returns in EDX a
# copy of heap(nursery_free_adr), so that the final MOV below is
# a no-op.
- slowpath_addr1 = self.malloc_fixedsize_slowpath1
+
# reserve room for the argument to the real malloc and the
# 8 saved XMM regs
self._regalloc.reserve_param(1+16)
- self.mc.CALL(imm(slowpath_addr1))
+
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if not gcrootmap.is_shadow_stack:
+ # there are two helpers to call only with asmgcc
+ slowpath_addr1 = self.malloc_fixedsize_slowpath1
+ self.mc.CALL(imm(slowpath_addr1))
self.mark_gc_roots()
slowpath_addr2 = self.malloc_fixedsize_slowpath2
self.mc.CALL(imm(slowpath_addr2))
@@ -2038,6 +2105,7 @@
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr-1, chr(offset))
# on 64-bits, 'tid' is a value that fits in 31 bits
+ assert rx86.fits_in_32bits(tid)
self.mc.MOV_mi((eax.value, 0), tid)
self.mc.MOV(heap(nursery_free_adr), edx)
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -127,7 +127,7 @@
# ______________________________________________________________________
-class TestCompileFramework(object):
+class CompileFrameworkTests(object):
# Test suite using (so far) the minimark GC.
def setup_class(cls):
funcs = []
@@ -178,7 +178,7 @@
try:
GcLLDescr_framework.DEBUG = True
cls.cbuilder = compile(get_entry(allfuncs), DEFL_GC,
- gcrootfinder="asmgcc", jit=True)
+ gcrootfinder=cls.gcrootfinder, jit=True)
finally:
GcLLDescr_framework.DEBUG = OLD_DEBUG
@@ -576,3 +576,10 @@
def test_compile_framework_minimal_size_in_nursery(self):
self.run('compile_framework_minimal_size_in_nursery')
+
+
+class TestShadowStack(CompileFrameworkTests):
+ gcrootfinder = "shadowstack"
+
+class TestAsmGcc(CompileFrameworkTests):
+ gcrootfinder = "asmgcc"
More information about the Pypy-commit
mailing list