[pypy-commit] pypy stmgc-c4: start implementing fastpath for nursery allocations (WIP)
Raemi
noreply at buildbot.pypy.org
Wed Oct 16 17:54:47 CEST 2013
Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c4
Changeset: r67430:53c3d84d1993
Date: 2013-10-16 17:53 +0200
http://bitbucket.org/pypy/pypy/changeset/53c3d84d1993/
Log: start implementing fastpath for nursery allocations (WIP) copy over
rewrite tests for stm (need fixing)
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -511,11 +511,7 @@
self._make_layoutbuilder()
self._make_gcrootmap()
self._setup_gcclass()
- if not self.stm:
- # XXX: not needed with stm/shadowstack??
- self._setup_tid()
- else:
- self.fielddescr_tid = None
+ self._setup_tid()
self._setup_write_barrier()
self._setup_str()
self._make_functions(really_not_translated)
@@ -534,10 +530,8 @@
def _initialize_for_tests(self):
self.layoutbuilder = None
self.fielddescr_tid = AbstractDescr()
- if self.stm:
- self.max_size_of_young_obj = None
- else:
- self.max_size_of_young_obj = 1000
+ self.fielddescr_rev = AbstractDescr()
+ self.max_size_of_young_obj = 1000
self.GCClass = None
self.gcheaderbuilder = None
self.HDRPTR = None
@@ -572,7 +566,15 @@
assert self.GCClass.inline_simple_malloc_varsize
def _setup_tid(self):
- self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+ if not self.stm:
+ self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+ self.fielddescr_rev = None
+ else:
+ self.fielddescr_tid = get_field_descr(self, self.GCClass.GCHDR,
+ 'h_tid')
+ self.fielddescr_rev = get_field_descr(self, self.GCClass.GCHDR,
+ 'h_revision')
+
frame_tid = self.layoutbuilder.get_type_id(jitframe.JITFRAME)
self.translator._jit2gc['frame_tid'] = frame_tid
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -52,6 +52,7 @@
# barriers. We do this on each "basic block" of operations, which in
# this case means between CALLs or unknown-size mallocs.
#
+ # SYNC with stmrewrite.py!
for op in operations:
if op.getopnum() == rop.DEBUG_MERGE_POINT:
continue
diff --git a/rpython/jit/backend/llsupport/stmrewrite.py b/rpython/jit/backend/llsupport/stmrewrite.py
--- a/rpython/jit/backend/llsupport/stmrewrite.py
+++ b/rpython/jit/backend/llsupport/stmrewrite.py
@@ -88,10 +88,11 @@
# it immediately
if (op.getopnum() == rop.GUARD_NOT_FORCED
and insert_transaction_break):
- # insert transaction_break after GUARD after call
+ # insert transaction_break after GUARD after calls
self.newops.append(
ResOperation(rop.STM_TRANSACTION_BREAK, [], None))
insert_transaction_break = False
+ self.emitting_an_operation_that_can_collect()
else:
assert insert_transaction_break is False
@@ -118,6 +119,7 @@
continue
# ---------- calls ----------
if op.is_call():
+ self.emitting_an_operation_that_can_collect()
if (op.getopnum() == rop.CALL_MAY_FORCE or
op.getopnum() == rop.CALL_ASSEMBLER or
op.getopnum() == rop.CALL_RELEASE_GIL):
@@ -142,7 +144,6 @@
self.fallback_inevitable(op)
else:
self.newops.append(op)
- self.known_category.clear()
continue
# ---------- copystrcontent ----------
if op.getopnum() in (rop.COPYSTRCONTENT,
@@ -155,7 +156,8 @@
continue
# ---------- labels ----------
if op.getopnum() == rop.LABEL:
- self.known_category.clear()
+ self.emitting_an_operation_that_can_collect()
+ self.known_lengths.clear()
self.always_inevitable = False
self.newops.append(op)
continue
@@ -163,6 +165,7 @@
if op.getopnum() == rop.JUMP:
self.newops.append(
ResOperation(rop.STM_TRANSACTION_BREAK, [], None))
+ # self.emitting_an_operation_that_can_collect()
self.newops.append(op)
continue
# ---------- finish, other ignored ops ----------
@@ -185,6 +188,10 @@
assert not insert_transaction_break
return self.newops
+ def emitting_an_operation_that_can_collect(self):
+ GcRewriterAssembler.emitting_an_operation_that_can_collect(self)
+ self.known_category.clear()
+
def write_to_read_categories(self):
for v, c in self.known_category.items():
if c == 'W':
@@ -197,13 +204,14 @@
if c == 'R':
self.known_category[v] = 'P'
-## def gen_malloc_nursery_varsize_frame(self, sizebox, v_result, tid):
-## """ For now don't generate CALL_MALLOC_NURSERY_VARSIZE_FRAME
-## """
-## addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_big_fixedsize')
-## args = [ConstInt(addr), sizebox, ConstInt(tid)]
-## descr = self.gc_ll_descr.malloc_big_fixedsize_descr
-## self._gen_call_malloc_gc(args, v_result, descr)
+ def gen_initialize_tid(self, v_newgcobj, tid):
+ GcRewriterAssembler.gen_initialize_tid(self, v_newgcobj, tid)
+ if self.gc_ll_descr.fielddescr_rev is not None:
+ op = ResOperation(rop.STM_SET_REVISION_GC, [v_newgcobj,], None,
+ descr=self.gc_ll_descr.fielddescr_rev)
+ self.newops.append(op)
+
+
def gen_write_barrier(self, v):
raise NotImplementedError
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -63,6 +63,7 @@
register_known_gctype(self.cpu, o_vtable, O)
#
tiddescr = self.gc_ll_descr.fielddescr_tid
+ revdescr = self.gc_ll_descr.fielddescr_rev
wbdescr = self.gc_ll_descr.write_barrier_descr
WORD = globals()['WORD']
#
diff --git a/rpython/jit/backend/llsupport/test/test_stmrewrite.py b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
--- a/rpython/jit/backend/llsupport/test/test_stmrewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
@@ -744,7 +744,6 @@
fakeextrainfo())
for op, guarded in [
("call(123, descr=calldescr2)", False),
- ("call_assembler(123, descr=casmdescr)", True),
("call_may_force(123, descr=calldescr2)", True),
("call_loopinvariant(123, descr=calldescr2)", False),
]:
@@ -770,6 +769,27 @@
jump(p1)
""" % (op, guard, tr_break), calldescr2=calldescr2)
+ def test_call_assembler(self):
+ self.check_rewrite("""
+ [i0, f0]
+ i2 = call_assembler(i0, f0, descr=casmdescr)
+ guard_not_forced()[]
+ """, """
+ [i0, f0]
+ i1 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_size)
+ p1 = call_malloc_nursery_varsize_frame(i1)
+ setfield_gc(p1, 0, descr=tiddescr)
+ stm_set_revision_gc(p1, descr=revdescr)
+ i2 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_depth)
+ setfield_gc(p1, i2, descr=framelendescr)
+ setfield_gc(p1, ConstClass(frame_info), descr=jf_frame_info)
+ setarrayitem_gc(p1, 0, i0, descr=signedframedescr)
+ setarrayitem_gc(p1, 1, f0, descr=floatframedescr)
+ i3 = call_assembler(p1, descr=casmdescr)
+ guard_not_forced() []
+ stm_transaction_break()
+ """)
+
def test_ptr_eq_null(self):
self.check_rewrite("""
[p1, p2]
@@ -833,3 +853,273 @@
def test_ptr_eq_other_direct_cases(self):
py.test.skip("can also keep ptr_eq if both args are L or W, "
"or if one arg is freshly malloced")
+
+ # ----------- tests copied from rewrite.py -------------
+ def test_rewrite_assembler_new_to_malloc(self):
+ self.check_rewrite("""
+ [p1]
+ p0 = new(descr=sdescr)
+ """, """
+ [p1]
+ p0 = call_malloc_nursery(%(sdescr.size)d)
+ setfield_gc(p0, 1234, descr=tiddescr)
+ stm_set_revision_gc(p0, descr=revdescr)
+ """)
+
+ def test_rewrite_assembler_new3_to_malloc(self):
+ self.check_rewrite("""
+ []
+ p0 = new(descr=sdescr)
+ p1 = new(descr=tdescr)
+ p2 = new(descr=sdescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery( \
+ %(sdescr.size + tdescr.size + sdescr.size)d)
+ setfield_gc(p0, 1234, descr=tiddescr)
+ p1 = int_add(p0, %(sdescr.size)d)
+ setfield_gc(p1, 5678, descr=tiddescr)
+ p2 = int_add(p1, %(tdescr.size)d)
+ setfield_gc(p2, 1234, descr=tiddescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_new_array_fixed_to_malloc(self):
+ self.check_rewrite("""
+ []
+ p0 = new_array(10, descr=adescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery( \
+ %(adescr.basesize + 10 * adescr.itemsize)d)
+ setfield_gc(p0, 4321, descr=tiddescr)
+ setfield_gc(p0, 10, descr=alendescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_new_and_new_array_fixed_to_malloc(self):
+ self.check_rewrite("""
+ []
+ p0 = new(descr=sdescr)
+ p1 = new_array(10, descr=adescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery( \
+ %(sdescr.size + \
+ adescr.basesize + 10 * adescr.itemsize)d)
+ setfield_gc(p0, 1234, descr=tiddescr)
+ p1 = int_add(p0, %(sdescr.size)d)
+ setfield_gc(p1, 4321, descr=tiddescr)
+ setfield_gc(p1, 10, descr=alendescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_round_up(self):
+ self.check_rewrite("""
+ []
+ p0 = new_array(6, descr=bdescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery(%(bdescr.basesize + 8)d)
+ setfield_gc(p0, 8765, descr=tiddescr)
+ setfield_gc(p0, 6, descr=blendescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_round_up_always(self):
+ self.check_rewrite("""
+ []
+ p0 = new_array(5, descr=bdescr)
+ p1 = new_array(5, descr=bdescr)
+ p2 = new_array(5, descr=bdescr)
+ p3 = new_array(5, descr=bdescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery(%(4 * (bdescr.basesize + 8))d)
+ setfield_gc(p0, 8765, descr=tiddescr)
+ setfield_gc(p0, 5, descr=blendescr)
+ p1 = int_add(p0, %(bdescr.basesize + 8)d)
+ setfield_gc(p1, 8765, descr=tiddescr)
+ setfield_gc(p1, 5, descr=blendescr)
+ p2 = int_add(p1, %(bdescr.basesize + 8)d)
+ setfield_gc(p2, 8765, descr=tiddescr)
+ setfield_gc(p2, 5, descr=blendescr)
+ p3 = int_add(p2, %(bdescr.basesize + 8)d)
+ setfield_gc(p3, 8765, descr=tiddescr)
+ setfield_gc(p3, 5, descr=blendescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_minimal_size(self):
+ self.check_rewrite("""
+ []
+ p0 = new(descr=edescr)
+ p1 = new(descr=edescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery(%(4*WORD)d)
+ setfield_gc(p0, 9000, descr=tiddescr)
+ p1 = int_add(p0, %(2*WORD)d)
+ setfield_gc(p1, 9000, descr=tiddescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_variable_size(self):
+ self.check_rewrite("""
+ [i0]
+ p0 = new_array(i0, descr=bdescr)
+ jump(i0)
+ """, """
+ [i0]
+ p0 = call_malloc_nursery_varsize(0, 1, i0, descr=bdescr)
+ setfield_gc(p0, i0, descr=blendescr)
+ jump(i0)
+ """)
+
+ def test_rewrite_new_string(self):
+ self.check_rewrite("""
+ [i0]
+ p0 = newstr(i0)
+ jump(i0)
+ """, """
+ [i0]
+ p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
+ setfield_gc(p0, i0, descr=strlendescr)
+ jump(i0)
+ """)
+
+ def test_rewrite_assembler_nonstandard_array(self):
+ # a non-standard array is a bit hard to get; e.g. GcArray(Float)
+ # is like that on Win32, but not on Linux. Build one manually...
+ NONSTD = lltype.GcArray(lltype.Float)
+ nonstd_descr = get_array_descr(self.gc_ll_descr, NONSTD)
+ nonstd_descr.tid = 6464
+ nonstd_descr.basesize = 64 # <= hacked
+ nonstd_descr.itemsize = 8
+ nonstd_descr_gcref = 123
+ self.check_rewrite("""
+ [i0]
+ p0 = new_array(i0, descr=nonstd_descr)
+ jump(i0)
+ """, """
+ [i0]
+ p0 = call_malloc_gc(ConstClass(malloc_array_nonstandard), \
+ 64, 8, \
+ %(nonstd_descr.lendescr.offset)d, \
+ 6464, i0, \
+ descr=malloc_array_nonstandard_descr)
+ jump(i0)
+ """, nonstd_descr=nonstd_descr)
+
+ def test_rewrite_assembler_maximal_size_1(self):
+ self.gc_ll_descr.max_size_of_young_obj = 100
+ self.check_rewrite("""
+ []
+ p0 = new_array(103, descr=bdescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_gc(ConstClass(malloc_array), 1, \
+ %(bdescr.tid)d, 103, \
+ descr=malloc_array_descr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_maximal_size_2(self):
+ self.gc_ll_descr.max_size_of_young_obj = 300
+ self.check_rewrite("""
+ []
+ p0 = new_array(101, descr=bdescr)
+ p1 = new_array(102, descr=bdescr) # two new_arrays can be combined
+ p2 = new_array(103, descr=bdescr) # but not all three
+ jump()
+ """, """
+ []
+ p0 = call_malloc_nursery( \
+ %(2 * (bdescr.basesize + 104))d)
+ setfield_gc(p0, 8765, descr=tiddescr)
+ setfield_gc(p0, 101, descr=blendescr)
+ p1 = int_add(p0, %(bdescr.basesize + 104)d)
+ setfield_gc(p1, 8765, descr=tiddescr)
+ setfield_gc(p1, 102, descr=blendescr)
+ p2 = call_malloc_nursery( \
+ %(bdescr.basesize + 104)d)
+ setfield_gc(p2, 8765, descr=tiddescr)
+ setfield_gc(p2, 103, descr=blendescr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_huge_size(self):
+ # "huge" is defined as "larger than 0xffffff bytes, or 16MB"
+ self.check_rewrite("""
+ []
+ p0 = new_array(20000000, descr=bdescr)
+ jump()
+ """, """
+ []
+ p0 = call_malloc_gc(ConstClass(malloc_array), 1, \
+ %(bdescr.tid)d, 20000000, \
+ descr=malloc_array_descr)
+ jump()
+ """)
+
+ def test_new_with_vtable(self):
+ self.check_rewrite("""
+ []
+ p0 = new_with_vtable(ConstClass(o_vtable))
+ jump()
+ """, """
+ [p1]
+ p0 = call_malloc_nursery(104) # rounded up
+ setfield_gc(p0, 9315, descr=tiddescr)
+ setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+ jump()
+ """)
+
+ def test_new_with_vtable_too_big(self):
+ self.gc_ll_descr.max_size_of_young_obj = 100
+ self.check_rewrite("""
+ []
+ p0 = new_with_vtable(ConstClass(o_vtable))
+ jump()
+ """, """
+ [p1]
+ p0 = call_malloc_gc(ConstClass(malloc_big_fixedsize), 104, 9315, \
+ descr=malloc_big_fixedsize_descr)
+ setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+ jump()
+ """)
+
+ def test_rewrite_assembler_newstr_newunicode(self):
+ self.check_rewrite("""
+ [i2]
+ p0 = newstr(14)
+ p1 = newunicode(10)
+ p2 = newunicode(i2)
+ p3 = newstr(i2)
+ jump()
+ """, """
+ [i2]
+ p0 = call_malloc_nursery( \
+ %(strdescr.basesize + 16 * strdescr.itemsize + \
+ unicodedescr.basesize + 10 * unicodedescr.itemsize)d)
+ setfield_gc(p0, %(strdescr.tid)d, descr=tiddescr)
+ setfield_gc(p0, 14, descr=strlendescr)
+ p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
+ setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
+ setfield_gc(p1, 10, descr=unicodelendescr)
+ p2 = call_malloc_nursery_varsize(2, 4, i2, \
+ descr=unicodedescr)
+ setfield_gc(p2, i2, descr=unicodelendescr)
+ p3 = call_malloc_nursery_varsize(1, 1, i2, \
+ descr=strdescr)
+ setfield_gc(p3, i2, descr=strlendescr)
+ jump()
+ """)
+
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -254,11 +254,18 @@
mc.J_il(rx86.Conditions['Z'], 0xfffff) # patched later
jz_location = mc.get_relative_pos()
#
- nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
self._reload_frame_if_necessary(mc, align_stack=True)
self.set_extra_stack_depth(mc, 0)
self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
- mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
+ if self.cpu.gc_ll_descr.stm:
+ # load nursery_current into EDI
+ self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+ mc.MOV_rm(edi.value,
+ (X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_CURRENT))
+ else:
+ nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+ mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
# clear the gc pattern
mc.MOV_bi(ofs, 0)
mc.RET()
@@ -2748,6 +2755,175 @@
# XXX if the next operation is a GUARD_NO_EXCEPTION, we should
# somehow jump over it too in the fast path
+ def _load_stm_thread_descriptor(self, mc, loc):
+ assert self.cpu.gc_ll_descr.stm
+ assert isinstance(loc, RegLoc)
+
+ td = self._get_stm_tl(rstm.get_thread_descriptor_adr())
+ self._tl_segment_if_stm(mc)
+ mc.MOV(loc, heap(td))
+ mc.MOV_rm(loc.value, (loc.value, 0))
+
+ def _cond_allocate_in_nursery_or_slowpath(self, mc, gcmap):
+ # needed for slowpath:
+ # eax = nursery_current
+ # edi = nursery_current + size
+ # needed here:
+ # X86_64_SCRATCH_REG = thread_descriptor
+ #
+ # cmp nursery_current+size > nursery_nextlimit
+ mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_NEXTLIMIT))
+ mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+ jmp_adr = mc.get_relative_pos()
+ #
+ # == SLOWPATH ==
+ # save the gcmap
+ self.push_gcmap(mc, gcmap, mov=True)
+ mc.CALL(imm(self.malloc_slowpath))
+ mc.JMP_l8(0)
+ jmp2_adr = mc.get_relative_pos()
+ #
+ # == FASTPATH ==
+ offset = mc.get_relative_pos() - jmp_adr
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_adr-1, chr(offset))
+ #
+ # thread_descriptor->nursery_current = nursery_current+size
+ mc.MOV_mr((X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_CURRENT),
+ edi.value)
+ #
+ # END
+ offset = mc.get_relative_pos() - jmp2_adr
+ assert 0 < offset <= 127
+ mc.overwrite(jmp2_adr-1, chr(offset))
+
+ def malloc_cond_stm(self, size, gcmap):
+ assert self.cpu.gc_ll_descr.stm
+ assert size & (WORD-1) == 0 # must be correctly aligned
+ mc = self.mc
+ # load nursery_current and nursery_nextlimit
+ self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+ mc.MOV_rm(eax.value,
+ (X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_CURRENT))
+ mc.LEA_rm(edi.value, (eax.value, size))
+ #
+ # eax=nursery_current, edi=nursery_current+size
+ self._cond_allocate_in_nursery_or_slowpath(mc, gcmap)
+
+ def malloc_cond_varsize_frame_stm(self, sizeloc, gcmap):
+ assert self.cpu.gc_ll_descr.stm
+ mc = self.mc
+ self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+ if sizeloc is eax:
+ self.mc.MOV(edi, sizeloc)
+ sizeloc = edi
+ self.mc.MOV_rm(eax.value, (X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_CURRENT))
+ if sizeloc is edi:
+ self.mc.ADD_rr(edi.value, eax.value)
+ else:
+ self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0))
+ #
+ # eax=nursery_current, edi=nursery_current+size
+ self._cond_allocate_in_nursery_or_slowpath(mc, gcmap)
+
+ def malloc_cond_varsize_stm(self, kind, lengthloc, itemsize,
+ maxlength, gcmap, arraydescr):
+ assert self.cpu.gc_ll_descr.stm
+ from rpython.jit.backend.llsupport.descr import ArrayDescr
+ assert isinstance(arraydescr, ArrayDescr)
+
+ mc = self.mc
+ # lengthloc is the length of the array, which we must not modify!
+ assert lengthloc is not eax and lengthloc is not edi
+ if isinstance(lengthloc, RegLoc):
+ varsizeloc = lengthloc
+ else:
+ mc.MOV(edi, lengthloc)
+ varsizeloc = edi
+
+ mc.CMP(varsizeloc, imm(maxlength))
+ mc.J_il8(rx86.Conditions['A'], 0) # patched later
+ jmp_adr0 = mc.get_relative_pos()
+
+ self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+ mc.MOV_rm(eax.value,
+ (X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_CURRENT))
+
+ if valid_addressing_size(itemsize):
+ shift = get_scale(itemsize)
+ else:
+ shift = self._imul_const_scaled(mc, edi.value,
+ varsizeloc.value, itemsize)
+ varsizeloc = edi
+ # now varsizeloc is a register != eax. The size of
+ # the variable part of the array is (varsizeloc << shift)
+ assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
+ constsize = arraydescr.basesize + self.gc_size_of_header
+ force_realignment = (itemsize % WORD) != 0
+ if force_realignment:
+ constsize += WORD - 1
+ mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+ constsize))
+ if force_realignment:
+ mc.AND_ri(edi.value, ~(WORD - 1))
+ # now edi contains the total size in bytes, rounded up to a multiple
+ # of WORD, plus nursery_free_adr
+ mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_NEXTLIMIT))
+ mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+ jmp_adr1 = mc.get_relative_pos()
+ #
+ # == SLOWPATH ==
+ offset = mc.get_relative_pos() - jmp_adr0
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_adr0-1, chr(offset))
+ # save the gcmap
+ self.push_gcmap(mc, gcmap, mov=True) # mov into RawEspLoc(0)
+ if kind == rewrite.FLAG_ARRAY:
+ mc.MOV_si(WORD, itemsize)
+ mc.MOV(edi, lengthloc)
+ mc.MOV_ri(eax.value, arraydescr.tid)
+ addr = self.malloc_slowpath_varsize
+ else:
+ if kind == rewrite.FLAG_STR:
+ addr = self.malloc_slowpath_str
+ else:
+ assert kind == rewrite.FLAG_UNICODE
+ addr = self.malloc_slowpath_unicode
+ mc.MOV(edi, lengthloc)
+ mc.CALL(imm(addr))
+ mc.JMP_l8(0) # jump to done, patched later
+ jmp_location = mc.get_relative_pos()
+ #
+ # == FASTPATH ==
+ offset = mc.get_relative_pos() - jmp_adr1
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_adr1-1, chr(offset))
+ #
+ # set thread_descriptor->nursery_current
+ mc.MOV_mr((X86_64_SCRATCH_REG.value,
+ StmGC.TD_NURSERY_CURRENT),
+ edi.value)
+ #
+ # write down the tid
+ mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+ # also set private_rev_num:
+ rn = self._get_stm_private_rev_num_addr()
+ self._tl_segment_if_stm(mc)
+ mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
+ mc.MOV(mem(eax, StmGC.H_REVISION), X86_64_SCRATCH_REG)
+ #
+ # == END ==
+ offset = mc.get_relative_pos() - jmp_location
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_location - 1, chr(offset))
+
+
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
assert not self.cpu.gc_ll_descr.stm
assert size & (WORD-1) == 0 # must be correctly aligned
@@ -2764,6 +2940,7 @@
self.mc.overwrite(jmp_adr-1, chr(offset))
self.mc.MOV(heap(nursery_free_adr), edi)
+
def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
sizeloc, gcmap):
assert not self.cpu.gc_ll_descr.stm
@@ -2876,6 +3053,22 @@
assert isinstance(reg, RegLoc)
self.mc.MOV_rr(reg.value, ebp.value)
+ def genop_discard_stm_set_revision_gc(self, op, arglocs):
+ base_loc, ofs_loc, size_loc = arglocs
+ assert isinstance(size_loc, ImmedLoc)
+ mc = self.mc
+
+ if IS_X86_32:
+ todo()
+
+ rn = self._get_stm_private_rev_num_addr()
+ self._tl_segment_if_stm(mc)
+ mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
+
+ dest_addr = AddressLoc(base_loc, ofs_loc)
+ mc.MOV(dest_addr, X86_64_SCRATCH_REG)
+
+
def genop_stm_transaction_break(self, op, arglocs, result_loc):
assert self.cpu.gc_ll_descr.stm
if not we_are_translated():
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -849,8 +849,6 @@
def consider_call_malloc_nursery(self, op):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
- assert gc_ll_descr.get_malloc_slowpath_addr() is not None
- # ^^^ if this returns None, don't translate the rest of this function
#
size_box = op.getarg(0)
assert isinstance(size_box, ConstInt)
@@ -865,15 +863,16 @@
gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
#
- self.assembler.malloc_cond(
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- size, gcmap)
+ if gc_ll_descr.stm:
+ self.assembler.malloc_cond_stm(size, gcmap)
+ else:
+ self.assembler.malloc_cond(
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ size, gcmap)
def consider_call_malloc_nursery_varsize_frame(self, op):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
- assert gc_ll_descr.get_malloc_slowpath_addr() is not None
- # ^^^ if this returns None, don't translate the rest of this function
#
size_box = op.getarg(0)
assert isinstance(size_box, BoxInt) # we cannot have a const here!
@@ -889,11 +888,13 @@
gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
#
- gc_ll_descr = self.assembler.cpu.gc_ll_descr
- self.assembler.malloc_cond_varsize_frame(
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- sizeloc, gcmap)
+ if gc_ll_descr.stm:
+ self.assembler.malloc_cond_varsize_frame_stm(sizeloc, gcmap)
+ else:
+ self.assembler.malloc_cond_varsize_frame(
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ sizeloc, gcmap)
def consider_call_malloc_nursery_varsize(self, op):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -919,11 +920,16 @@
#
itemsize = op.getarg(1).getint()
maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
- self.assembler.malloc_cond_varsize(
- op.getarg(0).getint(),
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- lengthloc, itemsize, maxlength, gcmap, arraydescr)
+ if gc_ll_descr.stm:
+ self.assembler.malloc_cond_varsize_stm(
+ op.getarg(0).getint(),
+ lengthloc, itemsize, maxlength, gcmap, arraydescr)
+ else:
+ self.assembler.malloc_cond_varsize(
+ op.getarg(0).getint(),
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ lengthloc, itemsize, maxlength, gcmap, arraydescr)
def get_gcmap(self, forbidden_regs=[], noregs=False):
frame_depth = self.fm.get_frame_depth()
@@ -1267,6 +1273,16 @@
if isinstance(loc, FrameLoc):
self.fm.hint_frame_locations[box] = loc
+
+ def consider_stm_set_revision_gc(self, op):
+ ofs, size, _ = unpack_fielddescr(op.getdescr())
+ ofs_loc = imm(ofs)
+ size_loc = imm(size)
+ assert isinstance(size_loc, ImmedLoc)
+ args = op.getarglist()
+ base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
+ self.perform_discard(op, [base_loc, ofs_loc, size_loc])
+
def consider_stm_transaction_break(self, op):
# XXX use the extra 3 words in the stm resume buffer to save
# up to 3 registers, too. For now we just flush them all.
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -348,6 +348,7 @@
rop.CALL_MALLOC_NURSERY_VARSIZE_FRAME,
rop.LABEL,
rop.STM_TRANSACTION_BREAK,
+ rop.STM_SET_REVISION_GC,
): # list of opcodes never executed by pyjitpl
continue
raise AssertionError("missing %r" % (key,))
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -511,6 +511,7 @@
'RECORD_KNOWN_CLASS/2', # [objptr, clsptr]
'KEEPALIVE/1',
'STM_TRANSACTION_BREAK/0',
+ 'STM_SET_REVISION_GC/1d', # not really GC, writes raw to the header
'_CANRAISE_FIRST', # ----- start of can_raise operations -----
'_CALL_FIRST',
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -9,6 +9,7 @@
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rlib.debug import ll_assert
from rpython.rlib.rarithmetic import LONG_BIT, r_uint
+from rpython.rtyper.extregistry import ExtRegistryEntry
WORD = LONG_BIT // 8
NULL = llmemory.NULL
@@ -36,8 +37,11 @@
malloc_zero_filled = True
#gcflag_extra = GCFLAG_EXTRA
-
- GCHDR = lltype.GcStruct(
+ # SYNC with et.h
+ TD_NURSERY_CURRENT = 80
+ TD_NURSERY_NEXTLIMIT = 88
+
+ GCHDR = lltype.Struct(
'GCPTR',
('h_tid', lltype.Unsigned),
('h_revision', lltype.Signed),
@@ -79,6 +83,13 @@
FX_MASK = 65535
+ # keep in sync with nursery.h:
+
+ # maximum size of object in nursery (is actually dependent on
+ # nursery size, but this should work)
+ GC_NURSERY_SECTION = 135168
+
+
def get_type_id(self, obj):
return llop.stm_get_tid(llgroup.HALFWORD, obj)
@@ -151,8 +162,7 @@
@classmethod
def JIT_max_size_of_young_obj(cls):
- # XXX there is actually a maximum, check
- return None
+ return cls.GC_NURSERY_SECTION
@classmethod
def JIT_minimal_size_in_nursery(cls):
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -5,6 +5,11 @@
from rpython.rlib.jit import dont_look_inside
@dont_look_inside
+def get_thread_descriptor_adr():
+ addr = llop.stm_get_adr_of_thread_descriptor(llmemory.Address)
+ return rffi.cast(lltype.Signed, addr)
+
+ at dont_look_inside
def get_adr_of_private_rev_num():
addr = llop.stm_get_adr_of_private_rev_num(llmemory.Address)
return rffi.cast(lltype.Signed, addr)
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -951,6 +951,7 @@
op_stm_barrier = _stm_not_implemented
op_stm_push_root = _stm_not_implemented
op_stm_pop_root_into = _stm_not_implemented
+ op_stm_get_adr_of_thread_descriptor = _stm_not_implemented
op_stm_get_adr_of_read_barrier_cache = _stm_not_implemented
op_stm_get_adr_of_private_rev_num = _stm_not_implemented
op_stm_enter_callback_call = _stm_not_implemented
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -448,6 +448,7 @@
'stm_get_adr_of_private_rev_num':LLOp(),
'stm_get_adr_of_read_barrier_cache':LLOp(),
+ 'stm_get_adr_of_thread_descriptor': LLOp(),
'stm_ignored_start': LLOp(canrun=True),
'stm_ignored_stop': LLOp(canrun=True),
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -590,6 +590,7 @@
OP_STM_PTR_EQ = _OP_STM
OP_STM_PUSH_ROOT = _OP_STM
OP_STM_POP_ROOT_INTO = _OP_STM
+ OP_STM_GET_ADR_OF_THREAD_DESCRIPTOR = _OP_STM
OP_STM_GET_ROOT_STACK_TOP = _OP_STM
OP_STM_GET_ADR_OF_PRIVATE_REV_NUM = _OP_STM
OP_STM_GET_ADR_OF_READ_BARRIER_CACHE= _OP_STM
diff --git a/rpython/translator/stm/funcgen.py b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -114,6 +114,11 @@
return '%s = (%s)stm_pop_root();' % (
arg0, cdecl(funcgen.lltypename(op.args[0]), ''))
+def stm_get_adr_of_thread_descriptor(funcgen, op):
+ result = funcgen.expr(op.result)
+ return '%s = (%s)&thread_descriptor;' % (
+ result, cdecl(funcgen.lltypename(op.result), ''))
+
def stm_get_root_stack_top(funcgen, op):
result = funcgen.expr(op.result)
return '%s = (%s)&stm_shadowstack;' % (
More information about the pypy-commit
mailing list