[pypy-commit] pypy stmgc-c4: start implementing fastpath for nursery allocations (WIP)

Raemi noreply at buildbot.pypy.org
Wed Oct 16 17:54:47 CEST 2013


Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c4
Changeset: r67430:53c3d84d1993
Date: 2013-10-16 17:53 +0200
http://bitbucket.org/pypy/pypy/changeset/53c3d84d1993/

Log:	start implementing fastpath for nursery allocations (WIP) copy over
	rewrite tests for stm (need fixing)

diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -511,11 +511,7 @@
             self._make_layoutbuilder()
             self._make_gcrootmap()
             self._setup_gcclass()
-            if not self.stm:
-                # XXX: not needed with stm/shadowstack??
-                self._setup_tid()
-            else:
-                self.fielddescr_tid = None
+            self._setup_tid()
         self._setup_write_barrier()
         self._setup_str()
         self._make_functions(really_not_translated)
@@ -534,10 +530,8 @@
     def _initialize_for_tests(self):
         self.layoutbuilder = None
         self.fielddescr_tid = AbstractDescr()
-        if self.stm:
-            self.max_size_of_young_obj = None
-        else:
-            self.max_size_of_young_obj = 1000
+        self.fielddescr_rev = AbstractDescr()
+        self.max_size_of_young_obj = 1000
         self.GCClass = None
         self.gcheaderbuilder = None
         self.HDRPTR = None
@@ -572,7 +566,15 @@
         assert self.GCClass.inline_simple_malloc_varsize
 
     def _setup_tid(self):
-        self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+        if not self.stm:
+            self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+            self.fielddescr_rev = None
+        else:
+            self.fielddescr_tid = get_field_descr(self, self.GCClass.GCHDR,
+                                                  'h_tid')
+            self.fielddescr_rev = get_field_descr(self, self.GCClass.GCHDR,
+                                                  'h_revision')
+                        
         frame_tid = self.layoutbuilder.get_type_id(jitframe.JITFRAME)
         self.translator._jit2gc['frame_tid'] = frame_tid
 
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -52,6 +52,7 @@
         # barriers.  We do this on each "basic block" of operations, which in
         # this case means between CALLs or unknown-size mallocs.
         #
+        # SYNC with stmrewrite.py!
         for op in operations:
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
                 continue
diff --git a/rpython/jit/backend/llsupport/stmrewrite.py b/rpython/jit/backend/llsupport/stmrewrite.py
--- a/rpython/jit/backend/llsupport/stmrewrite.py
+++ b/rpython/jit/backend/llsupport/stmrewrite.py
@@ -88,10 +88,11 @@
                 # it immediately
                 if (op.getopnum() == rop.GUARD_NOT_FORCED
                     and insert_transaction_break):
-                    # insert transaction_break after GUARD after call
+                    # insert transaction_break after GUARD after calls
                     self.newops.append(
                         ResOperation(rop.STM_TRANSACTION_BREAK, [], None))
                     insert_transaction_break = False
+                    self.emitting_an_operation_that_can_collect()
                 else:
                     assert insert_transaction_break is False
 
@@ -118,6 +119,7 @@
                 continue
             # ----------  calls  ----------
             if op.is_call():
+                self.emitting_an_operation_that_can_collect()
                 if (op.getopnum() == rop.CALL_MAY_FORCE or
                     op.getopnum() == rop.CALL_ASSEMBLER or
                     op.getopnum() == rop.CALL_RELEASE_GIL):
@@ -142,7 +144,6 @@
                         self.fallback_inevitable(op)
                     else:
                         self.newops.append(op)
-                self.known_category.clear()
                 continue
             # ----------  copystrcontent  ----------
             if op.getopnum() in (rop.COPYSTRCONTENT,
@@ -155,7 +156,8 @@
                     continue
             # ----------  labels  ----------
             if op.getopnum() == rop.LABEL:
-                self.known_category.clear()
+                self.emitting_an_operation_that_can_collect()
+                self.known_lengths.clear()
                 self.always_inevitable = False
                 self.newops.append(op)
                 continue
@@ -163,6 +165,7 @@
             if op.getopnum() == rop.JUMP:
                 self.newops.append(
                     ResOperation(rop.STM_TRANSACTION_BREAK, [], None))
+                # self.emitting_an_operation_that_can_collect()
                 self.newops.append(op)
                 continue
             # ----------  finish, other ignored ops  ----------
@@ -185,6 +188,10 @@
         assert not insert_transaction_break
         return self.newops
 
+    def emitting_an_operation_that_can_collect(self):
+        GcRewriterAssembler.emitting_an_operation_that_can_collect(self)
+        self.known_category.clear()
+
     def write_to_read_categories(self):
         for v, c in self.known_category.items():
             if c == 'W':
@@ -197,13 +204,14 @@
             if c == 'R':
                 self.known_category[v] = 'P'
 
-##    def gen_malloc_nursery_varsize_frame(self, sizebox, v_result, tid):
-##        """ For now don't generate CALL_MALLOC_NURSERY_VARSIZE_FRAME
-##        """
-##        addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_big_fixedsize')
-##        args = [ConstInt(addr), sizebox, ConstInt(tid)]
-##        descr = self.gc_ll_descr.malloc_big_fixedsize_descr
-##        self._gen_call_malloc_gc(args, v_result, descr)
+    def gen_initialize_tid(self, v_newgcobj, tid):
+        GcRewriterAssembler.gen_initialize_tid(self, v_newgcobj, tid)
+        if self.gc_ll_descr.fielddescr_rev is not None:
+            op = ResOperation(rop.STM_SET_REVISION_GC, [v_newgcobj,], None,
+                              descr=self.gc_ll_descr.fielddescr_rev)
+            self.newops.append(op)
+            
+
                 
     def gen_write_barrier(self, v):
         raise NotImplementedError
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -63,6 +63,7 @@
         register_known_gctype(self.cpu, o_vtable, O)
         #
         tiddescr = self.gc_ll_descr.fielddescr_tid
+        revdescr = self.gc_ll_descr.fielddescr_rev
         wbdescr = self.gc_ll_descr.write_barrier_descr
         WORD = globals()['WORD']
         #
diff --git a/rpython/jit/backend/llsupport/test/test_stmrewrite.py b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
--- a/rpython/jit/backend/llsupport/test/test_stmrewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
@@ -744,7 +744,6 @@
                                     fakeextrainfo())
         for op, guarded in [
                 ("call(123, descr=calldescr2)", False),
-                ("call_assembler(123, descr=casmdescr)", True),
                 ("call_may_force(123, descr=calldescr2)", True),
                 ("call_loopinvariant(123, descr=calldescr2)", False),
                 ]:
@@ -770,6 +769,27 @@
                 jump(p1)
             """ % (op, guard, tr_break), calldescr2=calldescr2)
 
+    def test_call_assembler(self):
+        self.check_rewrite("""
+        [i0, f0]
+        i2 = call_assembler(i0, f0, descr=casmdescr)
+        guard_not_forced()[] 
+        """, """
+        [i0, f0]
+        i1 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_size)
+        p1 = call_malloc_nursery_varsize_frame(i1)
+        setfield_gc(p1, 0, descr=tiddescr)
+        stm_set_revision_gc(p1, descr=revdescr)
+        i2 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_depth)
+        setfield_gc(p1, i2, descr=framelendescr)
+        setfield_gc(p1, ConstClass(frame_info), descr=jf_frame_info)
+        setarrayitem_gc(p1, 0, i0, descr=signedframedescr)
+        setarrayitem_gc(p1, 1, f0, descr=floatframedescr)
+        i3 = call_assembler(p1, descr=casmdescr)
+        guard_not_forced() []
+        stm_transaction_break()
+        """)
+
     def test_ptr_eq_null(self):
         self.check_rewrite("""
             [p1, p2]
@@ -833,3 +853,273 @@
     def test_ptr_eq_other_direct_cases(self):
         py.test.skip("can also keep ptr_eq if both args are L or W, "
                      "or if one arg is freshly malloced")
+
+    # ----------- tests copied from rewrite.py -------------
+    def test_rewrite_assembler_new_to_malloc(self):
+        self.check_rewrite("""
+            [p1]
+            p0 = new(descr=sdescr)
+        """, """
+            [p1]
+            p0 = call_malloc_nursery(%(sdescr.size)d)
+            setfield_gc(p0, 1234, descr=tiddescr)
+            stm_set_revision_gc(p0, descr=revdescr)
+        """)
+
+    def test_rewrite_assembler_new3_to_malloc(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=sdescr)
+            p1 = new(descr=tdescr)
+            p2 = new(descr=sdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(   \
+                               %(sdescr.size + tdescr.size + sdescr.size)d)
+            setfield_gc(p0, 1234, descr=tiddescr)
+            p1 = int_add(p0, %(sdescr.size)d)
+            setfield_gc(p1, 5678, descr=tiddescr)
+            p2 = int_add(p1, %(tdescr.size)d)
+            setfield_gc(p2, 1234, descr=tiddescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_new_array_fixed_to_malloc(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(10, descr=adescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(    \
+                                %(adescr.basesize + 10 * adescr.itemsize)d)
+            setfield_gc(p0, 4321, descr=tiddescr)
+            setfield_gc(p0, 10, descr=alendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_new_and_new_array_fixed_to_malloc(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=sdescr)
+            p1 = new_array(10, descr=adescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(                                  \
+                                %(sdescr.size +                        \
+                                  adescr.basesize + 10 * adescr.itemsize)d)
+            setfield_gc(p0, 1234, descr=tiddescr)
+            p1 = int_add(p0, %(sdescr.size)d)
+            setfield_gc(p1, 4321, descr=tiddescr)
+            setfield_gc(p1, 10, descr=alendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_round_up(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(6, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(bdescr.basesize + 8)d)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 6, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_round_up_always(self):
+        self.check_rewrite("""
+            []
+            p0 = new_array(5, descr=bdescr)
+            p1 = new_array(5, descr=bdescr)
+            p2 = new_array(5, descr=bdescr)
+            p3 = new_array(5, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(4 * (bdescr.basesize + 8))d)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 5, descr=blendescr)
+            p1 = int_add(p0, %(bdescr.basesize + 8)d)
+            setfield_gc(p1, 8765, descr=tiddescr)
+            setfield_gc(p1, 5, descr=blendescr)
+            p2 = int_add(p1, %(bdescr.basesize + 8)d)
+            setfield_gc(p2, 8765, descr=tiddescr)
+            setfield_gc(p2, 5, descr=blendescr)
+            p3 = int_add(p2, %(bdescr.basesize + 8)d)
+            setfield_gc(p3, 8765, descr=tiddescr)
+            setfield_gc(p3, 5, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_minimal_size(self):
+        self.check_rewrite("""
+            []
+            p0 = new(descr=edescr)
+            p1 = new(descr=edescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(%(4*WORD)d)
+            setfield_gc(p0, 9000, descr=tiddescr)
+            p1 = int_add(p0, %(2*WORD)d)
+            setfield_gc(p1, 9000, descr=tiddescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_variable_size(self):
+        self.check_rewrite("""
+            [i0]
+            p0 = new_array(i0, descr=bdescr)
+            jump(i0)
+        """, """
+            [i0]
+            p0 = call_malloc_nursery_varsize(0, 1, i0, descr=bdescr)
+            setfield_gc(p0, i0, descr=blendescr)
+            jump(i0)
+        """)
+
+    def test_rewrite_new_string(self):
+        self.check_rewrite("""
+        [i0]
+        p0 = newstr(i0)
+        jump(i0)
+        """, """
+        [i0]
+        p0 = call_malloc_nursery_varsize(1, 1, i0, descr=strdescr)
+        setfield_gc(p0, i0, descr=strlendescr)
+        jump(i0)
+        """)
+
+    def test_rewrite_assembler_nonstandard_array(self):
+        # a non-standard array is a bit hard to get; e.g. GcArray(Float)
+        # is like that on Win32, but not on Linux.  Build one manually...
+        NONSTD = lltype.GcArray(lltype.Float)
+        nonstd_descr = get_array_descr(self.gc_ll_descr, NONSTD)
+        nonstd_descr.tid = 6464
+        nonstd_descr.basesize = 64      # <= hacked
+        nonstd_descr.itemsize = 8
+        nonstd_descr_gcref = 123
+        self.check_rewrite("""
+            [i0]
+            p0 = new_array(i0, descr=nonstd_descr)
+            jump(i0)
+        """, """
+            [i0]
+            p0 = call_malloc_gc(ConstClass(malloc_array_nonstandard), \
+                                64, 8,                                \
+                                %(nonstd_descr.lendescr.offset)d,     \
+                                6464, i0,                             \
+                                descr=malloc_array_nonstandard_descr)
+            jump(i0)
+        """, nonstd_descr=nonstd_descr)
+
+    def test_rewrite_assembler_maximal_size_1(self):
+        self.gc_ll_descr.max_size_of_young_obj = 100
+        self.check_rewrite("""
+            []
+            p0 = new_array(103, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
+                                %(bdescr.tid)d, 103,          \
+                                descr=malloc_array_descr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_maximal_size_2(self):
+        self.gc_ll_descr.max_size_of_young_obj = 300
+        self.check_rewrite("""
+            []
+            p0 = new_array(101, descr=bdescr)
+            p1 = new_array(102, descr=bdescr)  # two new_arrays can be combined
+            p2 = new_array(103, descr=bdescr)  # but not all three
+            jump()
+        """, """
+            []
+            p0 = call_malloc_nursery(    \
+                              %(2 * (bdescr.basesize + 104))d)
+            setfield_gc(p0, 8765, descr=tiddescr)
+            setfield_gc(p0, 101, descr=blendescr)
+            p1 = int_add(p0, %(bdescr.basesize + 104)d)
+            setfield_gc(p1, 8765, descr=tiddescr)
+            setfield_gc(p1, 102, descr=blendescr)
+            p2 = call_malloc_nursery(    \
+                              %(bdescr.basesize + 104)d)
+            setfield_gc(p2, 8765, descr=tiddescr)
+            setfield_gc(p2, 103, descr=blendescr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_huge_size(self):
+        # "huge" is defined as "larger than 0xffffff bytes, or 16MB"
+        self.check_rewrite("""
+            []
+            p0 = new_array(20000000, descr=bdescr)
+            jump()
+        """, """
+            []
+            p0 = call_malloc_gc(ConstClass(malloc_array), 1, \
+                                %(bdescr.tid)d, 20000000,    \
+                                descr=malloc_array_descr)
+            jump()
+        """)
+
+    def test_new_with_vtable(self):
+        self.check_rewrite("""
+            []
+            p0 = new_with_vtable(ConstClass(o_vtable))
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_nursery(104)      # rounded up
+            setfield_gc(p0, 9315, descr=tiddescr)
+            setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+            jump()
+        """)
+
+    def test_new_with_vtable_too_big(self):
+        self.gc_ll_descr.max_size_of_young_obj = 100
+        self.check_rewrite("""
+            []
+            p0 = new_with_vtable(ConstClass(o_vtable))
+            jump()
+        """, """
+            [p1]
+            p0 = call_malloc_gc(ConstClass(malloc_big_fixedsize), 104, 9315, \
+                                descr=malloc_big_fixedsize_descr)
+            setfield_gc(p0, ConstClass(o_vtable), descr=vtable_descr)
+            jump()
+        """)
+
+    def test_rewrite_assembler_newstr_newunicode(self):
+        self.check_rewrite("""
+            [i2]
+            p0 = newstr(14)
+            p1 = newunicode(10)
+            p2 = newunicode(i2)
+            p3 = newstr(i2)
+            jump()
+        """, """
+            [i2]
+            p0 = call_malloc_nursery(                                \
+                      %(strdescr.basesize + 16 * strdescr.itemsize + \
+                        unicodedescr.basesize + 10 * unicodedescr.itemsize)d)
+            setfield_gc(p0, %(strdescr.tid)d, descr=tiddescr)
+            setfield_gc(p0, 14, descr=strlendescr)
+            p1 = int_add(p0, %(strdescr.basesize + 16 * strdescr.itemsize)d)
+            setfield_gc(p1, %(unicodedescr.tid)d, descr=tiddescr)
+            setfield_gc(p1, 10, descr=unicodelendescr)
+            p2 = call_malloc_nursery_varsize(2, 4, i2, \
+                                descr=unicodedescr)
+            setfield_gc(p2, i2, descr=unicodelendescr)
+            p3 = call_malloc_nursery_varsize(1, 1, i2, \
+                                descr=strdescr)
+            setfield_gc(p3, i2, descr=strlendescr)
+            jump()
+        """)
+
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -254,11 +254,18 @@
         mc.J_il(rx86.Conditions['Z'], 0xfffff) # patched later
         jz_location = mc.get_relative_pos()
         #
-        nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         self._reload_frame_if_necessary(mc, align_stack=True)
         self.set_extra_stack_depth(mc, 0)
         self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
-        mc.MOV(edi, heap(nursery_free_adr))   # load this in EDI
+        if self.cpu.gc_ll_descr.stm:
+            # load nursery_current into EDI
+            self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+            mc.MOV_rm(edi.value, 
+                      (X86_64_SCRATCH_REG.value, 
+                       StmGC.TD_NURSERY_CURRENT))
+        else:
+            nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+            mc.MOV(edi, heap(nursery_free_adr))   # load this in EDI
         # clear the gc pattern
         mc.MOV_bi(ofs, 0)
         mc.RET()
@@ -2748,6 +2755,175 @@
         # XXX if the next operation is a GUARD_NO_EXCEPTION, we should
         # somehow jump over it too in the fast path
 
+    def _load_stm_thread_descriptor(self, mc, loc):
+        assert self.cpu.gc_ll_descr.stm
+        assert isinstance(loc, RegLoc)
+        
+        td = self._get_stm_tl(rstm.get_thread_descriptor_adr())
+        self._tl_segment_if_stm(mc)
+        mc.MOV(loc, heap(td))
+        mc.MOV_rm(loc.value, (loc.value, 0))
+
+    def _cond_allocate_in_nursery_or_slowpath(self, mc, gcmap):
+        # needed for slowpath:
+        # eax = nursery_current
+        # edi = nursery_current + size
+        # needed here:
+        # X86_64_SCRATCH_REG = thread_descriptor
+        #
+        # cmp nursery_current+size > nursery_nextlimit
+        mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value, 
+                              StmGC.TD_NURSERY_NEXTLIMIT))
+        mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+        jmp_adr = mc.get_relative_pos()
+        #
+        # == SLOWPATH ==
+        # save the gcmap
+        self.push_gcmap(mc, gcmap, mov=True)
+        mc.CALL(imm(self.malloc_slowpath))
+        mc.JMP_l8(0)
+        jmp2_adr = mc.get_relative_pos()
+        #
+        # == FASTPATH ==
+        offset = mc.get_relative_pos() - jmp_adr
+        assert 0 < offset <= 127
+        mc.overwrite(jmp_adr-1, chr(offset))
+        #
+        # thread_descriptor->nursery_current = nursery_current+size
+        mc.MOV_mr((X86_64_SCRATCH_REG.value,
+                   StmGC.TD_NURSERY_CURRENT),
+                   edi.value)
+        #
+        # END
+        offset = mc.get_relative_pos() - jmp2_adr
+        assert 0 < offset <= 127
+        mc.overwrite(jmp2_adr-1, chr(offset))
+        
+    def malloc_cond_stm(self, size, gcmap):
+        assert self.cpu.gc_ll_descr.stm
+        assert size & (WORD-1) == 0     # must be correctly aligned
+        mc = self.mc
+        # load nursery_current and nursery_nextlimit
+        self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+        mc.MOV_rm(eax.value, 
+                  (X86_64_SCRATCH_REG.value,
+                   StmGC.TD_NURSERY_CURRENT))
+        mc.LEA_rm(edi.value, (eax.value, size))
+        #
+        # eax=nursery_current, edi=nursery_current+size
+        self._cond_allocate_in_nursery_or_slowpath(mc, gcmap)
+
+    def malloc_cond_varsize_frame_stm(self, sizeloc, gcmap):
+        assert self.cpu.gc_ll_descr.stm
+        mc = self.mc
+        self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+        if sizeloc is eax:
+            self.mc.MOV(edi, sizeloc)
+            sizeloc = edi
+        self.mc.MOV_rm(eax.value, (X86_64_SCRATCH_REG.value, 
+                                   StmGC.TD_NURSERY_CURRENT))
+        if sizeloc is edi:
+            self.mc.ADD_rr(edi.value, eax.value)
+        else:
+            self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0))
+        #
+        # eax=nursery_current, edi=nursery_current+size
+        self._cond_allocate_in_nursery_or_slowpath(mc, gcmap)
+
+    def malloc_cond_varsize_stm(self, kind, lengthloc, itemsize,
+                                maxlength, gcmap, arraydescr):
+        assert self.cpu.gc_ll_descr.stm
+        from rpython.jit.backend.llsupport.descr import ArrayDescr
+        assert isinstance(arraydescr, ArrayDescr)
+
+        mc = self.mc
+        # lengthloc is the length of the array, which we must not modify!
+        assert lengthloc is not eax and lengthloc is not edi
+        if isinstance(lengthloc, RegLoc):
+            varsizeloc = lengthloc
+        else:
+            mc.MOV(edi, lengthloc)
+            varsizeloc = edi
+
+        mc.CMP(varsizeloc, imm(maxlength))
+        mc.J_il8(rx86.Conditions['A'], 0) # patched later
+        jmp_adr0 = mc.get_relative_pos()
+
+        self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
+        mc.MOV_rm(eax.value, 
+                  (X86_64_SCRATCH_REG.value, 
+                   StmGC.TD_NURSERY_CURRENT))
+
+        if valid_addressing_size(itemsize):
+            shift = get_scale(itemsize)
+        else:
+            shift = self._imul_const_scaled(mc, edi.value,
+                                            varsizeloc.value, itemsize)
+            varsizeloc = edi
+        # now varsizeloc is a register != eax.  The size of
+        # the variable part of the array is (varsizeloc << shift)
+        assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
+        constsize = arraydescr.basesize + self.gc_size_of_header
+        force_realignment = (itemsize % WORD) != 0
+        if force_realignment:
+            constsize += WORD - 1
+        mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+                                   constsize))
+        if force_realignment:
+            mc.AND_ri(edi.value, ~(WORD - 1))
+        # now edi contains the total size in bytes, rounded up to a multiple
+        # of WORD, plus nursery_free_adr
+        mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value, 
+                              StmGC.TD_NURSERY_NEXTLIMIT))
+        mc.J_il8(rx86.Conditions['NA'], 0) # patched later
+        jmp_adr1 = mc.get_relative_pos()
+        #
+        # == SLOWPATH ==
+        offset = mc.get_relative_pos() - jmp_adr0
+        assert 0 < offset <= 127
+        mc.overwrite(jmp_adr0-1, chr(offset))
+        # save the gcmap
+        self.push_gcmap(mc, gcmap, mov=True)   # mov into RawEspLoc(0)
+        if kind == rewrite.FLAG_ARRAY:
+            mc.MOV_si(WORD, itemsize)
+            mc.MOV(edi, lengthloc)
+            mc.MOV_ri(eax.value, arraydescr.tid)
+            addr = self.malloc_slowpath_varsize
+        else:
+            if kind == rewrite.FLAG_STR:
+                addr = self.malloc_slowpath_str
+            else:
+                assert kind == rewrite.FLAG_UNICODE
+                addr = self.malloc_slowpath_unicode
+            mc.MOV(edi, lengthloc)
+        mc.CALL(imm(addr))
+        mc.JMP_l8(0)      # jump to done, patched later
+        jmp_location = mc.get_relative_pos()
+        #
+        # == FASTPATH ==
+        offset = mc.get_relative_pos() - jmp_adr1
+        assert 0 < offset <= 127
+        mc.overwrite(jmp_adr1-1, chr(offset))
+        #
+        # set thread_descriptor->nursery_current
+        mc.MOV_mr((X86_64_SCRATCH_REG.value,
+                   StmGC.TD_NURSERY_CURRENT),
+                   edi.value)
+        #
+        # write down the tid
+        mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+        # also set private_rev_num:
+        rn = self._get_stm_private_rev_num_addr()
+        self._tl_segment_if_stm(mc)
+        mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
+        mc.MOV(mem(eax, StmGC.H_REVISION), X86_64_SCRATCH_REG)
+        #
+        # == END ==
+        offset = mc.get_relative_pos() - jmp_location
+        assert 0 < offset <= 127
+        mc.overwrite(jmp_location - 1, chr(offset))
+
+    
     def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
         assert not self.cpu.gc_ll_descr.stm
         assert size & (WORD-1) == 0     # must be correctly aligned
@@ -2764,6 +2940,7 @@
         self.mc.overwrite(jmp_adr-1, chr(offset))
         self.mc.MOV(heap(nursery_free_adr), edi)
 
+        
     def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
                                   sizeloc, gcmap):
         assert not self.cpu.gc_ll_descr.stm
@@ -2876,6 +3053,22 @@
         assert isinstance(reg, RegLoc)
         self.mc.MOV_rr(reg.value, ebp.value)
 
+    def genop_discard_stm_set_revision_gc(self, op, arglocs):
+        base_loc, ofs_loc, size_loc = arglocs
+        assert isinstance(size_loc, ImmedLoc)
+        mc = self.mc
+
+        if IS_X86_32:
+            todo()
+            
+        rn = self._get_stm_private_rev_num_addr()
+        self._tl_segment_if_stm(mc)
+        mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
+
+        dest_addr = AddressLoc(base_loc, ofs_loc)
+        mc.MOV(dest_addr, X86_64_SCRATCH_REG)
+
+        
     def genop_stm_transaction_break(self, op, arglocs, result_loc):
         assert self.cpu.gc_ll_descr.stm
         if not we_are_translated():
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -849,8 +849,6 @@
 
     def consider_call_malloc_nursery(self, op):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        assert gc_ll_descr.get_malloc_slowpath_addr() is not None
-        # ^^^ if this returns None, don't translate the rest of this function
         #
         size_box = op.getarg(0)
         assert isinstance(size_box, ConstInt)
@@ -865,15 +863,16 @@
         gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
         #
-        self.assembler.malloc_cond(
-            gc_ll_descr.get_nursery_free_addr(),
-            gc_ll_descr.get_nursery_top_addr(),
-            size, gcmap)
+        if gc_ll_descr.stm:
+            self.assembler.malloc_cond_stm(size, gcmap)
+        else:
+            self.assembler.malloc_cond(
+                gc_ll_descr.get_nursery_free_addr(),
+                gc_ll_descr.get_nursery_top_addr(),
+                size, gcmap)
 
     def consider_call_malloc_nursery_varsize_frame(self, op):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        assert gc_ll_descr.get_malloc_slowpath_addr() is not None
-        # ^^^ if this returns None, don't translate the rest of this function
         #
         size_box = op.getarg(0)
         assert isinstance(size_box, BoxInt) # we cannot have a const here!
@@ -889,11 +888,13 @@
         gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
         #
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        self.assembler.malloc_cond_varsize_frame(
-            gc_ll_descr.get_nursery_free_addr(),
-            gc_ll_descr.get_nursery_top_addr(),
-            sizeloc, gcmap)
+        if gc_ll_descr.stm:
+            self.assembler.malloc_cond_varsize_frame_stm(sizeloc, gcmap)
+        else:
+            self.assembler.malloc_cond_varsize_frame(
+                gc_ll_descr.get_nursery_free_addr(),
+                gc_ll_descr.get_nursery_top_addr(),
+                sizeloc, gcmap)
 
     def consider_call_malloc_nursery_varsize(self, op):
         gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -919,11 +920,16 @@
         #
         itemsize = op.getarg(1).getint()
         maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
-        self.assembler.malloc_cond_varsize(
-            op.getarg(0).getint(),
-            gc_ll_descr.get_nursery_free_addr(),
-            gc_ll_descr.get_nursery_top_addr(),
-            lengthloc, itemsize, maxlength, gcmap, arraydescr)
+        if gc_ll_descr.stm:
+            self.assembler.malloc_cond_varsize_stm(
+                op.getarg(0).getint(), 
+                lengthloc, itemsize, maxlength, gcmap, arraydescr)
+        else:
+            self.assembler.malloc_cond_varsize(
+                op.getarg(0).getint(),
+                gc_ll_descr.get_nursery_free_addr(),
+                gc_ll_descr.get_nursery_top_addr(),
+                lengthloc, itemsize, maxlength, gcmap, arraydescr)
 
     def get_gcmap(self, forbidden_regs=[], noregs=False):
         frame_depth = self.fm.get_frame_depth()
@@ -1267,6 +1273,16 @@
                 if isinstance(loc, FrameLoc):
                     self.fm.hint_frame_locations[box] = loc
 
+    
+    def consider_stm_set_revision_gc(self, op):
+        ofs, size, _ = unpack_fielddescr(op.getdescr())
+        ofs_loc = imm(ofs)
+        size_loc = imm(size)
+        assert isinstance(size_loc, ImmedLoc)
+        args = op.getarglist()
+        base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
+        self.perform_discard(op, [base_loc, ofs_loc, size_loc])
+        
     def consider_stm_transaction_break(self, op):
         # XXX use the extra 3 words in the stm resume buffer to save
         # up to 3 registers, too.  For now we just flush them all.
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -348,6 +348,7 @@
                          rop.CALL_MALLOC_NURSERY_VARSIZE_FRAME,
                          rop.LABEL,
                          rop.STM_TRANSACTION_BREAK,
+                         rop.STM_SET_REVISION_GC,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
             raise AssertionError("missing %r" % (key,))
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -511,6 +511,7 @@
     'RECORD_KNOWN_CLASS/2',   # [objptr, clsptr]
     'KEEPALIVE/1',
     'STM_TRANSACTION_BREAK/0',
+    'STM_SET_REVISION_GC/1d', # not really GC, writes raw to the header
 
     '_CANRAISE_FIRST', # ----- start of can_raise operations -----
     '_CALL_FIRST',
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -9,6 +9,7 @@
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rlib.debug import ll_assert
 from rpython.rlib.rarithmetic import LONG_BIT, r_uint
+from rpython.rtyper.extregistry import ExtRegistryEntry
 
 WORD = LONG_BIT // 8
 NULL = llmemory.NULL
@@ -36,8 +37,11 @@
     malloc_zero_filled = True
     #gcflag_extra = GCFLAG_EXTRA
 
-
-    GCHDR = lltype.GcStruct(
+    # SYNC with et.h
+    TD_NURSERY_CURRENT = 80
+    TD_NURSERY_NEXTLIMIT = 88
+    
+    GCHDR = lltype.Struct(
         'GCPTR',
         ('h_tid', lltype.Unsigned),
         ('h_revision', lltype.Signed),
@@ -79,6 +83,13 @@
     
     FX_MASK = 65535
 
+    # keep in sync with nursery.h:
+    
+    # maximum size of object in nursery (is actually dependent on
+    # nursery size, but this should work)
+    GC_NURSERY_SECTION = 135168
+    
+
     def get_type_id(self, obj):
         return llop.stm_get_tid(llgroup.HALFWORD, obj)
 
@@ -151,8 +162,7 @@
 
     @classmethod
     def JIT_max_size_of_young_obj(cls):
-        # XXX there is actually a maximum, check
-        return None
+        return cls.GC_NURSERY_SECTION
 
     @classmethod
     def JIT_minimal_size_in_nursery(cls):
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -5,6 +5,11 @@
 from rpython.rlib.jit import dont_look_inside
 
 @dont_look_inside
+def get_thread_descriptor_adr():
+    addr = llop.stm_get_adr_of_thread_descriptor(llmemory.Address)
+    return rffi.cast(lltype.Signed, addr)
+
+ at dont_look_inside
 def get_adr_of_private_rev_num():
     addr = llop.stm_get_adr_of_private_rev_num(llmemory.Address)
     return rffi.cast(lltype.Signed, addr)
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -951,6 +951,7 @@
     op_stm_barrier = _stm_not_implemented
     op_stm_push_root = _stm_not_implemented
     op_stm_pop_root_into = _stm_not_implemented
+    op_stm_get_adr_of_thread_descriptor = _stm_not_implemented
     op_stm_get_adr_of_read_barrier_cache = _stm_not_implemented
     op_stm_get_adr_of_private_rev_num = _stm_not_implemented
     op_stm_enter_callback_call = _stm_not_implemented
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -448,6 +448,7 @@
 
     'stm_get_adr_of_private_rev_num':LLOp(),
     'stm_get_adr_of_read_barrier_cache':LLOp(),
+    'stm_get_adr_of_thread_descriptor': LLOp(),
 
     'stm_ignored_start':      LLOp(canrun=True),
     'stm_ignored_stop':       LLOp(canrun=True),
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -590,6 +590,7 @@
     OP_STM_PTR_EQ                       = _OP_STM
     OP_STM_PUSH_ROOT                    = _OP_STM
     OP_STM_POP_ROOT_INTO                = _OP_STM
+    OP_STM_GET_ADR_OF_THREAD_DESCRIPTOR = _OP_STM
     OP_STM_GET_ROOT_STACK_TOP           = _OP_STM
     OP_STM_GET_ADR_OF_PRIVATE_REV_NUM   = _OP_STM
     OP_STM_GET_ADR_OF_READ_BARRIER_CACHE= _OP_STM
diff --git a/rpython/translator/stm/funcgen.py b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -114,6 +114,11 @@
     return '%s = (%s)stm_pop_root();' % (
         arg0, cdecl(funcgen.lltypename(op.args[0]), ''))
 
+def stm_get_adr_of_thread_descriptor(funcgen, op):
+    result = funcgen.expr(op.result)
+    return '%s = (%s)&thread_descriptor;' % (
+        result, cdecl(funcgen.lltypename(op.result), ''))
+    
 def stm_get_root_stack_top(funcgen, op):
     result = funcgen.expr(op.result)
     return '%s = (%s)&stm_shadowstack;' % (


More information about the pypy-commit mailing list