[pypy-commit] pypy arm64: work more on write barrier

fijal pypy.commits at gmail.com
Mon Jun 17 10:09:32 EDT 2019


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96812:7a1bbf798839
Date: 2019-06-17 14:08 +0000
http://bitbucket.org/pypy/pypy/changeset/7a1bbf798839/

Log:	work more on write barrier

diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -374,10 +374,9 @@
             # A final TEST8 before the RET, for the caller.  Careful to
             # not follow this instruction with another one that changes
             # the status of the CPU flags!
-            YYY
-            mc.LDRB_ri(r.ip.value, r.r0.value,
-                                    imm=descr.jit_wb_if_flag_byteofs)
-            mc.TST_ri(r.ip.value, imm=0x80)
+            mc.LDRB_ri(r.ip0.value, r.x0.value, descr.jit_wb_if_flag_byteofs)
+            mc.MOVZ_r_u16(r.ip1.value, 0x80, 0)
+            mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)
         #
         mc.LDR_ri(r.ip0.value, r.sp.value, WORD)
         mc.LDR_ri(r.ip1.value, r.sp.value, 0)
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -180,6 +180,11 @@
         base = 0b10001010000
         self.write32((base << 21) | (rm << 16) | (rn << 5) | rd)
 
+    def AND_rr_shift(self, rd, rn, rm, shift, shifttype=0):
+        base = 0b10001010
+        assert 0 <= shift < 64
+        self.write32((base << 24) | (shifttype << 22) | (rm << 16) | (shift << 10) | (rn << 5) | rd)
+
     def AND_ri(self, rd, rn, immed):
         assert immed == 0xFF # just one value for now, don't feel like
         # understanding IMMR/IMMS quite yet
@@ -225,10 +230,16 @@
         base = 0b11001010000
         self.write32((base << 21) | (rm << 16) | (rn << 5) | rd)
 
-    def MVN_rr(self, rd, rm):
+    def MVN_rr(self, rd, rm): # defaults to xzr
         base = 0b10101010001
         self.write32((base << 21) | (rm << 16) | (0b11111 << 5)| rd)
 
+    def MVN_rr_shift(self, rd, rm, shift=0, shifttype=0): # defaults to LSL
+        base = 0b10101010
+        assert 0 <= shift < 64
+        self.write32((base << 24) | (shifttype << 22) | (1 << 21) |
+                     (rm << 16) | (shift << 10) | (0b11111 << 5) | rd)
+
     def SMULL_rr(self, rd, rn, rm):
         base = 0b10011011001
         self.write32((base << 21) | (rm << 16) | (0b11111 << 10) | (rn << 5) | rd)
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -5,7 +5,7 @@
 from rpython.jit.backend.aarch64 import registers as r
 from rpython.jit.backend.aarch64.codebuilder import OverwritingBuilder
 from rpython.jit.backend.aarch64.callbuilder import Aarch64CallBuilder
-from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm import conditions as c, shift
 from rpython.jit.backend.aarch64.arch import JITFRAME_FIXED_SIZE, WORD
 from rpython.jit.backend.aarch64.locations import imm
 from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
@@ -498,6 +498,10 @@
     def emit_op_cond_call_gc_wb(self, op, arglocs):
         self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs)
 
+    def emit_op_cond_call_gc_wb_array(self, op, arglocs):
+        self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs,
+                                     array=True)
+
     def _write_barrier_fastpath(self, mc, descr, arglocs, array=False, is_frame=False):
         # Write code equivalent to write_barrier() in the GC: it checks
         # a flag in the object at arglocs[0], and if set, it calls a
@@ -530,12 +534,12 @@
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            XXX
+            mc.MOVZ_r_u16(r.ip1.value, 0x80, 0)
             # GCFLAG_CARDS_SET is in this byte at 0x80
-            mc.TST_ri(r.ip.value, imm=0x80)
+            mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)
 
             js_location = mc.currpos()
-            mc.BKPT()
+            mc.BRK()
         else:
             js_location = 0
 
@@ -573,45 +577,40 @@
             # here, we can simply write again a conditional jump, which will be
             # taken if GCFLAG_CARDS_SET is still not set.
             jns_location = mc.currpos()
-            mc.BKPT()
+            mc.BRK()
             #
             # patch the JS above
-            offset = mc.currpos()
+            offset = mc.currpos() - js_location
             pmc = OverwritingBuilder(mc, js_location, WORD)
-            pmc.B_offs(offset, c.NE)  # We want to jump if the z flag isn't set
+            pmc.B_ofs_cond(offset, c.NE)  # We want to jump if the z flag isn't set
             #
             # case GCFLAG_CARDS_SET: emit a few instructions to do
             # directly the card flag setting
             loc_index = arglocs[1]
             assert loc_index.is_core_reg()
-            # must save the register loc_index before it is mutated
-            mc.PUSH([loc_index.value])
-            tmp1 = loc_index
+            tmp1 = r.ip1
             tmp2 = arglocs[-1]  # the last item is a preallocated tmp
             # lr = byteofs
             s = 3 + descr.jit_wb_card_page_shift
-            mc.MVN_rr(r.lr.value, loc_index.value,
-                                       imm=s, shifttype=shift.LSR)
+            mc.MVN_rr_shift(r.lr.value, loc_index.value, s, shifttype=shift.LSR)
 
             # tmp1 = byte_index
-            mc.MOV_ri(r.ip.value, imm=7)
-            mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
-            imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+            mc.MOVZ_r_u16(r.ip0.value, 7, 0)
+            mc.AND_rr_shift(tmp1.value, r.ip0.value, loc_index.value,
+                            descr.jit_wb_card_page_shift, shifttype=shift.LSR)
 
             # set the bit
-            mc.MOV_ri(tmp2.value, imm=1)
-            mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
-            mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
-                                          tmp1.value, shifttype=shift.LSL)
-            mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+            mc.MOVZ_r_u16(tmp2.value, 1, 0)
+            mc.LDRB_rr(r.ip0.value, loc_base.value, r.lr.value)
+            mc.LSL_rr(tmp2.value, tmp2.value, tmp1.value)
+            mc.ORR_rr(r.ip0.value, r.ip0.value, tmp2.value)
+            mc.STR_size_rr(0, r.ip0.value, loc_base.value, r.lr.value)
             # done
-            mc.POP([loc_index.value])
-            #
             #
             # patch the JNS above
-            offset = mc.currpos()
+            offset = mc.currpos() - jns_location
             pmc = OverwritingBuilder(mc, jns_location, WORD)
-            pmc.B_offs(offset, c.EQ)  # We want to jump if the z flag is set
+            pmc.B_ofs_cond(offset, c.EQ)  # We want to jump if the z flag is set
 
         offset = mc.currpos() - jz_location
         pmc = OverwritingBuilder(mc, jz_location, WORD)
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -809,6 +809,8 @@
                                                               for i in range(N)]
         return arglocs
 
+    prepare_op_cond_call_gc_wb_array = prepare_op_cond_call_gc_wb
+
     def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None):
         if var.type == FLOAT:
             return self.vfprm.force_allocate_reg(var, forbidden_vars,


More information about the pypy-commit mailing list