[pypy-commit] pypy arm64: work more on write barrier
fijal
pypy.commits at gmail.com
Mon Jun 17 10:09:32 EDT 2019
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96812:7a1bbf798839
Date: 2019-06-17 14:08 +0000
http://bitbucket.org/pypy/pypy/changeset/7a1bbf798839/
Log: work more on write barrier
diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -374,10 +374,9 @@
# A final TEST8 before the RET, for the caller. Careful to
# not follow this instruction with another one that changes
# the status of the CPU flags!
- YYY
- mc.LDRB_ri(r.ip.value, r.r0.value,
- imm=descr.jit_wb_if_flag_byteofs)
- mc.TST_ri(r.ip.value, imm=0x80)
+ mc.LDRB_ri(r.ip0.value, r.x0.value, descr.jit_wb_if_flag_byteofs)
+ mc.MOVZ_r_u16(r.ip1.value, 0x80, 0)
+ mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)
#
mc.LDR_ri(r.ip0.value, r.sp.value, WORD)
mc.LDR_ri(r.ip1.value, r.sp.value, 0)
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -180,6 +180,11 @@
base = 0b10001010000
self.write32((base << 21) | (rm << 16) | (rn << 5) | rd)
+ def AND_rr_shift(self, rd, rn, rm, shift, shifttype=0):
+ base = 0b10001010
+ assert 0 <= shift < 64
+ self.write32((base << 24) | (shifttype << 22) | (rm << 16) | (shift << 10) | (rn << 5) | rd)
+
def AND_ri(self, rd, rn, immed):
assert immed == 0xFF # just one value for now, don't feel like
# understanding IMMR/IMMS quite yet
@@ -225,10 +230,16 @@
base = 0b11001010000
self.write32((base << 21) | (rm << 16) | (rn << 5) | rd)
- def MVN_rr(self, rd, rm):
+ def MVN_rr(self, rd, rm): # defaults to xzr
base = 0b10101010001
self.write32((base << 21) | (rm << 16) | (0b11111 << 5)| rd)
+ def MVN_rr_shift(self, rd, rm, shift=0, shifttype=0): # defaults to LSL
+ base = 0b10101010
+ assert 0 <= shift < 64
+ self.write32((base << 24) | (shifttype << 22) | (1 << 21) |
+ (rm << 16) | (shift << 10) | (0b11111 << 5) | rd)
+
def SMULL_rr(self, rd, rn, rm):
base = 0b10011011001
self.write32((base << 21) | (rm << 16) | (0b11111 << 10) | (rn << 5) | rd)
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -5,7 +5,7 @@
from rpython.jit.backend.aarch64 import registers as r
from rpython.jit.backend.aarch64.codebuilder import OverwritingBuilder
from rpython.jit.backend.aarch64.callbuilder import Aarch64CallBuilder
-from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm import conditions as c, shift
from rpython.jit.backend.aarch64.arch import JITFRAME_FIXED_SIZE, WORD
from rpython.jit.backend.aarch64.locations import imm
from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
@@ -498,6 +498,10 @@
def emit_op_cond_call_gc_wb(self, op, arglocs):
self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs)
+ def emit_op_cond_call_gc_wb_array(self, op, arglocs):
+ self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs,
+ array=True)
+
def _write_barrier_fastpath(self, mc, descr, arglocs, array=False, is_frame=False):
# Write code equivalent to write_barrier() in the GC: it checks
# a flag in the object at arglocs[0], and if set, it calls a
@@ -530,12 +534,12 @@
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
if card_marking:
- XXX
+ mc.MOVZ_r_u16(r.ip1.value, 0x80, 0)
# GCFLAG_CARDS_SET is in this byte at 0x80
- mc.TST_ri(r.ip.value, imm=0x80)
+ mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)
js_location = mc.currpos()
- mc.BKPT()
+ mc.BRK()
else:
js_location = 0
@@ -573,45 +577,40 @@
# here, we can simply write again a conditional jump, which will be
# taken if GCFLAG_CARDS_SET is still not set.
jns_location = mc.currpos()
- mc.BKPT()
+ mc.BRK()
#
# patch the JS above
- offset = mc.currpos()
+ offset = mc.currpos() - js_location
pmc = OverwritingBuilder(mc, js_location, WORD)
- pmc.B_offs(offset, c.NE) # We want to jump if the z flag isn't set
+ pmc.B_ofs_cond(offset, c.NE) # We want to jump if the z flag isn't set
#
# case GCFLAG_CARDS_SET: emit a few instructions to do
# directly the card flag setting
loc_index = arglocs[1]
assert loc_index.is_core_reg()
- # must save the register loc_index before it is mutated
- mc.PUSH([loc_index.value])
- tmp1 = loc_index
+ tmp1 = r.ip1
tmp2 = arglocs[-1] # the last item is a preallocated tmp
# lr = byteofs
s = 3 + descr.jit_wb_card_page_shift
- mc.MVN_rr(r.lr.value, loc_index.value,
- imm=s, shifttype=shift.LSR)
+ mc.MVN_rr_shift(r.lr.value, loc_index.value, s, shifttype=shift.LSR)
# tmp1 = byte_index
- mc.MOV_ri(r.ip.value, imm=7)
- mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
- imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+ mc.MOVZ_r_u16(r.ip0.value, 7, 0)
+ mc.AND_rr_shift(tmp1.value, r.ip0.value, loc_index.value,
+ descr.jit_wb_card_page_shift, shifttype=shift.LSR)
# set the bit
- mc.MOV_ri(tmp2.value, imm=1)
- mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
- mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
- tmp1.value, shifttype=shift.LSL)
- mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+ mc.MOVZ_r_u16(tmp2.value, 1, 0)
+ mc.LDRB_rr(r.ip0.value, loc_base.value, r.lr.value)
+ mc.LSL_rr(tmp2.value, tmp2.value, tmp1.value)
+ mc.ORR_rr(r.ip0.value, r.ip0.value, tmp2.value)
+ mc.STR_size_rr(0, r.ip0.value, loc_base.value, r.lr.value)
# done
- mc.POP([loc_index.value])
- #
#
# patch the JNS above
- offset = mc.currpos()
+ offset = mc.currpos() - jns_location
pmc = OverwritingBuilder(mc, jns_location, WORD)
- pmc.B_offs(offset, c.EQ) # We want to jump if the z flag is set
+ pmc.B_ofs_cond(offset, c.EQ) # We want to jump if the z flag is set
offset = mc.currpos() - jz_location
pmc = OverwritingBuilder(mc, jz_location, WORD)
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -809,6 +809,8 @@
for i in range(N)]
return arglocs
+ prepare_op_cond_call_gc_wb_array = prepare_op_cond_call_gc_wb
+
def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None):
if var.type == FLOAT:
return self.vfprm.force_allocate_reg(var, forbidden_vars,
More information about the pypy-commit
mailing list