[pypy-commit] pypy arm64: zero_array, increment debug counter and exception handling
fijal
pypy.commits at gmail.com
Tue Jun 25 07:13:38 EDT 2019
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96857:c89b27574833
Date: 2019-06-25 11:12 +0000
http://bitbucket.org/pypy/pypy/changeset/c89b27574833/
Log: zero_array, increment debug counter and exception handling
diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -405,6 +405,7 @@
mc.LDP_rri(r.x0.value, r.x1.value, r.sp.value, 0)
mc.STR_ri(r.lr.value, r.sp.value, 0)
+ mc.STR_ri(r.x19.value, r.sp.value, WORD)
# store the current gcmap(r0) in the jitframe
gcmap_ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
@@ -414,8 +415,7 @@
mc.MOV_rr(r.x0.value, r.fp.value)
# store a possibly present exception
- # we use a callee saved reg here as a tmp for the exc.
- self._store_and_reset_exception(mc, None, r.ip1, on_frame=True)
+ self._store_and_reset_exception(mc, None, r.x19, on_frame=True)
# call realloc_frame, it takes two arguments
# arg0: the old jitframe
@@ -427,7 +427,7 @@
mc.MOV_rr(r.fp.value, r.x0.value)
# restore a possibly present exception
- self._restore_exception(mc, None, r.ip1)
+ self._restore_exception(mc, None, r.x19)
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
@@ -445,6 +445,7 @@
# return
mc.LDR_ri(r.lr.value, r.sp.value, 0)
+ mc.LDR_ri(r.x19.value, r.sp.value, WORD)
mc.ADD_ri(r.sp.value, r.sp.value, 2*WORD)
mc.RET_r(r.lr.value)
self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -60,6 +60,15 @@
base = 0b11100100
self.write32((scale << 30) | (base << 22) | (imm >> scale << 10) | (rn << 5) | rt)
+ def STRB_ri(self, rt, rn, imm):
+ self.STR_size_ri(0, rt, rn, imm)
+
+ def STRH_ri(self, rt, rn, imm):
+ self.STR_size_ri(1, rt, rn, imm)
+
+ def STRW_ri(self, rt, rn, imm):
+ self.STR_size_ri(2, rt, rn, imm)
+
def MOV_rr(self, rd, rn):
self.ORR_rr(rd, r.xzr.value, rn)
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -249,7 +249,6 @@
raise AssertionError("bad number of bytes")
def emit_op_increment_debug_counter(self, op, arglocs):
- return # XXXX
base_loc, value_loc = arglocs
self.mc.LDR_ri(value_loc.value, base_loc.value, 0)
self.mc.ADD_ri(value_loc.value, value_loc.value, 1)
@@ -635,6 +634,111 @@
self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs,
array=True)
+ #from ../x86/regalloc.py:1388
+ def emit_op_zero_array(self, op, arglocs):
+ from rpython.jit.backend.llsupport.descr import unpack_arraydescr
+ assert len(arglocs) == 0
+ size_box = op.getarg(2)
+ if isinstance(size_box, ConstInt) and size_box.getint() == 0:
+ return
+ itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
+ args = op.getarglist()
+ #
+ # ZERO_ARRAY(base_loc, start, size, 1, 1)
+ # 'start' and 'size' are both expressed in bytes,
+ # and the two scaling arguments should always be ConstInt(1) on ARM.
+ assert args[3].getint() == 1
+ assert args[4].getint() == 1
+ #
+ base_loc = self._regalloc.rm.make_sure_var_in_reg(args[0], args)
+ startbyte_box = args[1]
+ if isinstance(startbyte_box, ConstInt):
+ startbyte_loc = None
+ startbyte = startbyte_box.getint()
+ assert startbyte >= 0
+ else:
+ startbyte_loc = self._regalloc.rm.make_sure_var_in_reg(startbyte_box,
+ args)
+ startbyte = -1
+
+ # base_loc and startbyte_loc are in two regs here (or startbyte_loc
+ # is an immediate). Compute the dstaddr_loc, which is the raw
+ # address that we will pass as first argument to memset().
+ # It can be in the same register as either one, but not in
+ # args[2], because we're still needing the latter.
+ dstaddr_loc = r.ip1
+ if startbyte >= 0: # a constant
+ ofs = baseofs + startbyte
+ reg = base_loc.value
+ else:
+ self.mc.ADD_rr(dstaddr_loc.value,
+ base_loc.value, startbyte_loc.value)
+ ofs = baseofs
+ reg = dstaddr_loc.value
+ if check_imm_arg(ofs):
+ self.mc.ADD_ri(dstaddr_loc.value, reg, ofs)
+ else:
+ self.mc.gen_load_int(r.ip0.value, ofs)
+ self.mc.ADD_rr(dstaddr_loc.value, reg, r.ip0.value)
+
+ # We use STRB, STRH, STRW or STR based on whether we know the array
+ # item size is a multiple of 1, 2 or 4.
+ if itemsize & 1: itemsize = 1
+ elif itemsize & 2: itemsize = 2
+ elif itemsize & 4: itemsize = 4
+ else: itemsize = 8
+ limit = itemsize
+ next_group = -1
+ if itemsize < 8 and startbyte >= 0:
+ # we optimize STRB/STRH into STR, but this needs care:
+ # it only works if startindex_loc is a constant, otherwise
+ # we'd be doing unaligned accesses.
+ next_group = (-startbyte) & 7
+ limit = 8
+
+ if (isinstance(size_box, ConstInt) and
+ size_box.getint() <= 14 * limit): # same limit as GCC
+ # Inline a series of STR operations, starting at 'dstaddr_loc'.
+ #
+ self.mc.gen_load_int(r.ip0.value, 0)
+ i = 0
+ adjustment = 0
+ needs_adjustment = itemsize < 8 and (startbyte % 8)
+ total_size = size_box.getint()
+ while i < total_size:
+ sz = itemsize
+ if i == next_group:
+ next_group += 8
+ if next_group <= total_size:
+ sz = 8
+ if sz == 8:
+ if needs_adjustment:
+ self.mc.ADD_ri(dstaddr_loc.value, dstaddr_loc.value, i)
+ adjustment = -i
+ needs_adjustment = False
+ self.mc.STR_ri(r.ip0.value, dstaddr_loc.value, i + adjustment)
+ elif sz == 4:
+ self.mc.STRW_ri(r.ip0.value, dstaddr_loc.value, i + adjustment)
+ elif sz == 2:
+ self.mc.STRH_ri(r.ip0.value, dstaddr_loc.value, i + adjustment)
+ else:
+ self.mc.STRB_ri(r.ip0.value, dstaddr_loc.value, i + adjustment)
+ i += sz
+
+ else:
+ if isinstance(size_box, ConstInt):
+ size_loc = self.imm(size_box.getint())
+ else:
+ # load size_loc in a register different than dstaddr_loc
+ size_loc = self._regalloc.rm.make_sure_var_in_reg(size_box,
+ [])
+ #
+ # call memset()
+ self._regalloc.before_call()
+ self.simple_call_no_collect(self.imm(self.memset_addr),
+ [dstaddr_loc, self.imm(0), size_loc])
+ self._regalloc.rm.possibly_free_var(size_box)
+
def _emit_op_cond_call(self, op, arglocs, fcond):
if len(arglocs) == 2:
res_loc = arglocs[1] # cond_call_value
@@ -859,6 +963,10 @@
result_size)
cb.emit()
+ def simple_call_no_collect(self, fnloc, arglocs):
+ cb = Aarch64CallBuilder(self, fnloc, arglocs)
+ cb.emit_no_collect()
+
def emit_guard_op_guard_not_forced(self, op, guard_op, fcond, arglocs):
# arglocs is call locs + guard_locs, split them
if rop.is_call_assembler(op.getopnum()):
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -313,6 +313,7 @@
prepare_op_jit_debug = void
prepare_op_enter_portal_frame = void
prepare_op_leave_portal_frame = void
+ prepare_op_zero_array = void # dealth with in opassembler.py
def prepare_int_ri(self, op, res_in_cc):
boxes = op.getarglist()
@@ -648,6 +649,10 @@
resloc = self.after_call(op)
return resloc
+ def before_call(self, save_all_regs=False):
+ self.rm.before_call(save_all_regs=save_all_regs)
+ self.vfprm.before_call(save_all_regs=save_all_regs)
+
def after_call(self, v):
if v.type == 'v':
return
More information about the pypy-commit
mailing list