[pypy-commit] pypy arm64: array basics
fijal
pypy.commits at gmail.com
Wed May 15 05:48:42 EDT 2019
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96615:411dd68d082c
Date: 2019-05-15 09:47 +0000
http://bitbucket.org/pypy/pypy/changeset/411dd68d082c/
Log: array basics
diff --git a/rpython/jit/backend/aarch64/TODO b/rpython/jit/backend/aarch64/TODO
--- a/rpython/jit/backend/aarch64/TODO
+++ b/rpython/jit/backend/aarch64/TODO
@@ -1,3 +1,3 @@
* int_add - IMM
* int_cmp - IMM
-* *_ovf - merging operations
\ No newline at end of file
+* guard_nonnull_class - think about a better way
\ No newline at end of file
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -7,8 +7,6 @@
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.tool.udir import udir
-PC_OFFSET = 0 # XXX
-
class AbstractAarch64Builder(object):
def write32(self, word):
self.writechar(chr(word & 0xFF))
@@ -43,7 +41,7 @@
def STR_size_rr(self, scale, rt, rn, rm):
base = 0b111000001
assert 0 <= scale <= 3
- self.wirte32((scale << 30) | (base << 21) | (rm << 16) | (0b11 << 13) |
+ self.write32((scale << 30) | (base << 21) | (rm << 16) | (0b11 << 13) |
(0b010 << 10) | (rn << 5) | rt)
def STR_size_ri(self, scale, rt, rn, imm):
@@ -112,18 +110,37 @@
assert immed & 0x7 == 0
self.write32((base << 22) | (immed >> 3 << 10) | (rn << 5) | rt)
+ def LDRB_ri(self, rt, rn, immed):
+ base = 0b0011100101
+ assert 0 <= immed <= 1<<12
+ self.write32((base << 22) | (immed << 10) | (rn << 5) | rt)
+
+ def LDRSH_ri(self, rt, rn, immed):
+ base = 0b0111100110
+ assert 0 <= immed <= 1<<13
+ assert immed & 0b1 == 0
+ self.write32((base << 22) | (immed >> 1 << 10) | (rn << 5) | rt)
+
def LDR_rr(self, rt, rn, rm):
- xxx
+ base = 0b11111000011
+ self.write32((base << 21) | (rm << 16) | (0b011010 << 10) | (rn << 5) | rt)
- def LDR_size_ri(self, size, rt, rn, ofs):
- assert 0 <= size <= 3
- assert 0 <= ofs <= 4096
- base = 0b11100101
- self.write32((size << 30) | (base << 22) | (ofs >> size << 10) | (rn << 5) | rt)
+ def LDRB_rr(self, rt, rn, rm):
+ base = 0b00111000011
+ self.write32((base << 21) | (rm << 16) | (0b011010 << 10) | (rn << 5) | rt)
- def LDR_size_rr(self, size, rt, rn, rm):
- xxx
-
+ def LDRSW_rr(self, rt, rn, rm):
+ base = 0b10111000101
+ self.write32((base << 21) | (rm << 16) | (0b011010 << 10) | (rn << 5) | rt)
+
+ def LDRSH_rr(self, rt, rn, rm):
+ base = 0b01111000101
+ self.write32((base << 21) | (rm << 16) | (0b011010 << 10) | (rn << 5) | rt)
+
+ def LDRSB_rr(self, rt, rn, rm):
+ base = 0b00111000101
+ self.write32((base << 21) | (rm << 16) | (0b011010 << 10) | (rn << 5) | rt)
+
def LDR_r_literal(self, rt, offset):
base = 0b01011000
assert -(1 << 20) <= offset < (1<< 20)
@@ -237,14 +254,12 @@
def B_ofs(self, ofs):
base = 0b000101
assert ofs & 0x3 == 0
- pos = self.currpos()
- target_ofs = ofs - (pos + PC_OFFSET)
- assert -(1 << (26 + 2)) < target_ofs < 1<<(26 + 2)
- if target_ofs < 0:
- target_ofs = (1 << 26) - (-target_ofs >> 2)
+ assert -(1 << (26 + 2)) < ofs < 1<<(26 + 2)
+ if ofs < 0:
+ ofs = (1 << 26) - (-ofs >> 2)
else:
- target_ofs = target_ofs >> 2
- self.write32((base << 26) | target_ofs)
+ ofs = ofs >> 2
+ self.write32((base << 26) | ofs)
def B_ofs_cond(self, ofs, cond):
base = 0b01010100
@@ -255,6 +270,11 @@
xxx
self.write32((base << 24) | (imm << 5) | cond)
+ def B(self, target):
+ target = rffi.cast(lltype.Signed, target)
+ self.gen_load_int_full(r.ip0.value, target)
+ self.BR_r(r.ip0.value)
+
def BL(self, target):
# XXX use the IMM version if close enough
target = rffi.cast(lltype.Signed, target)
@@ -265,7 +285,7 @@
base = 0b1101011000111111000000
self.write32((base << 10) | (reg << 5))
- def BR(self, reg):
+ def BR_r(self, reg):
base = 0b1101011000011111000000
self.write32((base << 10) | (reg << 5))
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -127,6 +127,9 @@
emit_op_uint_le = gen_comp_op('emit_op_uint_le', c.LS)
emit_op_uint_ge = gen_comp_op('emit_op_uint_ge', c.HS)
+ emit_op_ptr_eq = emit_op_instance_ptr_eq = emit_op_int_eq
+ emit_op_ptr_ne = emit_op_instance_ptr_ne = emit_op_int_ne
+
def emit_op_int_is_true(self, op, arglocs):
reg, res = arglocs
@@ -159,38 +162,65 @@
def emit_op_gc_store(self, op, arglocs):
value_loc, base_loc, ofs_loc, size_loc = arglocs
scale = get_scale(size_loc.value)
- self._write_to_mem(value_loc, base_loc, ofs_loc, imm(scale))
+ self._write_to_mem(value_loc, base_loc, ofs_loc, scale)
def _emit_op_gc_load(self, op, arglocs):
base_loc, ofs_loc, res_loc, nsize_loc = arglocs
nsize = nsize_loc.value
signed = (nsize < 0)
scale = get_scale(abs(nsize))
- self._load_from_mem(res_loc, base_loc, ofs_loc, imm(scale), signed)
+ self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed)
emit_op_gc_load_i = _emit_op_gc_load
emit_op_gc_load_r = _emit_op_gc_load
emit_op_gc_load_f = _emit_op_gc_load
+ def emit_op_gc_store_indexed(self, op, arglocs):
+ value_loc, base_loc, index_loc, size_loc, ofs_loc = arglocs
+ assert index_loc.is_core_reg()
+ # add the base offset
+ if ofs_loc.value > 0:
+ self.mc.ADD_ri(r.ip0.value, index_loc.value, ofs_loc.value)
+ index_loc = r.ip0
+ scale = get_scale(size_loc.value)
+ self._write_to_mem(value_loc, base_loc, index_loc, scale)
+
+ def _emit_op_gc_load_indexed(self, op, arglocs):
+ res_loc, base_loc, index_loc, nsize_loc, ofs_loc = arglocs
+ assert index_loc.is_core_reg()
+ nsize = nsize_loc.value
+ signed = (nsize < 0)
+ # add the base offset
+ if ofs_loc.value > 0:
+ self.mc.ADD_ri(r.ip0.value, index_loc.value, ofs_loc.value)
+ index_loc = r.ip0
+ #
+ scale = get_scale(abs(nsize))
+ self._load_from_mem(res_loc, base_loc, index_loc, scale, signed)
+
+ emit_op_gc_load_indexed_i = _emit_op_gc_load_indexed
+ emit_op_gc_load_indexed_r = _emit_op_gc_load_indexed
+ emit_op_gc_load_indexed_f = _emit_op_gc_load_indexed
+
def _write_to_mem(self, value_loc, base_loc, ofs_loc, scale):
# Write a value of size '1 << scale' at the address
# 'base_ofs + ofs_loc'. Note that 'scale' is not used to scale
# the offset!
assert base_loc.is_core_reg()
- if scale.value == 3:
+ if scale == 3:
# WORD size
if ofs_loc.is_imm():
self.mc.STR_ri(value_loc.value, base_loc.value,
ofs_loc.value)
else:
- self.mc.STR_rr(value_loc.value, base_loc.value,
- ofs_loc.value)
+ self.mc.STR_size_rr(3, value_loc.value, base_loc.value,
+ ofs_loc.value)
else:
if ofs_loc.is_imm():
- self.mc.STR_size_ri(scale.value, value_loc.value, base_loc.value,
+ self.mc.STR_size_ri(scale, value_loc.value, base_loc.value,
ofs_loc.value)
else:
- self.mc.STR_size_rr(scale.value, value_loc.value, base_loc.value,
+ self.mc.STR_size_rr(scale, value_loc.value, base_loc.value,
ofs_loc.value)
def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale,
@@ -199,21 +229,54 @@
# 'base_loc + ofs_loc'. Note that 'scale' is not used to scale
# the offset!
#
- if scale.value == 3:
+ if scale == 3:
# WORD
if ofs_loc.is_imm():
- self.mc.LDR_ri(res_loc.value, base_loc.value,
- ofs_loc.value)
+ self.mc.LDR_ri(res_loc.value, base_loc.value, ofs_loc.value)
else:
- self.mc.LDR_rr(res_loc.value, base_loc.value,
- ofs_loc.value)
+ self.mc.LDR_rr(res_loc.value, base_loc.value, ofs_loc.value)
+ return
+ if scale == 2:
+ # 32bit int
+ if not signed:
+ if ofs_loc.is_imm():
+ self.mc.LDR_uint32_ri(res_loc.value, base_loc.value,
+ ofs_loc.value)
+ else:
+ self.mc.LDR_uint32_rr(res_loc.value, base_loc.value,
+ ofs_loc.value)
+ else:
+ if ofs_loc.is_imm():
+ self.mc.LDRSW_ri(res_loc.value, base_loc.value,
+ ofs_loc.value)
+ else:
+ self.mc.LDRSW_rr(res_loc.value, base_loc.value,
+ ofs_loc.value)
+ return
+ if scale == 1:
+ # short
+ if not signed:
+ if ofs_loc.is_imm():
+ self.mc.LDRH_ri(res_loc.value, base_loc.value, ofs_loc.value)
+ else:
+ self.mc.LDRH_rr(res_loc.value, base_loc.value, ofs_loc.value)
+ else:
+ if ofs_loc.is_imm():
+ self.mc.LDRSH_ri(res_loc.value, base_loc.value, ofs_loc.value)
+ else:
+ self.mc.LDRSH_rr(res_loc.value, base_loc.value, ofs_loc.value)
+ return
+ assert scale == 0
+ if not signed:
+ if ofs_loc.is_imm():
+ self.mc.LDRB_ri(res_loc.value, base_loc.value, ofs_loc.value)
+ else:
+ self.mc.LDRB_rr(res_loc.value, base_loc.value, ofs_loc.value)
else:
if ofs_loc.is_imm():
- self.mc.LDR_size_ri(scale.value, res_loc.value, base_loc.value,
- ofs_loc.value)
+ self.mc.LDRSB_ri(res_loc.value, base_loc.value, ofs_loc.value)
else:
- self.mc.LDR_size_rr(scale.value, res_loc.value, base_loc.value,
- ofs_loc.value)
+ self.mc.LDRSB_rr(res_loc.value, base_loc.value, ofs_loc.value)
# -------------------------------- guard --------------------------------
@@ -256,6 +319,63 @@
self._emit_guard(guard_op, c.get_opposite_of(fcond), arglocs)
emit_guard_op_guard_overflow = emit_guard_op_guard_false
+
+ def load_condition_into_cc(self, loc):
+ if not loc.is_core_reg():
+ if loc.is_stack():
+ self.regalloc_mov(loc, r.ip0)
+ else:
+ assert loc.is_imm()
+ self.mc.gen_load_int(r.ip0.value, loc.value)
+ loc = r.ip0
+ self.mc.CMP_ri(loc.value, 0)
+
+ def emit_op_guard_false(self, op, arglocs):
+ self.load_condition_into_cc(arglocs[0])
+ self._emit_guard(op, c.EQ, arglocs[1:])
+ emit_op_guard_isnull = emit_op_guard_false
+
+ def emit_op_guard_true(self, op, arglocs):
+ self.load_condition_into_cc(arglocs[0])
+ self._emit_guard(op, c.NE, arglocs[1:])
+ emit_op_guard_nonnull = emit_op_guard_true
+
+ def emit_op_guard_value(self, op, arglocs):
+ v0 = arglocs[0]
+ assert v0.is_core_reg() # can be also a float reg, but later
+ v1 = arglocs[1]
+ if v1.is_core_reg():
+ loc = v1
+ elif v1.is_imm():
+ self.mc.gen_load_int(r.ip0.value, v1.value)
+ loc = r.ip0
+ else:
+ assert v1.is_stack()
+ yyy
+ self.mc.CMP_rr(v0.value, loc.value)
+ self._emit_guard(op, c.EQ, arglocs[2:])
+
+ def emit_op_guard_class(self, op, arglocs):
+ offset = self.cpu.vtable_offset
+ assert offset is not None
+ self.mc.LDR_ri(r.ip0.value, arglocs[0].value, offset)
+ self.mc.gen_load_int(r.ip1.value, arglocs[1].value)
+ self.mc.CMP_rr(r.ip0.value, r.ip1.value)
+ self._emit_guard(op, c.EQ, arglocs[2:])
+
+ def emit_op_guard_nonnull_class(self, op, arglocs):
+ offset = self.cpu.vtable_offset
+ assert offset is not None
+ # XXX a bit obscure think about a better way
+ self.mc.MOVZ_r_u16(r.ip0.value, 1, 0)
+ self.mc.MOVZ_r_u16(r.ip1.value, 0, 0)
+ self.mc.CMP_ri(arglocs[0].value, 0)
+ self.mc.B_ofs_cond(4 * (4 + 2), c.EQ)
+ self.mc.LDR_ri(r.ip0.value, arglocs[0].value, offset)
+ self.mc.gen_load_int_full(r.ip1.value, arglocs[1].value)
+ self.mc.CMP_rr(r.ip0.value, r.ip1.value)
+ self._emit_guard(op, c.EQ, arglocs[2:])
+
# ----------------------------- call ------------------------------
def _genop_call(self, op, arglocs):
@@ -296,17 +416,6 @@
else:
cb.emit_no_collect()
- def load_condition_into_cc(self, loc):
- if not loc.is_core_reg():
- assert loc.is_stack()
- self.regalloc_mov(loc, r.ip0)
- loc = r.ip0
- self.mc.CMP_ri(loc.value, 0)
-
- def emit_op_guard_false(self, op, arglocs):
- self.load_condition_into_cc(arglocs[1])
- self._emit_guard(op, c.EQ, arglocs)
-
def emit_op_label(self, op, arglocs):
pass
@@ -315,9 +424,9 @@
assert isinstance(target_token, TargetToken)
target = target_token._ll_loop_code
if target_token in self.target_tokens_currently_compiling:
- self.mc.B_ofs(target)
+ self.mc.B_ofs(target - self.mc.currpos())
else:
- self.mc.BL(target)
+ self.mc.B(target)
def emit_op_finish(self, op, arglocs):
base_ofs = self.cpu.get_baseofs_of_frame_field()
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -212,6 +212,8 @@
var = op.getarg(i)
if var is not None: # xxx kludgy
self.possibly_free_var(var)
+ if op.is_guard():
+ self.possibly_free_vars(op.getfailargs())
def possibly_free_vars(self, vars):
for var in vars:
@@ -444,6 +446,34 @@
prepare_op_gc_load_r = _prepare_op_gc_load
prepare_op_gc_load_f = _prepare_op_gc_load
+ def prepare_op_gc_store_indexed(self, op):
+ boxes = op.getarglist()
+ base_loc = self.make_sure_var_in_reg(boxes[0], boxes)
+ value_loc = self.make_sure_var_in_reg(boxes[2], boxes)
+ index_loc = self.make_sure_var_in_reg(boxes[1], boxes)
+ assert boxes[3].getint() == 1 # scale
+ ofs = boxes[4].getint()
+ size = boxes[5].getint()
+ assert check_imm_arg(ofs)
+ return [value_loc, base_loc, index_loc, imm(size), imm(ofs)]
+
+ def _prepare_op_gc_load_indexed(self, op):
+ boxes = op.getarglist()
+ base_loc = self.make_sure_var_in_reg(boxes[0], boxes)
+ index_loc = self.make_sure_var_in_reg(boxes[1], boxes)
+ assert boxes[2].getint() == 1 # scale
+ ofs = boxes[3].getint()
+ nsize = boxes[4].getint()
+ assert check_imm_arg(ofs)
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res_loc = self.force_allocate_reg(op)
+ return [res_loc, base_loc, index_loc, imm(nsize), imm(ofs)]
+
+ prepare_op_gc_load_indexed_i = _prepare_op_gc_load_indexed
+ prepare_op_gc_load_indexed_r = _prepare_op_gc_load_indexed
+ prepare_op_gc_load_indexed_f = _prepare_op_gc_load_indexed
+
# --------------------------------- call ----------------------------
def _prepare_op_call(self, op):
@@ -607,8 +637,40 @@
return self._guard_impl(guard_op), c.VC
prepare_guard_op_guard_no_overflow = prepare_guard_op_guard_overflow
- prepare_op_guard_true = _guard_impl
- prepare_op_guard_false = _guard_impl
+ def guard_no_cc_impl(self, op):
+ # rare case of guard with no CC
+ arglocs = self._guard_impl(op)
+ return [self.loc(op.getarg(0))] + arglocs
+
+ prepare_op_guard_true = guard_no_cc_impl
+ prepare_op_guard_false = guard_no_cc_impl
+ prepare_op_guard_nonnull = guard_no_cc_impl
+ prepare_op_guard_isnull = guard_no_cc_impl
+
+ def prepare_op_guard_value(self, op):
+ arg = self.make_sure_var_in_reg(op.getarg(0))
+ op.getdescr().make_a_counter_per_value(op,
+ self.cpu.all_reg_indexes[arg.value])
+ l1 = self.loc(op.getarg(1))
+ imm_a1 = check_imm_box(op.getarg(1))
+ if not imm_a1:
+ l1 = self.make_sure_var_in_reg(op.getarg(1), [arg])
+ arglocs = self._guard_impl(op)
+ return [arg, l1] + arglocs
+
+ def prepare_op_guard_class(self, op):
+ assert not isinstance(op.getarg(0), Const)
+ x = self.make_sure_var_in_reg(op.getarg(0))
+ y_val = rffi.cast(lltype.Signed, op.getarg(1).getint())
+ arglocs = self._guard_impl(op)
+ return [x, imm(y_val)] + arglocs
+
+ prepare_op_guard_nonnull_class = prepare_op_guard_class
+ prepare_op_guard_gc_type = prepare_op_guard_class
+ prepare_op_guard_subclass = prepare_op_guard_class
+
+ prepare_op_ptr_eq = prepare_op_instance_ptr_eq = prepare_op_int_eq
+ prepare_op_ptr_ne = prepare_op_instance_ptr_ne = prepare_op_int_ne
prepare_op_nursery_ptr_increment = prepare_op_int_add
prepare_comp_op_int_add_ovf = prepare_int_ri
More information about the pypy-commit
mailing list