[pypy-commit] pypy optresult-unroll: in-progress
arigo
noreply at buildbot.pypy.org
Sun Sep 6 16:49:10 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: optresult-unroll
Changeset: r79480:9d419227611e
Date: 2015-09-06 16:49 +0200
http://bitbucket.org/pypy/pypy/changeset/9d419227611e/
Log: in-progress
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -708,7 +708,7 @@
self.fixup_target_tokens(rawstart)
self.update_frame_depth(frame_depth)
if logger:
- logger.log_bridge(inputargs, operations, "rewritten",
+ logger.log_bridge(inputargs, operations, "rewritten", faildescr,
ops_offset=ops_offset)
self.teardown()
@@ -935,9 +935,9 @@
op = operations[i]
self.mc.mark_op(op)
opnum = op.getopnum()
- if op.has_no_side_effect() and op.result not in regalloc.longevity:
+ if op.has_no_side_effect() and op not in regalloc.longevity:
regalloc.possibly_free_vars_for_op(op)
- elif not we_are_translated() and op.getopnum() == -124:
+ elif not we_are_translated() and op.getopnum() == -127:
regalloc.prepare_force_spill(op, fcond)
else:
arglocs = regalloc_operations[opnum](regalloc, op, fcond)
@@ -947,7 +947,7 @@
assert fcond is not None
if op.is_guard():
regalloc.possibly_free_vars(op.getfailargs())
- if op.result:
+ if op.type != 'v':
regalloc.possibly_free_var(op.result)
regalloc.possibly_free_vars_for_op(op)
regalloc.free_temp_vars()
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -49,6 +49,8 @@
def emit_op_int_add(self, op, arglocs, regalloc, fcond):
return self.int_add_impl(op, arglocs, regalloc, fcond)
+ emit_op_nursery_ptr_increment = emit_op_int_add
+
def int_add_impl(self, op, arglocs, regalloc, fcond, flags=False):
l0, l1, res = arglocs
if flags:
@@ -253,28 +255,102 @@
def emit_op_guard_class(self, op, arglocs, regalloc, fcond):
self._cmp_guard_class(op, arglocs, regalloc, fcond)
self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs[3:], save_exc=False)
+ self._emit_guard(op, arglocs[2:], save_exc=False)
return fcond
def emit_op_guard_nonnull_class(self, op, arglocs, regalloc, fcond):
self.mc.CMP_ri(arglocs[0].value, 1)
self._cmp_guard_class(op, arglocs, regalloc, c.HS)
self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs[3:], save_exc=False)
+ self._emit_guard(op, arglocs[2:], save_exc=False)
return fcond
def _cmp_guard_class(self, op, locs, regalloc, fcond):
- offset = locs[2]
+ offset = self.cpu.vtable_offset
if offset is not None:
- self.mc.LDR_ri(r.ip.value, locs[0].value, offset.value, cond=fcond)
+ self.mc.LDR_ri(r.ip.value, locs[0].value, offset, cond=fcond)
self.mc.CMP_rr(r.ip.value, locs[1].value, cond=fcond)
else:
typeid = locs[1]
- self.mc.LDRH_ri(r.ip.value, locs[0].value, cond=fcond)
- if typeid.is_imm():
- self.mc.CMP_ri(r.ip.value, typeid.value, cond=fcond)
- else:
- self.mc.CMP_rr(r.ip.value, typeid.value, cond=fcond)
+ assert typeid.is_imm()
+ expected_typeid = (self.cpu.gc_ll_descr
+ .get_typeid_from_classptr_if_gcremovetypeptr(typeid.value))
+ self._cmp_guard_gc_type(locs[0], expected_typeid, fcond)
+
+ def _cmp_guard_gc_type(self, loc_ptr, expected_typeid, fcond=c.AL):
+ # Note that the typeid half-word is at offset 0 on a little-endian
+ # machine; it would be at offset 2 or 4 on a big-endian machine.
+ assert self.cpu.supports_guard_gc_type
+ assert 0 <= expected_typeid <= 0xFFFF
+ self.mc.LDRH_ri(r.ip.value, loc_ptr.value, 0,
+ cond=fcond)
+ xxxxxx #ENCODING NOT SUPPORTED HERE?
+ self.mc.SUB_ri(r.ip.value, r.ip.value, expected_typeid & 0xFF00,
+ cond=fcond)
+ self.mc.CMP_ri(r.ip.value, expected_typeid & 0xFF,
+ cond=fcond)
+
+ def emit_op_guard_gc_type(self, op, arglocs, regalloc, fcond):
+ self._cmp_guard_gc_type(arglocs[0], arglocs[1].value)
+ self.guard_success_cc = c.EQ
+ self._emit_guard(op, arglocs[2:], save_exc=False)
+ return fcond
+
+ def emit_op_guard_is_object(self, op, arglocs, regalloc, fcond):
+ assert self.cpu.supports_guard_gc_type
+ loc_object = arglocs[0]
+ loc_base_type_info = arglocs[1]
+ # idea: read the typeid, fetch one byte of the field 'infobits' from
+ # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'.
+ self.mc.LDRH_ri(r.ip.value, loc_object.value)
+ #
+ base_type_info, shift_by, sizeof_ti = (
+ self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
+ infobits_offset, IS_OBJECT_FLAG = (
+ self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object())
+
+ if shift_by > 0:
+ self.mc.LSL_ri(r.ip.value, r.ip.value, shift_by)
+ self.mc.LDRB_ri(r.ip.value, loc_base_type_info, r.ip.value)
+ self.mc.TST_ri(r.ip.value, imm=IS_OBJECT_FLAG)
+ self.guard_success_cc = c.NE
+ self._emit_guard(op, arglocs[2:], save_exc=False)
+ return fcond
+
+ def emit_op_guard_subclass(self, op, arglocs, regalloc, fcond):
+ assert self.cpu.supports_guard_gc_type
+ loc_object = arglocs[0]
+ loc_check_against_class = arglocs[1]
+ loc_ofs_subclassrange_min = arglocs[2]
+ offset = self.cpu.vtable_offset
+ offset2 = self.cpu.subclassrange_min_offset
+ if offset is not None:
+ # read this field to get the vtable pointer
+ self.mc.LDR_ri(r.ip.value, loc_object.value, imm=offset)
+ # read the vtable's subclassrange_min field
+ self.mc.LDR_ri(r.ip.value, r.ip.value, imm=offset2)
+ else:
+ # read the typeid
+ self.mc.LDRH_ri(r.ip.value, loc_object.value)
+ # read the vtable's subclassrange_min field, as a single
+ # step with the correct offset
+ base_type_info, shift_by, sizeof_ti = (
+ self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
+ if shift_by > 0:
+ self.mc.LSL_ri(r.ip.value, r.ip.value, shift_by)
+ self.mc.LDR_ri(r.ip.value, loc_ofs_subclassrange_min.value,
+ r.ip.value)
+ # get the two bounds to check against
+ vtable_ptr = loc_check_against_class.getint()
+ vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr)
+ check_min = vtable_ptr.subclassrange_min
+ check_max = vtable_ptr.subclassrange_max
+ # check by doing the unsigned comparison (tmp - min) < (max - min)
+ self.mc.SUB_ri(r.ip.value, r.ip.value, check_min)
+ self.mc.CMP_ri(r.ip.value, check_max - check_min)
+ # the guard passes if we get a result of "below"
+ self.guard_success_cc = c.LO
+ self.implement_guard(guard_token)
def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond):
return self._emit_guard(op, locs, save_exc=False,
@@ -365,8 +441,12 @@
self.gen_func_epilog()
return fcond
- def emit_op_call(self, op, arglocs, regalloc, fcond):
+ def _genop_call(self, op, arglocs, regalloc, fcond):
return self._emit_call(op, arglocs, fcond=fcond)
+ emit_op_call_i = _genop_call
+ emit_op_call_r = _genop_call
+ emit_op_call_f = _genop_call
+ emit_op_call_n = _genop_call
def _emit_call(self, op, arglocs, is_call_release_gil=False, fcond=c.AL):
# args = [resloc, size, sign, args...]
@@ -396,14 +476,17 @@
cb.emit()
return fcond
- def emit_op_same_as(self, op, arglocs, regalloc, fcond):
+ def _genop_same_as(self, op, arglocs, regalloc, fcond):
argloc, resloc = arglocs
if argloc is not resloc:
self.mov_loc_loc(argloc, resloc)
return fcond
- emit_op_cast_ptr_to_int = emit_op_same_as
- emit_op_cast_int_to_ptr = emit_op_same_as
+ emit_op_same_as_i = _genop_same_as
+ emit_op_same_as_r = _genop_same_as
+ emit_op_same_as_f = _genop_same_as
+ emit_op_cast_ptr_to_int = _genop_same_as
+ emit_op_cast_int_to_ptr = _genop_same_as
def emit_op_guard_no_exception(self, op, arglocs, regalloc, fcond):
loc = arglocs[0]
@@ -574,7 +657,7 @@
emit_op_setfield_raw = emit_op_setfield_gc
emit_op_zero_ptr_field = emit_op_setfield_gc
- def emit_op_getfield_gc(self, op, arglocs, regalloc, fcond):
+ def _genop_getfield(self, op, arglocs, regalloc, fcond):
base_loc, ofs, res, size = arglocs
signed = op.getdescr().is_field_signed()
scale = get_scale(size.value)
@@ -592,7 +675,7 @@
self.mc.STR_ri(value_loc.value, base_loc.value, 0, cond=fcond)
return fcond
- def emit_op_getinteriorfield_gc(self, op, arglocs, regalloc, fcond):
+ def _genop_interiorfield(self, op, arglocs, regalloc, fcond):
(base_loc, index_loc, res_loc,
ofs_loc, ofs, itemsize, fieldsize) = arglocs
scale = get_scale(fieldsize.value)
@@ -613,6 +696,10 @@
imm(scale), signed, fcond)
return fcond
+ emit_op_getinteriorfield_gc_i = _genop_getinteriorfield
+ emit_op_getinteriorfield_gc_r = _genop_getinteriorfield
+ emit_op_getinteriorfield_gc_f = _genop_getinteriorfield
+
def emit_op_setinteriorfield_gc(self, op, arglocs, regalloc, fcond):
(base_loc, index_loc, value_loc,
ofs_loc, ofs, itemsize, fieldsize) = arglocs
@@ -697,12 +784,13 @@
self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond)
return fcond
- def emit_op_getarrayitem_gc(self, op, arglocs, regalloc, fcond):
+ def _genop_getarrayitem(self, op, arglocs, regalloc, fcond):
res_loc, base_loc, ofs_loc, scale, ofs = arglocs
assert ofs_loc.is_core_reg()
signed = op.getdescr().is_item_signed()
# scale the offset as required
+ # XXX we should try to encode the scale inside the "shift" part of LDR
if scale.value > 0:
self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value)
ofs_loc = r.ip
@@ -714,6 +802,17 @@
self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed, fcond)
return fcond
+ emit_op_getarrayitem_gc_i = _genop_getarrayitem
+ emit_op_getarrayitem_gc_r = _genop_getarrayitem
+ emit_op_getarrayitem_gc_f = _genop_getarrayitem
+ emit_op_getarrayitem_gc_pure_i = _genop_getarrayitem
+ emit_op_getarrayitem_gc_pure_r = _genop_getarrayitem
+ emit_op_getarrayitem_gc_pure_f = _genop_getarrayitem
+ emit_op_getarrayitem_raw_i = _genop_getarrayitem
+ emit_op_getarrayitem_raw_f = _genop_getarrayitem
+ emit_op_getarrayitem_raw_pure_i = _genop_getarrayitem
+ emit_op_getarrayitem_raw_pure_f = _genop_getarrayitem
+
def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale,
signed=False, fcond=c.AL):
if scale.value == 3:
@@ -771,10 +870,7 @@
else:
assert 0
- emit_op_getarrayitem_raw = emit_op_getarrayitem_gc
- emit_op_getarrayitem_gc_pure = emit_op_getarrayitem_gc
-
- def emit_op_raw_load(self, op, arglocs, regalloc, fcond):
+ def _genop_raw_load(self, op, arglocs, regalloc, fcond):
res_loc, base_loc, ofs_loc, scale, ofs = arglocs
assert ofs_loc.is_core_reg()
# no base offset
@@ -783,6 +879,9 @@
self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed, fcond)
return fcond
+ emit_op_raw_load_i = _genop_raw_load
+ emit_op_raw_load_f = _genop_raw_load
+
def emit_op_strlen(self, op, arglocs, regalloc, fcond):
l0, l1, res = arglocs
if l1.is_imm():
@@ -952,7 +1051,7 @@
def imm(self, v):
return imm(v)
- def emit_op_call_assembler(self, op, arglocs, regalloc, fcond):
+ def _genop_call_assembler(self, op, arglocs, regalloc, fcond):
if len(arglocs) == 4:
[argloc, vloc, result_loc, tmploc] = arglocs
else:
@@ -961,6 +1060,10 @@
self._store_force_index(self._find_nearby_operation(+1))
self.call_assembler(op, argloc, vloc, result_loc, tmploc)
return fcond
+ emit_op_call_assembler_i = _genop_call_assembler
+ emit_op_call_assembler_r = _genop_call_assembler
+ emit_op_call_assembler_f = _genop_call_assembler
+ emit_op_call_assembler_n = _genop_call_assembler
def _call_assembler_emit_call(self, addr, argloc, resloc):
ofs = self.saved_threadlocal_addr
@@ -991,9 +1094,9 @@
return pos
def _call_assembler_load_result(self, op, result_loc):
- if op.result is not None:
+ if op.type != 'v':
# load the return value from (tmploc, 0)
- kind = op.result.type
+ kind = op.type
descr = self.cpu.getarraydescr_for_frame(kind)
if kind == FLOAT:
ofs = self.cpu.unpack_arraydescr(descr)
@@ -1041,15 +1144,23 @@
self._emit_guard(op, arglocs, save_exc=True, is_guard_not_forced=True)
return fcond
- def emit_op_call_may_force(self, op, arglocs, regalloc, fcond):
+ def _genop_call_may_force(self, op, arglocs, regalloc, fcond):
self._store_force_index(self._find_nearby_operation(+1))
self._emit_call(op, arglocs, fcond=fcond)
return fcond
+ emit_op_call_may_force_i = _genop_call_may_force
+ emit_op_call_may_force_r = _genop_call_may_force
+ emit_op_call_may_force_f = _genop_call_may_force
+ emit_op_call_may_force_n = _genop_call_may_force
- def emit_op_call_release_gil(self, op, arglocs, regalloc, fcond):
+ def _genop_call_release_gil(self, op, arglocs, regalloc, fcond):
self._store_force_index(self._find_nearby_operation(+1))
self._emit_call(op, arglocs, is_call_release_gil=True)
return fcond
+ emit_op_call_release_gil_i = _genop_call_release_gil
+ emit_op_call_release_gil_r = _genop_call_release_gil
+ emit_op_call_release_gil_f = _genop_call_release_gil
+ emit_op_call_release_gil_n = _genop_call_release_gil
def _store_force_index(self, guard_op):
assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -24,8 +24,7 @@
from rpython.jit.backend.arm.arch import WORD, JITFRAME_FIXED_SIZE
from rpython.jit.codewriter import longlong
from rpython.jit.metainterp.history import (Const, ConstInt, ConstFloat,
- ConstPtr, BoxInt,
- Box, BoxPtr,
+ ConstPtr,
INT, REF, FLOAT)
from rpython.jit.metainterp.history import TargetToken
from rpython.jit.metainterp.resoperation import rop
@@ -689,8 +688,8 @@
arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint()))
loc = self.make_sure_var_in_reg(arg0)
loc1 = self.get_scratch_reg(INT, boxes)
- if op.result in self.longevity:
- resloc = self.force_allocate_reg(op.result, boxes)
+ if op in self.longevity:
+ resloc = self.force_allocate_reg(op, boxes)
self.possibly_free_var(op.result)
else:
resloc = None
@@ -706,55 +705,23 @@
return arglocs
def prepare_op_guard_class(self, op, fcond):
- return self._prepare_guard_class(op, fcond)
-
- prepare_op_guard_nonnull_class = prepare_op_guard_class
-
- def _prepare_guard_class(self, op, fcond):
assert not isinstance(op.getarg(0), Const)
boxes = op.getarglist()
x = self.make_sure_var_in_reg(boxes[0], boxes)
y_val = rffi.cast(lltype.Signed, op.getarg(1).getint())
- arglocs = [x, None, None]
+ arglocs = [x, imm(y_val)]
offset = self.cpu.vtable_offset
if offset is not None:
y = self.get_scratch_reg(INT, forbidden_vars=boxes)
- self.assembler.load(y, imm(y_val))
-
- assert check_imm_arg(offset)
- offset_loc = imm(offset)
-
+ self.assembler.load(y, arglocs[1])
arglocs[1] = y
- arglocs[2] = offset_loc
- else:
- # XXX hard-coded assumption: to go from an object to its class
- # we use the following algorithm:
- # - read the typeid from mem(locs[0]), i.e. at offset 0
- # - keep the lower 16 bits read there
- # - multiply by 4 and use it as an offset in type_info_group
- # - add 16 bytes, to go past the TYPE_INFO structure
- classptr = y_val
- # here, we have to go back from 'classptr' to the value expected
- # from reading the 16 bits in the object header
- from rpython.memory.gctypelayout import GCData
- sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
- type_info_group = llop.gc_get_type_info_group(llmemory.Address)
- type_info_group = rffi.cast(lltype.Signed, type_info_group)
- expected_typeid = classptr - sizeof_ti - type_info_group
- expected_typeid >>= 2
- if check_imm_arg(expected_typeid):
- arglocs[1] = imm(expected_typeid)
- else:
- y = self.get_scratch_reg(INT, forbidden_vars=boxes)
- self.assembler.load(y, imm(expected_typeid))
- arglocs[1] = y
return self._prepare_guard(op, arglocs)
- return arglocs
+ prepare_op_guard_nonnull_class = prepare_op_guard_class
def compute_hint_frame_locations(self, operations):
# optimization only: fill in the 'hint_frame_locations' dictionary
@@ -782,7 +749,7 @@
assert len(arglocs) == jump_op.numargs()
for i in range(jump_op.numargs()):
box = jump_op.getarg(i)
- if isinstance(box, Box):
+ if not isinstance(box, Const):
loc = arglocs[i]
if loc is not None and loc.is_stack():
self.frame_manager.hint_frame_pos[box] = (
@@ -1115,7 +1082,7 @@
# for boehm, this function should never be called
arraydescr = op.getdescr()
length_box = op.getarg(2)
- assert isinstance(length_box, BoxInt) # we cannot have a const here!
+ assert not isinstance(length_box, Const) # we cannot have a const here!
# the result will be in r0
self.rm.force_allocate_reg(op.result, selected_reg=r.r0)
# we need r1 as a temporary
@@ -1194,14 +1161,14 @@
# of some guard
position = self.rm.position
for arg in inputargs:
- assert isinstance(arg, Box)
+ assert not isinstance(arg, Const)
if self.last_real_usage.get(arg, -1) <= position:
self.force_spill_var(arg)
#
for i in range(len(inputargs)):
arg = inputargs[i]
- assert isinstance(arg, Box)
+ assert not isinstance(arg, Const)
loc = self.loc(arg)
arglocs[i] = loc
if loc.is_core_reg() or loc.is_vfp_reg():
More information about the pypy-commit
mailing list