[pypy-commit] pypy optresult-unroll: in-progress

Sun Sep 6 16:49:10 CEST 2015

Author: Armin Rigo <arigo at tunes.org>
Branch: optresult-unroll
Changeset: r79480:9d419227611e
Date: 2015-09-06 16:49 +0200
http://bitbucket.org/pypy/pypy/changeset/9d419227611e/

Log:	in-progress

diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -708,7 +708,7 @@
         self.fixup_target_tokens(rawstart)
         self.update_frame_depth(frame_depth)
         if logger:
-            logger.log_bridge(inputargs, operations, "rewritten",
+            logger.log_bridge(inputargs, operations, "rewritten", faildescr,
                               ops_offset=ops_offset)
         self.teardown()
 
@@ -935,9 +935,9 @@
             op = operations[i]
             self.mc.mark_op(op)
             opnum = op.getopnum()
-            if op.has_no_side_effect() and op.result not in regalloc.longevity:
+            if op.has_no_side_effect() and op not in regalloc.longevity:
                 regalloc.possibly_free_vars_for_op(op)
-            elif not we_are_translated() and op.getopnum() == -124:
+            elif not we_are_translated() and op.getopnum() == -127:
                 regalloc.prepare_force_spill(op, fcond)
             else:
                 arglocs = regalloc_operations[opnum](regalloc, op, fcond)
@@ -947,7 +947,7 @@
                     assert fcond is not None
             if op.is_guard():
                 regalloc.possibly_free_vars(op.getfailargs())
-            if op.result:
+            if op.type != 'v':
                 regalloc.possibly_free_var(op.result)
             regalloc.possibly_free_vars_for_op(op)
             regalloc.free_temp_vars()
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -49,6 +49,8 @@
     def emit_op_int_add(self, op, arglocs, regalloc, fcond):
         return self.int_add_impl(op, arglocs, regalloc, fcond)
 
+    emit_op_nursery_ptr_increment = emit_op_int_add
+
     def int_add_impl(self, op, arglocs, regalloc, fcond, flags=False):
         l0, l1, res = arglocs
         if flags:
@@ -253,28 +255,102 @@
     def emit_op_guard_class(self, op, arglocs, regalloc, fcond):
         self._cmp_guard_class(op, arglocs, regalloc, fcond)
         self.guard_success_cc = c.EQ
-        self._emit_guard(op, arglocs[3:], save_exc=False)
+        self._emit_guard(op, arglocs[2:], save_exc=False)
         return fcond
 
     def emit_op_guard_nonnull_class(self, op, arglocs, regalloc, fcond):
         self.mc.CMP_ri(arglocs[0].value, 1)
         self._cmp_guard_class(op, arglocs, regalloc, c.HS)
         self.guard_success_cc = c.EQ
-        self._emit_guard(op, arglocs[3:], save_exc=False)
+        self._emit_guard(op, arglocs[2:], save_exc=False)
         return fcond
 
     def _cmp_guard_class(self, op, locs, regalloc, fcond):
-        offset = locs[2]
+        offset = self.cpu.vtable_offset
         if offset is not None:
-            self.mc.LDR_ri(r.ip.value, locs[0].value, offset.value, cond=fcond)
+            self.mc.LDR_ri(r.ip.value, locs[0].value, offset, cond=fcond)
             self.mc.CMP_rr(r.ip.value, locs[1].value, cond=fcond)
         else:
             typeid = locs[1]
-            self.mc.LDRH_ri(r.ip.value, locs[0].value, cond=fcond)
-            if typeid.is_imm():
-                self.mc.CMP_ri(r.ip.value, typeid.value, cond=fcond)
-            else:
-                self.mc.CMP_rr(r.ip.value, typeid.value, cond=fcond)
+            assert typeid.is_imm()
+            expected_typeid = (self.cpu.gc_ll_descr
+                    .get_typeid_from_classptr_if_gcremovetypeptr(typeid.value))
+            self._cmp_guard_gc_type(locs[0], expected_typeid, fcond)
+
+    def _cmp_guard_gc_type(self, loc_ptr, expected_typeid, fcond=c.AL):
+        # Note that the typeid half-word is at offset 0 on a little-endian
+        # machine; it would be at offset 2 or 4 on a big-endian machine.
+        assert self.cpu.supports_guard_gc_type
+        assert 0 <= expected_typeid <= 0xFFFF
+        self.mc.LDRH_ri(r.ip.value, loc_ptr.value, 0,
+                        cond=fcond)
+        xxxxxx #ENCODING NOT SUPPORTED HERE?
+        self.mc.SUB_ri(r.ip.value, r.ip.value, expected_typeid & 0xFF00,
+                       cond=fcond)
+        self.mc.CMP_ri(r.ip.value, expected_typeid & 0xFF,
+                       cond=fcond)
+
+    def emit_op_guard_gc_type(self, op, arglocs, regalloc, fcond):
+        self._cmp_guard_gc_type(arglocs[0], arglocs[1].value)
+        self.guard_success_cc = c.EQ
+        self._emit_guard(op, arglocs[2:], save_exc=False)
+        return fcond
+
+    def emit_op_guard_is_object(self, op, arglocs, regalloc, fcond):
+        assert self.cpu.supports_guard_gc_type
+        loc_object = arglocs[0]
+        loc_base_type_info = arglocs[1]
+        # idea: read the typeid, fetch one byte of the field 'infobits' from
+        # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'.
+        self.mc.LDRH_ri(r.ip.value, loc_object.value)
+        #
+        base_type_info, shift_by, sizeof_ti = (
+            self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
+        infobits_offset, IS_OBJECT_FLAG = (
+            self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object())
+
+        if shift_by > 0:
+            self.mc.LSL_ri(r.ip.value, r.ip.value, shift_by)
+        self.mc.LDRB_ri(r.ip.value, loc_base_type_info, r.ip.value)
+        self.mc.TST_ri(r.ip.value, imm=IS_OBJECT_FLAG)
+        self.guard_success_cc = c.NE
+        self._emit_guard(op, arglocs[2:], save_exc=False)
+        return fcond
+
+    def emit_op_guard_subclass(self, op, arglocs, regalloc, fcond):
+        assert self.cpu.supports_guard_gc_type
+        loc_object = arglocs[0]
+        loc_check_against_class = arglocs[1]
+        loc_ofs_subclassrange_min = arglocs[2]
+        offset = self.cpu.vtable_offset
+        offset2 = self.cpu.subclassrange_min_offset
+        if offset is not None:
+            # read this field to get the vtable pointer
+            self.mc.LDR_ri(r.ip.value, loc_object.value, imm=offset)
+            # read the vtable's subclassrange_min field
+            self.mc.LDR_ri(r.ip.value, r.ip.value, imm=offset2)
+        else:
+            # read the typeid
+            self.mc.LDRH_ri(r.ip.value, loc_object.value)
+            # read the vtable's subclassrange_min field, as a single
+            # step with the correct offset
+            base_type_info, shift_by, sizeof_ti = (
+                self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
+            if shift_by > 0:
+                self.mc.LSL_ri(r.ip.value, r.ip.value, shift_by)
+            self.mc.LDR_ri(r.ip.value, loc_ofs_subclassrange_min.value,
+                           r.ip.value)
+        # get the two bounds to check against
+        vtable_ptr = loc_check_against_class.getint()
+        vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr)
+        check_min = vtable_ptr.subclassrange_min
+        check_max = vtable_ptr.subclassrange_max
+        # check by doing the unsigned comparison (tmp - min) < (max - min)
+        self.mc.SUB_ri(r.ip.value, r.ip.value, check_min)
+        self.mc.CMP_ri(r.ip.value, check_max - check_min)
+        # the guard passes if we get a result of "below"
+        self.guard_success_cc = c.LO
+        self.implement_guard(guard_token)
 
     def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond):
         return self._emit_guard(op, locs, save_exc=False,
@@ -365,8 +441,12 @@
         self.gen_func_epilog()
         return fcond
 
-    def emit_op_call(self, op, arglocs, regalloc, fcond):
+    def _genop_call(self, op, arglocs, regalloc, fcond):
         return self._emit_call(op, arglocs, fcond=fcond)
+    emit_op_call_i = _genop_call
+    emit_op_call_r = _genop_call
+    emit_op_call_f = _genop_call
+    emit_op_call_n = _genop_call
 
     def _emit_call(self, op, arglocs, is_call_release_gil=False, fcond=c.AL):
         # args = [resloc, size, sign, args...]
@@ -396,14 +476,17 @@
             cb.emit()
         return fcond
 
-    def emit_op_same_as(self, op, arglocs, regalloc, fcond):
+    def _genop_same_as(self, op, arglocs, regalloc, fcond):
         argloc, resloc = arglocs
         if argloc is not resloc:
             self.mov_loc_loc(argloc, resloc)
         return fcond
 
-    emit_op_cast_ptr_to_int = emit_op_same_as
-    emit_op_cast_int_to_ptr = emit_op_same_as
+    emit_op_same_as_i = _genop_same_as
+    emit_op_same_as_r = _genop_same_as
+    emit_op_same_as_f = _genop_same_as
+    emit_op_cast_ptr_to_int = _genop_same_as
+    emit_op_cast_int_to_ptr = _genop_same_as
 
     def emit_op_guard_no_exception(self, op, arglocs, regalloc, fcond):
         loc = arglocs[0]
@@ -574,7 +657,7 @@
     emit_op_setfield_raw = emit_op_setfield_gc
     emit_op_zero_ptr_field = emit_op_setfield_gc
 
-    def emit_op_getfield_gc(self, op, arglocs, regalloc, fcond):
+    def _genop_getfield(self, op, arglocs, regalloc, fcond):
         base_loc, ofs, res, size = arglocs
         signed = op.getdescr().is_field_signed()
         scale = get_scale(size.value)
@@ -592,7 +675,7 @@
         self.mc.STR_ri(value_loc.value, base_loc.value, 0, cond=fcond)
         return fcond
 
-    def emit_op_getinteriorfield_gc(self, op, arglocs, regalloc, fcond):
+    def _genop_interiorfield(self, op, arglocs, regalloc, fcond):
         (base_loc, index_loc, res_loc,
             ofs_loc, ofs, itemsize, fieldsize) = arglocs
         scale = get_scale(fieldsize.value)
@@ -613,6 +696,10 @@
                                 imm(scale), signed, fcond)
         return fcond
 
+    emit_op_getinteriorfield_gc_i = _genop_getinteriorfield
+    emit_op_getinteriorfield_gc_r = _genop_getinteriorfield
+    emit_op_getinteriorfield_gc_f = _genop_getinteriorfield
+
     def emit_op_setinteriorfield_gc(self, op, arglocs, regalloc, fcond):
         (base_loc, index_loc, value_loc,
             ofs_loc, ofs, itemsize, fieldsize) = arglocs
@@ -697,12 +784,13 @@
         self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond)
         return fcond
 
-    def emit_op_getarrayitem_gc(self, op, arglocs, regalloc, fcond):
+    def _genop_getarrayitem(self, op, arglocs, regalloc, fcond):
         res_loc, base_loc, ofs_loc, scale, ofs = arglocs
         assert ofs_loc.is_core_reg()
         signed = op.getdescr().is_item_signed()
 
         # scale the offset as required
+        # XXX we should try to encode the scale inside the "shift" part of LDR
         if scale.value > 0:
             self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value)
             ofs_loc = r.ip
@@ -714,6 +802,17 @@
         self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed, fcond)
         return fcond
 
+    emit_op_getarrayitem_gc_i = _genop_getarrayitem
+    emit_op_getarrayitem_gc_r = _genop_getarrayitem
+    emit_op_getarrayitem_gc_f = _genop_getarrayitem
+    emit_op_getarrayitem_gc_pure_i = _genop_getarrayitem
+    emit_op_getarrayitem_gc_pure_r = _genop_getarrayitem
+    emit_op_getarrayitem_gc_pure_f = _genop_getarrayitem
+    emit_op_getarrayitem_raw_i = _genop_getarrayitem
+    emit_op_getarrayitem_raw_f = _genop_getarrayitem
+    emit_op_getarrayitem_raw_pure_i = _genop_getarrayitem
+    emit_op_getarrayitem_raw_pure_f = _genop_getarrayitem
+
     def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale,
                                             signed=False, fcond=c.AL):
         if scale.value == 3:
@@ -771,10 +870,7 @@
         else:
             assert 0
 
-    emit_op_getarrayitem_raw = emit_op_getarrayitem_gc
-    emit_op_getarrayitem_gc_pure = emit_op_getarrayitem_gc
-
-    def emit_op_raw_load(self, op, arglocs, regalloc, fcond):
+    def _genop_raw_load(self, op, arglocs, regalloc, fcond):
         res_loc, base_loc, ofs_loc, scale, ofs = arglocs
         assert ofs_loc.is_core_reg()
         # no base offset
@@ -783,6 +879,9 @@
         self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed, fcond)
         return fcond
 
+    emit_op_raw_load_i = _genop_raw_load
+    emit_op_raw_load_f = _genop_raw_load
+
     def emit_op_strlen(self, op, arglocs, regalloc, fcond):
         l0, l1, res = arglocs
         if l1.is_imm():
@@ -952,7 +1051,7 @@
     def imm(self, v):
         return imm(v)
 
-    def emit_op_call_assembler(self, op, arglocs, regalloc, fcond):
+    def _genop_call_assembler(self, op, arglocs, regalloc, fcond):
         if len(arglocs) == 4:
             [argloc, vloc, result_loc, tmploc] = arglocs
         else:
@@ -961,6 +1060,10 @@
         self._store_force_index(self._find_nearby_operation(+1))
         self.call_assembler(op, argloc, vloc, result_loc, tmploc)
         return fcond
+    emit_op_call_assembler_i = _genop_call_assembler
+    emit_op_call_assembler_r = _genop_call_assembler
+    emit_op_call_assembler_f = _genop_call_assembler
+    emit_op_call_assembler_n = _genop_call_assembler
 
     def _call_assembler_emit_call(self, addr, argloc, resloc):
         ofs = self.saved_threadlocal_addr
@@ -991,9 +1094,9 @@
         return pos
 
     def _call_assembler_load_result(self, op, result_loc):
-        if op.result is not None:
+        if op.type != 'v':
             # load the return value from (tmploc, 0)
-            kind = op.result.type
+            kind = op.type
             descr = self.cpu.getarraydescr_for_frame(kind)
             if kind == FLOAT:
                 ofs = self.cpu.unpack_arraydescr(descr)
@@ -1041,15 +1144,23 @@
         self._emit_guard(op, arglocs, save_exc=True, is_guard_not_forced=True)
         return fcond
 
-    def emit_op_call_may_force(self, op, arglocs, regalloc, fcond):
+    def _genop_call_may_force(self, op, arglocs, regalloc, fcond):
         self._store_force_index(self._find_nearby_operation(+1))
         self._emit_call(op, arglocs, fcond=fcond)
         return fcond
+    emit_op_call_may_force_i = _genop_call_may_force
+    emit_op_call_may_force_r = _genop_call_may_force
+    emit_op_call_may_force_f = _genop_call_may_force
+    emit_op_call_may_force_n = _genop_call_may_force
 
-    def emit_op_call_release_gil(self, op, arglocs, regalloc, fcond):
+    def _genop_call_release_gil(self, op, arglocs, regalloc, fcond):
         self._store_force_index(self._find_nearby_operation(+1))
         self._emit_call(op, arglocs, is_call_release_gil=True)
         return fcond
+    emit_op_call_release_gil_i = _genop_call_release_gil
+    emit_op_call_release_gil_r = _genop_call_release_gil
+    emit_op_call_release_gil_f = _genop_call_release_gil
+    emit_op_call_release_gil_n = _genop_call_release_gil
 
     def _store_force_index(self, guard_op):
         assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -24,8 +24,7 @@
 from rpython.jit.backend.arm.arch import WORD, JITFRAME_FIXED_SIZE
 from rpython.jit.codewriter import longlong
 from rpython.jit.metainterp.history import (Const, ConstInt, ConstFloat,
-                                            ConstPtr, BoxInt,
-                                            Box, BoxPtr,
+                                            ConstPtr,
                                             INT, REF, FLOAT)
 from rpython.jit.metainterp.history import TargetToken
 from rpython.jit.metainterp.resoperation import rop
@@ -689,8 +688,8 @@
         arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint()))
         loc = self.make_sure_var_in_reg(arg0)
         loc1 = self.get_scratch_reg(INT, boxes)
-        if op.result in self.longevity:
-            resloc = self.force_allocate_reg(op.result, boxes)
+        if op in self.longevity:
+            resloc = self.force_allocate_reg(op, boxes)
             self.possibly_free_var(op.result)
         else:
             resloc = None
@@ -706,55 +705,23 @@
         return arglocs
 
     def prepare_op_guard_class(self, op, fcond):
-        return self._prepare_guard_class(op, fcond)
-
-    prepare_op_guard_nonnull_class = prepare_op_guard_class
-
-    def _prepare_guard_class(self, op, fcond):
         assert not isinstance(op.getarg(0), Const)
         boxes = op.getarglist()
 
         x = self.make_sure_var_in_reg(boxes[0], boxes)
         y_val = rffi.cast(lltype.Signed, op.getarg(1).getint())
 
-        arglocs = [x, None, None]
+        arglocs = [x, imm(y_val)]
 
         offset = self.cpu.vtable_offset
         if offset is not None:
             y = self.get_scratch_reg(INT, forbidden_vars=boxes)
-            self.assembler.load(y, imm(y_val))
-
-            assert check_imm_arg(offset)
-            offset_loc = imm(offset)
-
+            self.assembler.load(y, arglocs[1])
             arglocs[1] = y
-            arglocs[2] = offset_loc
-        else:
-            # XXX hard-coded assumption: to go from an object to its class
-            # we use the following algorithm:
-            #   - read the typeid from mem(locs[0]), i.e. at offset 0
-            #   - keep the lower 16 bits read there
-            #   - multiply by 4 and use it as an offset in type_info_group
-            #   - add 16 bytes, to go past the TYPE_INFO structure
-            classptr = y_val
-            # here, we have to go back from 'classptr' to the value expected
-            # from reading the 16 bits in the object header
-            from rpython.memory.gctypelayout import GCData
-            sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
-            type_info_group = llop.gc_get_type_info_group(llmemory.Address)
-            type_info_group = rffi.cast(lltype.Signed, type_info_group)
-            expected_typeid = classptr - sizeof_ti - type_info_group
-            expected_typeid >>= 2
-            if check_imm_arg(expected_typeid):
-                arglocs[1] = imm(expected_typeid)
-            else:
-                y = self.get_scratch_reg(INT, forbidden_vars=boxes)
-                self.assembler.load(y, imm(expected_typeid))
-                arglocs[1] = y
 
         return self._prepare_guard(op, arglocs)
 
-        return arglocs
+    prepare_op_guard_nonnull_class = prepare_op_guard_class
 
     def compute_hint_frame_locations(self, operations):
         # optimization only: fill in the 'hint_frame_locations' dictionary
@@ -782,7 +749,7 @@
         assert len(arglocs) == jump_op.numargs()
         for i in range(jump_op.numargs()):
             box = jump_op.getarg(i)
-            if isinstance(box, Box):
+            if not isinstance(box, Const):
                 loc = arglocs[i]
                 if loc is not None and loc.is_stack():
                     self.frame_manager.hint_frame_pos[box] = (
@@ -1115,7 +1082,7 @@
             # for boehm, this function should never be called
         arraydescr = op.getdescr()
         length_box = op.getarg(2)
-        assert isinstance(length_box, BoxInt) # we cannot have a const here!
+        assert not isinstance(length_box, Const) # we cannot have a const here!
         # the result will be in r0
         self.rm.force_allocate_reg(op.result, selected_reg=r.r0)
         # we need r1 as a temporary
@@ -1194,14 +1161,14 @@
         # of some guard
         position = self.rm.position
         for arg in inputargs:
-            assert isinstance(arg, Box)
+            assert not isinstance(arg, Const)
             if self.last_real_usage.get(arg, -1) <= position:
                 self.force_spill_var(arg)
 
         #
         for i in range(len(inputargs)):
             arg = inputargs[i]
-            assert isinstance(arg, Box)
+            assert not isinstance(arg, Const)
             loc = self.loc(arg)
             arglocs[i] = loc
             if loc.is_core_reg() or loc.is_vfp_reg():