[pypy-commit] pypy default: hg merge copystrcontents-in-rewrite

arigo pypy.commits at gmail.com
Sun Jun 9 07:31:23 EDT 2019


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r96783:77c16f38a6e2
Date: 2019-06-09 13:30 +0200
http://bitbucket.org/pypy/pypy/changeset/77c16f38a6e2/

Log:	hg merge copystrcontents-in-rewrite

	Remove copystrcontent and copyunicodecontent in the backends.
	Instead, replace it in rewrite.py with a direct call to memcpy() and
	new basic operation, load_effective_address, which the backend can
	even decide not to implement.

diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -834,73 +834,11 @@
         else:
             assert 0
 
-    #from ../x86/regalloc.py:928 ff.
-    def emit_op_copystrcontent(self, op, arglocs, regalloc, fcond):
-        assert len(arglocs) == 0
-        self._emit_copystrcontent(op, regalloc, fcond, is_unicode=False)
+    def emit_op_load_effective_address(self, op, arglocs, regalloc, fcond):
+        self._gen_address(arglocs[4], arglocs[0], arglocs[1], arglocs[3].value,
+                          arglocs[2].value)
         return fcond
 
-    def emit_op_copyunicodecontent(self, op, arglocs, regalloc, fcond):
-        assert len(arglocs) == 0
-        self._emit_copystrcontent(op, regalloc, fcond, is_unicode=True)
-        return fcond
-
-    def _emit_copystrcontent(self, op, regalloc, fcond, is_unicode):
-        # compute the source address
-        args = op.getarglist()
-        base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
-        ofs_loc = regalloc.rm.make_sure_var_in_reg(args[2], args)
-        assert args[0] is not args[1]    # forbidden case of aliasing
-        srcaddr_box = TempVar()
-        forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
-        srcaddr_loc = regalloc.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
-        self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
-                                        is_unicode=is_unicode)
-        # compute the destination address
-        base_loc = regalloc.rm.make_sure_var_in_reg(args[1], forbidden_vars)
-        ofs_loc = regalloc.rm.make_sure_var_in_reg(args[3], forbidden_vars)
-        forbidden_vars = [args[4], srcaddr_box]
-        dstaddr_box = TempVar()
-        dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, forbidden_vars)
-        self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
-                                        is_unicode=is_unicode)
-        # compute the length in bytes
-        length_box = args[4]
-        length_loc = regalloc.loc(length_box)
-        if is_unicode:
-            forbidden_vars = [srcaddr_box, dstaddr_box]
-            bytes_box = TempVar()
-            bytes_loc = regalloc.rm.force_allocate_reg(bytes_box, forbidden_vars)
-            scale = self._get_unicode_item_scale()
-            if not length_loc.is_core_reg():
-                self.regalloc_mov(length_loc, bytes_loc)
-                length_loc = bytes_loc
-            assert length_loc.is_core_reg()
-            self.mc.MOV_ri(r.ip.value, 1 << scale)
-            self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
-            length_box = bytes_box
-            length_loc = bytes_loc
-        # call memcpy()
-        regalloc.before_call()
-        self.simple_call_no_collect(imm(self.memcpy_addr),
-                                  [dstaddr_loc, srcaddr_loc, length_loc])
-        regalloc.rm.possibly_free_var(length_box)
-        regalloc.rm.possibly_free_var(dstaddr_box)
-        regalloc.rm.possibly_free_var(srcaddr_box)
-
-    def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
-        if is_unicode:
-            ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
-                                              self.cpu.translate_support_code)
-            scale = self._get_unicode_item_scale()
-        else:
-            ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                              self.cpu.translate_support_code)
-            assert itemsize == 1
-            ofs_items -= 1     # for the extra null character
-            scale = 0
-        self._gen_address(resloc, baseloc, ofsloc, scale, ofs_items)
-
    # result = base_loc  + (scaled_loc << scale) + static_offset
     def _gen_address(self, result, base_loc, scaled_loc, scale=0, static_offset=0):
         assert scaled_loc.is_core_reg()
@@ -915,16 +853,6 @@
         self.mc.ADD_rr(result.value, base_loc.value, scaled_loc.value)
         self.mc.ADD_ri(result.value, result.value, static_offset)
 
-    def _get_unicode_item_scale(self):
-        _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                              self.cpu.translate_support_code)
-        if itemsize == 4:
-            return 2
-        elif itemsize == 2:
-            return 1
-        else:
-            raise AssertionError("bad unicode item size")
-
     def store_force_descr(self, op, fail_locs, frame_depth):
         pos = self.mc.currpos()
         guard_token = self.build_guard_token(op, frame_depth, fail_locs, pos, c.AL)
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -873,8 +873,6 @@
     prepare_op_gc_load_indexed_r = _prepare_op_gc_load_indexed
     prepare_op_gc_load_indexed_f = _prepare_op_gc_load_indexed
 
-    prepare_op_copystrcontent = void
-    prepare_op_copyunicodecontent = void
     prepare_op_zero_array = void
 
     def _prepare_op_same_as(self, op, fcond):
@@ -899,6 +897,13 @@
         resloc = self.force_allocate_reg(op)
         return [resloc]
 
+    def prepare_op_load_effective_address(self, op, fcond):
+        args = op.getarglist()
+        arg0 = self.make_sure_var_in_reg(args[0], args)
+        arg1 = self.make_sure_var_in_reg(args[1], args)
+        res = self.force_allocate_reg(op)
+        return [arg0, arg1, args[2], args[3], res]
+
     def prepare_op_call_malloc_nursery(self, op, fcond):
         size_box = op.getarg(0)
         assert isinstance(size_box, ConstInt)
diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -23,6 +23,7 @@
     supports_floats = True
     supports_longlong = True
     supports_singlefloats = True
+    supports_load_effective_address = True
 
     from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
     all_reg_indexes = range(len(all_regs))
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -14,6 +14,7 @@
 from rpython.jit.metainterp.support import ptr2int
 from rpython.jit.backend.llsupport import symbolic, jitframe
 from rpython.jit.backend.llsupport.symbolic import WORD
+from rpython.jit.backend.llsupport.memcpy import memcpy_fn
 from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr, FieldDescr
 from rpython.jit.backend.llsupport.descr import GcCache, get_field_descr
 from rpython.jit.backend.llsupport.descr import get_array_descr
@@ -36,6 +37,11 @@
             self.fielddescr_vtable = get_field_descr(self, rclass.OBJECT,
                                                      'typeptr')
         self._generated_functions = []
+        self.memcpy_fn = memcpy_fn
+        self.memcpy_descr = get_call_descr(self,
+            [lltype.Signed, lltype.Signed, lltype.Signed], lltype.Void,
+            EffectInfo([], [], [], [], [], [], EffectInfo.EF_CANNOT_RAISE,
+                can_collect=False))
 
     def _setup_str(self):
         self.str_descr     = get_array_descr(self, rstr.STR)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -34,6 +34,10 @@
      - Add COND_CALLs to the write barrier before SETFIELD_GC and
        SETARRAYITEM_GC operations.
 
+     - Rewrites copystrcontent to a call to memcopy
+
+     - XXX does more than that, please write it down
+
     '_write_barrier_applied' contains a dictionary of variable -> None.
     If a variable is in the dictionary, next setfields can be called without
     a write barrier.  The idea is that an object that was freshly allocated
@@ -335,6 +339,10 @@
                 self.emitting_an_operation_that_can_collect()
             elif op.getopnum() == rop.LABEL:
                 self.emit_label()
+            # ---- change COPY{STR|UNICODE}CONTENT into a call ------
+            if op.opnum == rop.COPYSTRCONTENT or op.opnum == rop.COPYUNICODECONTENT:
+                self.rewrite_copy_str_content(op)
+                continue
             # ---------- write barriers ----------
             if self.gc_ll_descr.write_barrier_descr is not None:
                 if op.getopnum() == rop.SETFIELD_GC:
@@ -953,6 +961,61 @@
         self.gcrefs_output_list.append(gcref)
         return index
 
+    def rewrite_copy_str_content(self, op):
+        funcaddr = llmemory.cast_ptr_to_adr(self.gc_ll_descr.memcpy_fn)
+        memcpy_fn = self.cpu.cast_adr_to_int(funcaddr)
+        memcpy_descr = self.gc_ll_descr.memcpy_descr
+        if op.getopnum() == rop.COPYSTRCONTENT:
+            basesize = self.gc_ll_descr.str_descr.basesize
+            # because we have one extra item after alloc, the actual address
+            # of string start is 1 lower, from extra_item_after_malloc
+            basesize -= 1
+            assert self.gc_ll_descr.str_descr.itemsize == 1
+            itemscale = 0
+        else:
+            basesize = self.gc_ll_descr.unicode_descr.basesize
+            itemsize = self.gc_ll_descr.unicode_descr.itemsize
+            if itemsize == 2:
+                itemscale = 1
+            elif itemsize == 4:
+                itemscale = 2
+            else:
+                assert False, "unknown size of unicode"
+        i1 = self.emit_load_effective_address(op.getarg(0), op.getarg(2),
+                                              basesize, itemscale)
+        i2 = self.emit_load_effective_address(op.getarg(1), op.getarg(3),
+                                              basesize, itemscale)
+        if op.getopnum() == rop.COPYSTRCONTENT:
+            arg = op.getarg(4)
+        else:
+            # do some basic constant folding
+            if isinstance(op.getarg(4), ConstInt):
+                arg = ConstInt(op.getarg(4).getint() << itemscale)
+            else:
+                arg = ResOperation(rop.INT_LSHIFT,
+                                   [op.getarg(4), ConstInt(itemscale)])
+                self.emit_op(arg)
+        self.emit_op(ResOperation(rop.CALL_N,
+            [ConstInt(memcpy_fn), i2, i1, arg], descr=memcpy_descr))
+
+    def emit_load_effective_address(self, v_gcptr, v_index, base, itemscale):
+        if self.cpu.supports_load_effective_address:
+            i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
+                              [v_gcptr, v_index, ConstInt(base),
+                               ConstInt(itemscale)])
+            self.emit_op(i1)
+            return i1
+        else:
+            if itemscale > 0:
+                v_index = ResOperation(rop.INT_LSHIFT,
+                                       [v_index, ConstInt(itemscale)])
+                self.emit_op(v_index)
+            i1b = ResOperation(rop.INT_ADD, [v_gcptr, v_index])
+            self.emit_op(i1b)
+            i1 = ResOperation(rop.INT_ADD, [i1b, ConstInt(base)])
+            self.emit_op(i1)
+            return i1
+
     def remove_constptr(self, c):
         """Remove all ConstPtrs, and replace them with load_from_gc_table.
         """
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -142,11 +142,16 @@
         raw_sfdescr = get_array_descr(self.gc_ll_descr, RAW_SF)
         #
         strdescr     = self.gc_ll_descr.str_descr
+        str_basesize = self.gc_ll_descr.str_descr.basesize - 1
         unicodedescr = self.gc_ll_descr.unicode_descr
         strlendescr     = strdescr.lendescr
         unicodelendescr = unicodedescr.lendescr
         strhashdescr     = self.gc_ll_descr.str_hash_descr
         unicodehashdescr = self.gc_ll_descr.unicode_hash_descr
+        uni_basesize  = unicodedescr.basesize
+        uni_itemscale = {2: 1, 4: 2}[unicodedescr.itemsize]
+        memcpy_fn = self.gc_ll_descr.memcpy_fn
+        memcpy_descr = self.gc_ll_descr.memcpy_descr
 
         casmdescr = JitCellToken()
         clt = FakeLoopToken()
@@ -169,6 +174,7 @@
         signedframedescr = self.cpu.signedframedescr
         floatframedescr = self.cpu.floatframedescr
         casmdescr.compiled_loop_token = clt
+
         #
         guarddescr = AbstractFailDescr()
         #
@@ -200,6 +206,7 @@
 
     load_constant_offset = True
     load_supported_factors = (1,2,4,8)
+    supports_load_effective_address = True
 
     translate_support_code = None
 
@@ -237,6 +244,9 @@
             self._cache[key] = r
             return r
 
+    def cast_adr_to_int(self, adr):
+        return llmemory.AddressAsInt(adr)
+
 class TestBoehm(RewriteTests):
     def setup_method(self, meth):
         class FakeCPU(BaseFakeCPU):
@@ -1436,3 +1446,57 @@
             jump()
         """)
         assert len(self.gcrefs) == 2
+
+    def test_rewrite_copystrcontents(self):
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copystrcontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i2 = load_effective_address(p0, i0, %(str_basesize)s, 0)
+        i3 = load_effective_address(p1, i1, %(str_basesize)s, 0)
+        call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
+        """)
+
+    def test_rewrite_copystrcontents_without_load_effective_address(self):
+        self.cpu.supports_load_effective_address = False
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copystrcontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i2b = int_add(p0, i0)
+        i2 = int_add(i2b, %(str_basesize)s)
+        i3b = int_add(p1, i1)
+        i3 = int_add(i3b, %(str_basesize)s)
+        call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
+        """)
+
+    def test_rewrite_copyunicodecontents(self):
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copyunicodecontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i2 = load_effective_address(p0, i0, %(uni_basesize)s, %(uni_itemscale)d)
+        i3 = load_effective_address(p1, i1, %(uni_basesize)s, %(uni_itemscale)d)
+        i4 = int_lshift(i_len, %(uni_itemscale)d)
+        call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+        """)
+
+    def test_rewrite_copyunicodecontents_without_load_effective_address(self):
+        self.cpu.supports_load_effective_address = False
+        self.check_rewrite("""
+        [p0, p1, i0, i1, i_len]
+        copyunicodecontent(p0, p1, i0, i1, i_len)
+        """, """
+        [p0, p1, i0, i1, i_len]
+        i0s = int_lshift(i0, %(uni_itemscale)d)
+        i2b = int_add(p0, i0s)
+        i2 = int_add(i2b, %(uni_basesize)s)
+        i1s = int_lshift(i1, %(uni_itemscale)d)
+        i3b = int_add(p1, i1s)
+        i3 = int_add(i3b, %(uni_basesize)s)
+        i4 = int_lshift(i_len, %(uni_itemscale)d)
+        call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+        """)
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -19,6 +19,7 @@
     # Boxes and Consts are BoxFloats and ConstFloats.
     supports_singlefloats = False
     supports_guard_gc_type = False
+    supports_load_effective_address = False
 
     propagate_exception_descr = None
 
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -966,72 +966,6 @@
         pmc.overwrite()
 
 
-class StrOpAssembler(object):
-
-    _mixin_ = True
-
-    def emit_copystrcontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=False)
-
-    def emit_copyunicodecontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=True)
-
-    def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
-        if src_ofs.is_imm():
-            value = src_ofs.value << scale
-            if value < 32768:
-                self.mc.addi(dst.value, src_ptr.value, value)
-            else:
-                self.mc.load_imm(dst, value)
-                self.mc.add(dst.value, src_ptr.value, dst.value)
-        elif scale == 0:
-            self.mc.add(dst.value, src_ptr.value, src_ofs.value)
-        else:
-            self.mc.sldi(dst.value, src_ofs.value, scale)
-            self.mc.add(dst.value, src_ptr.value, dst.value)
-
-    def _emit_copycontent(self, arglocs, is_unicode):
-        [src_ptr_loc, dst_ptr_loc,
-         src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
-        if is_unicode:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                        self.cpu.translate_support_code)
-            if   itemsize == 2: scale = 1
-            elif itemsize == 4: scale = 2
-            else: raise AssertionError
-        else:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                        self.cpu.translate_support_code)
-            assert itemsize == 1
-            basesize -= 1     # for the extra null character
-            scale = 0
-
-        self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
-        self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
-
-        if length_loc.is_imm():
-            length = length_loc.getint()
-            self.mc.load_imm(r.r5, length << scale)
-        else:
-            if scale > 0:
-                self.mc.sldi(r.r5.value, length_loc.value, scale)
-            elif length_loc is not r.r5:
-                self.mc.mr(r.r5.value, length_loc.value)
-
-        self.mc.mr(r.r4.value, r.r0.value)
-        self.mc.addi(r.r4.value, r.r4.value, basesize)
-        self.mc.addi(r.r3.value, r.r2.value, basesize)
-
-        self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
-        self.mc.raw_call()
-
-
-class UnicodeOpAssembler(object):
-    _mixin_ = True
-    # empty!
-
-
 class AllocOpAssembler(object):
 
     _mixin_ = True
@@ -1336,8 +1270,7 @@
 
 class OpAssembler(IntOpAssembler, GuardOpAssembler,
                   MiscOpAssembler, FieldOpAssembler,
-                  StrOpAssembler, CallOpAssembler,
-                  UnicodeOpAssembler, ForceOpAssembler,
+                  CallOpAssembler, ForceOpAssembler,
                   AllocOpAssembler, FloatOpAssembler,
                   VectorAssembler):
     _mixin_ = True
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -802,18 +802,6 @@
         temp_loc = r.SCRATCH2
         return [base_loc, temp_loc]
 
-    def prepare_copystrcontent(self, op):
-        src_ptr_loc = self.ensure_reg(op.getarg(0))
-        dst_ptr_loc = self.ensure_reg(op.getarg(1))
-        src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
-        dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
-        length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
-        self._spill_before_call(gc_level=0)
-        return [src_ptr_loc, dst_ptr_loc,
-                src_ofs_loc, dst_ofs_loc, length_loc]
-
-    prepare_copyunicodecontent = prepare_copystrcontent
-
     prepare_same_as_i = helper.prepare_unary_op
     prepare_same_as_r = helper.prepare_unary_op
     prepare_same_as_f = helper.prepare_unary_op
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1222,78 +1222,16 @@
         resloc = self.force_allocate_reg(op, [op.getarg(0)])
         self.perform(op, [argloc], resloc)
 
-    def consider_copystrcontent(self, op):
-        self._consider_copystrcontent(op, is_unicode=False)
-
-    def consider_copyunicodecontent(self, op):
-        self._consider_copystrcontent(op, is_unicode=True)
-
-    def _consider_copystrcontent(self, op, is_unicode):
-        # compute the source address
-        args = op.getarglist()
-        base_loc = self.rm.make_sure_var_in_reg(args[0], args)
-        ofs_loc = self.rm.make_sure_var_in_reg(args[2], args)
-        assert args[0] is not args[1]    # forbidden case of aliasing
-        srcaddr_box = TempVar()
-        forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
-        srcaddr_loc = self.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
-        self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
-                                        is_unicode=is_unicode)
-        # compute the destination address
-        base_loc = self.rm.make_sure_var_in_reg(args[1], forbidden_vars)
-        ofs_loc = self.rm.make_sure_var_in_reg(args[3], forbidden_vars)
-        forbidden_vars = [args[4], srcaddr_box]
-        dstaddr_box = TempVar()
-        dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, forbidden_vars)
-        self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
-                                        is_unicode=is_unicode)
-        # compute the length in bytes
-        length_box = args[4]
-        length_loc = self.loc(length_box)
-        if is_unicode:
-            forbidden_vars = [srcaddr_box, dstaddr_box]
-            bytes_box = TempVar()
-            bytes_loc = self.rm.force_allocate_reg(bytes_box, forbidden_vars)
-            scale = self._get_unicode_item_scale()
-            if not (isinstance(length_loc, ImmedLoc) or
-                    isinstance(length_loc, RegLoc)):
-                self.assembler.mov(length_loc, bytes_loc)
-                length_loc = bytes_loc
-            self.assembler.load_effective_addr(length_loc, 0, scale, bytes_loc)
-            length_box = bytes_box
-            length_loc = bytes_loc
-        # call memcpy()
-        self.rm.before_call()
-        self.xrm.before_call()
-        self.assembler.simple_call_no_collect(imm(self.assembler.memcpy_addr),
-                                        [dstaddr_loc, srcaddr_loc, length_loc])
-        self.rm.possibly_free_var(length_box)
-        self.rm.possibly_free_var(dstaddr_box)
-        self.rm.possibly_free_var(srcaddr_box)
-
-    def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
-        if is_unicode:
-            ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
-                                                  self.translate_support_code)
-            scale = self._get_unicode_item_scale()
-        else:
-            ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                                  self.translate_support_code)
-            assert itemsize == 1
-            ofs_items -= 1     # for the extra null character
-            scale = 0
-        self.assembler.load_effective_addr(ofsloc, ofs_items, scale,
-                                           resloc, baseloc)
-
-    def _get_unicode_item_scale(self):
-        _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                                  self.translate_support_code)
-        if itemsize == 4:
-            return 2
-        elif itemsize == 2:
-            return 1
-        else:
-            raise AssertionError("bad unicode item size")
+    def consider_load_effective_address(self, op):
+        p0 = op.getarg(0)
+        i0 = op.getarg(1)
+        ploc = self.make_sure_var_in_reg(p0, [i0])
+        iloc = self.make_sure_var_in_reg(i0, [p0])
+        res = self.rm.force_allocate_reg(op, [p0, i0])
+        assert isinstance(op.getarg(2), ConstInt)
+        assert isinstance(op.getarg(3), ConstInt)
+        self.assembler.load_effective_addr(iloc, op.getarg(2).getint(),
+            op.getarg(3).getint(), res, ploc)
 
     def _consider_math_read_timestamp(self, op):
         # hint: try to move unrelated registers away from eax and edx now
diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -16,6 +16,7 @@
     debug = True
     supports_floats = True
     supports_singlefloats = True
+    supports_load_effective_address = True
 
     dont_keepalive_stuff = False # for tests
     with_threads = False
diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -963,75 +963,15 @@
     def _mem_offset_supported(self, value):
         return -2**19 <= value < 2**19
 
-    def emit_copystrcontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=False)
-
-    def emit_copyunicodecontent(self, op, arglocs, regalloc):
-        self._emit_copycontent(arglocs, is_unicode=True)
-
-    def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
-        if src_ofs.is_imm():
-            value = src_ofs.value << scale
-            if check_imm_value(value):
-                self.mc.AGHIK(dst, src_ptr, l.imm(value))
-            else:
-                # it is fine to use r1 here, because it will
-                # only hold a value before invoking the memory copy
-                self.mc.load_imm(r.SCRATCH, value)
-                self.mc.AGRK(dst, src_ptr, r.SCRATCH)
-        elif scale == 0:
-            self.mc.AGRK(dst, src_ptr, src_ofs)
-        else:
-            self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale))
-            self.mc.AGRK(dst, src_ptr, r.SCRATCH)
-
-    def _emit_copycontent(self, arglocs, is_unicode):
-        [src_ptr_loc, dst_ptr_loc,
-         src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
-        if is_unicode:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                        self.cpu.translate_support_code)
-            if   itemsize == 2: scale = 1
-            elif itemsize == 4: scale = 2
-            else: raise AssertionError
-        else:
-            basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                        self.cpu.translate_support_code)
-            assert itemsize == 1
-            basesize -= 1     # for the extra null character
-            scale = 0
-
-        # src and src_len are tmp registers
-        src = src_ptr_loc
-        src_len = r.odd_reg(src)
-        dst = r.r0
-        dst_len = r.r1
-        self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale)
-        self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale)
-
-        if length_loc.is_imm():
-            length = length_loc.getint()
-            self.mc.load_imm(dst_len, length << scale)
-        else:
-            if scale > 0:
-                self.mc.SLLG(dst_len, length_loc, l.addr(scale))
-            else:
-                self.mc.LGR(dst_len, length_loc)
-        # ensure that src_len is as long as dst_len, otherwise
-        # padding bytes are written to dst
-        self.mc.LGR(src_len, dst_len)
-
-        self.mc.AGHI(src, l.imm(basesize))
-        self.mc.AGHI(dst, l.imm(basesize))
-
-        # s390x has memset directly as a hardware instruction!!
-        # 0xB8 means we might reference dst later
-        self.mc.MVCLE(dst, src, l.addr(0xB8))
-        # NOTE this instruction can (determined by the cpu), just
-        # quit the movement any time, thus it is looped until all bytes
-        # are copied!
-        self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
+    # ...copystrcontent logic was removed, but note that
+    # if we want to reintroduce support for that:
+    # s390x has memset directly as a hardware instruction!!
+    # 0xB8 means we might reference dst later
+    #self.mc.MVCLE(dst, src, l.addr(0xB8))
+    # NOTE this instruction can (determined by the cpu), just
+    # quit the movement any time, thus it is looped until all bytes
+    # are copied!
+    #self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
 
     def emit_zero_array(self, op, arglocs, regalloc):
         base_loc, startindex_loc, length_loc, \
diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -1269,29 +1269,6 @@
         loc1 = self.ensure_reg(op.getarg(1))
         return [loc0, loc1]
 
-    def prepare_copystrcontent(self, op):
-        """ this function needs five registers.
-            src & src_len: are allocated using ensure_even_odd_pair.
-              note that these are tmp registers, thus the actual variable
-              value is not modified.
-            src_len: when entering the assembler, src_ofs_loc's value is contained
-              in src_len register.
-        """
-        src_ptr_loc, _ = \
-                self.rm.ensure_even_odd_pair(op.getarg(0),
-                             None, bind_first=True, 
-                             must_exist=False, load_loc_odd=False)
-        src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
-        dst_ptr_loc = self.ensure_reg(op.getarg(1))
-        dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
-        length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
-        # no need to spill, we do not call memcpy, but we use s390x's
-        # hardware instruction to copy memory
-        return [src_ptr_loc, dst_ptr_loc,
-                src_ofs_loc, dst_ofs_loc, length_loc]
-
-    prepare_copyunicodecontent = prepare_copystrcontent
-
     def prepare_label(self, op):
         descr = op.getdescr()
         assert isinstance(descr, TargetToken)
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -441,6 +441,7 @@
                          rop.GC_STORE,
                          rop.GC_STORE_INDEXED,
                          rop.LOAD_FROM_GC_TABLE,
+                         rop.LOAD_EFFECTIVE_ADDRESS,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
             if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST:
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1055,6 +1055,8 @@
     'UNICODEGETITEM/2/i',
     #
     'LOAD_FROM_GC_TABLE/1/r',    # only emitted by rewrite.py
+    'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, only if
+    # cpu.supports_load_effective_address. [v_gcptr,v_index,c_baseofs,c_shift]
     #
     '_ALWAYS_PURE_LAST',  # ----- end of always_pure operations -----
 


More information about the pypy-commit mailing list