[pypy-commit] pypy default: hg merge copystrcontents-in-rewrite
arigo
pypy.commits at gmail.com
Sun Jun 9 07:31:23 EDT 2019
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r96783:77c16f38a6e2
Date: 2019-06-09 13:30 +0200
http://bitbucket.org/pypy/pypy/changeset/77c16f38a6e2/
Log: hg merge copystrcontents-in-rewrite
Remove copystrcontent and copyunicodecontent in the backends.
Instead, replace it in rewrite.py with a direct call to memcpy() and
new basic operation, load_effective_address, which the backend can
even decide not to implement.
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -834,73 +834,11 @@
else:
assert 0
- #from ../x86/regalloc.py:928 ff.
- def emit_op_copystrcontent(self, op, arglocs, regalloc, fcond):
- assert len(arglocs) == 0
- self._emit_copystrcontent(op, regalloc, fcond, is_unicode=False)
+ def emit_op_load_effective_address(self, op, arglocs, regalloc, fcond):
+ self._gen_address(arglocs[4], arglocs[0], arglocs[1], arglocs[3].value,
+ arglocs[2].value)
return fcond
- def emit_op_copyunicodecontent(self, op, arglocs, regalloc, fcond):
- assert len(arglocs) == 0
- self._emit_copystrcontent(op, regalloc, fcond, is_unicode=True)
- return fcond
-
- def _emit_copystrcontent(self, op, regalloc, fcond, is_unicode):
- # compute the source address
- args = op.getarglist()
- base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
- ofs_loc = regalloc.rm.make_sure_var_in_reg(args[2], args)
- assert args[0] is not args[1] # forbidden case of aliasing
- srcaddr_box = TempVar()
- forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
- srcaddr_loc = regalloc.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
- self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
- is_unicode=is_unicode)
- # compute the destination address
- base_loc = regalloc.rm.make_sure_var_in_reg(args[1], forbidden_vars)
- ofs_loc = regalloc.rm.make_sure_var_in_reg(args[3], forbidden_vars)
- forbidden_vars = [args[4], srcaddr_box]
- dstaddr_box = TempVar()
- dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, forbidden_vars)
- self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
- is_unicode=is_unicode)
- # compute the length in bytes
- length_box = args[4]
- length_loc = regalloc.loc(length_box)
- if is_unicode:
- forbidden_vars = [srcaddr_box, dstaddr_box]
- bytes_box = TempVar()
- bytes_loc = regalloc.rm.force_allocate_reg(bytes_box, forbidden_vars)
- scale = self._get_unicode_item_scale()
- if not length_loc.is_core_reg():
- self.regalloc_mov(length_loc, bytes_loc)
- length_loc = bytes_loc
- assert length_loc.is_core_reg()
- self.mc.MOV_ri(r.ip.value, 1 << scale)
- self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
- length_box = bytes_box
- length_loc = bytes_loc
- # call memcpy()
- regalloc.before_call()
- self.simple_call_no_collect(imm(self.memcpy_addr),
- [dstaddr_loc, srcaddr_loc, length_loc])
- regalloc.rm.possibly_free_var(length_box)
- regalloc.rm.possibly_free_var(dstaddr_box)
- regalloc.rm.possibly_free_var(srcaddr_box)
-
- def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
- if is_unicode:
- ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- scale = self._get_unicode_item_scale()
- else:
- ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- ofs_items -= 1 # for the extra null character
- scale = 0
- self._gen_address(resloc, baseloc, ofsloc, scale, ofs_items)
-
# result = base_loc + (scaled_loc << scale) + static_offset
def _gen_address(self, result, base_loc, scaled_loc, scale=0, static_offset=0):
assert scaled_loc.is_core_reg()
@@ -915,16 +853,6 @@
self.mc.ADD_rr(result.value, base_loc.value, scaled_loc.value)
self.mc.ADD_ri(result.value, result.value, static_offset)
- def _get_unicode_item_scale(self):
- _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 4:
- return 2
- elif itemsize == 2:
- return 1
- else:
- raise AssertionError("bad unicode item size")
-
def store_force_descr(self, op, fail_locs, frame_depth):
pos = self.mc.currpos()
guard_token = self.build_guard_token(op, frame_depth, fail_locs, pos, c.AL)
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -873,8 +873,6 @@
prepare_op_gc_load_indexed_r = _prepare_op_gc_load_indexed
prepare_op_gc_load_indexed_f = _prepare_op_gc_load_indexed
- prepare_op_copystrcontent = void
- prepare_op_copyunicodecontent = void
prepare_op_zero_array = void
def _prepare_op_same_as(self, op, fcond):
@@ -899,6 +897,13 @@
resloc = self.force_allocate_reg(op)
return [resloc]
+ def prepare_op_load_effective_address(self, op, fcond):
+ args = op.getarglist()
+ arg0 = self.make_sure_var_in_reg(args[0], args)
+ arg1 = self.make_sure_var_in_reg(args[1], args)
+ res = self.force_allocate_reg(op)
+ return [arg0, arg1, args[2], args[3], res]
+
def prepare_op_call_malloc_nursery(self, op, fcond):
size_box = op.getarg(0)
assert isinstance(size_box, ConstInt)
diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -23,6 +23,7 @@
supports_floats = True
supports_longlong = True
supports_singlefloats = True
+ supports_load_effective_address = True
from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
all_reg_indexes = range(len(all_regs))
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -14,6 +14,7 @@
from rpython.jit.metainterp.support import ptr2int
from rpython.jit.backend.llsupport import symbolic, jitframe
from rpython.jit.backend.llsupport.symbolic import WORD
+from rpython.jit.backend.llsupport.memcpy import memcpy_fn
from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr, FieldDescr
from rpython.jit.backend.llsupport.descr import GcCache, get_field_descr
from rpython.jit.backend.llsupport.descr import get_array_descr
@@ -36,6 +37,11 @@
self.fielddescr_vtable = get_field_descr(self, rclass.OBJECT,
'typeptr')
self._generated_functions = []
+ self.memcpy_fn = memcpy_fn
+ self.memcpy_descr = get_call_descr(self,
+ [lltype.Signed, lltype.Signed, lltype.Signed], lltype.Void,
+ EffectInfo([], [], [], [], [], [], EffectInfo.EF_CANNOT_RAISE,
+ can_collect=False))
def _setup_str(self):
self.str_descr = get_array_descr(self, rstr.STR)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -34,6 +34,10 @@
- Add COND_CALLs to the write barrier before SETFIELD_GC and
SETARRAYITEM_GC operations.
+ - Rewrites copystrcontent to a call to memcopy
+
+ - XXX does more than that, please write it down
+
'_write_barrier_applied' contains a dictionary of variable -> None.
If a variable is in the dictionary, next setfields can be called without
a write barrier. The idea is that an object that was freshly allocated
@@ -335,6 +339,10 @@
self.emitting_an_operation_that_can_collect()
elif op.getopnum() == rop.LABEL:
self.emit_label()
+ # ---- change COPY{STR|UNICODE}CONTENT into a call ------
+ if op.opnum == rop.COPYSTRCONTENT or op.opnum == rop.COPYUNICODECONTENT:
+ self.rewrite_copy_str_content(op)
+ continue
# ---------- write barriers ----------
if self.gc_ll_descr.write_barrier_descr is not None:
if op.getopnum() == rop.SETFIELD_GC:
@@ -953,6 +961,61 @@
self.gcrefs_output_list.append(gcref)
return index
+ def rewrite_copy_str_content(self, op):
+ funcaddr = llmemory.cast_ptr_to_adr(self.gc_ll_descr.memcpy_fn)
+ memcpy_fn = self.cpu.cast_adr_to_int(funcaddr)
+ memcpy_descr = self.gc_ll_descr.memcpy_descr
+ if op.getopnum() == rop.COPYSTRCONTENT:
+ basesize = self.gc_ll_descr.str_descr.basesize
+ # because we have one extra item after alloc, the actual address
+ # of string start is 1 lower, from extra_item_after_malloc
+ basesize -= 1
+ assert self.gc_ll_descr.str_descr.itemsize == 1
+ itemscale = 0
+ else:
+ basesize = self.gc_ll_descr.unicode_descr.basesize
+ itemsize = self.gc_ll_descr.unicode_descr.itemsize
+ if itemsize == 2:
+ itemscale = 1
+ elif itemsize == 4:
+ itemscale = 2
+ else:
+ assert False, "unknown size of unicode"
+ i1 = self.emit_load_effective_address(op.getarg(0), op.getarg(2),
+ basesize, itemscale)
+ i2 = self.emit_load_effective_address(op.getarg(1), op.getarg(3),
+ basesize, itemscale)
+ if op.getopnum() == rop.COPYSTRCONTENT:
+ arg = op.getarg(4)
+ else:
+ # do some basic constant folding
+ if isinstance(op.getarg(4), ConstInt):
+ arg = ConstInt(op.getarg(4).getint() << itemscale)
+ else:
+ arg = ResOperation(rop.INT_LSHIFT,
+ [op.getarg(4), ConstInt(itemscale)])
+ self.emit_op(arg)
+ self.emit_op(ResOperation(rop.CALL_N,
+ [ConstInt(memcpy_fn), i2, i1, arg], descr=memcpy_descr))
+
+ def emit_load_effective_address(self, v_gcptr, v_index, base, itemscale):
+ if self.cpu.supports_load_effective_address:
+ i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS,
+ [v_gcptr, v_index, ConstInt(base),
+ ConstInt(itemscale)])
+ self.emit_op(i1)
+ return i1
+ else:
+ if itemscale > 0:
+ v_index = ResOperation(rop.INT_LSHIFT,
+ [v_index, ConstInt(itemscale)])
+ self.emit_op(v_index)
+ i1b = ResOperation(rop.INT_ADD, [v_gcptr, v_index])
+ self.emit_op(i1b)
+ i1 = ResOperation(rop.INT_ADD, [i1b, ConstInt(base)])
+ self.emit_op(i1)
+ return i1
+
def remove_constptr(self, c):
"""Remove all ConstPtrs, and replace them with load_from_gc_table.
"""
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -142,11 +142,16 @@
raw_sfdescr = get_array_descr(self.gc_ll_descr, RAW_SF)
#
strdescr = self.gc_ll_descr.str_descr
+ str_basesize = self.gc_ll_descr.str_descr.basesize - 1
unicodedescr = self.gc_ll_descr.unicode_descr
strlendescr = strdescr.lendescr
unicodelendescr = unicodedescr.lendescr
strhashdescr = self.gc_ll_descr.str_hash_descr
unicodehashdescr = self.gc_ll_descr.unicode_hash_descr
+ uni_basesize = unicodedescr.basesize
+ uni_itemscale = {2: 1, 4: 2}[unicodedescr.itemsize]
+ memcpy_fn = self.gc_ll_descr.memcpy_fn
+ memcpy_descr = self.gc_ll_descr.memcpy_descr
casmdescr = JitCellToken()
clt = FakeLoopToken()
@@ -169,6 +174,7 @@
signedframedescr = self.cpu.signedframedescr
floatframedescr = self.cpu.floatframedescr
casmdescr.compiled_loop_token = clt
+
#
guarddescr = AbstractFailDescr()
#
@@ -200,6 +206,7 @@
load_constant_offset = True
load_supported_factors = (1,2,4,8)
+ supports_load_effective_address = True
translate_support_code = None
@@ -237,6 +244,9 @@
self._cache[key] = r
return r
+ def cast_adr_to_int(self, adr):
+ return llmemory.AddressAsInt(adr)
+
class TestBoehm(RewriteTests):
def setup_method(self, meth):
class FakeCPU(BaseFakeCPU):
@@ -1436,3 +1446,57 @@
jump()
""")
assert len(self.gcrefs) == 2
+
+ def test_rewrite_copystrcontents(self):
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copystrcontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i2 = load_effective_address(p0, i0, %(str_basesize)s, 0)
+ i3 = load_effective_address(p1, i1, %(str_basesize)s, 0)
+ call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
+ """)
+
+ def test_rewrite_copystrcontents_without_load_effective_address(self):
+ self.cpu.supports_load_effective_address = False
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copystrcontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i2b = int_add(p0, i0)
+ i2 = int_add(i2b, %(str_basesize)s)
+ i3b = int_add(p1, i1)
+ i3 = int_add(i3b, %(str_basesize)s)
+ call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr)
+ """)
+
+ def test_rewrite_copyunicodecontents(self):
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copyunicodecontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i2 = load_effective_address(p0, i0, %(uni_basesize)s, %(uni_itemscale)d)
+ i3 = load_effective_address(p1, i1, %(uni_basesize)s, %(uni_itemscale)d)
+ i4 = int_lshift(i_len, %(uni_itemscale)d)
+ call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+ """)
+
+ def test_rewrite_copyunicodecontents_without_load_effective_address(self):
+ self.cpu.supports_load_effective_address = False
+ self.check_rewrite("""
+ [p0, p1, i0, i1, i_len]
+ copyunicodecontent(p0, p1, i0, i1, i_len)
+ """, """
+ [p0, p1, i0, i1, i_len]
+ i0s = int_lshift(i0, %(uni_itemscale)d)
+ i2b = int_add(p0, i0s)
+ i2 = int_add(i2b, %(uni_basesize)s)
+ i1s = int_lshift(i1, %(uni_itemscale)d)
+ i3b = int_add(p1, i1s)
+ i3 = int_add(i3b, %(uni_basesize)s)
+ i4 = int_lshift(i_len, %(uni_itemscale)d)
+ call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr)
+ """)
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -19,6 +19,7 @@
# Boxes and Consts are BoxFloats and ConstFloats.
supports_singlefloats = False
supports_guard_gc_type = False
+ supports_load_effective_address = False
propagate_exception_descr = None
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -966,72 +966,6 @@
pmc.overwrite()
-class StrOpAssembler(object):
-
- _mixin_ = True
-
- def emit_copystrcontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=False)
-
- def emit_copyunicodecontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=True)
-
- def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
- if src_ofs.is_imm():
- value = src_ofs.value << scale
- if value < 32768:
- self.mc.addi(dst.value, src_ptr.value, value)
- else:
- self.mc.load_imm(dst, value)
- self.mc.add(dst.value, src_ptr.value, dst.value)
- elif scale == 0:
- self.mc.add(dst.value, src_ptr.value, src_ofs.value)
- else:
- self.mc.sldi(dst.value, src_ofs.value, scale)
- self.mc.add(dst.value, src_ptr.value, dst.value)
-
- def _emit_copycontent(self, arglocs, is_unicode):
- [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
- if is_unicode:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 2: scale = 1
- elif itemsize == 4: scale = 2
- else: raise AssertionError
- else:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- basesize -= 1 # for the extra null character
- scale = 0
-
- self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
- self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
-
- if length_loc.is_imm():
- length = length_loc.getint()
- self.mc.load_imm(r.r5, length << scale)
- else:
- if scale > 0:
- self.mc.sldi(r.r5.value, length_loc.value, scale)
- elif length_loc is not r.r5:
- self.mc.mr(r.r5.value, length_loc.value)
-
- self.mc.mr(r.r4.value, r.r0.value)
- self.mc.addi(r.r4.value, r.r4.value, basesize)
- self.mc.addi(r.r3.value, r.r2.value, basesize)
-
- self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
- self.mc.raw_call()
-
-
-class UnicodeOpAssembler(object):
- _mixin_ = True
- # empty!
-
-
class AllocOpAssembler(object):
_mixin_ = True
@@ -1336,8 +1270,7 @@
class OpAssembler(IntOpAssembler, GuardOpAssembler,
MiscOpAssembler, FieldOpAssembler,
- StrOpAssembler, CallOpAssembler,
- UnicodeOpAssembler, ForceOpAssembler,
+ CallOpAssembler, ForceOpAssembler,
AllocOpAssembler, FloatOpAssembler,
VectorAssembler):
_mixin_ = True
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -802,18 +802,6 @@
temp_loc = r.SCRATCH2
return [base_loc, temp_loc]
- def prepare_copystrcontent(self, op):
- src_ptr_loc = self.ensure_reg(op.getarg(0))
- dst_ptr_loc = self.ensure_reg(op.getarg(1))
- src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
- dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
- length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
- self._spill_before_call(gc_level=0)
- return [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc]
-
- prepare_copyunicodecontent = prepare_copystrcontent
-
prepare_same_as_i = helper.prepare_unary_op
prepare_same_as_r = helper.prepare_unary_op
prepare_same_as_f = helper.prepare_unary_op
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1222,78 +1222,16 @@
resloc = self.force_allocate_reg(op, [op.getarg(0)])
self.perform(op, [argloc], resloc)
- def consider_copystrcontent(self, op):
- self._consider_copystrcontent(op, is_unicode=False)
-
- def consider_copyunicodecontent(self, op):
- self._consider_copystrcontent(op, is_unicode=True)
-
- def _consider_copystrcontent(self, op, is_unicode):
- # compute the source address
- args = op.getarglist()
- base_loc = self.rm.make_sure_var_in_reg(args[0], args)
- ofs_loc = self.rm.make_sure_var_in_reg(args[2], args)
- assert args[0] is not args[1] # forbidden case of aliasing
- srcaddr_box = TempVar()
- forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
- srcaddr_loc = self.rm.force_allocate_reg(srcaddr_box, forbidden_vars)
- self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
- is_unicode=is_unicode)
- # compute the destination address
- base_loc = self.rm.make_sure_var_in_reg(args[1], forbidden_vars)
- ofs_loc = self.rm.make_sure_var_in_reg(args[3], forbidden_vars)
- forbidden_vars = [args[4], srcaddr_box]
- dstaddr_box = TempVar()
- dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, forbidden_vars)
- self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
- is_unicode=is_unicode)
- # compute the length in bytes
- length_box = args[4]
- length_loc = self.loc(length_box)
- if is_unicode:
- forbidden_vars = [srcaddr_box, dstaddr_box]
- bytes_box = TempVar()
- bytes_loc = self.rm.force_allocate_reg(bytes_box, forbidden_vars)
- scale = self._get_unicode_item_scale()
- if not (isinstance(length_loc, ImmedLoc) or
- isinstance(length_loc, RegLoc)):
- self.assembler.mov(length_loc, bytes_loc)
- length_loc = bytes_loc
- self.assembler.load_effective_addr(length_loc, 0, scale, bytes_loc)
- length_box = bytes_box
- length_loc = bytes_loc
- # call memcpy()
- self.rm.before_call()
- self.xrm.before_call()
- self.assembler.simple_call_no_collect(imm(self.assembler.memcpy_addr),
- [dstaddr_loc, srcaddr_loc, length_loc])
- self.rm.possibly_free_var(length_box)
- self.rm.possibly_free_var(dstaddr_box)
- self.rm.possibly_free_var(srcaddr_box)
-
- def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
- if is_unicode:
- ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
- self.translate_support_code)
- scale = self._get_unicode_item_scale()
- else:
- ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.translate_support_code)
- assert itemsize == 1
- ofs_items -= 1 # for the extra null character
- scale = 0
- self.assembler.load_effective_addr(ofsloc, ofs_items, scale,
- resloc, baseloc)
-
- def _get_unicode_item_scale(self):
- _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.translate_support_code)
- if itemsize == 4:
- return 2
- elif itemsize == 2:
- return 1
- else:
- raise AssertionError("bad unicode item size")
+ def consider_load_effective_address(self, op):
+ p0 = op.getarg(0)
+ i0 = op.getarg(1)
+ ploc = self.make_sure_var_in_reg(p0, [i0])
+ iloc = self.make_sure_var_in_reg(i0, [p0])
+ res = self.rm.force_allocate_reg(op, [p0, i0])
+ assert isinstance(op.getarg(2), ConstInt)
+ assert isinstance(op.getarg(3), ConstInt)
+ self.assembler.load_effective_addr(iloc, op.getarg(2).getint(),
+ op.getarg(3).getint(), res, ploc)
def _consider_math_read_timestamp(self, op):
# hint: try to move unrelated registers away from eax and edx now
diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -16,6 +16,7 @@
debug = True
supports_floats = True
supports_singlefloats = True
+ supports_load_effective_address = True
dont_keepalive_stuff = False # for tests
with_threads = False
diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -963,75 +963,15 @@
def _mem_offset_supported(self, value):
return -2**19 <= value < 2**19
- def emit_copystrcontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=False)
-
- def emit_copyunicodecontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=True)
-
- def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
- if src_ofs.is_imm():
- value = src_ofs.value << scale
- if check_imm_value(value):
- self.mc.AGHIK(dst, src_ptr, l.imm(value))
- else:
- # it is fine to use r1 here, because it will
- # only hold a value before invoking the memory copy
- self.mc.load_imm(r.SCRATCH, value)
- self.mc.AGRK(dst, src_ptr, r.SCRATCH)
- elif scale == 0:
- self.mc.AGRK(dst, src_ptr, src_ofs)
- else:
- self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale))
- self.mc.AGRK(dst, src_ptr, r.SCRATCH)
-
- def _emit_copycontent(self, arglocs, is_unicode):
- [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
-
- if is_unicode:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 2: scale = 1
- elif itemsize == 4: scale = 2
- else: raise AssertionError
- else:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- basesize -= 1 # for the extra null character
- scale = 0
-
- # src and src_len are tmp registers
- src = src_ptr_loc
- src_len = r.odd_reg(src)
- dst = r.r0
- dst_len = r.r1
- self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale)
- self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale)
-
- if length_loc.is_imm():
- length = length_loc.getint()
- self.mc.load_imm(dst_len, length << scale)
- else:
- if scale > 0:
- self.mc.SLLG(dst_len, length_loc, l.addr(scale))
- else:
- self.mc.LGR(dst_len, length_loc)
- # ensure that src_len is as long as dst_len, otherwise
- # padding bytes are written to dst
- self.mc.LGR(src_len, dst_len)
-
- self.mc.AGHI(src, l.imm(basesize))
- self.mc.AGHI(dst, l.imm(basesize))
-
- # s390x has memset directly as a hardware instruction!!
- # 0xB8 means we might reference dst later
- self.mc.MVCLE(dst, src, l.addr(0xB8))
- # NOTE this instruction can (determined by the cpu), just
- # quit the movement any time, thus it is looped until all bytes
- # are copied!
- self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
+ # ...copystrcontent logic was removed, but note that
+ # if we want to reintroduce support for that:
+ # s390x has memset directly as a hardware instruction!!
+ # 0xB8 means we might reference dst later
+ #self.mc.MVCLE(dst, src, l.addr(0xB8))
+ # NOTE this instruction can (determined by the cpu), just
+ # quit the movement any time, thus it is looped until all bytes
+ # are copied!
+ #self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def emit_zero_array(self, op, arglocs, regalloc):
base_loc, startindex_loc, length_loc, \
diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -1269,29 +1269,6 @@
loc1 = self.ensure_reg(op.getarg(1))
return [loc0, loc1]
- def prepare_copystrcontent(self, op):
- """ this function needs five registers.
- src & src_len: are allocated using ensure_even_odd_pair.
- note that these are tmp registers, thus the actual variable
- value is not modified.
- src_len: when entering the assembler, src_ofs_loc's value is contained
- in src_len register.
- """
- src_ptr_loc, _ = \
- self.rm.ensure_even_odd_pair(op.getarg(0),
- None, bind_first=True,
- must_exist=False, load_loc_odd=False)
- src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
- dst_ptr_loc = self.ensure_reg(op.getarg(1))
- dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
- length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
- # no need to spill, we do not call memcpy, but we use s390x's
- # hardware instruction to copy memory
- return [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc]
-
- prepare_copyunicodecontent = prepare_copystrcontent
-
def prepare_label(self, op):
descr = op.getdescr()
assert isinstance(descr, TargetToken)
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -441,6 +441,7 @@
rop.GC_STORE,
rop.GC_STORE_INDEXED,
rop.LOAD_FROM_GC_TABLE,
+ rop.LOAD_EFFECTIVE_ADDRESS,
): # list of opcodes never executed by pyjitpl
continue
if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST:
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1055,6 +1055,8 @@
'UNICODEGETITEM/2/i',
#
'LOAD_FROM_GC_TABLE/1/r', # only emitted by rewrite.py
+ 'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, only if
+ # cpu.supports_load_effective_address. [v_gcptr,v_index,c_baseofs,c_shift]
#
'_ALWAYS_PURE_LAST', # ----- end of always_pure operations -----
More information about the pypy-commit
mailing list