[pypy-commit] pypy ppc-vsx-support: remove the special cpu for sse4, cpu now has a field which describes the vector extension
plan_rich
pypy.commits at gmail.com
Wed Jul 20 07:27:35 EDT 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: ppc-vsx-support
Changeset: r85775:ec08d8d7e121
Date: 2016-07-20 12:46 +0200
http://bitbucket.org/pypy/pypy/changeset/ec08d8d7e121/
Log: remove the special cpu for sse4, cpu now has a field which describes
the vector extension undo several changes for sse4
diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -13,7 +13,6 @@
MODEL_X86 = 'x86'
MODEL_X86_NO_SSE2 = 'x86-without-sse2'
MODEL_X86_64 = 'x86-64'
-MODEL_X86_64_SSE4 = 'x86-64-sse4'
MODEL_ARM = 'arm'
MODEL_PPC_64 = 'ppc-64'
MODEL_S390_64 = 's390x'
@@ -80,9 +79,6 @@
from rpython.jit.backend.x86 import detect_feature as feature
if sys.maxint == 2**63-1:
result = MODEL_X86_64
- # has sse 2 at least
- if feature.detect_sse4_1():
- result = MODEL_X86_64_SSE4
else:
assert sys.maxint == 2**31-1
if feature.detect_sse2():
@@ -119,8 +115,6 @@
return "rpython.jit.backend.x86.runner", "CPU386_NO_SSE2"
elif backend_name == MODEL_X86_64:
return "rpython.jit.backend.x86.runner", "CPU_X86_64"
- elif backend_name == MODEL_X86_64_SSE4:
- return "rpython.jit.backend.x86.runner", "CPU_X86_64_SSE4"
elif backend_name == MODEL_ARM:
return "rpython.jit.backend.arm.runner", "CPU_ARM"
elif backend_name == MODEL_PPC_64:
@@ -144,7 +138,6 @@
MODEL_X86: ['floats', 'singlefloats', 'longlong'],
MODEL_X86_NO_SSE2: ['longlong'],
MODEL_X86_64: ['floats', 'singlefloats'],
- MODEL_X86_64_SSE4: ['floats', 'singlefloats'],
MODEL_ARM: ['floats', 'singlefloats', 'longlong'],
MODEL_PPC_64: ['floats'],
MODEL_S390_64: ['floats'],
diff --git a/rpython/jit/backend/test/test_detect_cpu.py b/rpython/jit/backend/test/test_detect_cpu.py
--- a/rpython/jit/backend/test/test_detect_cpu.py
+++ b/rpython/jit/backend/test/test_detect_cpu.py
@@ -31,8 +31,6 @@
def test_detect_model_from_c_compiler():
info1 = detect_model_from_host_platform()
info2 = detect_model_from_c_compiler()
- if info1.endswith("-sse4"):
- info1 = info1[:-len("-sse4")]
assert info1 == info2
def test_getcpufeatures():
diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -45,7 +45,6 @@
'x86_32': 'i386',
'x86_64': 'i386:x86-64',
'x86-64': 'i386:x86-64',
- 'x86-64-sse4': 'i386:x86-64',
'i386': 'i386',
'arm': 'arm',
'arm_32': 'arm',
diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -145,6 +145,8 @@
supports_longlong = False
class CPU_X86_64(AbstractX86CPU):
+ vector_ext = X86VectorExt()
+
backend_name = 'x86_64'
NUM_REGS = 16
CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14, regloc.r15]
@@ -152,10 +154,4 @@
IS_64_BIT = True
HAS_CODEMAP = True
-class CPU_X86_64_SSE4(CPU_X86_64):
- vector_ext = X86VectorExt()
- #vector_extension = True
- #vector_register_size = 16
- #vector_horizontal_operations = True
-
CPU = CPU386
diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -9,10 +9,11 @@
ebp, r8, r9, r10, r11, r12, r13, r14, r15, xmm0, xmm1, xmm2, xmm3, xmm4,
xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, AddressLoc)
+from rpython.jit.backend.llsupport.vector_ext import VectorExt
from rpython.jit.backend.llsupport.regalloc import get_scale
from rpython.jit.metainterp.resoperation import (rop, ResOperation,
VectorOp, VectorGuardOp)
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, always_inline
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rtyper.lltypesystem import lltype
from rpython.jit.backend.x86 import rx86
@@ -32,6 +33,13 @@
raise NotImplementedError(msg)
# DUP END
+class X86VectorExt(VectorExt):
+ def setup_once(self, asm):
+ if cpu_feature.detect_sse4_1():
+ self.enable(16, accum=True)
+ asm.setup_once_vector()
+ self._setup = True
+
class VectorAssemblerMixin(object):
_mixin_ = True
@@ -139,29 +147,31 @@
not_implemented("reduce sum for %s not impl." % arg)
- def _genop_vec_getarrayitem(self, op, arglocs, resloc):
- # considers item scale (raw_load does not)
- base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
- scale = get_scale(size_loc.value)
- src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
- self._vec_load(resloc, src_addr, integer_loc.value,
- size_loc.value, aligned_loc.value)
-
- genop_vec_getarrayitem_raw_i = _genop_vec_getarrayitem
- genop_vec_getarrayitem_raw_f = _genop_vec_getarrayitem
-
- genop_vec_getarrayitem_gc_i = _genop_vec_getarrayitem
- genop_vec_getarrayitem_gc_f = _genop_vec_getarrayitem
+ # TODO remove
+ #def _genop_vec_getarrayitem(self, op, arglocs, resloc):
+ # # considers item scale (raw_load does not)
+ # base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
+ # scale = get_scale(size_loc.value)
+ # src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
+ # self._vec_load(resloc, src_addr, integer_loc.value,
+ # size_loc.value, aligned_loc.value)
+ #
+ #genop_vec_getarrayitem_raw_i = _genop_vec_getarrayitem
+ #genop_vec_getarrayitem_raw_f = _genop_vec_getarrayitem
+ #
+ #genop_vec_getarrayitem_gc_i = _genop_vec_getarrayitem
+ #genop_vec_getarrayitem_gc_f = _genop_vec_getarrayitem
- def _genop_vec_raw_load(self, op, arglocs, resloc):
+ def _genop_vec_load(self, op, arglocs, resloc):
base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
src_addr = addr_add(base_loc, ofs_loc, ofs.value, 0)
self._vec_load(resloc, src_addr, integer_loc.value,
size_loc.value, aligned_loc.value)
- genop_vec_raw_load_i = _genop_vec_raw_load
- genop_vec_raw_load_f = _genop_vec_raw_load
+ genop_vec_load_i = _genop_vec_load
+ genop_vec_load_f = _genop_vec_load
+ @always_inline
def _vec_load(self, resloc, src_addr, integer, itemsize, aligned):
if integer:
if aligned:
@@ -174,23 +184,25 @@
elif itemsize == 8:
self.mc.MOVUPD(resloc, src_addr)
- def _genop_discard_vec_setarrayitem(self, op, arglocs):
- # considers item scale (raw_store does not)
- base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs
- scale = get_scale(size_loc.value)
- dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale)
- self._vec_store(dest_loc, value_loc, integer_loc.value,
- size_loc.value, aligned_loc.value)
+ # TODO remove
+ #def _genop_discard_vec_setarrayitem(self, op, arglocs):
+ # # considers item scale (raw_store does not)
+ # base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs
+ # scale = get_scale(size_loc.value)
+ # dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale)
+ # self._vec_store(dest_loc, value_loc, integer_loc.value,
+ # size_loc.value, aligned_loc.value)
- genop_discard_vec_setarrayitem_raw = _genop_discard_vec_setarrayitem
- genop_discard_vec_setarrayitem_gc = _genop_discard_vec_setarrayitem
+ #genop_discard_vec_setarrayitem_raw = _genop_discard_vec_setarrayitem
+ #genop_discard_vec_setarrayitem_gc = _genop_discard_vec_setarrayitem
- def genop_discard_vec_raw_store(self, op, arglocs):
+ def genop_discard_vec_store(self, op, arglocs):
base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs
dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, 0)
self._vec_store(dest_loc, value_loc, integer_loc.value,
size_loc.value, aligned_loc.value)
+ @always_inline
def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned):
if integer:
if aligned:
@@ -528,7 +540,7 @@
class VectorRegallocMixin(object):
_mixin_ = True
- def _consider_vec_getarrayitem(self, op):
+ def _consider_vec_load(self, op):
descr = op.getdescr()
assert isinstance(descr, ArrayDescr)
assert not descr.is_array_of_pointers() and \
@@ -543,14 +555,15 @@
self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs),
imm(integer), imm(aligned)], result_loc)
- consider_vec_getarrayitem_raw_i = _consider_vec_getarrayitem
- consider_vec_getarrayitem_raw_f = _consider_vec_getarrayitem
- consider_vec_getarrayitem_gc_i = _consider_vec_getarrayitem
- consider_vec_getarrayitem_gc_f = _consider_vec_getarrayitem
- consider_vec_raw_load_i = _consider_vec_getarrayitem
- consider_vec_raw_load_f = _consider_vec_getarrayitem
+ #consider_vec_getarrayitem_raw_i = _consider_vec_getarrayitem
+ #consider_vec_getarrayitem_raw_f = _consider_vec_getarrayitem
+ #consider_vec_getarrayitem_gc_i = _consider_vec_getarrayitem
+ #consider_vec_getarrayitem_gc_f = _consider_vec_getarrayitem
+ consider_vec_load_i = _consider_vec_load
+ consider_vec_load_f = _consider_vec_load
- def _consider_vec_setarrayitem(self, op):
+ def consider_vec_store(self, op):
+ # TODO
descr = op.getdescr()
assert isinstance(descr, ArrayDescr)
assert not descr.is_array_of_pointers() and \
@@ -566,9 +579,9 @@
self.perform_discard(op, [base_loc, ofs_loc, value_loc,
imm(itemsize), imm(ofs), imm(integer), imm(aligned)])
- consider_vec_setarrayitem_raw = _consider_vec_setarrayitem
- consider_vec_setarrayitem_gc = _consider_vec_setarrayitem
- consider_vec_raw_store = _consider_vec_setarrayitem
+ #consider_vec_setarrayitem_raw = _consider_vec_setarrayitem
+ #consider_vec_setarrayitem_gc = _consider_vec_setarrayitem
+ #consider_vec_store = _consider_vec_setarrayitem
def consider_vec_arith(self, op):
lhs = op.getarg(0)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1361,6 +1361,30 @@
'guard_true(i100) [p0, i0]',
], trace)
+ def test_guard_failarg_do_not_rename_to_const(Self):
+ # Loop -2 (pre vectorize) : noopt with 15 ops
+ trace = self.parse_loop("""
+ []
+ label(p0, p1, p2, p3, p4, i5, i6, p7, p8, p9, p10, i11, i12, f13, p14, p15, i16, i17, descr=TargetToken(70367324045984))
+ debug_merge_point(0, 0, '(numpy_call2_inc_out_right: no get_printable_location)')
+ i19 = int_and(i6, 7)
+ i20 = int_is_zero(i19)
+ guard_true(i20, descr=<ResumeGuardDescr object at 0x3fffab60d7b0>) [p7, p3, p2, p1, p0, p8, p10, i11, i19, i6, i12, i5, p4]
+ f21 = raw_load_f(i12, i6, descr=<ArrayF 8>)
+ guard_not_invalidated(descr=<ResumeGuardCopiedDescr object at 0x3fffab5fcde8>) [p7, p3, p2, p1, p0, p8, p10, i11, i19, i6, i12, i5, p4]
+ f22 = float_mul(f21, f13)
+ raw_store(i16, i6, f22, descr=<ArrayF 8>)
+ i24 = int_add(i5, 1)
+ i26 = int_add(i6, 8)
+ i27 = int_ge(i24, i17)
+ guard_false(i27, descr=<ResumeGuardDescr object at 0x3fffab60d818>) [i17, i24, p7, p3, p2, p1, p0, i26, None, p4]
+ debug_merge_point(0, 0, '(numpy_call2_inc_out_right: no get_printable_location)')
+ jump(p0, p1, p2, p3, p4, i24, i26, p7, p8, p9, p10, 1, i12, f13, p14, p15, i16, i17, descr=TargetToken(70367324045984))
+ """)
+ vopt = self.schedule(trace)
+ import pdb; pdb.set_trace()
+
+
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/warmspot.py b/rpython/jit/metainterp/warmspot.py
--- a/rpython/jit/metainterp/warmspot.py
+++ b/rpython/jit/metainterp/warmspot.py
@@ -33,7 +33,7 @@
# Bootstrapping
def apply_jit(translator, backend_name="auto", inline=False,
- vec=False, enable_opts=ALL_OPTS_NAMES, **kwds):
+ vec=True, enable_opts=ALL_OPTS_NAMES, **kwds):
if 'CPUClass' not in kwds:
from rpython.jit.backend.detect_cpu import getcpuclass
kwds['CPUClass'] = getcpuclass(backend_name)
More information about the pypy-commit
mailing list