[pypy-commit] pypy ppc-vsx-support: remove the special cpu for sse4, cpu now has a field which describes the vector extension

plan_rich pypy.commits at gmail.com
Wed Jul 20 07:27:35 EDT 2016


Author: Richard Plangger <planrichi at gmail.com>
Branch: ppc-vsx-support
Changeset: r85775:ec08d8d7e121
Date: 2016-07-20 12:46 +0200
http://bitbucket.org/pypy/pypy/changeset/ec08d8d7e121/

Log:	remove the special cpu for sse4, cpu now has a field which describes
	the vector extension undo several changes for sse4

diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -13,7 +13,6 @@
 MODEL_X86         = 'x86'
 MODEL_X86_NO_SSE2 = 'x86-without-sse2'
 MODEL_X86_64      = 'x86-64'
-MODEL_X86_64_SSE4 = 'x86-64-sse4'
 MODEL_ARM         = 'arm'
 MODEL_PPC_64      = 'ppc-64'
 MODEL_S390_64     = 's390x'
@@ -80,9 +79,6 @@
         from rpython.jit.backend.x86 import detect_feature as feature
         if sys.maxint == 2**63-1:
             result = MODEL_X86_64
-            # has sse 2 at least
-            if feature.detect_sse4_1():
-                result = MODEL_X86_64_SSE4
         else:
             assert sys.maxint == 2**31-1
             if feature.detect_sse2():
@@ -119,8 +115,6 @@
         return "rpython.jit.backend.x86.runner", "CPU386_NO_SSE2"
     elif backend_name == MODEL_X86_64:
         return "rpython.jit.backend.x86.runner", "CPU_X86_64"
-    elif backend_name == MODEL_X86_64_SSE4:
-        return "rpython.jit.backend.x86.runner", "CPU_X86_64_SSE4"
     elif backend_name == MODEL_ARM:
         return "rpython.jit.backend.arm.runner", "CPU_ARM"
     elif backend_name == MODEL_PPC_64:
@@ -144,7 +138,6 @@
         MODEL_X86: ['floats', 'singlefloats', 'longlong'],
         MODEL_X86_NO_SSE2: ['longlong'],
         MODEL_X86_64: ['floats', 'singlefloats'],
-        MODEL_X86_64_SSE4: ['floats', 'singlefloats'],
         MODEL_ARM: ['floats', 'singlefloats', 'longlong'],
         MODEL_PPC_64: ['floats'],
         MODEL_S390_64: ['floats'],
diff --git a/rpython/jit/backend/test/test_detect_cpu.py b/rpython/jit/backend/test/test_detect_cpu.py
--- a/rpython/jit/backend/test/test_detect_cpu.py
+++ b/rpython/jit/backend/test/test_detect_cpu.py
@@ -31,8 +31,6 @@
 def test_detect_model_from_c_compiler():
     info1 = detect_model_from_host_platform()
     info2 = detect_model_from_c_compiler()
-    if info1.endswith("-sse4"):
-        info1 = info1[:-len("-sse4")]
     assert info1 == info2
 
 def test_getcpufeatures():
diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -45,7 +45,6 @@
         'x86_32': 'i386',
         'x86_64': 'i386:x86-64',
         'x86-64': 'i386:x86-64',
-        'x86-64-sse4': 'i386:x86-64',
         'i386': 'i386',
         'arm': 'arm',
         'arm_32': 'arm',
diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -145,6 +145,8 @@
     supports_longlong = False
 
 class CPU_X86_64(AbstractX86CPU):
+    vector_ext = X86VectorExt()
+
     backend_name = 'x86_64'
     NUM_REGS = 16
     CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14, regloc.r15]
@@ -152,10 +154,4 @@
     IS_64_BIT = True
     HAS_CODEMAP = True
 
-class CPU_X86_64_SSE4(CPU_X86_64):
-    vector_ext = X86VectorExt()
-    #vector_extension = True
-    #vector_register_size = 16
-    #vector_horizontal_operations = True
-
 CPU = CPU386
diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -9,10 +9,11 @@
     ebp, r8, r9, r10, r11, r12, r13, r14, r15, xmm0, xmm1, xmm2, xmm3, xmm4,
     xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
     X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, AddressLoc)
+from rpython.jit.backend.llsupport.vector_ext import VectorExt
 from rpython.jit.backend.llsupport.regalloc import get_scale
 from rpython.jit.metainterp.resoperation import (rop, ResOperation,
         VectorOp, VectorGuardOp)
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, always_inline
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.lltypesystem import lltype
 from rpython.jit.backend.x86 import rx86
@@ -32,6 +33,13 @@
     raise NotImplementedError(msg)
 # DUP END
 
+class X86VectorExt(VectorExt):
+    def setup_once(self, asm):
+        if cpu_feature.detect_sse4_1():
+            self.enable(16, accum=True)
+            asm.setup_once_vector()
+        self._setup = True
+
 class VectorAssemblerMixin(object):
     _mixin_ = True
 
@@ -139,29 +147,31 @@
 
         not_implemented("reduce sum for %s not impl." % arg)
 
-    def _genop_vec_getarrayitem(self, op, arglocs, resloc):
-        # considers item scale (raw_load does not)
-        base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
-        scale = get_scale(size_loc.value)
-        src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
-        self._vec_load(resloc, src_addr, integer_loc.value,
-                       size_loc.value, aligned_loc.value)
-    
-    genop_vec_getarrayitem_raw_i = _genop_vec_getarrayitem
-    genop_vec_getarrayitem_raw_f = _genop_vec_getarrayitem
-    
-    genop_vec_getarrayitem_gc_i = _genop_vec_getarrayitem
-    genop_vec_getarrayitem_gc_f = _genop_vec_getarrayitem
+    # TODO remove
+    #def _genop_vec_getarrayitem(self, op, arglocs, resloc):
+    #    # considers item scale (raw_load does not)
+    #    base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
+    #    scale = get_scale(size_loc.value)
+    #    src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
+    #    self._vec_load(resloc, src_addr, integer_loc.value,
+    #                   size_loc.value, aligned_loc.value)
+    #
+    #genop_vec_getarrayitem_raw_i = _genop_vec_getarrayitem
+    #genop_vec_getarrayitem_raw_f = _genop_vec_getarrayitem
+    #
+    #genop_vec_getarrayitem_gc_i = _genop_vec_getarrayitem
+    #genop_vec_getarrayitem_gc_f = _genop_vec_getarrayitem
 
-    def _genop_vec_raw_load(self, op, arglocs, resloc):
+    def _genop_vec_load(self, op, arglocs, resloc):
         base_loc, ofs_loc, size_loc, ofs, integer_loc, aligned_loc = arglocs
         src_addr = addr_add(base_loc, ofs_loc, ofs.value, 0)
         self._vec_load(resloc, src_addr, integer_loc.value,
                        size_loc.value, aligned_loc.value)
 
-    genop_vec_raw_load_i = _genop_vec_raw_load
-    genop_vec_raw_load_f = _genop_vec_raw_load
+    genop_vec_load_i = _genop_vec_load
+    genop_vec_load_f = _genop_vec_load
 
+    @always_inline
     def _vec_load(self, resloc, src_addr, integer, itemsize, aligned):
         if integer:
             if aligned:
@@ -174,23 +184,25 @@
             elif itemsize == 8:
                 self.mc.MOVUPD(resloc, src_addr)
 
-    def _genop_discard_vec_setarrayitem(self, op, arglocs):
-        # considers item scale (raw_store does not)
-        base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs
-        scale = get_scale(size_loc.value)
-        dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale)
-        self._vec_store(dest_loc, value_loc, integer_loc.value,
-                        size_loc.value, aligned_loc.value)
+    # TODO remove
+    #def _genop_discard_vec_setarrayitem(self, op, arglocs):
+    #    # considers item scale (raw_store does not)
+    #    base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs
+    #    scale = get_scale(size_loc.value)
+    #    dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale)
+    #    self._vec_store(dest_loc, value_loc, integer_loc.value,
+    #                    size_loc.value, aligned_loc.value)
 
-    genop_discard_vec_setarrayitem_raw = _genop_discard_vec_setarrayitem
-    genop_discard_vec_setarrayitem_gc = _genop_discard_vec_setarrayitem
+    #genop_discard_vec_setarrayitem_raw = _genop_discard_vec_setarrayitem
+    #genop_discard_vec_setarrayitem_gc = _genop_discard_vec_setarrayitem
 
-    def genop_discard_vec_raw_store(self, op, arglocs):
+    def genop_discard_vec_store(self, op, arglocs):
         base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs
         dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, 0)
         self._vec_store(dest_loc, value_loc, integer_loc.value,
                         size_loc.value, aligned_loc.value)
 
+    @always_inline
     def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned):
         if integer:
             if aligned:
@@ -528,7 +540,7 @@
 class VectorRegallocMixin(object):
     _mixin_ = True
 
-    def _consider_vec_getarrayitem(self, op):
+    def _consider_vec_load(self, op):
         descr = op.getdescr()
         assert isinstance(descr, ArrayDescr)
         assert not descr.is_array_of_pointers() and \
@@ -543,14 +555,15 @@
         self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs),
                           imm(integer), imm(aligned)], result_loc)
 
-    consider_vec_getarrayitem_raw_i = _consider_vec_getarrayitem
-    consider_vec_getarrayitem_raw_f = _consider_vec_getarrayitem
-    consider_vec_getarrayitem_gc_i = _consider_vec_getarrayitem
-    consider_vec_getarrayitem_gc_f = _consider_vec_getarrayitem
-    consider_vec_raw_load_i = _consider_vec_getarrayitem
-    consider_vec_raw_load_f = _consider_vec_getarrayitem
+    #consider_vec_getarrayitem_raw_i = _consider_vec_getarrayitem
+    #consider_vec_getarrayitem_raw_f = _consider_vec_getarrayitem
+    #consider_vec_getarrayitem_gc_i = _consider_vec_getarrayitem
+    #consider_vec_getarrayitem_gc_f = _consider_vec_getarrayitem
+    consider_vec_load_i = _consider_vec_load
+    consider_vec_load_f = _consider_vec_load
 
-    def _consider_vec_setarrayitem(self, op):
+    def consider_vec_store(self, op):
+        # TODO
         descr = op.getdescr()
         assert isinstance(descr, ArrayDescr)
         assert not descr.is_array_of_pointers() and \
@@ -566,9 +579,9 @@
         self.perform_discard(op, [base_loc, ofs_loc, value_loc,
                                  imm(itemsize), imm(ofs), imm(integer), imm(aligned)])
 
-    consider_vec_setarrayitem_raw = _consider_vec_setarrayitem
-    consider_vec_setarrayitem_gc = _consider_vec_setarrayitem
-    consider_vec_raw_store = _consider_vec_setarrayitem
+    #consider_vec_setarrayitem_raw = _consider_vec_setarrayitem
+    #consider_vec_setarrayitem_gc = _consider_vec_setarrayitem
+    #consider_vec_store = _consider_vec_setarrayitem
 
     def consider_vec_arith(self, op):
         lhs = op.getarg(0)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1361,6 +1361,30 @@
             'guard_true(i100) [p0, i0]',
         ], trace)
 
+    def test_guard_failarg_do_not_rename_to_const(Self):
+        # Loop -2 (pre vectorize) : noopt with 15 ops
+        trace = self.parse_loop("""
+        []
+        label(p0, p1, p2, p3, p4, i5, i6, p7, p8, p9, p10, i11, i12, f13, p14, p15, i16, i17, descr=TargetToken(70367324045984))
+        debug_merge_point(0, 0, '(numpy_call2_inc_out_right: no get_printable_location)')
+        i19 = int_and(i6, 7)
+        i20 = int_is_zero(i19)
+        guard_true(i20, descr=<ResumeGuardDescr object at 0x3fffab60d7b0>) [p7, p3, p2, p1, p0, p8, p10, i11, i19, i6, i12, i5, p4]
+        f21 = raw_load_f(i12, i6, descr=<ArrayF 8>)
+        guard_not_invalidated(descr=<ResumeGuardCopiedDescr object at 0x3fffab5fcde8>) [p7, p3, p2, p1, p0, p8, p10, i11, i19, i6, i12, i5, p4]
+        f22 = float_mul(f21, f13)
+        raw_store(i16, i6, f22, descr=<ArrayF 8>)
+        i24 = int_add(i5, 1)
+        i26 = int_add(i6, 8)
+        i27 = int_ge(i24, i17)
+        guard_false(i27, descr=<ResumeGuardDescr object at 0x3fffab60d818>) [i17, i24, p7, p3, p2, p1, p0, i26, None, p4]
+        debug_merge_point(0, 0, '(numpy_call2_inc_out_right: no get_printable_location)')
+        jump(p0, p1, p2, p3, p4, i24, i26, p7, p8, p9, p10, 1, i12, f13, p14, p15, i16, i17, descr=TargetToken(70367324045984))
+        """)
+        vopt = self.schedule(trace)
+        import pdb; pdb.set_trace()
+
+
 
 class TestLLtype(BaseTestVectorize, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/warmspot.py b/rpython/jit/metainterp/warmspot.py
--- a/rpython/jit/metainterp/warmspot.py
+++ b/rpython/jit/metainterp/warmspot.py
@@ -33,7 +33,7 @@
 # Bootstrapping
 
 def apply_jit(translator, backend_name="auto", inline=False,
-              vec=False, enable_opts=ALL_OPTS_NAMES, **kwds):
+              vec=True, enable_opts=ALL_OPTS_NAMES, **kwds):
     if 'CPUClass' not in kwds:
         from rpython.jit.backend.detect_cpu import getcpuclass
         kwds['CPUClass'] = getcpuclass(backend_name)


More information about the pypy-commit mailing list