[pypy-commit] pypy vecopt: added some missing vector x86 instructions to mc
plan_rich
noreply at buildbot.pypy.org
Mon May 11 15:46:12 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77292:7d60c4409027
Date: 2015-05-11 15:46 +0200
http://bitbucket.org/pypy/pypy/changeset/7d60c4409027/
Log: added some missing vector x86 instructions to mc started to
implement the new instructions (pack/unpack/expand/...) not yet
finished. i need to find the suitable instructions for those
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -4,15 +4,20 @@
import py
from rpython.jit.metainterp.test.support import LLJitMixin
+from rpython.jit.backend.x86.test.test_basic import Jit386Mixin
from rpython.jit.metainterp.warmspot import reset_jit, get_stats
from pypy.module.micronumpy import boxes
from pypy.module.micronumpy.compile import FakeSpace, Parser, InterpreterState
from pypy.module.micronumpy.base import W_NDimArray
-class TestNumpyJit(LLJitMixin):
+class TestNumpyJit(Jit386Mixin):
graph = None
interp = None
+ def setup_method(self, method):
+ if not self.CPUClass.vector_extension:
+ py.test.skip("needs vector extension to run (for now)")
+
def setup_class(cls):
default = """
a = [1,2,3,4]
@@ -128,7 +133,6 @@
"""
def test_sum(self):
- py.test.skip('TODO')
result = self.run("sum")
assert result == sum(range(30))
self.check_trace_count(1)
@@ -150,7 +154,6 @@
"""
def test_cumsum(self):
- py.test.skip('TODO')
result = self.run("cumsum")
assert result == 15
self.check_trace_count(1)
@@ -220,7 +223,6 @@
})
def define_reduce():
- py.test.skip('TODO')
return """
a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sum(a)
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -242,7 +242,10 @@
translate_support_code = False
is_llgraph = True
- vector_register_size = 16
+ vector_extension = True
+ vector_register_size = 16 # in bytes
+ vector_horizontal_operations = True
+ vector_pack_slots = True
def __init__(self, rtyper, stats=None, *ignored_args, **kwds):
model.AbstractCPU.__init__(self)
@@ -794,8 +797,6 @@
_type = longlong.FLOATSTORAGE
else:
raise AssertionError(box)
- #for a in arg:
- # assert lltype.typeOf(a) == _type
else:
raise AssertionError(box)
#
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -25,6 +25,11 @@
HAS_CODEMAP = False
+ vector_extension = False
+ vector_register_size = 0 # in bytes
+ vector_horizontal_operations = False
+ vector_pack_slots = False
+
def __init__(self, rtyper, stats, opts, translate_support_code=False,
gcdescr=None):
assert type(opts) is not bool
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1,5 +1,6 @@
import sys
import os
+import py
from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
@@ -2517,6 +2518,45 @@
else:
raise NotImplementedError
+ def genop_vec_int_sub(self, op, arglocs, resloc):
+ loc0, loc1, itemsize_loc = arglocs
+ itemsize = itemsize_loc.value
+ if itemsize == 1:
+ self.mc.PSUBB(loc0, loc1)
+ elif itemsize == 2:
+ self.mc.PSUBW(loc0, loc1)
+ elif itemsize == 4:
+ self.mc.PSUBD(loc0, loc1)
+ elif itemsize == 8:
+ self.mc.PSUBQ(loc0, loc1)
+ else:
+ raise NotImplementedError
+
+ genop_vec_float_arith = """
+ def genop_vec_float_{type}(self, op, arglocs, resloc):
+ loc0, loc1, itemsize_loc = arglocs
+ itemsize = itemsize_loc.value
+ if itemsize == 4:
+ self.mc.{p_op_s}(loc0, loc1)
+ elif itemsize == 8:
+ self.mc.{p_op_d}(loc0, loc1)
+ else:
+ raise NotImplementedError
+ """
+ for op in ['add','mul','sub','div']:
+ OP = op.upper()
+ _source = genop_vec_float_arith.format(type=op, p_op_s=OP+'PS',p_op_d=OP+'PD')
+ exec py.code.Source(_source).compile()
+ del genop_vec_float_arith
+
+ def genop_vec_unpack(self, op, arglocs, resloc):
+ loc0, indexloc, sizeloc = arglocs
+ size = sizeloc.value
+ if size == 4:
+ pass
+ elif size == 8:
+ self.mc.CMPPD(
+
def genop_vec_int_signext(self, op, arglocs, resloc):
pass
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1505,7 +1505,8 @@
consider_vec_raw_store = consider_vec_setarrayitem_raw
- def consider_vec_int_add(self, op):
+
+ def consider_vec_arith(self, op):
count = op.getarg(2)
assert isinstance(count, ConstInt)
itemsize = self.assembler.cpu.vector_register_size // count.value
@@ -1514,6 +1515,26 @@
loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
self.perform(op, [loc0, loc1, imm(itemsize)], loc0)
+ consider_vec_int_add = consider_vec_arith
+ consider_vec_int_sub = consider_vec_arith
+ consider_vec_int_mul = consider_vec_arith
+ consider_vec_float_add = consider_vec_arith
+ consider_vec_float_sub = consider_vec_arith
+ consider_vec_float_mul = consider_vec_arith
+ del consider_vec_arith
+
+ def consider_vec_logic(self, op):
+ count = op.getarg(2)
+ assert isinstance(count, ConstInt)
+ itemsize = self.assembler.cpu.vector_register_size // count.value
+ args = op.getarglist()
+ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ loc1 = self.xrm.make_sure_var_in_reg(op.getarg(1), args)
+ self.perform(op, [loc0, loc1, imm(itemsize)], loc0)
+
+ consider_vec_float_eq = consider_vec_logic
+ del consider_vec_logic
+
def consider_vec_int_signext(self, op):
# there is not much we can do in this case. arithmetic is
# done on the vector register, if there is a wrap around,
@@ -1524,6 +1545,35 @@
#if op.getarg(1).value != op.getarg(2).value:
# raise NotImplementedError("signext not implemented")
+ def consider_vec_box_pack(self, op):
+ count = op.getarg(3)
+ index = op.getarg(2)
+ assert isinstance(count, ConstInt)
+ assert isinstance(index, ConstInt)
+ itemsize = self.assembler.cpu.vector_register_size // count.value
+ args = op.getarglist()
+ loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0), args)
+ loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
+ self.perform(op, [loc0, loc1, imm(index.value), imm(itemsize)], None)
+
+ def consider_vec_box_unpack(self, op):
+ count = op.getarg(2)
+ index = op.getarg(1)
+ assert isinstance(count, ConstInt)
+ assert isinstance(index, ConstInt)
+ itemsize = self.assembler.cpu.vector_register_size // count.value
+ args = op.getarglist()
+ loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0), args)
+ result = self.force_allocate_reg(op.result, args)
+ self.perform(op, [loc0, imm(index.value), imm(itemsize)], result)
+
+ def consider_vec_expand(self, op):
+ pass
+
+ def consider_vec_box(self, op):
+ # pseudo instruction, needed to create a new variable
+ pass
+
def consider_guard_early_exit(self, op):
pass
diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -24,11 +24,6 @@
with_threads = False
frame_reg = regloc.ebp
- vector_extension = False
- vector_register_size = 0 # in bytes
- vector_horizontal_operations = False
- vector_pack_slots = False
-
from rpython.jit.backend.x86.arch import JITFRAME_FIXED_SIZE
all_reg_indexes = gpr_reg_mgr_cls.all_reg_indexes
gen_regs = gpr_reg_mgr_cls.all_regs
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -920,6 +920,15 @@
define_modrm_modes('XORPS_x*', [rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
+define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM')
+define_modrm_modes('ADDPS_x*', [ rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM')
+define_modrm_modes('SUBPD_x*', ['\x66', rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM')
+define_modrm_modes('SUBPS_x*', [ rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM')
+define_modrm_modes('MULPD_x*', ['\x66', rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM')
+define_modrm_modes('MULPS_x*', [ rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM')
+define_modrm_modes('DIVPD_x*', ['\x66', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
+define_modrm_modes('DIVPS_x*', [ rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
+
def define_pxmm_insn(insnname_template, insn_char):
def add_insn(char, *post):
methname = insnname_template.replace('*', char)
@@ -938,6 +947,9 @@
define_pxmm_insn('PADDW_x*', '\xFD')
define_pxmm_insn('PADDB_x*', '\xFC')
define_pxmm_insn('PSUBQ_x*', '\xFB')
+define_pxmm_insn('PSUBD_x*', '\xFA')
+define_pxmm_insn('PSUBW_x*', '\xF9')
+define_pxmm_insn('PSUBB_x*', '\xF8')
define_pxmm_insn('PAND_x*', '\xDB')
define_pxmm_insn('POR_x*', '\xEB')
define_pxmm_insn('PXOR_x*', '\xEF')
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -402,7 +402,9 @@
(j, vbox) = box_to_vbox.get(arg, (-1, None))
if vbox:
arg_cloned = arg.clonebox()
- unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, ConstInt(j)], arg_cloned)
+ cj = ConstInt(j)
+ ci = ConstInt(vbox.item_count)
+ unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, cj, ci], arg_cloned)
self.emit_operation(unpack_op)
sched_data.rename_unpacked(arg, arg_cloned)
op.setarg(i, arg_cloned)
@@ -415,7 +417,9 @@
(j, vbox) = box_to_vbox.get(arg, (-1, None))
if vbox:
arg_cloned = arg.clonebox()
- unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, ConstInt(j)], arg_cloned)
+ cj = ConstInt(j)
+ ci = ConstInt(vbox.item_count)
+ unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, cj, ci], arg_cloned)
self.emit_operation(unpack_op)
sched_data.rename_unpacked(arg, arg_cloned)
fail_args[i] = arg_cloned
@@ -619,6 +623,7 @@
break
vbox = BoxVector(arg.type, len(ops))
+ print "creating vectorbox", vbox, "of type",arg.type
if all_same_box:
expand_op = ResOperation(rop.VEC_EXPAND, [arg, ConstInt(len(ops))], vbox)
self.preamble_ops.append(expand_op)
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -458,12 +458,13 @@
'VEC_FLOAT_ADD/3',
'VEC_FLOAT_SUB/3',
'VEC_FLOAT_MUL/3',
+ 'VEC_FLOAT_DIV/3',
'VEC_FLOAT_EQ/3',
'VEC_INT_SIGNEXT/3',
'_VEC_ARITHMETIC_LAST',
- 'VEC_BOX_UNPACK/2',
- 'VEC_BOX_PACK/3',
- 'VEC_EXPAND/2',
+ 'VEC_BOX_UNPACK/3', # iX|fX = VEC_BOX_UNPACK(vX, index, item_count)
+ 'VEC_BOX_PACK/4', # VEC_BOX_PACK(vX, var/const, index, item_count)
+ 'VEC_EXPAND/2', # vX = VEC_EXPAND(var/const, item_count)
'VEC_BOX/1',
#
'INT_LT/2b',
@@ -725,6 +726,7 @@
rop.FLOAT_ADD: rop.VEC_FLOAT_ADD,
rop.FLOAT_SUB: rop.VEC_FLOAT_SUB,
rop.FLOAT_MUL: rop.VEC_FLOAT_MUL,
+ rop.FLOAT_TRUEDIV: rop.VEC_FLOAT_DIV,
rop.FLOAT_EQ: rop.VEC_FLOAT_EQ,
rop.INT_SIGNEXT: rop.VEC_INT_SIGNEXT,
More information about the pypy-commit
mailing list