[pypy-commit] pypy ppc-vsx-support: first vector loop successfully compiles on ppc (floating point only)
plan_rich
pypy.commits at gmail.com
Mon Jun 20 03:06:08 EDT 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: ppc-vsx-support
Changeset: r85231:1352b56d157d
Date: 2016-06-20 09:05 +0200
http://bitbucket.org/pypy/pypy/changeset/1352b56d157d/
Log: first vector loop successfully compiles on ppc (floating point only)
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -61,7 +61,8 @@
XFL = Form("FM", "frB", "XO1", "Rc")
XFX = Form("CRM", "rS", "XO1")
XLL = Form("LL", "XO1")
-XX1 = Form("vrT", "rA", "rB", "XO1")
+XX1 = Form("fvrT", "rA", "rB", "XO1")
+XX3 = Form("fvrT", "fvrA", "fvrB", "XO9")
VX = Form("lvrT", "lvrA", "lvrB", "XO8")
MI = Form("rA", "rS", "SH", "MB", "ME", "Rc")
@@ -576,6 +577,12 @@
class PPCVSXAssembler(object):
_mixin_ = True
+ # floating point operations (ppc got it's own vector
+ # unit for double/single precision floating points
+
+ # FLOAT
+ # -----
+
# load
lxvdsx = XX1(31, XO1=332) # splat first element
lxvd2x = XX1(31, XO1=844)
@@ -585,7 +592,23 @@
stxvd2x = XX1(31, XO1=972)
stxvw4x = XX1(31, XO1=908)
- # integer
+ # arith
+
+ # add
+ xvadddp = XX3(60, XO9=96)
+ xvaddsp = XX3(60, XO9=64)
+ # sub
+ xvsubdp = XX3(60, XO9=104)
+ xvsubsp = XX3(60, XO9=72)
+ # mul
+ xvmuldp = XX3(60, XO9=112)
+ xvmulsp = XX3(60, XO9=80)
+ # div
+ xvdivdp = XX3(60, XO9=102)
+ xvdivsp = XX3(60, XO9=88)
+
+ # INTEGER
+ # -------
vaddudm = VX(4, XO8=192)
class PPCAssembler(BasicPPCAssembler, PPCVSXAssembler):
diff --git a/rpython/jit/backend/ppc/ppc_field.py b/rpython/jit/backend/ppc/ppc_field.py
--- a/rpython/jit/backend/ppc/ppc_field.py
+++ b/rpython/jit/backend/ppc/ppc_field.py
@@ -43,7 +43,9 @@
"spr": (11, 20),
"TO": ( 6, 10),
"UIMM": (16, 31),
- "vrT": (6, 31, 'unsigned', regname._V, 'overlap'),
+ "fvrT": (6, 31, 'unsigned', regname._V, 'overlap'),
+ "fvrA": (11, 31, 'unsigned', regname._V, 'overlap'),
+ "fvrB": (16, 31, 'unsigned', regname._V, 'overlap'),
# low vector register T (low in a sense:
# can only address 32 vector registers)
"lvrT": (6, 10, 'unsigned', regname._V),
@@ -59,6 +61,7 @@
"XO6": (21, 29),
"XO7": (27, 30),
"XO8": (21, 31),
+ "XO9": (21, 28),
"LL": ( 9, 10),
}
@@ -110,18 +113,6 @@
value = super(sh, self).decode(inst)
return (value & 32) << 5 | (value >> 10 & 31)
-# ??? class tx(Field):
-# ??? def encode(self, value):
-# ??? value = (value & 31) << 20 | (value & 32) >> 5
-# ??? return super(tx, self).encode(value)
-# ??? def decode(self, inst):
-# ??? value = super(tx, self).decode(inst)
-# ??? return (value & 32) << 5 | (value >> 20 & 31)
-# ??? def r(self):
-# ??? import pdb; pdb.set_trace()
-# ??? return super(tx, self).r()
-# other special fields?
-
ppc_fields = {
"LI": IField("LI", *fields["LI"]),
"BD": IField("BD", *fields["BD"]),
@@ -129,7 +120,6 @@
"mbe": mbe("mbe", *fields["mbe"]),
"sh": sh("sh", *fields["sh"]),
"spr": spr("spr", *fields["spr"]),
- # ??? "vrT": tx("vrT", *fields["vrT"]),
}
for f in fields:
diff --git a/rpython/jit/backend/ppc/rassemblermaker.py b/rpython/jit/backend/ppc/rassemblermaker.py
--- a/rpython/jit/backend/ppc/rassemblermaker.py
+++ b/rpython/jit/backend/ppc/rassemblermaker.py
@@ -46,9 +46,15 @@
elif field.name == 'sh':
body.append('sh1 = (%s & 31) << 10 | (%s & 32) >> 5' % (value, value))
value = 'sh1'
- elif field.name == 'vrT':
+ elif field.name == 'fvrT':
body.append('vrT1 = (%s & 31) << 21 | (%s & 32) >> 5' % (value, value))
value = 'vrT1'
+ elif field.name == 'fvrA':
+ body.append('fvrA1 = ((%s & 31) << 15 | (%s & 32) >> 5) << 2' % (value, value))
+ value = 'fvrA1'
+ elif field.name == 'fvrB':
+ body.append('fvrB1 = ((%s & 31) << 10 | (%s & 32) >> 5) << 1' % (value, value))
+ value = 'fvrB1'
if isinstance(field, IField):
body.append('v |= ((%3s >> 2) & r_uint(%#05x)) << 2' % (value, field.mask))
else:
diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -48,19 +48,12 @@
def _vec_load(self, resloc, baseloc, indexloc, integer, itemsize, aligned):
if integer:
+ raise NotImplementedError
+ else:
if itemsize == 4:
self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value)
elif itemsize == 8:
self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value)
- else:
- raise NotImplementedError
- else:
- if itemsize == 4:
- self.mc.MOVUPS(resloc, src_addr)
- elif itemsize == 8:
- self.mc.MOVUPD(resloc, src_addr)
- else:
- raise NotImplementedError
def _emit_vec_setitem(self, op, arglocs, regalloc):
# prepares item scale (raw_store does not)
@@ -83,14 +76,12 @@
def _vec_store(self, baseloc, indexloc, valueloc, integer, itemsize, aligned):
if integer:
+ raise NotImplementedError
+ else:
if itemsize == 4:
self.mc.stxvw4x(valueloc.value, indexloc.value, baseloc.value)
elif itemsize == 8:
self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value)
- else:
- raise NotImplementedError
- else:
- raise NotImplementedError
def emit_vec_int_add(self, op, arglocs, regalloc):
@@ -103,8 +94,41 @@
elif size == 4:
raise NotImplementedError
elif size == 8:
+ raise NotImplementedError # need value in another register!
self.mc.vaddudm(resloc.value, loc0.value, loc1.value)
+ def emit_vec_float_add(self, op, arglocs, resloc):
+ resloc, loc0, loc1, itemsize_loc = arglocs
+ itemsize = itemsize_loc.value
+ if itemsize == 4:
+ self.mc.xvaddsp(resloc.value, loc0.value, loc1.value)
+ elif itemsize == 8:
+ self.mc.xvadddp(resloc.value, loc0.value, loc1.value)
+
+ def emit_vec_float_sub(self, op, arglocs, resloc):
+ resloc, loc0, loc1, itemsize_loc = arglocs
+ itemsize = itemsize_loc.value
+ if itemsize == 4:
+ self.mc.xvsubsp(resloc.value, loc0.value, loc1.value)
+ elif itemsize == 8:
+ self.mc.xvsubdp(resloc.value, loc0.value, loc1.value)
+
+ def emit_vec_float_mul(self, op, arglocs, resloc):
+ resloc, loc0, loc1, itemsize_loc = arglocs
+ itemsize = itemsize_loc.value
+ if itemsize == 4:
+ self.mc.xvmulsp(resloc.value, loc0.value, loc1.value)
+ elif itemsize == 8:
+ self.mc.xvmuldp(resloc.value, loc0.value, loc1.value)
+
+ def emit_vec_float_truediv(self, op, arglocs, resloc):
+ resloc, loc0, loc1, itemsize_loc = arglocs
+ itemsize = itemsize_loc.value
+ if itemsize == 4:
+ self.mc.xvdivsp(resloc.value, loc0.value, loc1.value)
+ elif itemsize == 8:
+ self.mc.xvdivdp(resloc.value, loc0.value, loc1.value)
+
#def genop_guard_vec_guard_true(self, guard_op, guard_token, locs, resloc):
# self.implement_guard(guard_token)
@@ -253,23 +277,6 @@
#def genop_vec_int_xor(self, op, arglocs, resloc):
# self.mc.PXOR(resloc, arglocs[0])
- #genop_vec_float_arith = """
- #def genop_vec_float_{type}(self, op, arglocs, resloc):
- # loc0, loc1, itemsize_loc = arglocs
- # itemsize = itemsize_loc.value
- # if itemsize == 4:
- # self.mc.{p_op_s}(loc0, loc1)
- # elif itemsize == 8:
- # self.mc.{p_op_d}(loc0, loc1)
- #"""
- #for op in ['add','mul','sub']:
- # OP = op.upper()
- # _source = genop_vec_float_arith.format(type=op,
- # p_op_s=OP+'PS',
- # p_op_d=OP+'PD')
- # exec py.code.Source(_source).compile()
- #del genop_vec_float_arith
-
#def genop_vec_float_truediv(self, op, arglocs, resloc):
# loc0, loc1, sizeloc = arglocs
# size = sizeloc.value
@@ -569,10 +576,10 @@
prepare_vec_int_add = prepare_vec_arith
#prepare_vec_int_sub = prepare_vec_arith
#prepare_vec_int_mul = prepare_vec_arith
- #prepare_vec_float_add = prepare_vec_arith
- #prepare_vec_float_sub = prepare_vec_arith
- #prepare_vec_float_mul = prepare_vec_arith
- #prepare_vec_float_truediv = prepare_vec_arith
+ prepare_vec_float_add = prepare_vec_arith
+ prepare_vec_float_sub = prepare_vec_arith
+ prepare_vec_float_mul = prepare_vec_arith
+ prepare_vec_float_truediv = prepare_vec_arith
del prepare_vec_arith
def _prepare_vec_store(self, op):
diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -1,5 +1,6 @@
import py
-
+import pytest
+import math
from hypothesis import given, note, strategies as st
from rpython.jit.metainterp.warmspot import ll_meta_interp, get_stats
from rpython.jit.metainterp.test.support import LLJitMixin
@@ -13,7 +14,8 @@
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rlib.rawstorage import (alloc_raw_storage, raw_storage_setitem,
free_raw_storage, raw_storage_getitem)
-from rpython.rlib.objectmodel import specialize, is_annotation_constant
+from rpython.rlib.objectmodel import (specialize, is_annotation_constant,
+ always_inline)
from rpython.jit.backend.detect_cpu import getcpuclass
CPU = getcpuclass()
@@ -24,7 +26,40 @@
def free(mem):
lltype.free(mem, flavor='raw')
+def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
+ return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
+
+class RawStorage(object):
+ def __init__(self):
+ self.arrays = []
+
+ def new(self, values, type, size=None, zero=True):
+ bytecount = rffi.sizeof(type)
+ if not values:
+ array = alloc_raw_storage(size*bytecount, zero=zero)
+ self.arrays.append(array)
+ return array
+ else:
+ size = len(values)*bytecount
+ array = alloc_raw_storage(size, zero=zero)
+ for i,v in enumerate(values):
+ raw_storage_setitem(array, i*bytecount, rffi.cast(type,v))
+ self.arrays.append(array)
+ return array
+
+ def clear(self):
+ while self.arrays:
+ array = self.arrays.pop()
+ free_raw_storage(array)
+
+ at pytest.fixture(scope='session')
+def rawstorage(request):
+ rs = RawStorage()
+ request.addfinalizer(rs.clear)
+ return rs
+
integers_64bit = st.integers(min_value=-2**63, max_value=2**63-1)
+floats = st.floats()
class VectorizeTests:
enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
@@ -40,42 +75,80 @@
type_system=self.type_system,
vec=vec, vec_all=vec_all)
- @given(st.lists(integers_64bit, min_size=5, max_size=50),
- st.lists(integers_64bit, min_size=5, max_size=50))
- def test_vector_simple(self, la, lb):
- myjitdriver = JitDriver(greens = [],
- reds = 'auto',
- vectorize=True)
- i = min(len(la), len(lb))
- la = la[:i]
- lb = lb[:i]
- bc = i*rffi.sizeof(rffi.SIGNED)
- vc = alloc_raw_storage(bc, zero=True)
+ @given(data=st.data())
+ @pytest.mark.parametrize('func', [lambda a,b: a+b,
+ lambda a,b: a*b, lambda a,b: a-b, lambda a,b: a / b])
+ def test_vector_simple_float(self, func, data):
+ func = always_inline(func)
+
+ type = rffi.DOUBLE
+ size = rffi.sizeof(rffi.DOUBLE)
+ myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
+ def f(bytecount, va, vb, vc):
+ i = 0
+ while i < bytecount:
+ myjitdriver.jit_merge_point()
+ a = raw_storage_getitem(type,va,i)
+ b = raw_storage_getitem(type,vb,i)
+ c = func(a,b)
+ raw_storage_setitem(vc, i, rffi.cast(type,c))
+ i += size
+
+ la = data.draw(st.lists(floats, min_size=10, max_size=150))
+ #la = [0.0,0.0,0.0,0.0,0.0,0.0,0.0]
+ #lb = [0.0,0.0,0.0,0.0,1.7976931348623157e+308,0.0,0.0]
+ l = len(la)
+ lb = data.draw(st.lists(floats, min_size=l, max_size=l))
+
+ rawstorage = RawStorage()
+ va = rawstorage.new(la, type)
+ vb = rawstorage.new(lb, type)
+ vc = rawstorage.new(None, type, size=l)
+ self.meta_interp(f, [l*size, va, vb, vc])
+
+ for i in range(l):
+ c = raw_storage_getitem(type,vc,i*size)
+ r = func(la[i], lb[i])
+ assert isclose(r, c) or (math.isnan(r) and math.isnan(c)) or \
+ (math.isinf(r) and math.isinf(c) and \
+ (r < 0.0 and c < 0.0) or \
+ (r > 0.0 and c > 0.0))
+
+ rawstorage.clear()
+
+ #@given(st.data())
+ def test_vector_simple_int(self):
+
+ type = rffi.SIGNED
size = rffi.sizeof(rffi.SIGNED)
- def f(d):
- va = alloc_raw_storage(bc, zero=True)
- vb = alloc_raw_storage(bc, zero=True)
- x = 1
- for i in range(d):
- j = i*size
- raw_storage_setitem(va, j, rffi.cast(rffi.SIGNED,la[i]))
- raw_storage_setitem(vb, j, rffi.cast(rffi.SIGNED,lb[i]))
+ myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
+ def f(bytecount, va, vb, vc):
i = 0
- while i < bc:
+ while i < bytecount:
myjitdriver.jit_merge_point()
- a = raw_storage_getitem(rffi.SIGNED,va,i)
- b = raw_storage_getitem(rffi.SIGNED,vb,i)
+ a = raw_storage_getitem(type,va,i)
+ b = raw_storage_getitem(type,vb,i)
c = a+b
- raw_storage_setitem(vc, i, rffi.cast(rffi.SIGNED,c))
- i += 1*size
+ raw_storage_setitem(vc, i, rffi.cast(type,c))
+ i += size
- free_raw_storage(va)
- free_raw_storage(vb)
- self.meta_interp(f, [i])
- for p in range(i):
- c = raw_storage_getitem(rffi.SIGNED,vc,p*size)
- assert intmask(la[p] + lb[p]) == c
- free_raw_storage(vc)
+ rawstorage = RawStorage()
+ #la = data.draw(st.lists(integers_64bit, min_size=10, max_size=150))
+ la = [0] * 10
+ l = len(la)
+ #lb = data.draw(st.lists(integers_64bit, min_size=l, max_size=l))
+ lb = [0] * 10
+
+ va = rawstorage.new(la, lltype.Signed)
+ vb = rawstorage.new(lb, lltype.Signed)
+ vc = rawstorage.new(None, lltype.Signed, size=l)
+ self.meta_interp(f, [l*size, va, vb, vc])
+
+ for i in range(l):
+ c = raw_storage_getitem(type,vc,i*size)
+ assert intmask(la[i] + lb[i]) == c
+
+ rawstorage.clear()
@py.test.mark.parametrize('i',[1,2,3,8,17,128,130,131,142,143])
def test_vectorize_array_get_set(self,i):
More information about the pypy-commit
mailing list