[pypy-commit] pypy ppc-vsx-support: change the code builder to resolve issue for the vector regsiters (float) 32-63, because 0-31 overlap with floating point currently for vector reg. allocation 0-31 are not used. they come in handy to splat floating point values
plan_rich
pypy.commits at gmail.com
Wed Jun 29 09:02:30 EDT 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: ppc-vsx-support
Changeset: r85449:946ddc31e87b
Date: 2016-06-29 15:01 +0200
http://bitbucket.org/pypy/pypy/changeset/946ddc31e87b/
Log: change the code builder to resolve issue for the vector regsiters
(float) 32-63, because 0-31 overlap with floating point currently
for vector reg. allocation 0-31 are not used. they come in handy to
splat floating point values
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -65,6 +65,7 @@
XX2 = Form("fvrT", "fvrB", "XO6")
XX3 = Form("fvrT", "fvrA", "fvrB", "XO9")
XX3_2 = Form("fvrT", "fvrA", "fvrB", "OE", "XO11")
+XX3_splat = Form("fvrT", "fvrA", "fvrB", "DM", "XO13", "OE")
XV = Form("ivrT", "rA", "rB", "XO1")
VX = Form("ivrT", "ivrA", "ivrB", "XO8")
VC = Form("ivrT", "ivrA", "ivrB", "XO12", "OE")
@@ -653,6 +654,14 @@
vcmpequdx = VC(4, XO12=199, OE=1)
vcmpequd = VC(4, XO12=199, OE=0)
+ # permute/splat
+ # splat low of A, and low of B
+ xxspltdl = XX3_splat(60, XO13=10, OE=0, DM=0b00)
+ # splat high of A, and high of B
+ xxspltdh = XX3_splat(60, XO13=10, OE=0, DM=0b11)
+ # generic splat
+ xxspltd = XX3_splat(60, XO13=10, OE=0)
+
# INTEGER
# -------
diff --git a/rpython/jit/backend/ppc/ppc_field.py b/rpython/jit/backend/ppc/ppc_field.py
--- a/rpython/jit/backend/ppc/ppc_field.py
+++ b/rpython/jit/backend/ppc/ppc_field.py
@@ -44,8 +44,8 @@
"TO": ( 6, 10),
"UIMM": (16, 31),
"fvrT": (6, 31, 'unsigned', regname._V, 'overlap'),
- "fvrA": (11, 31, 'unsigned', regname._V, 'overlap'),
- "fvrB": (16, 31, 'unsigned', regname._V, 'overlap'),
+ "fvrA": (11, 29, 'unsigned', regname._V, 'overlap'),
+ "fvrB": (16, 30, 'unsigned', regname._V, 'overlap'),
# low vector register T (low in a sense:
# can only address 32 vector registers)
"ivrT": (6, 10, 'unsigned', regname._V),
@@ -66,6 +66,8 @@
"XO10": (26, 31),
"XO11": (22, 28),
"XO12": (22, 31),
+ "XO13": (24, 28),
+ "DM": (22, 23),
"LL": ( 9, 10),
"SIM": (11, 15),
}
diff --git a/rpython/jit/backend/ppc/rassemblermaker.py b/rpython/jit/backend/ppc/rassemblermaker.py
--- a/rpython/jit/backend/ppc/rassemblermaker.py
+++ b/rpython/jit/backend/ppc/rassemblermaker.py
@@ -50,10 +50,10 @@
body.append('vrT1 = (%s & 31) << 21 | (%s & 32) >> 5' % (value, value))
value = 'vrT1'
elif field.name == 'fvrA':
- body.append('fvrA1 = ((%s & 31) << 15 | (%s & 32) >> 5) << 2' % (value, value))
+ body.append('fvrA1 = (%s & 31) << 14 | (%s & 32) >> 5' % (value, value))
value = 'fvrA1'
elif field.name == 'fvrB':
- body.append('fvrB1 = ((%s & 31) << 10 | (%s & 32) >> 5) << 1' % (value, value))
+ body.append('fvrB1 = (%s & 31) << 10 | (%s & 32) >> 5' % (value, value))
value = 'fvrB1'
if isinstance(field, IField):
body.append('v |= ((%3s >> 2) & r_uint(%#05x)) << 2' % (value, field.mask))
diff --git a/rpython/jit/backend/ppc/register.py b/rpython/jit/backend/ppc/register.py
--- a/rpython/jit/backend/ppc/register.py
+++ b/rpython/jit/backend/ppc/register.py
@@ -4,7 +4,7 @@
ALL_REGS = [RegisterLocation(i) for i in range(32)]
ALL_FLOAT_REGS = [FPRegisterLocation(i) for i in range(32)]
ALL_INTEGER_VECTOR_REGS = [VectorRegisterLocation(i) for i in range(32)]
-ALL_FLOAT_VECTOR_REGS = [VectorRegisterLocation(i) for i in range(64)]
+ALL_FLOAT_VECTOR_REGS = [VectorRegisterLocation(i) for i in range(32,64)]
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16,\
r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31\
@@ -19,12 +19,12 @@
ivr24, ivr25, ivr26, ivr27, ivr28, ivr29, ivr30, ivr31\
= ALL_FLOAT_REGS
-vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7, vr8, vr9, vr10, vr11, vr12, vr13, \
- vr14, vr15, vr16, vr17, vr18, vr19, vr20, vr21, vr22, vr23, vr24, vr25, \
- vr26, vr27, vr28, vr29, vr30, vr31, vr32, vr33, vr34, vr35, vr36, vr37, \
- vr38, vr39, vr40, vr41, vr42, vr43, vr44, vr45, vr46, vr47, vr48, \
- vr49, vr50, vr51, vr52, vr53, vr54, vr55, vr56, vr57, vr58, vr59, vr60, \
- vr61, vr62, vr63 = ALL_FLOAT_VECTOR_REGS
+# the first 32 vector register are partly shared with the normal floating point
+# registers, since there are so many registers, we just take the upper 31 ones
+vr32, vr33, vr34, vr35, vr36, vr37, \
+ vr38, vr39, vr40, vr41, vr42, vr43, vr44, vr45, vr46, vr47, vr48, \
+ vr49, vr50, vr51, vr52, vr53, vr54, vr55, vr56, vr57, vr58, vr59, vr60, \
+ vr61, vr62, vr63 = ALL_FLOAT_VECTOR_REGS
NONVOLATILES = [r14, r15, r16, r17, r18, r19, r20, r21, r22, r23,
diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -479,7 +479,10 @@
self.mc.load_imm(tloc, srcloc.value)
self.mc.lxvd2x(res, 0, tloc.value)
elif size == 8:
- self.mc.vmr(res, srcloc.value, srcloc.value)
+ # splat the low of src to both slots in res
+ src = srcloc.value
+ #import pdb; pdb.set_trace()
+ self.mc.xxspltdl(res, src, src)
else:
notimplemented("[ppc/assembler] vec expand in this combination not supported")
@@ -804,7 +807,7 @@
l0 = self.expand_float(op.bytesize, arg)
res = self.force_allocate_vector_reg(op)
else:
- l0 = self.ensure_vector_reg(arg)
+ l0 = self.ensure_reg(arg)
res = self.force_allocate_vector_reg(op)
return [res, l0]
diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -440,7 +440,7 @@
myjitdriver.jit_merge_point()
va[i] = va[i] + variable
i += 1
- val = va[0]
+ val = va[d//2]
lltype.free(va, flavor='raw')
return val
res = self.meta_interp(f, [60,58.4547])
More information about the pypy-commit
mailing list