[pypy-commit] pypy vecopt: doc additions, reenabled the int8 expand test (passes now)
plan_rich
noreply at buildbot.pypy.org
Wed Jun 24 13:58:25 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78292:e70ae41089d7
Date: 2015-06-24 13:58 +0200
http://bitbucket.org/pypy/pypy/changeset/e70ae41089d7/
Log: doc additions, reenabled the int8 expand test (passes now)
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -274,10 +274,9 @@
sum(d)
"""
def test_int8_expand(self):
- py.test.skip("TODO implement assembler")
result = self.run("int8_expand")
- assert int(result) == 8*8 + sum(range(0,17))
- self.check_vectorized(3, 2) # TODO sum at the end
+ assert int(result) == 17*8 + sum(range(0,17))
+ self.check_vectorized(3, 1) # TODO sum at the end
def define_int32_add_const():
return """
diff --git a/rpython/doc/jit/vectorization.rst b/rpython/doc/jit/vectorization.rst
--- a/rpython/doc/jit/vectorization.rst
+++ b/rpython/doc/jit/vectorization.rst
@@ -2,7 +2,10 @@
Vectorization
=============
-TBA
+To find parallel instructions the tracer must provide enough information about
+memory load/store operations. They must be adjacent in memory. The requirement for
+that is that they use the same index variable and offset can be expressed as a
+a linear or affine combination.
Features
--------
@@ -13,6 +16,9 @@
* int8/int16/int32/int64 arithmetic: add, substract, multiply, negate, absolute
* int8/int16/int32/int64 logical: and, or, xor
+Reduction
+---------
+
Reduction is implemented:
* sum
@@ -21,10 +27,13 @@
* all, any, prod, min, max
-To find parallel instructions the tracer must provide enough information about
-memory load/store operations. They must be adjacent in memory. The requirement for
-that is that they use the same index variable and offset can be expressed as a
-a linear or affine combination.
+Constant & Variable Expansion
+-----------------------------
+
+Packed arithmetic operations expand scalar variables or contants into vector registers.
+
+Guard Strengthening
+-------------------
Unrolled guards are strengthend on a arithmetical level (See GuardStrengthenOpt).
The resulting vector trace will only have one guard that checks the index.
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -746,6 +746,7 @@
PSHUFLW_xxi = xmminsn('\xF2', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'))
PSHUFB_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), register(2), '\xC0')
PSHUFB_xm = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), mem_reg_plus_const(2))
+ PSHUFB_xj = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), abs_(2))
# SSE3
HADDPD_xx = xmminsn('\x66', rex_nw, '\x0F\x7C', register(1,8), register(2), '\xC0')
More information about the pypy-commit
mailing list