[pypy-commit] pypy vecopt2: added vector IR operations (load/store & arithmetic)
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:45:48 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77098:6c8dc39764dc
Date: 2015-03-27 18:04 +0100
http://bitbucket.org/pypy/pypy/changeset/6c8dc39764dc/
Log: added vector IR operations (load/store & arithmetic) adapted the
blackhole interpreter/executor to ignore those for now started to
implement the scheduling of operations
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -340,6 +340,12 @@
rop.LABEL,
): # list of opcodes never executed by pyjitpl
continue
+ # XXX this is temporary! after the algorithm works i have to adjust the
+ # black hole interpreter!
+ if rop._VEC_ARITHMETIC_FIRST <= value <= rop._VEC_ARITHMETIC_LAST or \
+ value == rop.VEC_RAW_LOAD or value == rop.VEC_RAW_STORE:
+ continue
+
raise AssertionError("missing %r" % (key,))
return execute_by_num_args
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -106,6 +106,7 @@
self.memory_refs = memory_refs
self.adjacent_list = [ [] for i in range(len(self.operations)) ]
self.integral_mod = IntegralMod()
+ self.schedulable_nodes = [0] # label is always scheduleable
self.build_dependencies(self.operations)
def build_dependencies(self, operations):
@@ -146,6 +147,9 @@
if i > 0:
self._guard_dependency(op, i, operations, tracker)
+ if len(self.adjacent_list[i]) == 0:
+ self.schedulable_nodes.append(i)
+
def update_memory_ref(self, op, index, tracker):
if index not in self.memory_refs:
return
@@ -375,6 +379,26 @@
opnum = op.getopnum()
return rop.SETARRAYITEM_GC<= opnum and opnum <= rop.UNICODESETITEM
+class Scheduler(object):
+ def __init__(self, graph):
+ self.graph = graph
+ self.schedulable_nodes = self.graph.schedulable_nodes
+
+ def has_more_to_schedule(self):
+ return len(self.schedulable_nodes) > 0
+
+ def next_schedule_index(self):
+ return self.schedulable_nodes[0]
+
+ def schedule(self, index):
+ node = self.schedulable_nodes[index]
+ del self.schedulable_nodes[index]
+ #
+ for dep in self.graph.get_uses(node):
+ self.schedulable_nodes.append(dep.idx_to)
+ #
+ # self.graph.adjacent_list[node] = None
+
class IntegralMod(object):
""" Calculates integral modifications on an integer object.
The operations must be provided in backwards direction and of one
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -11,7 +11,7 @@
from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
from rpython.jit.metainterp.optimizeopt.unroll import Inliner
from rpython.jit.metainterp.optimizeopt.vectorize import (VectorizingOptimizer, MemoryRef,
- isomorphic, Pair)
+ isomorphic, Pair, NotAVectorizeableLoop)
from rpython.jit.metainterp.optimize import InvalidLoop
from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string
from rpython.jit.metainterp import executor, compile, resume
@@ -81,6 +81,15 @@
opt.combine_packset()
return opt
+ def schedule(self, loop, unroll_factor = -1):
+ opt = self.vec_optimizer_unrolled(loop, unroll_factor)
+ opt.build_dependency_graph()
+ opt.find_adjacent_memory_refs()
+ opt.extend_packset()
+ opt.combine_packset()
+ opt.schedule()
+ return opt
+
def assert_unroll_loop_equals(self, loop, expected_loop, \
unroll_factor = -1):
vec_optimizer = self.vec_optimizer_unrolled(loop, unroll_factor)
@@ -783,19 +792,23 @@
[]
jump()
"""
- vopt = self.combine_packset(self.parse_loop(ops),15)
- assert len(vopt.vec_info.memory_refs) == 0
- assert len(vopt.packset.packs) == 0
+ try:
+ self.combine_packset(self.parse_loop(ops),15)
+ pytest.fail("combine should raise an exception if no pack "
+ "statements are present")
+ except NotAVectorizeableLoop:
+ pass
ops = """
[p0,i0]
i3 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
jump(p0,i3)
"""
- loop = self.parse_loop(ops)
- vopt = self.combine_packset(loop,15)
- assert len(vopt.vec_info.memory_refs) == 16
- assert len(vopt.packset.packs) == 0
+ try:
+ loop = self.parse_loop(ops)
+ self.combine_packset(loop,15)
+ except NotAVectorizeableLoop:
+ pass
def test_packset_vector_operation(self):
for op in ['int_add', 'int_sub', 'int_mul']:
@@ -849,13 +862,13 @@
guard_true(i16) []
i2 = vec_raw_load(p0, i0, 4, descr=floatarraydescr)
i3 = vec_raw_load(p1, i0, 4, descr=floatarraydescr)
- i4 = {op}(i2,i3)
+ i4 = {op}(i2,i3,4,descr=floatarraydescr)
vec_raw_store(p2, i0, i4, 4, descr=floatarraydescr)
jump(p0,p1,p2,i15)
""".format(op=vop)
loop = self.parse_loop(ops)
vopt = self.schedule(loop,3)
- self.assert_equals(loop, self.parse_loop(vops)
+ self.assert_equal(loop, self.parse_loop(vops))
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -4,7 +4,7 @@
from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization
from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
- MemoryRef, IntegralMod)
+ MemoryRef, IntegralMod, Scheduler)
from rpython.jit.metainterp.resoperation import rop
from rpython.jit.metainterp.resume import Snapshot
from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -39,6 +39,10 @@
self.packset = None
self.unroll_count = 0
+ def emit_operation(self, op):
+ self._last_emitted_op = op
+ self._newoperations.append(op)
+
def emit_unrolled_operation(self, op):
if op.getopnum() == rop.DEBUG_MERGE_POINT:
self.last_debug_merge_point = op
@@ -263,26 +267,42 @@
self.packset.add_pair(*candidate)
def combine_packset(self):
- changed = False
+ if len(self.packset.packs) == 0:
+ raise NotAVectorizeableLoop()
while True:
- changed = False
+ len_before = len(self.packset.packs)
for i,pack1 in enumerate(self.packset.packs):
for j,pack2 in enumerate(self.packset.packs):
if i == j:
continue
if pack1.rightmost_match_leftmost(pack2):
self.packset.combine(i,j)
- changed = True
- break
+ continue
if pack2.rightmost_match_leftmost(pack1):
self.packset.combine(j,i)
- changed = True
- break
- if changed:
- break
- if not changed:
+ continue
+ if len_before == len(self.packset.packs):
break
+ def schedule(self):
+ scheduler = Scheduler(self.dependency_graph)
+ while scheduler.has_more_to_schedule():
+ candidate_index = scheduler.next_schedule_index()
+ candidate = self.loop.operations[candidate_index]
+ pack = self.packset.pack_for_operation(candidate, candidate_index)
+ if pack:
+ self._schedule_pack(scheduler, pack)
+ else:
+ self.emit_operation(candidate)
+ scheduler.schedule(0)
+
+ def _schedule_pack(self, scheduler, pack):
+ if scheduler.all_schedulable([ e.opidx for e in pack.operations ]):
+ self.emit_vec_operation(pack)
+
+ def emit_vec_operation(self, pack):
+ pass
+
def isomorphic(l_op, r_op):
""" Described in the paper ``Instruction-Isomorphism in Program Execution''.
I think this definition is to strict. TODO -> find another reference
@@ -350,7 +370,15 @@
for op in pack_j.operations[1:]:
operations.append(op)
self.packs[i] = Pack(operations)
- del self.packs[j]
+ # instead of deleting an item in the center of pack array,
+ # the last element is assigned to position j and
+ # the last slot is freed. Order of packs don't matter
+ last_pos = len(self.packs) - 1
+ if j == last_pos:
+ del self.packs[j]
+ else:
+ self.packs[j] = self.packs[last_pos]
+ del self.packs[last_pos]
class Pack(object):
""" A pack is a set of n statements that are:
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -167,6 +167,9 @@
def is_ovf(self):
return rop._OVF_FIRST <= self.getopnum() <= rop._OVF_LAST
+ def is_vector_arithmetic(self):
+ return rop._VEC_ARITHMETIC_FIRST <= self.getopnum() <= rop._VEC_ARITHMETIC_LAST
+
def is_comparison(self):
return self.is_always_pure() and self.returns_bool_result()
@@ -440,6 +443,28 @@
'CONVERT_FLOAT_BYTES_TO_LONGLONG/1',
'CONVERT_LONGLONG_BYTES_TO_FLOAT/1',
#
+ # vector operations
+ '_VEC_ARITHMETIC_FIRST',
+ 'VEC_CHAR_ADD/3d',
+ 'VEC_CHAR_SUB/3d',
+ 'VEC_CHAR_MUL/3d',
+ 'VEC_SHORT_ADD/3d',
+ 'VEC_SHORT_SUB/3d',
+ 'VEC_SHORT_MUL/3d',
+ 'VEC_INT_ADD/3d',
+ 'VEC_INT_SUB/3d',
+ 'VEC_INT_MUL/3d',
+ 'VEC_UINT_ADD/3d',
+ 'VEC_UINT_SUB/3d',
+ 'VEC_UINT_MUL/3d',
+ 'VEC_SP_FLOAT_ADD/3d',
+ 'VEC_SP_FLOAT_SUB/3d',
+ 'VEC_SP_FLOAT_MUL/3d',
+ 'VEC_FLOAT_ADD/3d',
+ 'VEC_FLOAT_SUB/3d',
+ 'VEC_FLOAT_MUL/3d',
+ '_VEC_ARITHMETIC_LAST',
+ #
'INT_LT/2b',
'INT_LE/2b',
'INT_EQ/2b',
@@ -488,6 +513,7 @@
'GETARRAYITEM_RAW/2d',
'GETINTERIORFIELD_GC/2d',
'RAW_LOAD/2d',
+ 'VEC_RAW_LOAD/3d',
'GETFIELD_GC/1d',
'GETFIELD_RAW/1d',
'_MALLOC_FIRST',
@@ -511,6 +537,7 @@
'SETINTERIORFIELD_GC/3d',
'SETINTERIORFIELD_RAW/3d', # right now, only used by tests
'RAW_STORE/3d',
+ 'VEC_RAW_STORE/4d',
'SETFIELD_GC/2d',
'ZERO_PTR_FIELD/2', # only emitted by the rewrite, clears a pointer field
# at a given constant offset, no descr
More information about the pypy-commit
mailing list