[pypy-commit] pypy vecopt: continued refactoring of accumulation, removed extra accum box, but made vector box more generic
plan_rich
noreply at buildbot.pypy.org
Thu Jul 9 16:43:44 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78508:7b660544d874
Date: 2015-07-09 14:10 +0200
http://bitbucket.org/pypy/pypy/changeset/7b660544d874/
Log: continued refactoring of accumulation, removed extra accum box, but
made vector box more generic moved some methods down the hierarchy
to prevent assert isinstance(...)
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -3,7 +3,7 @@
from rpython.jit.backend.llgraph import support
from rpython.jit.backend.llsupport import symbolic
from rpython.jit.metainterp.history import AbstractDescr
-from rpython.jit.metainterp.history import Const, getkind, BoxVectorAccum
+from rpython.jit.metainterp.history import Const, getkind
from rpython.jit.metainterp.history import INT, REF, FLOAT, VOID, VECTOR
from rpython.jit.metainterp.resoperation import rop
from rpython.jit.metainterp.optimizeopt import intbounds
@@ -31,11 +31,7 @@
try:
newbox = _cache[box]
except KeyError:
- if isinstance(box, BoxVectorAccum):
- newbox = _cache[box] = \
- box.__class__(box, box.scalar_var, box.operator)
- else:
- newbox = _cache[box] = box.__class__()
+ newbox = _cache[box] = box.__class__()
return newbox
#
self.inputargs = map(mapping, inputargs)
@@ -877,10 +873,10 @@
value = self.env[box]
else:
value = None
- if isinstance(box, BoxVectorAccum):
- if box.operator == '+':
+ if box.getaccum():
+ if box.getaccum().operator == '+':
value = sum(value)
- elif box.operator == '*':
+ elif box.getaccum().operator == '*':
def prod(acc, x): return acc * x
value = reduce(prod, value, 1)
else:
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -23,7 +23,7 @@
from rpython.jit.codewriter import longlong
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
- ConstFloat, BoxInt, BoxFloat, BoxVector, BoxVectorAccum, INT, REF,
+ ConstFloat, BoxInt, BoxFloat, BoxVector, INT, REF,
FLOAT, VECTOR, TargetToken)
from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.jit.metainterp.compile import ResumeGuardDescr
@@ -308,8 +308,11 @@
faillocs = []
descr = guard_op.getdescr()
for v in guard_op.getfailargs():
- if v is not None and isinstance(v, BoxVectorAccum):
- loc = self.loc(v.scalar_var)
+ if v is None:
+ continue
+ accum = v.getaccum()
+ if accum:
+ loc = self.loc(accum.getvar())
self.update_accumulation_loc(v, descr)
faillocs.append(loc)
else:
@@ -317,7 +320,7 @@
return faillocs
- def update_accumulation_loc(self, accumbox, descr):
+ def update_accumulation_loc(self, box, accum, descr):
""" Saves the location to the AccumInfo object.
Necessary to reconstruct the values at a guard exit.
"""
diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -1,6 +1,6 @@
import py
from rpython.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
- ConstFloat, BoxInt, BoxFloat, BoxVector, BoxVectorAccum, INT, REF,
+ ConstFloat, BoxInt, BoxFloat, BoxVector, INT, REF,
FLOAT, VECTOR, TargetToken)
from rpython.jit.backend.llsupport.descr import (ArrayDescr, CallDescr,
unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr)
diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -198,16 +198,16 @@
if loop.versions is not None:
token = jitcell_token
for version in loop.versions:
- versioned_loop = create_empty_loop(metainterp)
- versioned_loop.inputargs = version.inputargs
- versioned_loop.operations = version.operations
- versioned_loop.original_jitcell_token = jitcell_token
- for _, faildescr in version.faildescrs:
+ for faildescr in version.faildescrs:
+ vl = create_empty_loop(metainterp)
+ vl.inputargs = version.inputargs
+ vl.operations = version.operations
+ vl.original_jitcell_token = jitcell_token
send_bridge_to_backend(jitdriver_sd, metainterp_sd,
faildescr, version.inputargs,
version.operations, jitcell_token)
- versioned_loop.original_jitcell_token = jitcell_token
- record_loop_or_bridge(metainterp_sd, versioned_loop)
+ vl.original_jitcell_token = jitcell_token
+ record_loop_or_bridge(metainterp_sd, vl)
loop.versions = None
def compile_retrace(metainterp, greenkey, start,
@@ -514,7 +514,8 @@
class ResumeGuardDescr(ResumeDescr):
_attrs_ = ('rd_numb', 'rd_count', 'rd_consts', 'rd_virtuals',
- 'rd_frame_info_list', 'rd_pendingfields', 'status')
+ 'rd_frame_info_list', 'rd_pendingfields', 'rd_accum_list',
+ 'status')
rd_numb = lltype.nullptr(NUMBERING)
rd_count = 0
@@ -715,6 +716,9 @@
class ResumeAtLoopHeaderDescr(ResumeGuardDescr):
guard_opnum = rop.GUARD_EARLY_EXIT
+ def exits_early(self):
+ return True
+
class CompileLoopVersionDescr(ResumeGuardDescr):
guard_opnum = rop.GUARD_EARLY_EXIT
@@ -725,6 +729,12 @@
def handle_fail(self, deadframe, metainterp_sd, jitdriver_sd):
assert 0, "this guard must never fail"
+ def exits_early(self):
+ return True
+
+ def loop_version(self):
+ return True
+
class AllVirtuals:
llopaque = True
cache = None
diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -131,6 +131,8 @@
# only structured containers can compare their shape (vector box)
return True
+ def getaccum(self):
+ return None
class AbstractDescr(AbstractValue):
__slots__ = ()
@@ -163,6 +165,15 @@
def compile_and_attach(self, metainterp, new_loop):
raise NotImplementedError
+ def exits_early(self):
+ # is this guard either a guard_early_exit resop,
+ # or it has been moved before an guard_early_exit
+ return False
+
+ def loop_version(self):
+ # compile a loop version out of this guard?
+ return False
+
class BasicFinalDescr(AbstractFailDescr):
final_descr = True
@@ -519,24 +530,53 @@
# ____________________________________________________________
+class Accum(object):
+ PLUS = '+'
+ MULTIPLY = '*'
+
+ def __init__(self, opnum, var, pos):
+ self.var = var
+ self.pos = pos
+ self.operator = Accum.PLUS
+ if opnum == rop.FLOAT_MUL:
+ self.operator = Accum.MULTIPLY
+
+ def getoriginalbox(self):
+ return self.var
+
+ def getop(self):
+ return self.operator
+
+ def accumulates_value(self):
+ return True
+
+ def save_to_descr(self, descr, position):
+ assert isinstance(descr,ResumeGuardDescr)
+ ai = AccumInfo(descr.rd_accum_list, position, self.operator, self.var)
+ descr.rd_accum_list = ai
+
class BoxVector(Box):
type = VECTOR
- _attrs_ = ('item_type','item_count','item_size','item_signed')
+ _attrs_ = ('item_type','item_count','item_size','item_signed','accum')
_extended_display = False
- def __init__(self, item_type=FLOAT, item_count=2, item_size=8, item_signed=False):
+ def __init__(self, item_type=FLOAT, item_count=2, item_size=8, item_signed=False, accum=None):
assert item_type in (FLOAT, INT)
self.item_type = item_type
self.item_count = item_count
self.item_size = item_size
self.item_signed = item_signed
+ self.accum = None
def gettype(self):
return self.item_type
+
def getsize(self):
return self.item_size
+
def getsigned(self):
return self.item_signed
+
def getcount(self):
return self.item_count
@@ -576,11 +616,8 @@
return False
return True
-class BoxVectorAccum(BoxVector):
- def __init__(self, box, var, operator):
- BoxVector.__init__(self, box.item_type, box.item_count, box.item_size, box.item_signed)
- self.scalar_var = var
- self.operator = operator
+ def getaccum(self):
+ return self.accum
# ____________________________________________________________
@@ -697,8 +734,8 @@
class LoopVersion(object):
- def __init__(self, loop, aligned=False):
- self.operations = loop.operations
+ def __init__(self, operations, opt_ops, aligned=False):
+ self.operations = operations
self.aligned = aligned
self.faildescrs = []
#
@@ -711,9 +748,18 @@
i += 1
assert label.getopnum() == rop.LABEL
self.label_pos = i
- #self.parent_trace_label_args = None
- #self.bridge_label_args = label.getarglist()
self.inputargs = label.getarglist()
+ for op in opt_ops:
+ if op.is_guard():
+ descr = op.getdescr()
+ if descr.loop_version():
+ # currently there is only ONE versioning,
+ # that is the original loop after unrolling.
+ # if there are more possibilites, let the descr
+ # know which loop version he preferes
+ self.faildescrs.append(descr)
+ op.setfailargs(self.inputargs)
+ op.rd_snapshot = None
def adddescr(self, op, descr):
self.faildescrs.append((op, descr))
@@ -772,6 +818,13 @@
def get_operations(self):
return self.operations
+ def find_first_index(self, opnum):
+ """ return the first operation having the same opnum or -1 """
+ for i,op in enumerate(self.operations):
+ if op.getopnum() == opnum:
+ return i
+ return -1
+
def get_display_text(self): # for graphpage.py
return self.name + '\n' + repr(self.inputargs)
@@ -803,20 +856,14 @@
for arg in inputargs:
if arg is None:
continue
- if isinstance(arg, BoxVectorAccum):
- seen[arg.scalar_var] = None
- else:
- seen[arg] = None
+ seen[arg] = None
return seen
@staticmethod
def check_if_box_was_seen(box, seen):
if box is not None:
assert isinstance(box, Box)
- if isinstance(box, BoxVectorAccum):
- assert box in seen or box.scalar_var in seen
- else:
- assert box in seen
+ assert box in seen
@staticmethod
def check_consistency_of_branch(operations, seen):
@@ -846,9 +893,6 @@
assert isinstance(box, Box), "LABEL contains %r" % (box,)
seen = TreeLoop.seen_args(inputargs)
seen_count = len(seen)
- for arg in seen:
- if isinstance(arg, BoxVectorAccum):
- seen_count -= 1
assert seen_count == len(inputargs), (
"duplicate Box in the LABEL arguments")
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -857,17 +857,6 @@
return oplist
-class Accum(object):
- PLUS = '+'
- MULTIPLY = '*'
-
- def __init__(self, opnum, var, pos):
- self.var = var
- self.pos = pos
- self.operator = Accum.PLUS
- if opnum == rop.FLOAT_MUL:
- self.operator = Accum.MULTIPLY
-
class Pack(object):
""" A pack is a set of n statements that are:
* isomorphic
diff --git a/rpython/jit/metainterp/optimizeopt/util.py b/rpython/jit/metainterp/optimizeopt/util.py
--- a/rpython/jit/metainterp/optimizeopt/util.py
+++ b/rpython/jit/metainterp/optimizeopt/util.py
@@ -213,7 +213,6 @@
return True
def rename_failargs(self, guard, clone=False):
- from rpython.jit.metainterp.history import BoxVectorAccum
from rpython.jit.metainterp.compile import ResumeGuardDescr
if guard.getfailargs() is not None:
if clone:
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -6,7 +6,7 @@
"""
import py
-#XXXimport time
+import time
from rpython.jit.metainterp.resume import Snapshot, AccumInfo
from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, NotAProfitableLoop
@@ -15,13 +15,13 @@
CompileLoopVersionDescr, invent_fail_descr_for_op, ResumeGuardDescr)
from rpython.jit.metainterp.history import (ConstInt, VECTOR, FLOAT, INT,
BoxVector, BoxFloat, BoxInt, ConstFloat, TargetToken, JitCellToken, Box,
- BoxVectorAccum, LoopVersion)
+ LoopVersion, Accum)
from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization
from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method, Renamer
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Node, IndexVar)
from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleData,
- Scheduler, Pack, Pair, AccumPair, Accum, vectorbox_outof_box, getpackopnum,
+ Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum,
getunpackopnum, PackType, determine_input_output_types)
from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
@@ -36,7 +36,6 @@
optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
inline_short_preamble, start_state, False)
orig_ops = loop.operations
- orig_version = LoopVersion(loop)
if len(orig_ops) >= 75:
# if more than 75 operations are present in this loop,
# it won't be possible to vectorize. There are too many
@@ -47,29 +46,27 @@
metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, -2, None, None, "pre vectorize")
metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
#
- #XXXstart = time.clock()
- opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, cost_threshold, orig_version)
+ start = time.clock()
+ #
+ #
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, cost_threshold)
opt.propagate_all_forward()
gso = GuardStrengthenOpt(opt.dependency_graph.index_vars)
gso.propagate_all_forward(opt.loop)
- #XXXend = time.clock()
+ # loop versioning
+ loop.versions = [LoopVersion(orig_ops, loop.operations)]
#
- loop.versions = [orig_version]
-
+ #
+ end = time.clock()
+ #
metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, -2, None, None, "post vectorize")
+ #
+ nano = int((end-start)*10.0**9)
+ debug_print("# vecopt factor: %d opcount: (%d -> %d) took %dns" % \
+ (opt.unroll_count+1, len(orig_ops), len(loop.operations), nano))
debug_stop("vec-opt-loop")
#
- # XXX
- #XXXns = int((end-start)*10.0**9)
- #XXXdebug_start("xxx-clock")
- #XXXdebug_print("vecopt unroll: %d gso count: %d opcount: (%d -> %d) took %dns" % \
- #XXX (opt.unroll_count+1,
- #XXX gso.strength_reduced,
- #XXX len(orig_ops),
- #XXX len(loop.operations),
- #XXX ns))
- #XXXdebug_stop("xxx-clock")
except NotAVectorizeableLoop:
debug_stop("vec-opt-loop")
# vectorization is not possible
@@ -93,12 +90,25 @@
return a.left.getindex() < b.left.getindex()
packsort = listsort.make_timsort_class(lt=cmp_pack_lt)
+def copy_fail_descr(op, optimizer):
+ olddescr = op.getdescr()
+ exits_early = olddescr.guard_opnum == rop.GUARD_EARLY_EXIT
+ if exits_early:
+ if isinstance(olddescr, CompileLoopVersionDescr):
+ descr = CompileLoopVersionDescr()
+ else:
+ descr = ResumeAtLoopHeaderDescr()
+ else:
+ descr = invent_fail_descr_for_op(op.getopnum(), optimizer)
+ if olddescr:
+ descr.copy_all_attributes_from(olddescr)
+ return descr
+
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
- def __init__(self, metainterp_sd, jitdriver_sd, loop, cost_threshold, orig_loop_version):
+ def __init__(self, metainterp_sd, jitdriver_sd, loop, cost_threshold):
Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, [])
- self.orig_loop_version = orig_loop_version
self.dependency_graph = None
self.packset = None
self.unroll_count = 0
@@ -234,15 +244,10 @@
if copied_op.is_guard():
assert isinstance(copied_op, GuardResOp)
target_guard = copied_op
+ copied_op.setdescr(copy_fail_descr(copied_op, self))
descr = target_guard.getdescr()
exits_early = descr.guard_opnum == rop.GUARD_EARLY_EXIT
- # early exits already have the right failargs set
if not exits_early:
- descr = invent_fail_descr_for_op(copied_op.getopnum(), self)
- olddescr = copied_op.getdescr()
- if olddescr:
- descr.copy_all_attributes_from(olddescr)
- copied_op.setdescr(descr)
# copy failargs/snapshot
copied_op.rd_snapshot = \
renamer.rename_rd_snapshot(copied_op.rd_snapshot,
@@ -472,11 +477,9 @@
op = guard_node.getoperation()
failargs = op.getfailargs()
for i,arg in enumerate(failargs):
- if isinstance(arg, BoxVectorAccum):
- descr = op.getdescr()
- assert isinstance(descr,ResumeGuardDescr)
- ai = AccumInfo(descr.rd_accum_list, i, arg.operator, arg.scalar_var)
- descr.rd_accum_list = ai
+ accum = arg.getaccum()
+ if accum:
+ accum.save_to_descr(op.getdescr(),i)
self.loop.operations = \
sched_data.prepend_invariant_operations(self._newoperations)
self.clear_newoperations()
@@ -516,10 +519,9 @@
return arg
def analyse_index_calculations(self):
- ee_pos = 1
- ops = self.loop.operations
- if len(ops) <= 2 or ops[ee_pos].getopnum() != rop.GUARD_EARLY_EXIT:
- return
+ ee_pos = self.loop.find_first_index(rop.GUARD_EARLY_EXIT)
+ if len(self.loop.operations) <= 2 or ee_pos == -1:
+ raise NotAVectorizeableLoop()
self.dependency_graph = graph = DependencyGraph(self.loop)
label_node = graph.getnode(0)
ee_guard_node = graph.getnode(ee_pos)
@@ -569,9 +571,9 @@
label_node.edge_to(last_but_one, label='pullup')
# only the last guard needs a connection
guard_node.edge_to(ee_guard_node, label='pullup-last-guard')
- self.relax_guard_to(guard_node, ee_guard_node, label_node)
+ self.relax_guard_to(guard_node, ee_guard_node)
- def relax_guard_to(self, guard_node, other_node, label_node):
+ def relax_guard_to(self, guard_node, other_node):
""" Relaxes a guard operation to an earlier guard. """
# clone this operation object. if the vectorizer is
# not able to relax guards, it won't leave behind a modified operation
@@ -594,10 +596,6 @@
tgt_op.setdescr(descr)
tgt_op.rd_snapshot = op.rd_snapshot
tgt_op.setfailargs(op.getfailargs())
- if guard_true_false:
- self.orig_loop_version.adddescr(tgt_op, descr)
- tgt_op.setfailargs(label_node.getoperation().getarglist()[:])
- tgt_op.rd_snapshot = None
class CostModel(object):
@@ -840,6 +838,7 @@
if not pack.is_accumulating():
continue
accum = pack.accum
+ pack.accum = None
# create a new vector box for the parameters
box = pack.input_type.new_vector_box()
size = vec_reg_size // pack.input_type.getsize()
@@ -857,12 +856,14 @@
sched_data.invariant_oplist.append(op)
else:
raise NotImplementedError("can only handle + and *")
- result = BoxVectorAccum(box, accum.var, accum.operator)
+ result = box.clonebox()
+ assert isinstance(result, BoxVector)
+ result.accum = accum
# pack the scalar value
op = ResOperation(getpackopnum(box.gettype()),
[box, accum.var, ConstInt(0), ConstInt(1)], result)
sched_data.invariant_oplist.append(op)
# rename the variable with the box
- sched_data.setvector_of_box(accum.var, 0, result) # prevent it from expansion
- renamer.start_renaming(accum.var, result)
+ sched_data.setvector_of_box(accum.getoriginalbox(), 0, result) # prevent it from expansion
+ renamer.start_renaming(accum.getoriginalbox(), result)
More information about the pypy-commit
mailing list