[pypy-commit] pypy vecopt-merge: forcing memo of variables while parsing to the same memo at runtime (only jitviewer and the test suite affected)
plan_rich
noreply at buildbot.pypy.org
Wed Sep 16 18:06:59 CEST 2015
Author: Richard Plangger <planrichi at gmail.com>
Branch: vecopt-merge
Changeset: r79658:fe1eb22de735
Date: 2015-09-16 18:06 +0200
http://bitbucket.org/pypy/pypy/changeset/fe1eb22de735/
Log: forcing memo of variables while parsing to the same memo at runtime
(only jitviewer and the test suite affected) poking costmodel tests,
accumulator in the algorthim missing
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -356,7 +356,7 @@
pack = ''
if self.pack:
pack = "p: %d" % self.pack.numops()
- return "Node(%s,%s i: %d)" % (self.op.getopname(), pack, self.opidx)
+ return "Node(%s,%s i: %d)" % (self.op, pack, self.opidx)
def __ne__(self, other):
return not self.__eq__(other)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -7,6 +7,7 @@
from rpython.jit.metainterp.optimizeopt.renamer import Renamer
from rpython.rlib.objectmodel import we_are_translated
from rpython.jit.metainterp.jitexc import NotAProfitableLoop
+from rpython.rlib.objectmodel import specialize
class SchedulerState(object):
@@ -78,7 +79,7 @@
state.renamer.rename(op)
if unpack:
state.ensure_args_unpacked(op)
- node.position = len(state.oplist)
+ node.vector=Trueposition = len(state.oplist)
worklist = state.worklist
for dep in node.provides()[:]: # COPY
to = dep.to
@@ -131,115 +132,6 @@
for node in state.graph.nodes:
assert node.emitted
-#UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
-# rop.UINT_LT, rop.UINT_LE,
-# rop.UINT_GT, rop.UINT_GE)
-
-#class Type(object):
-# """ The type of one operation. Saves type, size and sign. """
-# @staticmethod
-# def of(op):
-# descr = op.getdescr()
-# if descr:
-# type = INT
-# if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
-# type = FLOAT
-# size = descr.get_item_size_in_bytes()
-# sign = descr.is_item_signed()
-# return Type(type, size, sign)
-# else:
-# size = 8
-# sign = True
-# if op.type == 'f' or op.getopnum() in UNSIGNED_OPS:
-# sign = False
-# return Type(op.type, size, sign)
-#
-# def __init__(self, type, size, signed):
-# assert type in (FLOAT, INT)
-# self.type = type
-# self.size = size
-# self.signed = signed
-#
-# def bytecount(self):
-# return self.size
-#
-# def clone(self):
-# return Type(self.type, self.size, self.signed)
-#
-# def __repr__(self):
-# sign = '-'
-# if not self.signed:
-# sign = '+'
-# return 'Type(%s%s, %d)' % (sign, self.type, self.size)
-#
- #UNKNOWN_TYPE = '-'
-
- #@staticmethod
- #def of(box, count=-1):
- # assert box.type == 'V'
- # if count == -1:
- # count = box.getcount()
- # return Type(box.gettype(), box.getsize(), box.getsigned(), count)
-
- #@staticmethod
- #def by_descr(descr, vec_reg_size):
- # _t = INT
- # signed = descr.is_item_signed()
- # if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
- # _t = FLOAT
- # signed = False
- # size = descr.get_item_size_in_bytes()
- # pt = Type(_t, size, signed, vec_reg_size // size)
- # return pt
-
- #def clone(self):
- # return Type(self.type, self.size, self.signed, self.count)
-
- #def new_vector_box(self, count = -1):
- # if count == -1:
- # count = self.count
- # assert count > 1
- # assert self.type in ('i','f')
- # assert self.size > 0
- # xxx
- # return BoxVector(self.type, count, self.size, self.signed)
-
- #def combine(self, other):
- # """ nothing to be done here """
- # if not we_are_translated():
- # assert self.type == other.type
- # assert self.signed == other.signed
-
-
- #def byte_size(self):
- # return self.count * self.size
-
- #def setsize(self, size):
- # self.size = size
-
- #def setcount(self, count):
- # self.count = count
-
- #def gettype(self):
- # return self.type
-
- #def getsize(self):
- # return self.size
-
- #def getcount(self):
- # return self.count
-
-
-
-class TypeOutput(object):
- def __init__(self, type, count):
- self.type = type
- self.count = count
-
-
- def bytecount(self):
- return self.count * self.type.bytecount()
-
class TypeRestrict(object):
ANY_TYPE = -1
ANY_SIZE = -1
@@ -273,13 +165,6 @@
TR_LONG = TypeRestrict(INT, 8, 2)
TR_INT_2 = TypeRestrict(INT, 4, 2)
- #INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS)
- #FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS)
- #FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS)
- #LOAD = LoadToVectorLoad()
- #STORE = StoreToVectorStore()
- #GUARD = PassThroughOp((TR_ANY_INTEGER,))
-
# note that the following definition is x86 arch specific
MAPPING = {
rop.VEC_INT_ADD: [TR_ANY_INTEGER, TR_ANY_INTEGER],
@@ -318,11 +203,6 @@
rop.VEC_INT_IS_TRUE: [TR_ANY_INTEGER,TR_ANY_INTEGER],
}
- # TODO?
- UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
- rop.UINT_LT, rop.UINT_LE,
- rop.UINT_GT, rop.UINT_GE)
-
def turn_into_vector(state, pack):
""" Turn a pack into a vector instruction """
#
@@ -412,39 +292,6 @@
# self.input_type.getsize() != vecop.getsize():
# vecop = self.extend(vecop, self.input_type)
- # use the input as an indicator for the pack type
- #packable = vecop.maximum_numops()
- #packed = vecop.count
- #assert packed >= 0
- #assert packable >= 0
- #if packed > packable:
- # # the argument has more items than the operation is able to process!
- # # pos == 0 then it is already at the right place
- # if pos != 0:
- # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
- # state.remember_args_in_vector(i, args[i])
- # #self.update_input_output(self.pack)
- # continue
- # else:
- # assert vecop is not None
- # args[i] = vecop
- # continue
- #vboxes = self.vector_boxes_for_args(i)
- #if packed < packable and len(vboxes) > 1:
- # # the argument is scattered along different vector boxes
- # args[i] = self.gather(vboxes, packable)
- # state.remember_args_in_vector(i, args[i])
- # continue
- #if pos != 0:
- # # The vector box is at a position != 0 but it
- # # is required to be at position 0. Unpack it!
- # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
- # state.remember_args_in_vector(i, args[i])
- # continue
- ##
- #assert vecop is not None
- #args[i] = vecop
-
def check_if_pack_supported(self, pack):
op0 = pack.operations[0].getoperation()
if self.input_type is None:
@@ -461,25 +308,6 @@
# see assembler for comment why
raise NotAProfitableLoop
-def extend(self, vbox, newtype):
- assert vbox.gettype() == newtype.gettype()
- if vbox.gettype() == INT:
- return self.extend_int(vbox, newtype)
- else:
- raise NotImplementedError("cannot yet extend float")
-
-def extend_int(self, vbox, newtype):
- vbox_cloned = newtype.new_vector_box(vbox.getcount())
- self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
- newsize = newtype.getsize()
- assert newsize > 0
- op = ResOperation(rop.VEC_INT_SIGNEXT,
- [vbox, ConstInt(newsize)],
- vbox_cloned)
- self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount())
- self.vecops.append(op)
- return vbox_cloned
-
def unpack_from_vector(state, arg, index, count):
""" Extract parts of the vector box into another vector box """
print "unpack i", index, "c", count, "v", arg
@@ -556,9 +384,6 @@
if variables is not None:
variables.append(vecop)
state.expand([arg], vecop)
- #expanded_map.setdefault(arg,[]).append((vecop, -1))
- #for i in range(vecop.count):
- # state.setvector_of_box(arg, i, vecop)
args[index] = vecop
return vecop
@@ -642,7 +467,7 @@
SchedulerState.post_schedule(self)
# add accumulation info to the descriptor
- #for version in self.loop.versions:
+ # TODO for version in self.loop.versions:
# # this needs to be done for renamed (accum arguments)
# version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for arg in version.inputargs ]
#self.appended_arg_count = len(sched_data.invariant_vector_vars)
@@ -717,7 +542,7 @@
if argument and not argument.is_constant():
arg = self.ensure_unpacked(i, argument)
if argument is not arg:
- fail_arguments[i] = arg
+ fail_args[i] = arg
def ensure_unpacked(self, index, arg):
if arg in self.seen or arg.is_vector():
@@ -756,9 +581,8 @@
break
self.setvector_of_box(arg, i, box)
-
def opcount_filling_vector_register(pack, vec_reg_size):
- """ how many operations of that kind can one execute
+ """ How many operations of that kind can one execute
with a machine instruction of register size X?
"""
op = pack.leftmost()
@@ -790,10 +614,16 @@
def numops(self):
return len(self.operations)
- def leftmost(self):
+ @specialize.arg(1)
+ def leftmost(self, node=False):
+ if node:
+ return self.operations[0]
return self.operations[0].getoperation()
- def rightmost(self):
+ @specialize.arg(1)
+ def rightmost(self, node=False):
+ if node:
+ return self.operations[-1]
return self.operations[-1].getoperation()
def pack_type(self):
@@ -933,7 +763,7 @@
def __repr__(self):
if len(self.operations) == 0:
return "Pack(empty)"
- return "Pack(%dx %s)" % (self.numops(), self.operations[0])
+ return "Pack(%dx %s)" % (self.numops(), self.operations)
def is_accumulating(self):
return self.accum is not None
@@ -943,14 +773,11 @@
cloned.accum = self.accum
return cloned
-
class Pair(Pack):
""" A special Pack object with only two statements. """
def __init__(self, left, right):
assert isinstance(left, Node)
assert isinstance(right, Node)
- self.left = left
- self.right = right
Pack.__init__(self, [left, right])
def __eq__(self, other):
@@ -960,246 +787,28 @@
class AccumPair(Pair):
""" A pair that keeps track of an accumulation value """
- def __init__(self, left, right, input_type, output_type, accum):
+ def __init__(self, left, right, accum):
assert isinstance(left, Node)
assert isinstance(right, Node)
- Pair.__init__(self, left, right, input_type, output_type)
- self.left = left
- self.right = right
+ Pair.__init__(self, left, right)
self.accum = accum
-#class OpToVectorOp(object):
-# def __init__(self): #, restrictargs, typeoutput):
-# pass
-# #self.args = list(restrictargs) # do not use a tuple. rpython cannot union
-# #self.out = typeoutput
+#def extend(self, vbox, newtype):
+# assert vbox.gettype() == newtype.gettype()
+# if vbox.gettype() == INT:
+# return self.extend_int(vbox, newtype)
+# else:
+# raise NotImplementedError("cannot yet extend float")
#
-#class OpToVectorOpConv(OpToVectorOp):
-# def __init__(self, intype, outtype):
-# #self.from_size = intype.getsize()
-# #self.to_size = outtype.getsize()
-# #OpToVectorOp.__init__(self, (intype, ), outtype)
-# pass
-#
-# def new_result_vector_box(self):
-# type = self.output_type.gettype()
-# size = self.to_size
-# count = self.output_type.getcount()
-# vec_reg_size = self.sched_data.vec_reg_size
-# if count * size > vec_reg_size:
-# count = vec_reg_size // size
-# signed = self.output_type.signed
-# assert type in ('i','f')
-# assert size > 0
-# assert count > 1
-# return BoxVector(type, count, size, signed)
-#
-# def get_output_type_given(self, input_type, op):
-# return self.result_ptype
-#
-# def get_input_type_given(self, output_type, op):
-# return self.arg_ptypes[0]
-#
-# def force_input(self, ptype):
-# return self.arg_ptypes[0]
-#
-#class SignExtToVectorOp(OpToVectorOp):
-# def __init__(self, intype, outtype):
-# OpToVectorOp.__init__(self, intype, outtype)
-# self.size = -1
-#
-# def before_argument_transform(self, args):
-# sizearg = args[1]
-# assert isinstance(sizearg, ConstInt)
-# self.size = sizearg.value
-#
-# def new_result_vector_box(self):
-# type = self.output_type.gettype()
-# count = self.input_type.getcount()
-# vec_reg_size = self.sched_data.vec_reg_size
-# if count * self.size > vec_reg_size:
-# count = vec_reg_size // self.size
-# signed = self.input_type.signed
-# assert type in ('i','f')
-# assert self.size > 0
-# assert count > 1
-# return BoxVector(type, count, self.size, signed)
-#
-# def get_output_type_given(self, input_type, op):
-# sizearg = op.getarg(1)
-# assert isinstance(sizearg, ConstInt)
-# output_type = input_type.clone()
-# output_type.setsize(sizearg.value)
-# return output_type
-#
-# def get_input_type_given(self, output_type, op):
-# raise AssertionError("can never infer input type!")
-#
-#class LoadToVectorLoad(OpToVectorOp):
-# def __init__(self):
-# OpToVectorOp.__init__(self, (), TypeRestrict())
-#
-# # OLD def before_argument_transform(self, args):
-# #count = min(self.output_type.getcount(), len(self.getoperations()))
-# #args.append(ConstInt(count))
-#
-# def get_output_type_given(self, input_type, op):
-# return xxx#Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-#
-# def get_input_type_given(self, output_type, op):
-# return None
-#
-#class StoreToVectorStore(OpToVectorOp):
-# """ Storing operations are special because they are not allowed
-# to store to memory if the vector is not fully filled.
-# Thus a modified split_pack function.
-# """
-# def __init__(self):
-# OpToVectorOp.__init__(self, (None, None, TypeRestrict()), None)
-# self.has_descr = True
-#
-# def must_be_full_but_is_not(self, pack):
-# vrs = self.sched_data.vec_reg_size
-# it = pack.input_type
-# return it.getsize() * it.getcount() < vrs
-#
-# def get_output_type_given(self, input_type, op):
-# return None
-#
-# def get_input_type_given(self, output_type, op):
-# return xxx#Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-#
-#class PassThroughOp(OpToVectorOp):
-# """ This pass through is only applicable if the target
-# operation is capable of handling vector operations.
-# Guard true/false is such an example.
-# """
-# def __init__(self, args):
-# OpToVectorOp.__init__(self, args, None)
-#
-# def get_output_type_given(self, input_type, op):
-# return None
-#
-# def get_input_type_given(self, output_type, op):
-# raise AssertionError("cannot infer input type from output type")
-#
-#
-#
-##def determine_input_output_types(pack, node, forward):
-## """ This function is two fold. If moving forward, it
-## gets an input type from the packs output type and returns
-## the transformed packtype.
-##
-## Moving backward, the origins pack input type is the output
-## type and the transformation of the packtype (in reverse direction)
-## is the input
-## """
-## op = node.getoperation()
-## op2vecop = determine_trans(op)
-## if forward:
-## input_type = op2vecop.force_input(pack.output_type)
-## output_type = op2vecop.get_output_type_given(input_type, op)
-## if output_type:
-## output_type = output_type.clone()
-## else:
-## # going backwards, things are not that easy anymore
-## output_type = pack.input_type
-## input_type = op2vecop.get_input_type_given(output_type, op)
-## if input_type:
-## input_type = input_type.clone()
-##
-## return input_type, output_type
-#
-#def determine_trans(op):
-# op2vecop = trans.MAPPING.get(op.vector, None)
-# if op2vecop is None:
-# raise NotImplementedError("missing vecop for '%s'" % (op.getopname(),))
-# return op2vecop
+#def extend_int(self, vbox, newtype):
+# vbox_cloned = newtype.new_vector_box(vbox.getcount())
+# self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
+# newsize = newtype.getsize()
+# assert newsize > 0
+# op = ResOperation(rop.VEC_INT_SIGNEXT,
+# [vbox, ConstInt(newsize)],
+# vbox_cloned)
+# self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount())
+# self.vecops.append(op)
+# return vbox_cloned
-
-#def before_argument_transform(self, args):
-# pass
-
-#def transform_result(self, result):
-# if result is None:
-# return None
-# vbox = self.new_result_vector_box()
-# #
-# # mark the position and the vbox in the hash
-# for i, node in enumerate(self.getoperations()):
-# if i >= vbox.getcount():
-# break
-# op = node.getoperation()
-# self.sched_data.setvector_of_box(op, i, vbox)
-# return vbox
-
-#def new_result_vector_box(self):
-# type = self.output_type.gettype()
-# size = self.output_type.getsize()
-# count = min(self.output_type.getcount(), len(self.pack.operations))
-# signed = self.output_type.signed
-# return BoxVector(type, count, size, signed)
-
-#def getoperations(self):
-# return self.pack.operations
-
-#def transform_arguments(self, args):
-# """ Transforming one argument to a vector box argument
-# The following cases can occur:
-# 1) argument is present in the box_to_vbox map.
-# a) vector can be reused immediatly (simple case)
-# b) vector is to big
-# c) vector is to small
-# 2) argument is not known to reside in a vector
-# a) expand vars/consts before the label and add as argument
-# b) expand vars created in the loop body
-# """
-# for i,arg in enumerate(args):
-# if arg.returns_vector():
-# continue
-# if not self.is_vector_arg(i):
-# continue
-# box_pos, vbox = self.sched_data.getvector_of_box(arg)
-# if not vbox:
-# # constant/variable expand this box
-# vbox = self.expand(arg, i)
-# self.sched_data.setvector_of_box(arg, 0, vbox)
-# box_pos = 0
-# # convert size i64 -> i32, i32 -> i64, ...
-# if self.input_type.getsize() > 0 and \
-# self.input_type.getsize() != vbox.getsize():
-# vbox = self.extend(vbox, self.input_type)
-
-# # use the input as an indicator for the pack type
-# packable = self.input_type.getcount()
-# packed = vbox.getcount()
-# assert packed >= 0
-# assert packable >= 0
-# if packed > packable:
-# # the argument has more items than the operation is able to process!
-# # box_pos == 0 then it is already at the right place
-# if box_pos != 0:
-# args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type)
-# remember_args_in_vector(i, args[i])
-# #self.update_input_output(self.pack)
-# continue
-# else:
-# assert vbox is not None
-# args[i] = vbox
-# continue
-# vboxes = self.vector_boxes_for_args(i)
-# if packed < packable and len(vboxes) > 1:
-# # the argument is scattered along different vector boxes
-# args[i] = self.gather(vboxes, packable)
-# remember_args_in_vector(i, args[i])
-# continue
-# if box_pos != 0:
-# # The vector box is at a position != 0 but it
-# # is required to be at position 0. Unpack it!
-# args[i] = self.unpack(vbox, box_pos, packed - box_pos, self.input_type)
-# remember_args_in_vector(i, args[i])
-# continue
-# #self.update_input_output(self.pack)
-# #
-# assert vbox is not None
-# args[i] = vbox
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -2,14 +2,15 @@
from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt.util import equaloplists
-from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
- Pack, NotAProfitableLoop, VectorizingOptimizer)
+from rpython.jit.metainterp.optimizeopt.vector import (Pack, X86_CostModel,
+ NotAProfitableLoop, VectorizingOptimizer)
+from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleState
from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
from rpython.jit.metainterp.optimizeopt.test.test_schedule import SchedulerBaseTest
-from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData,
+from rpython.jit.metainterp.optimizeopt.test.test_vecopt import (FakeMetaInterpStaticData,
FakeJitDriverStaticData)
-from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.metainterp.resoperation import rop, ResOperation, AbstractValue
from rpython.jit.tool.oparser import parse as opparse
from rpython.jit.tool.oparser_model import get_model
@@ -18,7 +19,7 @@
self.index_var = iv
self.array = array
- def is_adjacent_to(self, other):
+ def is_adjacent_after(self, other):
if self.array is not other.array:
return False
iv = self.index_var
@@ -28,36 +29,39 @@
# i1 and i0 ...
# but not i0, i2
# ...
- return abs(val) == 1
+ print iv, 'is after', ov, "?", val == 1
+ return val == 1
class CostModelBaseTest(SchedulerBaseTest):
+
def savings(self, loop):
metainterp_sd = FakeMetaInterpStaticData(self.cpu)
jitdriver_sd = FakeJitDriverStaticData()
- opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, [])
- label_index = loop.find_first_index(rop.LABEL)
- opt.orig_label_args = loop.operations[label_index].getarglist()[:]
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
+ opt.orig_label_args = loop.label.getarglist()[:]
graph = opt.dependency_graph = DependencyGraph(loop)
+ self.show_dot_graph(graph, 'costmodel')
for k,m in graph.memory_refs.items():
graph.memory_refs[k] = FakeMemoryRef(m.array, m.index_var)
- opt.find_adjacent_memory_refs()
+ opt.find_adjacent_memory_refs(graph)
opt.extend_packset()
opt.combine_packset()
for pack in opt.packset.packs:
print "pack: \n ",
print '\n '.join([str(op.getoperation()) for op in pack.operations])
print
- opt.costmodel.reset_savings()
- opt.schedule(True)
- return opt.costmodel.savings
+ costmodel = X86_CostModel(self.cpu, 0)
+ state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
+ opt.schedule(state)
+ return costmodel.savings
def assert_operations_match(self, loop_a, loop_b):
assert equaloplists(loop_a.operations, loop_b.operations)
def test_load_2_unpack(self):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
guard_true(i0) [f10]
guard_true(i1) [f11]
""")
@@ -68,11 +72,11 @@
assert savings == -2
def test_load_4_unpack(self):
- loop1 = self.parse("""
- i10 = raw_load(p0, i0, descr=float)
- i11 = raw_load(p0, i1, descr=float)
- i12 = raw_load(p0, i2, descr=float)
- i13 = raw_load(p0, i3, descr=float)
+ loop1 = self.parse_trace("""
+ i10 = raw_load_i(p0, i0, descr=float)
+ i11 = raw_load_i(p0, i1, descr=float)
+ i12 = raw_load_i(p0, i2, descr=float)
+ i13 = raw_load_i(p0, i3, descr=float)
guard_true(i0) [i10]
guard_true(i1) [i11]
guard_true(i2) [i12]
@@ -82,29 +86,29 @@
assert savings == -1
def test_load_2_unpack_1(self):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
guard_true(i0) [f10]
""")
savings = self.savings(loop1)
assert savings == 0
def test_load_2_unpack_1_index1(self):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
guard_true(i0) [f11]
""")
savings = self.savings(loop1)
assert savings == -1
- def test_load_arith(self):
- loop1 = self.parse("""
- i10 = raw_load(p0, i0, descr=int)
- i11 = raw_load(p0, i1, descr=int)
- i12 = raw_load(p0, i2, descr=int)
- i13 = raw_load(p0, i3, descr=int)
+ def test_load_arith1(self):
+ loop1 = self.parse_trace("""
+ i10 = raw_load_i(p0, i0, descr=int)
+ i11 = raw_load_i(p0, i1, descr=int)
+ i12 = raw_load_i(p0, i2, descr=int)
+ i13 = raw_load_i(p0, i3, descr=int)
i15 = int_add(i10, 1)
i16 = int_add(i11, 1)
i17 = int_add(i12, 1)
@@ -114,9 +118,9 @@
assert savings == 6
def test_load_arith_store(self):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
i20 = cast_float_to_int(f10)
i21 = cast_float_to_int(f11)
i30 = int_signext(i20, 4)
@@ -128,9 +132,9 @@
assert savings >= 0
def test_sum(self):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
f12 = float_add(f1, f10)
f13 = float_add(f12, f11)
""")
@@ -139,9 +143,9 @@
@py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)])
def test_sum_float_to_int(self, bytes, s):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
i10 = cast_float_to_int(f10)
i11 = cast_float_to_int(f11)
i12 = int_signext(i10, {c})
@@ -166,20 +170,20 @@
py.test.fail("must not fail")
def test_cast(self):
- loop1 = self.parse("""
- i100 = raw_load(p0, i1, descr=float)
- i101 = raw_load(p0, i2, descr=float)
- i102 = raw_load(p0, i3, descr=float)
- i103 = raw_load(p0, i4, descr=float)
+ loop1 = self.parse_trace("""
+ i100 = raw_load_i(p0, i1, descr=float)
+ i101 = raw_load_i(p0, i2, descr=float)
+ i102 = raw_load_i(p0, i3, descr=float)
+ i103 = raw_load_i(p0, i4, descr=float)
#
- i104 = raw_load(p1, i1, descr=short)
- i105 = raw_load(p1, i2, descr=short)
- i106 = raw_load(p1, i3, descr=short)
- i107 = raw_load(p1, i4, descr=short)
- i108 = raw_load(p1, i5, descr=short)
- i109 = raw_load(p1, i6, descr=short)
- i110 = raw_load(p1, i7, descr=short)
- i111 = raw_load(p1, i8, descr=short)
+ i104 = raw_load_i(p1, i1, descr=short)
+ i105 = raw_load_i(p1, i2, descr=short)
+ i106 = raw_load_i(p1, i3, descr=short)
+ i107 = raw_load_i(p1, i4, descr=short)
+ i108 = raw_load_i(p1, i5, descr=short)
+ i109 = raw_load_i(p1, i6, descr=short)
+ i110 = raw_load_i(p1, i7, descr=short)
+ i111 = raw_load_i(p1, i8, descr=short)
#
f100 = cast_int_to_float(i104)
f101 = cast_int_to_float(i105)
@@ -192,7 +196,7 @@
""")
try:
self.savings(loop1)
- py.test.fail("must not profitable!")
+ py.test.fail("must not be profitable!")
except NotAProfitableLoop:
pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -6,6 +6,7 @@
from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, Dependency,
IndexVar, MemoryRef, Node)
+from rpython.jit.metainterp.compile import ResumeAtLoopHeaderDescr
from rpython.jit.metainterp.optimizeopt.vector import VectorLoop
from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.jit.backend.llgraph.runner import ArrayDescr
@@ -54,7 +55,7 @@
loop.jump.setdescr(token)
for op in loop.operations:
if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
- op.setdescr(compile.ResumeAtLoopHeaderDescr())
+ op.setdescr(ResumeAtLoopHeaderDescr())
return loop
def assert_edges(self, graph, edge_list, exceptions):
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -29,6 +29,8 @@
from rpython.rlib.debug import debug_print, debug_start, debug_stop
from rpython.rlib.jit import Counters
from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.jit.backend.llsupport.symbolic import (WORD as INT_WORD,
+ SIZEOF_FLOAT as FLOAT_WORD)
class VectorLoop(object):
def __init__(self, label, oplist, jump):
@@ -188,7 +190,7 @@
# vectorize
graph = DependencyGraph(loop)
- self.find_adjacent_memory_refs()
+ self.find_adjacent_memory_refs(graph)
self.extend_packset()
self.combine_packset()
# TODO move cost model to CPU
@@ -256,7 +258,7 @@
if op.getopnum() in prohibit_opnums:
continue # do not unroll this operation twice
copied_op = op.clone()
- if copied_op.result is not None:
+ if not copied_op.returns_void():
# every result assigns a new box, thus creates an entry
# to the rename map.
new_assigned_box = copied_op.result.clonebox()
@@ -323,7 +325,7 @@
They are represented as a linear combination: i*c/d + e, i is a variable,
all others are integers that are calculated in reverse direction
"""
- loop = self.loop
+ loop = graph.loop
operations = loop.operations
self.packset = PackSet(self.cpu.vector_register_size)
@@ -338,8 +340,10 @@
# exclue a_opidx == b_opidx only consider the ones
# that point forward:
if memref_a.is_adjacent_after(memref_b):
+ print node_a.getindex(), "is after", node_b.getindex()
pair = self.packset.can_be_packed(node_a, node_b, None, False)
if pair:
+ print "creating mem pair", pair
self.packset.add_pack(pair)
def extend_packset(self):
@@ -348,26 +352,33 @@
"""
pack_count = self.packset.pack_count()
while True:
- for pack in self.packset.packs:
+ i = 0
+ packs = self.packset.packs
+ while i < len(packs):
+ pack = packs[i]
self.follow_def_uses(pack)
+ i += 1
if pack_count == self.packset.pack_count():
pack_count = self.packset.pack_count()
- for pack in self.packset.packs:
+ i = 0
+ while i < len(packs):
+ pack = packs[i]
self.follow_use_defs(pack)
+ i += 1
if pack_count == self.packset.pack_count():
break
pack_count = self.packset.pack_count()
def follow_use_defs(self, pack):
assert isinstance(pack, Pair)
- for ldep in pack.left.depends():
- for rdep in pack.right.depends():
+ for ldep in pack.leftmost(True).depends():
+ for rdep in pack.rightmost(True).depends():
lnode = ldep.to
rnode = rdep.to
- # only valid if the result of the left is in args of pack left
- result = lnode.getoperation().result
- args = pack.left.getoperation().getarglist()
- if result is None or result not in args:
+ # only valid if left is in args of pack left
+ left = lnode.getoperation()
+ args = pack.leftmost().getarglist()
+ if left is None or left not in args:
continue
isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
if isomorph and lnode.is_before(rnode):
@@ -377,19 +388,25 @@
def follow_def_uses(self, pack):
assert isinstance(pack, Pair)
- for ldep in pack.left.provides():
- for rdep in pack.right.provides():
+ print "lprov", pack.leftmost(node=True).provides_count(),
+ print "rprov", pack.rightmost(node=True).provides_count()
+ for ldep in pack.leftmost(node=True).provides():
+ for rdep in pack.rightmost(node=True).provides():
lnode = ldep.to
rnode = rdep.to
- result = pack.left.getoperation().result
+ print "trying", lnode.getindex(), rnode.getindex(), lnode, rnode
+ left = pack.leftmost()
args = lnode.getoperation().getarglist()
- if result is None or result not in args:
+ if left is None or left not in args:
continue
isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
if isomorph and lnode.is_before(rnode):
pair = self.packset.can_be_packed(lnode, rnode, pack, True)
if pair:
+ print "creating pair" , pair, pair.operations[0].op, pair.operations[1].op
self.packset.add_pack(pair)
+ else:
+ print "!!!creating pair" , lnode, rnode
def combine_packset(self):
""" Combination is done iterating the packs that have
@@ -404,7 +421,6 @@
i = 0
j = 0
end_ij = len(self.packset.packs)
- orphan = {}
while True:
len_before = len(self.packset.packs)
i = 0
@@ -616,6 +632,7 @@
cost, benefit_factor = self.cb_signext(pack)
#
self.savings += benefit_factor * times - cost
+ print "$$$ recording", benefit_factor, "*", times, "-", cost, "now:", self.savings
def cb_signext(self, pack):
left = pack.leftmost()
@@ -627,13 +644,16 @@
def record_cast_int(self, fromsize, tosize, count):
# for each move there is 1 instruction
self.savings += -count
+ print "$$$ cast", -count, "now", self.savings
def record_vector_pack(self, src, index, count):
if src.datatype == FLOAT:
if index == 1 and count == 1:
self.savings -= 2
+ print "$$$ vector pack -2 now:", self.savings
return
self.savings -= count
+ print "$$$ vector pack ", count, "now", self.savings
def record_vector_unpack(self, src, index, count):
self.record_vector_pack(src, index, count)
@@ -680,6 +700,7 @@
if self.profitable_pack(lnode, rnode, origin_pack, forward):
return Pair(lnode, rnode)
else:
+ print "dependent"
if self.contains_pair(lnode, rnode):
return None
if origin_pack is not None:
@@ -688,24 +709,18 @@
def contains_pair(self, lnode, rnode):
for pack in self.packs:
- if pack.left is lnode or pack.right is rnode:
+ if pack.leftmost(node=True) is lnode or \
+ pack.rightmost(node=True) is rnode:
return True
return False
def profitable_pack(self, lnode, rnode, origin_pack, forward):
- lpacknode = origin_pack.left
- if self.prohibit_packing(origin_pack,
- lpacknode.getoperation(),
- lnode.getoperation(),
- forward):
+ if self.prohibit_packing(origin_pack, origin_pack.leftmost(),
+ lnode.getoperation(), forward):
return False
- rpacknode = origin_pack.right
- if self.prohibit_packing(origin_pack,
- rpacknode.getoperation(),
- rnode.getoperation(),
- forward):
+ if self.prohibit_packing(origin_pack, origin_pack.rightmost(),
+ rnode.getoperation(), forward):
return False
-
return True
def prohibit_packing(self, pack, packed, inquestion, forward):
@@ -713,7 +728,7 @@
if inquestion.vector == -1:
return True
if packed.is_primitive_array_access():
- if packed.getarg(1) == inquestion.result:
+ if packed.getarg(1) is inquestion:
return True
if not forward and inquestion.getopnum() == rop.INT_SIGNEXT:
# prohibit the packing of signext in backwards direction
@@ -742,37 +757,37 @@
def accumulates_pair(self, lnode, rnode, origin_pack):
# lnode and rnode are isomorphic and dependent
assert isinstance(origin_pack, Pair)
- lop = lnode.getoperation()
- opnum = lop.getopnum()
+ left = lnode.getoperation()
+ opnum = left.getopnum()
if opnum in (rop.FLOAT_ADD, rop.INT_ADD, rop.FLOAT_MUL):
- roper = rnode.getoperation()
- assert lop.numargs() == 2 and lop.result is not None
- accum_var, accum_pos = self.getaccumulator_variable(lop, roper, origin_pack)
+ right = rnode.getoperation()
+ assert left.numargs() == 2 and not left.returns_void()
+ accum_var, accum_pos = self.getaccumulator_variable(left, right, origin_pack)
if not accum_var:
return None
- # the dependency exists only because of the result of lnode
+ # the dependency exists only because of the left?
for dep in lnode.provides():
if dep.to is rnode:
if not dep.because_of(accum_var):
# not quite ... this is not handlable
return None
# get the original variable
- accum_var = lop.getarg(accum_pos)
+ accum_var = left.getarg(accum_pos)
# in either of the two cases the arguments are mixed,
# which is not handled currently
var_pos = (accum_pos + 1) % 2
- plop = origin_pack.left.getoperation()
- if lop.getarg(var_pos) is not plop.result:
+ if left.getarg(var_pos) is not origin_pack.leftmost():
return None
- prop = origin_pack.right.getoperation()
- if roper.getarg(var_pos) is not prop.result:
+ if right.getarg(var_pos) is not origin_pack.rightmost():
return None
# this can be handled by accumulation
- ptype = origin_pack.output_type
- if ptype.getsize() != 8:
+ size = INT_WORD
+ if left.type == 'f':
+ size = FLOAT_WORD
+ if left.bytesize == right.bytesize and left.bytesize == size:
# do not support if if the type size is smaller
# than the cpu word size.
# WHY?
@@ -781,16 +796,14 @@
# considered. => tree pattern matching problem.
return None
accum = Accum(opnum, accum_var, accum_pos)
- return AccumPair(lnode, rnode, ptype, ptype, accum)
+ return AccumPair(lnode, rnode, accum)
return None
- def getaccumulator_variable(self, lop, rop, origin_pack):
- args = rop.getarglist()
- for i, arg in enumerate(args):
- if arg is lop.result:
+ def getaccumulator_variable(self, left, right, origin_pack):
+ for i, arg in enumerate(right.getarglist()):
+ if arg is left:
return arg, i
- #
return None, -1
def accumulate_prepare(self, state):
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -147,6 +147,11 @@
i += 1
arg = self.getarg(i)
if arg.is_constant():
+ if arg.type == 'i':
+ self.setdatatype('i', INT_WORD, True)
+ else:
+ assert arg.type == 'f'
+ self.setdatatype('f', FLOAT_WORD, False)
return
self.setdatatype(arg.datatype, arg.bytesize, arg.signed)
assert self.datatype != '\x00'
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -198,6 +198,8 @@
from rpython.rtyper.lltypesystem import lltype, llmemory
assert elem.startswith('p')
v = InputArgRef(lltype.nullptr(llmemory.GCREF.TO))
+ # ensure that the variable gets the proper naming
+ self.update_memo(v, elem)
self.vars[elem] = v
return v
@@ -353,9 +355,24 @@
raise ParseError("Double assign to var %s in line: %s" % (res, line))
resop = self.create_op(opnum, args, res, descr, fail_args)
res = self.update_vector(resop, res)
+ self.update_memo(resop, res)
self.vars[res] = resop
return resop
+ def update_memo(self, val, name):
+ """ This updates the id of the operation or inputarg.
+ Internally you will see the same variable names as
+ in the trace as string.
+ """
+ regex = re.compile("[prif](\d+)")
+ match = regex.match(name)
+ if match:
+ counter = int(match.group(1))
+ countdict = val._repr_memo
+ countdict._d[val] = counter
+ if countdict.counter < counter:
+ countdict.counter = counter
+
def update_vector(self, resop, var):
pattern = re.compile('.*\[(\d+)x(u?)(i|f)(\d+)\]')
match = pattern.match(var)
More information about the pypy-commit
mailing list