[pypy-commit] pypy vecopt: preventing int signext from >32 -> <32
plan_rich
noreply at buildbot.pypy.org
Wed Jun 24 12:00:37 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78287:3b569b13ba22
Date: 2015-06-24 12:00 +0200
http://bitbucket.org/pypy/pypy/changeset/3b569b13ba22/
Log: preventing int signext from >32 -> <32 preventing packed int mul for
64 bit cannot be done with an sse opcode (see assembler comment)
interestingly SSE seems to quite well support float/double, but not
int (other than add,sub,logicals)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2598,10 +2598,12 @@
self.mc.PMULLW(loc0, loc1)
elif itemsize == 4:
self.mc.PMULLD(loc0, loc1)
- elif itemsize == 8:
- self.mc.PMULDQ(loc0, loc1) # TODO
else:
- raise NotImplementedError("did not implement integer mul")
+ # NOTE see http://stackoverflow.com/questions/8866973/can-long-integer-routines-benefit-from-sse/8867025#8867025
+ # There is no 64x64 bit packed mul and I did not find one
+ # for 8 bit either. It is questionable if it gives any benefit
+ # for 8 bit.
+ raise NotImplementedError("")
def genop_vec_int_add(self, op, arglocs, resloc):
loc0, loc1, size_loc = arglocs
diff --git a/rpython/jit/metainterp/jitexc.py b/rpython/jit/metainterp/jitexc.py
--- a/rpython/jit/metainterp/jitexc.py
+++ b/rpython/jit/metainterp/jitexc.py
@@ -61,6 +61,14 @@
self.green_int, self.green_ref, self.green_float,
self.red_int, self.red_ref, self.red_float)
+class NotAVectorizeableLoop(JitException):
+ def __str__(self):
+ return 'NotAVectorizeableLoop()'
+
+class NotAProfitableLoop(JitException):
+ def __str__(self):
+ return 'NotAProfitableLoop()'
+
def _get_standard_error(rtyper, Class):
exdata = rtyper.exceptiondata
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -6,6 +6,7 @@
MemoryRef, Node, IndexVar)
from rpython.jit.metainterp.optimizeopt.util import Renamer
from rpython.rlib.objectmodel import we_are_translated
+from rpython.jit.metainterp.jitexc import NotAProfitableLoop
class SchedulerData(object):
@@ -238,12 +239,31 @@
self.input_type = self.determine_input_type(op0)
self.output_type = self.determine_output_type(op0)
+ def check_if_pack_supported(self, pack):
+ op0 = pack.operations[0].getoperation()
+ insize = self.input_type.getsize()
+ if op0.casts_box():
+ # prohibit the packing of signext calls that
+ # cast to int16/int8.
+ _, outsize = op0.cast_to()
+ self._prevent_signext(outsize, insize)
+ if op0.getopnum() == rop.INT_ADD:
+ if insize == 8 or insize == 1:
+ # see assembler for comment why
+ raise NotAProfitableLoop
+
+ def _prevent_signext(self, outsize, insize):
+ if outsize < 4 and insize != outsize:
+ raise NotAProfitableLoop
+
def as_vector_operation(self, pack, sched_data, oplist):
self.sched_data = sched_data
self.preamble_ops = oplist
self.costmodel = sched_data.costmodel
self.update_input_output(pack)
#
+ self.check_if_pack_supported(pack)
+ #
off = 0
stride = self.split_pack(pack, self.sched_data.vec_reg_size)
left = len(pack.operations)
@@ -370,6 +390,7 @@
def extend_int(self, vbox, newtype):
vbox_cloned = newtype.new_vector_box(vbox.item_count)
+ self._prevent_signext(newtype.getsize(), vbox.getsize())
op = ResOperation(rop.VEC_INT_SIGNEXT,
[vbox, ConstInt(newtype.getsize())],
vbox_cloned)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -9,7 +9,7 @@
import time
from rpython.jit.metainterp.resume import Snapshot
-from rpython.jit.metainterp.jitexc import JitException
+from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, NotAProfitableLoop
from rpython.jit.metainterp.optimizeopt.unroll import optimize_unroll
from rpython.jit.metainterp.compile import ResumeAtLoopHeaderDescr, invent_fail_descr_for_op
from rpython.jit.metainterp.history import (ConstInt, VECTOR, FLOAT, INT,
@@ -44,14 +44,6 @@
else:
print ""
-class NotAVectorizeableLoop(JitException):
- def __str__(self):
- return 'NotAVectorizeableLoop()'
-
-class NotAProfitableLoop(JitException):
- def __str__(self):
- return 'NotAProfitableLoop()'
-
def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations,
inline_short_preamble, start_state, cost_threshold):
optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
@@ -623,7 +615,7 @@
else:
# store only has an input
return Pair(lnode, rnode, ptype, None)
- if self.profitable_pack(lnode, rnode, origin_pack):
+ if self.profitable_pack(lnode, rnode, origin_pack, forward):
input_type = origin_pack.output_type
output_type = determine_output_type(lnode, input_type)
return Pair(lnode, rnode, input_type, output_type)
@@ -640,33 +632,29 @@
return True
return False
- def profitable_pack(self, lnode, rnode, origin_pack):
+ def profitable_pack(self, lnode, rnode, origin_pack, forward):
lpacknode = origin_pack.left
- if self.prohibit_packing(origin_pack, lpacknode.getoperation(), lnode.getoperation()):
+ if self.prohibit_packing(origin_pack,
+ lpacknode.getoperation(),
+ lnode.getoperation(),
+ forward):
return False
rpacknode = origin_pack.right
- if self.prohibit_packing(origin_pack, rpacknode.getoperation(), rnode.getoperation()):
+ if self.prohibit_packing(origin_pack,
+ rpacknode.getoperation(),
+ rnode.getoperation(),
+ forward):
return False
return True
- def prohibit_packing(self, pack, packed, inquestion):
+ def prohibit_packing(self, pack, packed, inquestion, forward):
""" Blocks the packing of some operations """
if inquestion.vector == -1:
return True
if packed.is_raw_array_access():
if packed.getarg(1) == inquestion.result:
return True
- if inquestion.casts_box():
- # prohibit the packing of signext calls that
- # cast to int16/int8.
- input_type = pack.output_type
- if input_type:
- py.test.set_trace()
- insize = input_type.getsize()
- outtype,outsize = inquestion.cast_to()
- if outsize < 4 and insize != outsize:
- return True
return False
def combine(self, i, j):
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -214,7 +214,6 @@
if self.casts[3] == 0:
if self.getopnum() == rop.INT_SIGNEXT:
arg = self.getarg(1)
- assert isinstance(arg, ConstInt)
return (to_type,arg.value)
else:
raise NotImplementedError
More information about the pypy-commit
mailing list