[pypy-svn] r68153 - in pypy/branch/merge-floats-via-sse2/pypy: jit/backend/llgraph jit/backend/llsupport jit/backend/llsupport/test jit/backend/test jit/backend/x86 jit/backend/x86/test jit/metainterp jit/metainterp/test module/pypyjit
fijal at codespeak.net
fijal at codespeak.net
Mon Oct 5 10:34:08 CEST 2009
Author: fijal
Date: Mon Oct 5 10:34:06 2009
New Revision: 68153
Modified:
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llgraph/runner.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/descr.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/llmodel.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/regalloc.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/symbolic.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_descr.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_regalloc.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_runner.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/test/runner_test.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/assembler.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/jump.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/regalloc.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386setup.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/runner.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_gc_integration.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_jump.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_regalloc.py
pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_ri386_auto_encoding.py
pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/history.py
pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/logger.py
pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/oparser.py
pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_executor.py
pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_logger.py
pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_oparser.py
pypy/branch/merge-floats-via-sse2/pypy/module/pypyjit/policy.py
Log:
(arigo, fijal)
Merge floats-via-sse2 branch. This branch adds float support to x86 backend,
via SSE2 instruction set, present in Pentium IV and newer.
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llgraph/runner.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llgraph/runner.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llgraph/runner.py Mon Oct 5 10:34:06 2009
@@ -467,9 +467,10 @@
assert isinstance(calldescr, Descr)
func = args[0].getint()
for arg in args[1:]:
- if (isinstance(arg, history.BoxPtr) or
- isinstance(arg, history.ConstPtr)):
+ if arg.type == REF:
llimpl.do_call_pushptr(arg.getref_base())
+ elif arg.type == FLOAT:
+ llimpl.do_call_pushfloat(arg.getfloat())
else:
llimpl.do_call_pushint(arg.getint())
if calldescr.typeinfo == REF:
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/descr.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/descr.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/descr.py Mon Oct 5 10:34:06 2009
@@ -1,7 +1,7 @@
from pypy.rpython.lltypesystem import lltype
from pypy.jit.backend.llsupport import symbolic
from pypy.jit.metainterp.history import AbstractDescr, getkind, BoxInt, BoxPtr
-from pypy.jit.metainterp.history import BasicFailDescr, LoopToken
+from pypy.jit.metainterp.history import BasicFailDescr, LoopToken, BoxFloat
from pypy.jit.metainterp.resoperation import ResOperation, rop
# The point of the class organization in this file is to make instances
@@ -67,8 +67,14 @@
def get_field_size(self, translate_support_code):
raise NotImplementedError
+ _is_pointer_field = False # unless overridden by GcPtrFieldDescr
+ _is_float_field = False # unless overridden by FloatFieldDescr
+
def is_pointer_field(self):
- return False # unless overridden by GcPtrFieldDescr
+ return self._is_pointer_field
+
+ def is_float_field(self):
+ return self._is_float_field
def repr_of_descr(self):
return '<%s %s>' % (self._clsname, self.offset)
@@ -81,12 +87,12 @@
class GcPtrFieldDescr(NonGcPtrFieldDescr):
_clsname = 'GcPtrFieldDescr'
- def is_pointer_field(self):
- return True
+ _is_pointer_field = True
def getFieldDescrClass(TYPE):
return getDescrClass(TYPE, BaseFieldDescr, GcPtrFieldDescr,
- NonGcPtrFieldDescr, 'Field', 'get_field_size')
+ NonGcPtrFieldDescr, 'Field', 'get_field_size',
+ '_is_float_field')
def get_field_descr(gccache, STRUCT, fieldname):
cache = gccache._cache_field
@@ -122,8 +128,14 @@
def get_item_size(self, translate_support_code):
raise NotImplementedError
+ _is_array_of_pointers = False # unless overridden by GcPtrArrayDescr
+ _is_array_of_floats = False # unless overridden by FloatArrayDescr
+
def is_array_of_pointers(self):
- return False # unless overridden by GcPtrArrayDescr
+ return self._is_array_of_pointers
+
+ def is_array_of_floats(self):
+ return self._is_array_of_floats
def repr_of_descr(self):
return '<%s>' % self._clsname
@@ -136,12 +148,12 @@
class GcPtrArrayDescr(NonGcPtrArrayDescr):
_clsname = 'GcPtrArrayDescr'
- def is_array_of_pointers(self):
- return True
+ _is_array_of_pointers = True
def getArrayDescrClass(ARRAY):
return getDescrClass(ARRAY.OF, BaseArrayDescr, GcPtrArrayDescr,
- NonGcPtrArrayDescr, 'Array', 'get_item_size')
+ NonGcPtrArrayDescr, 'Array', 'get_item_size',
+ '_is_array_of_floats')
def get_array_descr(gccache, ARRAY):
cache = gccache._cache_array
@@ -174,13 +186,20 @@
def instantiate_arg_classes(self):
result = []
for c in self.arg_classes:
- if c == 'i': box = BoxInt()
- else: box = BoxPtr()
+ if c == 'i': box = BoxInt()
+ elif c == 'f': box = BoxFloat()
+ else: box = BoxPtr()
result.append(box)
return result
+ _returns_a_pointer = False # unless overridden by GcPtrCallDescr
+ _returns_a_float = False # unless overridden by FloatCallDescr
+
def returns_a_pointer(self):
- return False # unless overridden by GcPtrCallDescr
+ return self._returns_a_pointer
+
+ def returns_a_float(self):
+ return self._returns_a_float
def get_result_size(self, translate_support_code):
raise NotImplementedError
@@ -195,6 +214,8 @@
else:
if self.returns_a_pointer():
result = BoxPtr()
+ elif self.returns_a_float():
+ result = BoxFloat()
else:
result = BoxInt()
result_list = [result]
@@ -221,8 +242,7 @@
class GcPtrCallDescr(NonGcPtrCallDescr):
_clsname = 'GcPtrCallDescr'
- def returns_a_pointer(self):
- return True
+ _returns_a_pointer = True
class VoidCallDescr(NonGcPtrCallDescr):
_clsname = 'VoidCallDescr'
@@ -233,7 +253,8 @@
if RESULT is lltype.Void:
return VoidCallDescr
return getDescrClass(RESULT, BaseCallDescr, GcPtrCallDescr,
- NonGcPtrCallDescr, 'Call', 'get_result_size')
+ NonGcPtrCallDescr, 'Call', 'get_result_size',
+ '_returns_a_float')
def get_call_descr(gccache, ARGS, RESULT):
arg_classes = []
@@ -241,6 +262,7 @@
kind = getkind(ARG)
if kind == 'int': arg_classes.append('i')
elif kind == 'ref': arg_classes.append('r')
+ elif kind == 'float': arg_classes.append('f')
else:
raise NotImplementedError('ARG = %r' % (ARG,))
arg_classes = ''.join(arg_classes)
@@ -258,7 +280,7 @@
# ____________________________________________________________
def getDescrClass(TYPE, BaseDescr, GcPtrDescr, NonGcPtrDescr,
- nameprefix, methodname, _cache={}):
+ nameprefix, methodname, floatattrname, _cache={}):
if isinstance(TYPE, lltype.Ptr):
if TYPE.TO._gckind == 'gc':
return GcPtrDescr
@@ -276,5 +298,8 @@
return symbolic.get_size(TYPE, translate_support_code)
setattr(Descr, methodname, method)
#
+ if TYPE is lltype.Float:
+ setattr(Descr, floatattrname, True)
+ #
_cache[nameprefix, TYPE] = Descr
return Descr
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/llmodel.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/llmodel.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/llmodel.py Mon Oct 5 10:34:06 2009
@@ -4,7 +4,8 @@
from pypy.rpython.llinterp import LLInterpreter
from pypy.rpython.annlowlevel import llhelper
from pypy.rlib.objectmodel import we_are_translated, specialize
-from pypy.jit.metainterp.history import BoxInt, BoxPtr, set_future_values
+from pypy.jit.metainterp.history import BoxInt, BoxPtr, set_future_values,\
+ BoxFloat
from pypy.jit.backend.model import AbstractCPU
from pypy.jit.backend.llsupport import symbolic
from pypy.jit.backend.llsupport.symbolic import WORD, unroll_basic_sizes
@@ -205,7 +206,8 @@
ofs = fielddescr.offset
size = fielddescr.get_field_size(self.translate_support_code)
ptr = fielddescr.is_pointer_field()
- return ofs, size, ptr
+ float = fielddescr.is_float_field()
+ return ofs, size, ptr, float
unpack_fielddescr._always_inline_ = True
def arraydescrof(self, A):
@@ -216,7 +218,8 @@
ofs = arraydescr.get_base_size(self.translate_support_code)
size = arraydescr.get_item_size(self.translate_support_code)
ptr = arraydescr.is_array_of_pointers()
- return ofs, size, ptr
+ float = arraydescr.is_array_of_floats()
+ return ofs, size, ptr, float
unpack_arraydescr._always_inline_ = True
def calldescrof(self, FUNC, ARGS, RESULT):
@@ -246,40 +249,66 @@
def do_getarrayitem_gc(self, arraybox, indexbox, arraydescr):
itemindex = indexbox.getint()
gcref = arraybox.getref_base()
- ofs, size, ptr = self.unpack_arraydescr(arraydescr)
+ ofs, size, ptr, float = self.unpack_arraydescr(arraydescr)
+ # --- start of GC unsafe code (no GC operation!) ---
+ items = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ #
+ if ptr:
+ items = rffi.cast(rffi.CArrayPtr(lltype.Signed), items)
+ pval = self._cast_int_to_gcref(items[itemindex])
+ # --- end of GC unsafe code ---
+ return BoxPtr(pval)
+ #
+ if float:
+ items = rffi.cast(rffi.CArrayPtr(lltype.Float), items)
+ fval = items[itemindex]
+ # --- end of GC unsafe code ---
+ return BoxFloat(fval)
#
for TYPE, itemsize in unroll_basic_sizes:
if size == itemsize:
- val = (rffi.cast(rffi.CArrayPtr(TYPE), gcref)
- [ofs/itemsize + itemindex])
- val = rffi.cast(lltype.Signed, val)
- break
+ items = rffi.cast(rffi.CArrayPtr(TYPE), items)
+ val = items[itemindex]
+ # --- end of GC unsafe code ---
+ return BoxInt(rffi.cast(lltype.Signed, val))
else:
raise NotImplementedError("size = %d" % size)
- if ptr:
- return BoxPtr(self._cast_int_to_gcref(val))
- else:
- return BoxInt(val)
def do_setarrayitem_gc(self, arraybox, indexbox, vbox, arraydescr):
itemindex = indexbox.getint()
gcref = arraybox.getref_base()
- ofs, size, ptr = self.unpack_arraydescr(arraydescr)
+ ofs, size, ptr, float = self.unpack_arraydescr(arraydescr)
#
if ptr:
vboxptr = vbox.getref_base()
self.gc_ll_descr.do_write_barrier(gcref, vboxptr)
- a = rffi.cast(rffi.CArrayPtr(lltype.Signed), gcref)
- a[ofs/WORD + itemindex] = self.cast_gcref_to_int(vboxptr)
+ # --- start of GC unsafe code (no GC operation!) ---
+ items = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ items = rffi.cast(rffi.CArrayPtr(lltype.Signed), items)
+ items[itemindex] = self.cast_gcref_to_int(vboxptr)
+ # --- end of GC unsafe code ---
+ return
+ #
+ if float:
+ fval = vbox.getfloat()
+ # --- start of GC unsafe code (no GC operation!) ---
+ items = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ items = rffi.cast(rffi.CArrayPtr(lltype.Float), items)
+ items[itemindex] = fval
+ # --- end of GC unsafe code ---
+ return
+ #
+ val = vbox.getint()
+ for TYPE, itemsize in unroll_basic_sizes:
+ if size == itemsize:
+ # --- start of GC unsafe code (no GC operation!) ---
+ items = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ items = rffi.cast(rffi.CArrayPtr(TYPE), items)
+ items[itemindex] = rffi.cast(TYPE, val)
+ # --- end of GC unsafe code ---
+ return
else:
- v = vbox.getint()
- for TYPE, itemsize in unroll_basic_sizes:
- if size == itemsize:
- a = rffi.cast(rffi.CArrayPtr(TYPE), gcref)
- a[ofs/itemsize + itemindex] = rffi.cast(TYPE, v)
- break
- else:
- raise NotImplementedError("size = %d" % size)
+ raise NotImplementedError("size = %d" % size)
def _new_do_len(TP):
def do_strlen(self, stringbox):
@@ -312,18 +341,29 @@
@specialize.argtype(1)
def _base_do_getfield(self, gcref, fielddescr):
- ofs, size, ptr = self.unpack_fielddescr(fielddescr)
+ ofs, size, ptr, float = self.unpack_fielddescr(fielddescr)
+ # --- start of GC unsafe code (no GC operation!) ---
+ field = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ #
+ if ptr:
+ pval = rffi.cast(rffi.CArrayPtr(lltype.Signed), field)[0]
+ pval = self._cast_int_to_gcref(pval)
+ # --- end of GC unsafe code ---
+ return BoxPtr(pval)
+ #
+ if float:
+ fval = rffi.cast(rffi.CArrayPtr(lltype.Float), field)[0]
+ # --- end of GC unsafe code ---
+ return BoxFloat(fval)
+ #
for TYPE, itemsize in unroll_basic_sizes:
if size == itemsize:
- val = rffi.cast(rffi.CArrayPtr(TYPE), gcref)[ofs/itemsize]
+ val = rffi.cast(rffi.CArrayPtr(TYPE), field)[0]
+ # --- end of GC unsafe code ---
val = rffi.cast(lltype.Signed, val)
- break
+ return BoxInt(val)
else:
raise NotImplementedError("size = %d" % size)
- if ptr:
- return BoxPtr(self._cast_int_to_gcref(val))
- else:
- return BoxInt(val)
def do_getfield_gc(self, structbox, fielddescr):
gcref = structbox.getref_base()
@@ -334,23 +374,40 @@
@specialize.argtype(1)
def _base_do_setfield(self, gcref, vbox, fielddescr):
- ofs, size, ptr = self.unpack_fielddescr(fielddescr)
+ ofs, size, ptr, float = self.unpack_fielddescr(fielddescr)
+ #
if ptr:
assert lltype.typeOf(gcref) is not lltype.Signed, (
"can't handle write barriers for setfield_raw")
ptr = vbox.getref_base()
self.gc_ll_descr.do_write_barrier(gcref, ptr)
- a = rffi.cast(rffi.CArrayPtr(lltype.Signed), gcref)
- a[ofs/WORD] = self.cast_gcref_to_int(ptr)
+ # --- start of GC unsafe code (no GC operation!) ---
+ field = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ field = rffi.cast(rffi.CArrayPtr(lltype.Signed), field)
+ field[0] = self.cast_gcref_to_int(ptr)
+ # --- end of GC unsafe code ---
+ return
+ #
+ if float:
+ fval = vbox.getfloat()
+ # --- start of GC unsafe code (no GC operation!) ---
+ field = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ field = rffi.cast(rffi.CArrayPtr(lltype.Float), field)
+ field[0] = fval
+ # --- end of GC unsafe code ---
+ return
+ #
+ val = vbox.getint()
+ for TYPE, itemsize in unroll_basic_sizes:
+ if size == itemsize:
+ # --- start of GC unsafe code (no GC operation!) ---
+ field = rffi.ptradd(rffi.cast(rffi.CCHARP, gcref), ofs)
+ field = rffi.cast(rffi.CArrayPtr(TYPE), field)
+ field[0] = rffi.cast(TYPE, val)
+ # --- end of GC unsafe code ---
+ return
else:
- v = vbox.getint()
- for TYPE, itemsize in unroll_basic_sizes:
- if size == itemsize:
- v = rffi.cast(TYPE, v)
- rffi.cast(rffi.CArrayPtr(TYPE), gcref)[ofs/itemsize] = v
- break
- else:
- raise NotImplementedError("size = %d" % size)
+ raise NotImplementedError("size = %d" % size)
def do_setfield_gc(self, structbox, vbox, fielddescr):
gcref = structbox.getref_base()
@@ -416,7 +473,9 @@
# nonsense but nothing wrong (the return value should be ignored)
if calldescr.returns_a_pointer():
return BoxPtr(self.get_latest_value_ref(0))
- elif calldescr.get_result_size(self.translate_support_code) != 0:
+ elif calldescr.returns_a_float():
+ return BoxFloat(self.get_latest_value_float(0))
+ elif calldescr.get_result_size(self.translate_support_code) > 0:
return BoxInt(self.get_latest_value_int(0))
else:
return None
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/regalloc.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/regalloc.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/regalloc.py Mon Oct 5 10:34:06 2009
@@ -22,26 +22,28 @@
def get(self, box):
return self.stack_bindings.get(box, None)
- def loc(self, box):
+ def loc(self, box, size):
res = self.get(box)
if res is not None:
return res
- newloc = self.stack_pos(self.stack_depth)
+ newloc = self.stack_pos(self.stack_depth, size)
self.stack_bindings[box] = newloc
- self.stack_depth += 1
+ self.stack_depth += size
return newloc
# abstract methods that need to be overwritten for specific assemblers
@staticmethod
- def stack_pos(loc):
+ def stack_pos(loc, size):
raise NotImplementedError("Purely abstract")
class RegisterManager(object):
""" Class that keeps track of register allocations
"""
- all_regs = []
- no_lower_byte_regs = []
+ box_types = None # or a list of acceptable types
+ all_regs = []
+ no_lower_byte_regs = []
save_around_call_regs = []
+ reg_width = 1 # in terms of stack space eaten
def __init__(self, longevity, stack_manager=None, assembler=None):
self.free_regs = self.all_regs[:]
@@ -57,11 +59,16 @@
def next_instruction(self, incr=1):
self.position += incr
+ def _check_type(self, v):
+ if not we_are_translated() and self.box_types is not None:
+ assert isinstance(v, TempBox) or v.type in self.box_types
+
def possibly_free_var(self, v):
""" If v is stored in a register and v is not used beyond the
current position, then free it. Must be called at some
point for all variables that might be in registers.
"""
+ self._check_type(v)
if isinstance(v, Const) or v not in self.reg_bindings:
return
if v not in self.longevity or self.longevity[v][1] <= self.position:
@@ -96,6 +103,7 @@
returns allocated register or None, if not possible.
"""
+ self._check_type(v)
assert not isinstance(v, Const)
if selected_reg is not None:
res = self.reg_bindings.get(v, None)
@@ -140,7 +148,7 @@
loc = self.reg_bindings[v_to_spill]
del self.reg_bindings[v_to_spill]
if self.stack_manager.get(v_to_spill) is None:
- newloc = self.stack_manager.loc(v_to_spill)
+ newloc = self.stack_manager.loc(v_to_spill, self.reg_width)
self.assembler.regalloc_mov(loc, newloc)
return loc
@@ -172,6 +180,7 @@
Will not spill a variable from 'forbidden_vars'.
"""
+ self._check_type(v)
if isinstance(v, TempBox):
self.longevity[v] = (self.position, self.position)
loc = self.try_allocate_reg(v, selected_reg,
@@ -189,12 +198,13 @@
def loc(self, box):
""" Return the location of 'box'.
"""
+ self._check_type(box)
if isinstance(box, Const):
return self.convert_to_imm(box)
try:
return self.reg_bindings[box]
except KeyError:
- return self.stack_manager.loc(box)
+ return self.stack_manager.loc(box, self.reg_width)
def return_constant(self, v, forbidden_vars=[], selected_reg=None,
imm_fine=True):
@@ -203,6 +213,7 @@
a register. See 'force_allocate_reg' for the meaning of 'selected_reg'
and 'forbidden_vars'.
"""
+ self._check_type(v)
assert isinstance(v, Const)
if selected_reg or not imm_fine:
# this means we cannot have it in IMM, eh
@@ -210,7 +221,7 @@
self.assembler.regalloc_mov(self.convert_to_imm(v), selected_reg)
return selected_reg
if selected_reg is None and self.free_regs:
- loc = self.free_regs.pop()
+ loc = self.free_regs[-1]
self.assembler.regalloc_mov(self.convert_to_imm(v), loc)
return loc
loc = self._spill_var(v, forbidden_vars, selected_reg)
@@ -225,6 +236,7 @@
register. Return the register. See 'return_constant' and
'force_allocate_reg' for the meaning of the optional arguments.
"""
+ self._check_type(v)
if isinstance(v, Const):
return self.return_constant(v, forbidden_vars, selected_reg,
imm_fine)
@@ -248,7 +260,7 @@
self.reg_bindings[v] = loc
self.assembler.regalloc_mov(prev_loc, loc)
else:
- loc = self.stack_manager.loc(v)
+ loc = self.stack_manager.loc(v, self.reg_width)
self.assembler.regalloc_mov(prev_loc, loc)
def force_result_in_reg(self, result_v, v, forbidden_vars=[]):
@@ -256,14 +268,19 @@
The variable v is copied away if it's further used. The meaning
of 'forbidden_vars' is the same as in 'force_allocate_reg'.
"""
+ self._check_type(result_v)
+ self._check_type(v)
if isinstance(v, Const):
loc = self.make_sure_var_in_reg(v, forbidden_vars,
imm_fine=False)
+ # note that calling make_sure_var_in_reg with imm_fine=False
+ # will not allocate place in reg_bindings, we need to do it
+ # on our own
self.reg_bindings[result_v] = loc
self.free_regs = [reg for reg in self.free_regs if reg is not loc]
return loc
if v not in self.reg_bindings:
- prev_loc = self.stack_manager.loc(v)
+ prev_loc = self.stack_manager.loc(v, self.reg_width)
loc = self.force_allocate_reg(v, forbidden_vars)
self.assembler.regalloc_mov(prev_loc, loc)
assert v in self.reg_bindings
@@ -283,7 +300,8 @@
def _sync_var(self, v):
if not self.stack_manager.get(v):
reg = self.reg_bindings[v]
- self.assembler.regalloc_mov(reg, self.stack_manager.loc(v))
+ to = self.stack_manager.loc(v, self.reg_width)
+ self.assembler.regalloc_mov(reg, to)
# otherwise it's clean
def before_call(self, force_store=[]):
@@ -310,6 +328,7 @@
which is in variable v.
"""
if v is not None:
+ self._check_type(v)
r = self.call_result_location(v)
self.reg_bindings[v] = r
self.free_regs = [fr for fr in self.free_regs if fr is not r]
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/symbolic.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/symbolic.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/symbolic.py Mon Oct 5 10:34:06 2009
@@ -61,8 +61,10 @@
SIZEOF_CHAR = get_size(lltype.Char, False)
SIZEOF_SHORT = get_size(rffi.SHORT, False)
SIZEOF_INT = get_size(rffi.INT, False)
+SIZEOF_FLOAT = get_size(lltype.Float, False)
unroll_basic_sizes = unrolling_iterable([(lltype.Signed, WORD),
(lltype.Char, SIZEOF_CHAR),
(rffi.SHORT, SIZEOF_SHORT),
(rffi.INT, SIZEOF_INT)])
+# does not contain Float ^^^ which must be special-cased
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_descr.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_descr.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_descr.py Mon Oct 5 10:34:06 2009
@@ -26,12 +26,16 @@
T = lltype.GcStruct('T')
S = lltype.GcStruct('S', ('x', lltype.Char),
('y', lltype.Ptr(T)),
- ('z', lltype.Ptr(U)))
+ ('z', lltype.Ptr(U)),
+ ('f', lltype.Float))
assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
cls = getFieldDescrClass(lltype.Char)
assert cls != getFieldDescrClass(lltype.Signed)
assert cls == getFieldDescrClass(lltype.Char)
+ clsf = getFieldDescrClass(lltype.Float)
+ assert clsf != cls
+ assert clsf == getFieldDescrClass(lltype.Float)
#
c0 = GcCache(False)
c1 = GcCache(True)
@@ -42,25 +46,35 @@
descr_x = get_field_descr(c2, S, 'x')
descr_y = get_field_descr(c2, S, 'y')
descr_z = get_field_descr(c2, S, 'z')
+ descr_f = get_field_descr(c2, S, 'f')
assert descr_x.__class__ is cls
assert descr_y.__class__ is GcPtrFieldDescr
assert descr_z.__class__ is NonGcPtrFieldDescr
+ assert descr_f.__class__ is clsf
if not tsc:
assert descr_x.offset < descr_y.offset < descr_z.offset
assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
assert descr_x.get_field_size(False) == rffi.sizeof(lltype.Char)
assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
+ assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
else:
assert isinstance(descr_x.offset, Symbolic)
assert isinstance(descr_y.offset, Symbolic)
assert isinstance(descr_z.offset, Symbolic)
+ assert isinstance(descr_f.offset, Symbolic)
assert isinstance(descr_x.get_field_size(True), Symbolic)
assert isinstance(descr_y.get_field_size(True), Symbolic)
assert isinstance(descr_z.get_field_size(True), Symbolic)
+ assert isinstance(descr_f.get_field_size(True), Symbolic)
assert not descr_x.is_pointer_field()
assert descr_y.is_pointer_field()
assert not descr_z.is_pointer_field()
+ assert not descr_f.is_pointer_field()
+ assert not descr_x.is_float_field()
+ assert not descr_y.is_float_field()
+ assert not descr_z.is_float_field()
+ assert descr_f.is_float_field()
def test_get_array_descr():
@@ -69,68 +83,102 @@
A1 = lltype.GcArray(lltype.Char)
A2 = lltype.GcArray(lltype.Ptr(T))
A3 = lltype.GcArray(lltype.Ptr(U))
+ A4 = lltype.GcArray(lltype.Float)
assert getArrayDescrClass(A2) is GcPtrArrayDescr
assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
cls = getArrayDescrClass(A1)
assert cls != getArrayDescrClass(lltype.GcArray(lltype.Signed))
assert cls == getArrayDescrClass(lltype.GcArray(lltype.Char))
+ clsf = getArrayDescrClass(A4)
+ assert clsf != cls
+ assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
#
c0 = GcCache(False)
descr1 = get_array_descr(c0, A1)
descr2 = get_array_descr(c0, A2)
descr3 = get_array_descr(c0, A3)
+ descr4 = get_array_descr(c0, A4)
assert descr1.__class__ is cls
assert descr2.__class__ is GcPtrArrayDescr
assert descr3.__class__ is NonGcPtrArrayDescr
+ assert descr4.__class__ is clsf
assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
assert not descr1.is_array_of_pointers()
assert descr2.is_array_of_pointers()
assert not descr3.is_array_of_pointers()
+ assert not descr4.is_array_of_pointers()
+ assert not descr1.is_array_of_floats()
+ assert not descr2.is_array_of_floats()
+ assert not descr3.is_array_of_floats()
+ assert descr4.is_array_of_floats()
#
WORD = rffi.sizeof(lltype.Signed)
assert descr1.get_base_size(False) == WORD
assert descr2.get_base_size(False) == WORD
assert descr3.get_base_size(False) == WORD
+ assert descr4.get_base_size(False) == WORD
assert descr1.get_ofs_length(False) == 0
assert descr2.get_ofs_length(False) == 0
assert descr3.get_ofs_length(False) == 0
+ assert descr4.get_ofs_length(False) == 0
assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
+ assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
#
assert isinstance(descr1.get_base_size(True), Symbolic)
assert isinstance(descr2.get_base_size(True), Symbolic)
assert isinstance(descr3.get_base_size(True), Symbolic)
+ assert isinstance(descr4.get_base_size(True), Symbolic)
assert isinstance(descr1.get_ofs_length(True), Symbolic)
assert isinstance(descr2.get_ofs_length(True), Symbolic)
assert isinstance(descr3.get_ofs_length(True), Symbolic)
+ assert isinstance(descr4.get_ofs_length(True), Symbolic)
assert isinstance(descr1.get_item_size(True), Symbolic)
assert isinstance(descr2.get_item_size(True), Symbolic)
assert isinstance(descr3.get_item_size(True), Symbolic)
+ assert isinstance(descr4.get_item_size(True), Symbolic)
-def test_get_call_descr():
+def test_get_call_descr_not_translated():
c0 = GcCache(False)
descr1 = get_call_descr(c0, [lltype.Char, lltype.Signed], lltype.Char)
assert descr1.get_result_size(False) == rffi.sizeof(lltype.Char)
assert not descr1.returns_a_pointer()
+ assert not descr1.returns_a_float()
assert descr1.arg_classes == "ii"
#
T = lltype.GcStruct('T')
descr2 = get_call_descr(c0, [lltype.Ptr(T)], lltype.Ptr(T))
assert descr2.get_result_size(False) == rffi.sizeof(lltype.Ptr(T))
assert descr2.returns_a_pointer()
+ assert not descr2.returns_a_float()
assert descr2.arg_classes == "r"
#
U = lltype.GcStruct('U', ('x', lltype.Signed))
assert descr2 == get_call_descr(c0, [lltype.Ptr(U)], lltype.Ptr(U))
#
+ descr4 = get_call_descr(c0, [lltype.Float, lltype.Float], lltype.Float)
+ assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
+ assert not descr4.returns_a_pointer()
+ assert descr4.returns_a_float()
+ assert descr4.arg_classes == "ff"
+
+def test_get_call_descr_translated():
c1 = GcCache(True)
+ T = lltype.GcStruct('T')
+ U = lltype.GcStruct('U', ('x', lltype.Signed))
descr3 = get_call_descr(c1, [lltype.Ptr(T)], lltype.Ptr(U))
assert isinstance(descr3.get_result_size(True), Symbolic)
assert descr3.returns_a_pointer()
+ assert not descr3.returns_a_float()
assert descr3.arg_classes == "r"
-
+ #
+ descr4 = get_call_descr(c1, [lltype.Float, lltype.Float], lltype.Float)
+ assert isinstance(descr4.get_result_size(True), Symbolic)
+ assert not descr4.returns_a_pointer()
+ assert descr4.returns_a_float()
+ assert descr4.arg_classes == "ff"
def test_repr_of_descr():
c0 = GcCache(False)
@@ -162,3 +210,6 @@
#
descr4i = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Char)
assert 'CharCallDescr' in descr4i.repr_of_descr()
+ #
+ descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
+ assert 'FloatCallDescr' in descr4f.repr_of_descr()
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_regalloc.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_regalloc.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_regalloc.py Mon Oct 5 10:34:06 2009
@@ -1,5 +1,5 @@
-from pypy.jit.metainterp.history import BoxInt, ConstInt
+from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxFloat
from pypy.jit.backend.llsupport.regalloc import StackManager
from pypy.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan
@@ -27,7 +27,7 @@
return v
class TStackManager(StackManager):
- def stack_pos(self, i):
+ def stack_pos(self, i, size):
return i
class MockAsm(object):
@@ -146,7 +146,7 @@
rm.next_instruction()
# allocate a stack position
b0, b1, b2, b3, b4 = boxes
- sp = sm.loc(b0)
+ sp = sm.loc(b0, 1)
assert sp == 0
loc = rm.make_sure_var_in_reg(b0)
assert isinstance(loc, FakeReg)
@@ -207,7 +207,7 @@
asm = MockAsm()
rm = RegisterManager(longevity, stack_manager=sm, assembler=asm)
rm.next_instruction()
- sm.loc(b0)
+ sm.loc(b0, 1)
rm.force_result_in_reg(b1, b0)
rm._check_invariants()
loc = rm.loc(b1)
@@ -233,10 +233,11 @@
assert isinstance(loc, ConstInt)
for box in boxes[:-1]:
rm.force_allocate_reg(box)
- assert len(asm.moves) == 4
+ assert len(asm.moves) == 3
loc = rm.return_constant(ConstInt(1), imm_fine=False)
assert isinstance(loc, FakeReg)
- assert len(asm.moves) == 6
+ assert len(asm.moves) == 5
+ assert len(rm.reg_bindings) == 3
def test_force_result_in_reg_const(self):
boxes, longevity = boxes_and_longevity(2)
@@ -276,3 +277,21 @@
rm.after_call(boxes[-1])
assert len(rm.reg_bindings) == 3
rm._check_invariants()
+
+ def test_different_stack_width(self):
+ class XRegisterManager(RegisterManager):
+ reg_width = 2
+
+ sm = TStackManager()
+ b0 = BoxInt()
+ longevity = {b0: (0, 1)}
+ asm = MockAsm()
+ rm = RegisterManager(longevity, stack_manager=sm, assembler=asm)
+ f0 = BoxFloat()
+ longevity = {f0: (0, 1)}
+ xrm = XRegisterManager(longevity, stack_manager=sm, assembler=asm)
+ xrm.loc(f0)
+ rm.loc(b0)
+ assert sm.stack_depth == 3
+
+
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_runner.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_runner.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/llsupport/test/test_runner.py Mon Oct 5 10:34:06 2009
@@ -7,6 +7,7 @@
pass
class MyLLCPU(AbstractLLCPU):
+ supports_floats = True
def compile_loop(self, inputargs, operations, looptoken):
py.test.skip("llsupport test: cannot compile operations")
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/test/runner_test.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/test/runner_test.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/test/runner_test.py Mon Oct 5 10:34:06 2009
@@ -340,6 +340,19 @@
BoxInt(ord('A'))],
calldescr)
assert x.value == ord('B')
+ if cpu.supports_floats:
+ def func(f, i):
+ return float(i) + f
+ FPTR = self.Ptr(self.FuncType([lltype.Float, lltype.Signed],
+ lltype.Float))
+ func_ptr = llhelper(FPTR, func)
+ FTP = deref(FPTR)
+ calldescr = cpu.calldescrof(FTP, FTP.ARGS, FTP.RESULT)
+ x = cpu.do_call(
+ [self.get_funcbox(cpu, func_ptr),
+ BoxFloat(3.5), BoxInt(42)],
+ calldescr)
+ assert x.value == 42 + 3.5
def test_call(self):
@@ -367,6 +380,24 @@
'int', descr=calldescr)
assert res.value == 2 * num
+ if cpu.supports_floats:
+ def func(f0, f1, f2, f3, f4, f5, f6, i0, i1, f7, f8, f9):
+ return f0 + f1 + f2 + f3 + f4 + f5 + f6 + float(i0 + i1) + f7 + f8 + f9
+ F = lltype.Float
+ I = lltype.Signed
+ FUNC = self.FuncType([F] * 7 + [I] * 2 + [F] * 3, F)
+ FPTR = self.Ptr(FUNC)
+ func_ptr = llhelper(FPTR, func)
+ calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+ funcbox = self.get_funcbox(cpu, func_ptr)
+ args = ([BoxFloat(.1) for i in range(7)] +
+ [BoxInt(1), BoxInt(2), BoxFloat(.2), BoxFloat(.3),
+ BoxFloat(.4)])
+ res = self.execute_operation(rop.CALL,
+ [funcbox] + args,
+ 'float', descr=calldescr)
+ assert abs(res.value - 4.6) < 0.0001
+
def test_field_basic(self):
t_box, T_box = self.alloc_instance(self.T)
fielddescr = self.cpu.fielddescrof(self.S, 'value')
@@ -416,12 +447,22 @@
res = self.execute_operation(rop.GETFIELD_GC, [t_box],
'ref', descr=fielddescr2)
assert res.value == null_const.value
+ if self.cpu.supports_floats:
+ floatdescr = self.cpu.fielddescrof(self.S, 'float')
+ self.execute_operation(rop.SETFIELD_GC, [t_box, BoxFloat(3.4)],
+ 'void', descr=floatdescr)
+ res = self.execute_operation(rop.GETFIELD_GC, [t_box],
+ 'float', descr=floatdescr)
+ assert res.value == 3.4
+
def test_passing_guards(self):
- for (opname, args) in [(rop.GUARD_TRUE, [BoxInt(1)]),
- (rop.GUARD_FALSE, [BoxInt(0)]),
- (rop.GUARD_VALUE, [BoxInt(42), BoxInt(42)]),
- ]:
+ all = [(rop.GUARD_TRUE, [BoxInt(1)]),
+ (rop.GUARD_FALSE, [BoxInt(0)]),
+ (rop.GUARD_VALUE, [BoxInt(42), BoxInt(42)])]
+ if self.cpu.supports_floats:
+ all.append((rop.GUARD_VALUE, [BoxFloat(3.5), BoxFloat(3.5)]))
+ for (opname, args) in all:
assert self.execute_operation(opname, args, 'void') == None
assert not self.guard_failed
@@ -435,10 +476,12 @@
# 'void')
def test_failing_guards(self):
- for opname, args in [(rop.GUARD_TRUE, [BoxInt(0)]),
- (rop.GUARD_FALSE, [BoxInt(1)]),
- (rop.GUARD_VALUE, [BoxInt(42), BoxInt(41)]),
- ]:
+ all = [(rop.GUARD_TRUE, [BoxInt(0)]),
+ (rop.GUARD_FALSE, [BoxInt(1)]),
+ (rop.GUARD_VALUE, [BoxInt(42), BoxInt(41)])]
+ if self.cpu.supports_floats:
+ all.append((rop.GUARD_VALUE, [BoxFloat(-1.0), BoxFloat(1.0)]))
+ for opname, args in all:
assert self.execute_operation(opname, args, 'void') == None
assert self.guard_failed
@@ -589,6 +632,23 @@
'int', descr=arraydescr)
assert r.value == 1
+ if self.cpu.supports_floats:
+ a_box, A = self.alloc_array_of(lltype.Float, 31)
+ arraydescr = self.cpu.arraydescrof(A)
+ self.execute_operation(rop.SETARRAYITEM_GC, [a_box, BoxInt(1),
+ BoxFloat(3.5)],
+ 'void', descr=arraydescr)
+ self.execute_operation(rop.SETARRAYITEM_GC, [a_box, BoxInt(2),
+ BoxFloat(4.5)],
+ 'void', descr=arraydescr)
+ r = self.execute_operation(rop.GETARRAYITEM_GC, [a_box, BoxInt(1)],
+ 'float', descr=arraydescr)
+ assert r.value == 3.5
+ r = self.execute_operation(rop.GETARRAYITEM_GC, [a_box, BoxInt(2)],
+ 'float', descr=arraydescr)
+ assert r.value == 4.5
+
+
def test_string_basic(self):
s_box = self.alloc_string("hello\xfe")
r = self.execute_operation(rop.STRLEN, [s_box], 'int')
@@ -633,6 +693,180 @@
r = self.execute_operation(rop.SAME_AS, [u_box.constbox()], 'ref')
assert r.value == u_box.value
+ if self.cpu.supports_floats:
+ r = self.execute_operation(rop.SAME_AS, [ConstFloat(5.5)], 'float')
+ assert r.value == 5.5
+
+ def test_jump(self):
+ # this test generates small loops where the JUMP passes many
+ # arguments of various types, shuffling them around.
+ if self.cpu.supports_floats:
+ numkinds = 3
+ else:
+ numkinds = 2
+ seed = random.randrange(0, 10000)
+ print 'Seed is', seed # or choose it by changing the previous line
+ r = random.Random()
+ r.seed(seed)
+ for nb_args in range(50):
+ print 'Passing %d arguments around...' % nb_args
+ #
+ inputargs = []
+ for k in range(nb_args):
+ kind = r.randrange(0, numkinds)
+ if kind == 0:
+ inputargs.append(BoxInt())
+ elif kind == 1:
+ inputargs.append(BoxPtr())
+ else:
+ inputargs.append(BoxFloat())
+ jumpargs = []
+ remixing = []
+ for srcbox in inputargs:
+ n = r.randrange(0, len(inputargs))
+ otherbox = inputargs[n]
+ if otherbox.type == srcbox.type:
+ remixing.append((srcbox, otherbox))
+ else:
+ otherbox = srcbox
+ jumpargs.append(otherbox)
+ #
+ index_counter = r.randrange(0, len(inputargs)+1)
+ i0 = BoxInt()
+ i1 = BoxInt()
+ i2 = BoxInt()
+ inputargs.insert(index_counter, i0)
+ jumpargs.insert(index_counter, i1)
+ #
+ looptoken = LoopToken()
+ faildescr = BasicFailDescr()
+ operations = [
+ ResOperation(rop.INT_SUB, [i0, ConstInt(1)], i1),
+ ResOperation(rop.INT_GE, [i1, ConstInt(0)], i2),
+ ResOperation(rop.GUARD_TRUE, [i2], None),
+ ResOperation(rop.JUMP, jumpargs, None, descr=looptoken),
+ ]
+ operations[2].fail_args = inputargs[:]
+ operations[2].descr = faildescr
+ #
+ self.cpu.compile_loop(inputargs, operations, looptoken)
+ #
+ values = []
+ S = lltype.GcStruct('S')
+ for box in inputargs:
+ if isinstance(box, BoxInt):
+ values.append(r.randrange(-10000, 10000))
+ elif isinstance(box, BoxPtr):
+ p = lltype.malloc(S)
+ values.append(lltype.cast_opaque_ptr(llmemory.GCREF, p))
+ elif isinstance(box, BoxFloat):
+ values.append(r.random())
+ else:
+ assert 0
+ values[index_counter] = 11
+ #
+ for i, (box, val) in enumerate(zip(inputargs, values)):
+ if isinstance(box, BoxInt):
+ self.cpu.set_future_value_int(i, val)
+ elif isinstance(box, BoxPtr):
+ self.cpu.set_future_value_ref(i, val)
+ elif isinstance(box, BoxFloat):
+ self.cpu.set_future_value_float(i, val)
+ else:
+ assert 0
+ #
+ fail = self.cpu.execute_token(looptoken)
+ assert fail is faildescr
+ #
+ dstvalues = values[:]
+ for _ in range(11):
+ expected = dstvalues[:]
+ for tgtbox, srcbox in remixing:
+ v = dstvalues[inputargs.index(srcbox)]
+ expected[inputargs.index(tgtbox)] = v
+ dstvalues = expected
+ #
+ assert dstvalues[index_counter] == 11
+ dstvalues[index_counter] = 0
+ for i, (box, val) in enumerate(zip(inputargs, dstvalues)):
+ if isinstance(box, BoxInt):
+ got = self.cpu.get_latest_value_int(i)
+ elif isinstance(box, BoxPtr):
+ got = self.cpu.get_latest_value_ref(i)
+ elif isinstance(box, BoxFloat):
+ got = self.cpu.get_latest_value_float(i)
+ else:
+ assert 0
+ assert type(got) == type(val)
+ assert got == val
+
+ def test_compile_bridge_float(self):
+ if not self.cpu.supports_floats:
+ py.test.skip("requires floats")
+ fboxes = [BoxFloat() for i in range(12)]
+ i2 = BoxInt()
+ faildescr1 = BasicFailDescr()
+ faildescr2 = BasicFailDescr()
+ operations = [
+ ResOperation(rop.FLOAT_LE, [fboxes[0], ConstFloat(9.2)], i2),
+ ResOperation(rop.GUARD_TRUE, [i2], None, descr=faildescr1),
+ ResOperation(rop.FINISH, fboxes, None, descr=faildescr2),
+ ]
+ operations[-2].fail_args = fboxes
+ looptoken = LoopToken()
+ self.cpu.compile_loop(fboxes, operations, looptoken)
+
+ fboxes2 = [BoxFloat() for i in range(12)]
+ f3 = BoxFloat()
+ bridge = [
+ ResOperation(rop.FLOAT_SUB, [fboxes2[0], ConstFloat(1.0)], f3),
+ ResOperation(rop.JUMP, [f3] + fboxes2[1:], None, descr=looptoken),
+ ]
+
+ self.cpu.compile_bridge(faildescr1, fboxes2, bridge)
+
+ for i in range(len(fboxes)):
+ self.cpu.set_future_value_float(i, 13.5 + 6.73 * i)
+ fail = self.cpu.execute_token(looptoken)
+ assert fail is faildescr2
+ res = self.cpu.get_latest_value_float(0)
+ assert res == 8.5
+ for i in range(1, len(fboxes)):
+ assert self.cpu.get_latest_value_float(i) == 13.5 + 6.73 * i
+
+ def test_unused_result_float(self):
+ # same as test_unused_result_int, for float operations
+ from pypy.jit.metainterp.test.test_executor import get_float_tests
+ float_tests = list(get_float_tests(self.cpu))
+ inputargs = []
+ operations = []
+ for opnum, boxargs, rettype, retvalue in float_tests:
+ inputargs += boxargs
+ if rettype == 'int':
+ boxres = BoxInt()
+ elif rettype == 'float':
+ boxres = BoxFloat()
+ else:
+ assert 0
+ operations.append(ResOperation(opnum, boxargs, boxres))
+ faildescr = BasicFailDescr()
+ operations.append(ResOperation(rop.FINISH, [], None,
+ descr=faildescr))
+ looptoken = LoopToken()
+ #
+ self.cpu.compile_loop(inputargs, operations, looptoken)
+ #
+ for i, box in enumerate(inputargs):
+ if isinstance(box, BoxInt):
+ self.cpu.set_future_value_int(i, box.getint())
+ elif isinstance(box, BoxFloat):
+ self.cpu.set_future_value_float(i, box.getfloat())
+ else:
+ assert 0
+ #
+ fail = self.cpu.execute_token(looptoken)
+ assert fail is faildescr
+
class LLtypeBackendTest(BaseBackendTest):
@@ -656,7 +890,8 @@
('chr1', lltype.Char),
('chr2', lltype.Char),
('short', rffi.SHORT),
- ('next', lltype.Ptr(S))))
+ ('next', lltype.Ptr(S)),
+ ('float', lltype.Float)))
T = lltype.GcStruct('T', ('parent', S),
('next', lltype.Ptr(S)))
U = lltype.GcStruct('U', ('parent', T),
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/assembler.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/assembler.py Mon Oct 5 10:34:06 2009
@@ -1,13 +1,14 @@
import sys, os
import ctypes
from pypy.jit.backend.llsupport import symbolic
-from pypy.jit.metainterp.history import Const, Box, BoxPtr, REF
+from pypy.jit.metainterp.history import Const, Box, BoxPtr, REF, FLOAT
from pypy.jit.metainterp.history import AbstractFailDescr
from pypy.rpython.lltypesystem import lltype, rffi, ll2ctypes, rstr, llmemory
from pypy.rpython.lltypesystem.rclass import OBJECT
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.tool.uid import fixid
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte
+from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte,\
+ X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs
from pypy.rlib.objectmodel import we_are_translated, specialize
from pypy.jit.backend.x86 import codebuf
from pypy.jit.backend.x86.ri386 import *
@@ -62,6 +63,12 @@
if name.upper() == name:
setattr(MachineCodeBlockWrapper, name, _new_method(name))
+class ExecutableToken386(object):
+ _x86_loop_code = 0
+ _x86_bootstrap_code = 0
+ _x86_stack_depth = 0
+ _x86_arglocs = (None, None)
+
class Assembler386(object):
mc = None
mc2 = None
@@ -79,6 +86,8 @@
MAX_FAIL_BOXES, zero=True)
self.fail_boxes_ptr = lltype.malloc(lltype.GcArray(llmemory.GCREF),
MAX_FAIL_BOXES, zero=True)
+ self.fail_boxes_float = lltype.malloc(lltype.GcArray(lltype.Float),
+ MAX_FAIL_BOXES, zero=True)
def leave_jitted_hook(self):
fail_boxes_ptr = self.fail_boxes_ptr
@@ -89,10 +98,13 @@
if self.mc is None:
rffi.cast(lltype.Signed, self.fail_boxes_int) # workaround
rffi.cast(lltype.Signed, self.fail_boxes_ptr) # workaround
+ rffi.cast(lltype.Signed, self.fail_boxes_float) # workaround
self.fail_box_int_addr = rffi.cast(lltype.Signed,
lltype.direct_arrayitems(self.fail_boxes_int))
self.fail_box_ptr_addr = rffi.cast(lltype.Signed,
lltype.direct_arrayitems(self.fail_boxes_ptr))
+ self.fail_box_float_addr = rffi.cast(lltype.Signed,
+ lltype.direct_arrayitems(self.fail_boxes_float))
# the address of the function called by 'new'
gc_ll_descr = self.cpu.gc_ll_descr
@@ -184,6 +196,7 @@
mc.done()
def _assemble_bootstrap_code(self, inputargs, arglocs):
+ nonfloatlocs, floatlocs = arglocs
self.mc.PUSH(ebp)
self.mc.MOV(ebp, esp)
self.mc.PUSH(ebx)
@@ -192,31 +205,37 @@
# NB. exactly 4 pushes above; if this changes, fix stack_pos().
# You must also keep _get_callshape() in sync.
adr_stackadjust = self._patchable_stackadjust()
- for i in range(len(arglocs)):
- loc = arglocs[i]
- if not isinstance(loc, REG):
- if inputargs[i].type == REF:
- # This uses XCHG to put zeroes in fail_boxes_ptr after
- # reading them
- self.mc.XOR(ecx, ecx)
- self.mc.XCHG(ecx, addr_add(imm(self.fail_box_ptr_addr),
- imm(i*WORD)))
- else:
- self.mc.MOV(ecx, addr_add(imm(self.fail_box_int_addr),
- imm(i*WORD)))
- self.mc.MOV(loc, ecx)
- for i in range(len(arglocs)):
- loc = arglocs[i]
+ tmp = X86RegisterManager.all_regs[0]
+ xmmtmp = X86XMMRegisterManager.all_regs[0]
+ for i in range(len(nonfloatlocs)):
+ loc = nonfloatlocs[i]
+ if loc is None:
+ continue
if isinstance(loc, REG):
- if inputargs[i].type == REF:
- # This uses XCHG to put zeroes in fail_boxes_ptr after
- # reading them
- self.mc.XOR(loc, loc)
- self.mc.XCHG(loc, addr_add(imm(self.fail_box_ptr_addr),
- imm(i*WORD)))
- else:
- self.mc.MOV(loc, addr_add(imm(self.fail_box_int_addr),
+ target = loc
+ else:
+ target = tmp
+ if inputargs[i].type == REF:
+ # This uses XCHG to put zeroes in fail_boxes_ptr after
+ # reading them
+ self.mc.XOR(target, target)
+ self.mc.XCHG(target, addr_add(imm(self.fail_box_ptr_addr),
imm(i*WORD)))
+ else:
+ self.mc.MOV(target, addr_add(imm(self.fail_box_int_addr),
+ imm(i*WORD)))
+ self.mc.MOV(loc, target)
+ for i in range(len(floatlocs)):
+ loc = floatlocs[i]
+ if loc is None:
+ continue
+ if isinstance(loc, REG):
+ self.mc.MOVSD(loc, addr64_add(imm(self.fail_box_float_addr),
+ imm(i*WORD*2)))
+ else:
+ self.mc.MOVSD(xmmtmp, addr64_add(imm(self.fail_box_float_addr),
+ imm(i*WORD*2)))
+ self.mc.MOVSD(loc, xmmtmp)
return adr_stackadjust
def dump(self, text):
@@ -231,14 +250,38 @@
# ------------------------------------------------------------
- def regalloc_mov(self, from_loc, to_loc):
- self.mc.MOV(to_loc, from_loc)
+ def mov(self, from_loc, to_loc):
+ if isinstance(from_loc, XMMREG) or isinstance(to_loc, XMMREG):
+ self.mc.MOVSD(to_loc, from_loc)
+ else:
+ self.mc.MOV(to_loc, from_loc)
+
+ regalloc_mov = mov # legacy interface
+
+ def regalloc_fstp(self, loc):
+ self.mc.FSTP(loc)
def regalloc_push(self, loc):
- self.mc.PUSH(loc)
+ if isinstance(loc, XMMREG):
+ self.mc.SUB(esp, imm(2*WORD))
+ self.mc.MOVSD(mem64(esp, 0), loc)
+ elif isinstance(loc, MODRM64):
+ # XXX evil trick
+ self.mc.PUSH(mem(ebp, get_ebp_ofs(loc.position)))
+ self.mc.PUSH(mem(ebp, get_ebp_ofs(loc.position + 1)))
+ else:
+ self.mc.PUSH(loc)
def regalloc_pop(self, loc):
- self.mc.POP(loc)
+ if isinstance(loc, XMMREG):
+ self.mc.MOVSD(loc, mem64(esp, 0))
+ self.mc.ADD(esp, imm(2*WORD))
+ elif isinstance(loc, MODRM64):
+ # XXX evil trick
+ self.mc.POP(mem(ebp, get_ebp_ofs(loc.position + 1)))
+ self.mc.POP(mem(ebp, get_ebp_ofs(loc.position)))
+ else:
+ self.mc.POP(loc)
def regalloc_perform(self, op, arglocs, resloc):
genop_list[op.opnum](self, op, arglocs, resloc)
@@ -261,7 +304,7 @@
else:
dispatch_opnum = op.opnum
adr_jump_offset = genop_guard_list[dispatch_opnum](self, op,
- guard_opnum,
+ guard_op,
failaddr, arglocs,
resloc)
faildescr._x86_adr_jump_offset = adr_jump_offset
@@ -296,8 +339,16 @@
getattr(self.mc, 'SET' + cond)(lower_byte(result_loc))
return genop_cmp
+ def _cmpop_float(cond):
+ def genop_cmp(self, op, arglocs, result_loc):
+ self.mc.UCOMISD(arglocs[0], arglocs[1])
+ self.mc.MOV(result_loc, imm8(0))
+ getattr(self.mc, 'SET' + cond)(lower_byte(result_loc))
+ return genop_cmp
+
def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
- def genop_cmp_guard(self, op, guard_opnum, addr, arglocs, result_loc):
+ def genop_cmp_guard(self, op, guard_op, addr, arglocs, result_loc):
+ guard_opnum = guard_op.opnum
if isinstance(op.args[0], Const):
self.mc.CMP(arglocs[1], arglocs[0])
if guard_opnum == rop.GUARD_FALSE:
@@ -341,6 +392,10 @@
genop_int_and = _binaryop("AND", True)
genop_int_or = _binaryop("OR", True)
genop_int_xor = _binaryop("XOR", True)
+ genop_float_add = _binaryop("ADDSD", True)
+ genop_float_sub = _binaryop('SUBSD')
+ genop_float_mul = _binaryop('MULSD', True)
+ genop_float_truediv = _binaryop('DIVSD')
genop_int_mul_ovf = genop_int_mul
genop_int_sub_ovf = genop_int_sub
@@ -355,6 +410,13 @@
genop_int_gt = _cmpop("G", "L")
genop_int_ge = _cmpop("GE", "LE")
+ genop_float_lt = _cmpop_float('B')
+ genop_float_le = _cmpop_float('BE')
+ genop_float_eq = _cmpop_float('E')
+ genop_float_ne = _cmpop_float('NE')
+ genop_float_gt = _cmpop_float('A')
+ genop_float_ge = _cmpop_float('AE')
+
genop_uint_gt = _cmpop("A", "B")
genop_uint_lt = _cmpop("B", "A")
genop_uint_le = _cmpop("BE", "AE")
@@ -376,6 +438,25 @@
# a difference at some point
xxx_genop_char_eq = genop_int_eq
+ def genop_float_neg(self, op, arglocs, resloc):
+ self.mc.XORPD(arglocs[0], arglocs[1])
+
+ def genop_float_abs(self, op, arglocs, resloc):
+ self.mc.ANDPD(arglocs[0], arglocs[1])
+
+ def genop_float_is_true(self, op, arglocs, resloc):
+ loc0, loc1 = arglocs
+ self.mc.XORPD(loc0, loc0)
+ self.mc.UCOMISD(loc1, loc0)
+ self.mc.SETNE(lower_byte(resloc))
+ self.mc.MOVZX(resloc, lower_byte(resloc))
+
+ def genop_cast_float_to_int(self, op, arglocs, resloc):
+ self.mc.CVTTSD2SI(resloc, arglocs[0])
+
+ def genop_cast_int_to_float(self, op, arglocs, resloc):
+ self.mc.CVTSI2SD(resloc, arglocs[0])
+
def genop_bool_not(self, op, arglocs, resloc):
self.mc.XOR(arglocs[0], imm8(1))
@@ -397,7 +478,8 @@
loc2 = cl
self.mc.SHR(loc, loc2)
- def genop_guard_oononnull(self, op, guard_opnum, addr, arglocs, resloc):
+ def genop_guard_oononnull(self, op, guard_op, addr, arglocs, resloc):
+ guard_opnum = guard_op.opnum
loc = arglocs[0]
self.mc.TEST(loc, loc)
if guard_opnum == rop.GUARD_TRUE:
@@ -405,7 +487,8 @@
else:
return self.implement_guard(addr, self.mc.JNZ)
- def genop_guard_ooisnull(self, op, guard_opnum, addr, arglocs, resloc):
+ def genop_guard_ooisnull(self, op, guard_op, addr, arglocs, resloc):
+ guard_opnum = guard_op.opnum
loc = arglocs[0]
self.mc.TEST(loc, loc)
if guard_opnum == rop.GUARD_TRUE:
@@ -428,7 +511,7 @@
self.mc.SETE(lower_byte(resloc))
def genop_same_as(self, op, arglocs, resloc):
- self.mc.MOV(resloc, arglocs[0])
+ self.mov(arglocs[0], resloc)
genop_cast_ptr_to_int = genop_same_as
def genop_int_mod(self, op, arglocs, resloc):
@@ -474,6 +557,8 @@
self.mc.MOVZX(resloc, addr_add(base_loc, ofs_loc))
elif size == WORD:
self.mc.MOV(resloc, addr_add(base_loc, ofs_loc))
+ elif size == 8:
+ self.mc.MOVSD(resloc, addr64_add(base_loc, ofs_loc))
else:
raise NotImplementedError("getfield size = %d" % size)
@@ -483,15 +568,19 @@
base_loc, ofs_loc, scale, ofs = arglocs
assert isinstance(ofs, IMM32)
assert isinstance(scale, IMM32)
- if scale.value == 0:
- self.mc.MOVZX(resloc, addr8_add(base_loc, ofs_loc, ofs.value,
- scale.value))
- elif scale.value == 2:
- self.mc.MOV(resloc, addr_add(base_loc, ofs_loc, ofs.value,
- scale.value))
- else:
- print "[asmgen]setarrayitem unsupported size: %d" % scale.value
- raise NotImplementedError()
+ if op.result.type == FLOAT:
+ self.mc.MOVSD(resloc, addr64_add(base_loc, ofs_loc, ofs.value,
+ scale.value))
+ else:
+ if scale.value == 0:
+ self.mc.MOVZX(resloc, addr8_add(base_loc, ofs_loc, ofs.value,
+ scale.value))
+ elif scale.value == 2:
+ self.mc.MOV(resloc, addr_add(base_loc, ofs_loc, ofs.value,
+ scale.value))
+ else:
+ print "[asmgen]setarrayitem unsupported size: %d" % scale.value
+ raise NotImplementedError()
genop_getfield_raw = genop_getfield_gc
genop_getarrayitem_gc_pure = genop_getarrayitem_gc
@@ -500,7 +589,9 @@
base_loc, ofs_loc, size_loc, value_loc = arglocs
assert isinstance(size_loc, IMM32)
size = size_loc.value
- if size == WORD:
+ if size == WORD * 2:
+ self.mc.MOVSD(addr64_add(base_loc, ofs_loc), value_loc)
+ elif size == WORD:
self.mc.MOV(addr_add(base_loc, ofs_loc), value_loc)
elif size == 2:
self.mc.MOV16(addr_add(base_loc, ofs_loc), value_loc)
@@ -514,14 +605,18 @@
base_loc, ofs_loc, value_loc, scale_loc, baseofs = arglocs
assert isinstance(baseofs, IMM32)
assert isinstance(scale_loc, IMM32)
- if scale_loc.value == 2:
- self.mc.MOV(addr_add(base_loc, ofs_loc, baseofs.value,
- scale_loc.value), value_loc)
- elif scale_loc.value == 0:
- self.mc.MOV(addr8_add(base_loc, ofs_loc, baseofs.value,
- scale_loc.value), lower_byte(value_loc))
- else:
- raise NotImplementedError("scale = %d" % scale_loc.value)
+ if op.args[2].type == FLOAT:
+ self.mc.MOVSD(addr64_add(base_loc, ofs_loc, baseofs.value,
+ scale_loc.value), value_loc)
+ else:
+ if scale_loc.value == 2:
+ self.mc.MOV(addr_add(base_loc, ofs_loc, baseofs.value,
+ scale_loc.value), value_loc)
+ elif scale_loc.value == 0:
+ self.mc.MOV(addr8_add(base_loc, ofs_loc, baseofs.value,
+ scale_loc.value), lower_byte(value_loc))
+ else:
+ raise NotImplementedError("scale = %d" % scale_loc.value)
def genop_discard_strsetitem(self, op, arglocs):
base_loc, ofs_loc, val_loc = arglocs
@@ -580,17 +675,17 @@
else:
assert 0, itemsize
- def genop_guard_guard_true(self, ign_1, guard_opnum, addr, locs, ign_2):
+ def genop_guard_guard_true(self, ign_1, guard_op, addr, locs, ign_2):
loc = locs[0]
self.mc.TEST(loc, loc)
return self.implement_guard(addr, self.mc.JZ)
- def genop_guard_guard_no_exception(self, ign_1, guard_opnum, addr,
+ def genop_guard_guard_no_exception(self, ign_1, guard_op, addr,
locs, ign_2):
self.mc.CMP(heap(self.cpu.pos_exception()), imm(0))
return self.implement_guard(addr, self.mc.JNZ)
- def genop_guard_guard_exception(self, ign_1, guard_opnum, addr,
+ def genop_guard_guard_exception(self, ign_1, guard_op, addr,
locs, resloc):
loc = locs[0]
loc1 = locs[1]
@@ -603,57 +698,84 @@
self.mc.MOV(heap(self.cpu.pos_exc_value()), imm(0))
return addr
- def genop_guard_guard_no_overflow(self, ign_1, guard_opnum, addr,
+ def genop_guard_guard_no_overflow(self, ign_1, guard_op, addr,
locs, resloc):
return self.implement_guard(addr, self.mc.JO)
- def genop_guard_guard_overflow(self, ign_1, guard_opnum, addr,
+ def genop_guard_guard_overflow(self, ign_1, guard_op, addr,
locs, resloc):
return self.implement_guard(addr, self.mc.JNO)
- def genop_guard_guard_false(self, ign_1, guard_opnum, addr, locs, ign_2):
+ def genop_guard_guard_false(self, ign_1, guard_op, addr, locs, ign_2):
loc = locs[0]
self.mc.TEST(loc, loc)
return self.implement_guard(addr, self.mc.JNZ)
- def genop_guard_guard_value(self, ign_1, guard_opnum, addr, locs, ign_2):
- self.mc.CMP(locs[0], locs[1])
+ def genop_guard_guard_value(self, ign_1, guard_op, addr, locs, ign_2):
+ if guard_op.args[0].type == FLOAT:
+ assert guard_op.args[1].type == FLOAT
+ self.mc.UCOMISD(locs[0], locs[1])
+ else:
+ self.mc.CMP(locs[0], locs[1])
return self.implement_guard(addr, self.mc.JNE)
- def genop_guard_guard_class(self, ign_1, guard_opnum, addr, locs, ign_2):
+ def genop_guard_guard_class(self, ign_1, guard_op, addr, locs, ign_2):
offset = self.cpu.vtable_offset
self.mc.CMP(mem(locs[0], offset), locs[1])
return self.implement_guard(addr, self.mc.JNE)
+ def _no_const_locs(self, args, locs):
+ """ returns those locs which correspond to non-const args
+ """
+ newlocs = []
+ for i in range(len(args)):
+ arg = args[i]
+ if isinstance(arg, Box):
+ newlocs.append(locs[i])
+ return newlocs
+
def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
fail_locs):
addr = self.mc2.tell()
exc = (guard_opnum == rop.GUARD_EXCEPTION or
guard_opnum == rop.GUARD_NO_EXCEPTION)
- faildescr._x86_faillocs = fail_locs
+ faildescr._x86_faillocs = self._no_const_locs(failargs, fail_locs)
self.generate_failure(self.mc2, faildescr, failargs, fail_locs, exc)
return addr
def generate_failure(self, mc, faildescr, failargs, locs, exc):
assert len(failargs) < MAX_FAIL_BOXES
pos = mc.tell()
- for i in range(len(locs)):
+ for i in range(len(failargs)):
+ arg = failargs[i]
loc = locs[i]
- if isinstance(loc, REG):
- if failargs[i].type == REF:
- base = self.fail_box_ptr_addr
- else:
- base = self.fail_box_int_addr
- mc.MOV(addr_add(imm(base), imm(i*WORD)), loc)
- for i in range(len(locs)):
+ if arg.type == FLOAT:
+ if isinstance(loc, REG):
+ mc.MOVSD(addr64_add(imm(self.fail_box_float_addr),
+ imm(i*WORD*2)), loc)
+ else:
+ if isinstance(loc, REG):
+ if arg.type == REF:
+ base = self.fail_box_ptr_addr
+ else:
+ base = self.fail_box_int_addr
+ mc.MOV(addr_add(imm(base), imm(i*WORD)), loc)
+ for i in range(len(failargs)):
+ arg = failargs[i]
loc = locs[i]
- if not isinstance(loc, REG):
- if failargs[i].type == REF:
- base = self.fail_box_ptr_addr
- else:
- base = self.fail_box_int_addr
- mc.MOV(eax, loc)
- mc.MOV(addr_add(imm(base), imm(i*WORD)), eax)
+ if arg.type == FLOAT:
+ if not isinstance(loc, REG):
+ mc.MOVSD(xmm0, loc)
+ mc.MOVSD(addr64_add(imm(self.fail_box_float_addr),
+ imm(i*WORD*2)), xmm0)
+ else:
+ if not isinstance(loc, REG):
+ if arg.type == REF:
+ base = self.fail_box_ptr_addr
+ else:
+ base = self.fail_box_int_addr
+ mc.MOV(eax, loc)
+ mc.MOV(addr_add(imm(base), imm(i*WORD)), eax)
if self.debug_markers:
mc.MOV(eax, imm(pos))
mc.MOV(addr_add(imm(self.fail_box_int_addr),
@@ -690,16 +812,42 @@
assert isinstance(sizeloc, IMM32)
size = sizeloc.value
nargs = len(op.args)-1
- extra_on_stack = self.align_stack_for_call(nargs)
- for i in range(nargs+1, 1, -1):
- self.mc.PUSH(arglocs[i])
+ extra_on_stack = 0
+ for arg in range(2, nargs + 2):
+ extra_on_stack += round_up_to_4(arglocs[arg].width)
+ extra_on_stack = self.align_stack_for_call(extra_on_stack)
+ self.mc.SUB(esp, imm(extra_on_stack))
if isinstance(op.args[0], Const):
x = rel32(op.args[0].getint())
else:
x = arglocs[1]
+ if x is eax:
+ tmp = ecx
+ else:
+ tmp = eax
+ p = 0
+ for i in range(2, nargs + 2):
+ loc = arglocs[i]
+ if isinstance(loc, REG):
+ if isinstance(loc, XMMREG):
+ self.mc.MOVSD(mem64(esp, p), loc)
+ else:
+ self.mc.MOV(mem(esp, p), loc)
+ p += round_up_to_4(loc.width)
+ p = 0
+ for i in range(2, nargs + 2):
+ loc = arglocs[i]
+ if not isinstance(loc, REG):
+ if isinstance(loc, MODRM64):
+ self.mc.MOVSD(xmm0, loc)
+ self.mc.MOVSD(mem64(esp, p), xmm0)
+ else:
+ self.mc.MOV(tmp, loc)
+ self.mc.MOV(mem(esp, p), tmp)
+ p += round_up_to_4(loc.width)
self.mc.CALL(x)
self.mark_gc_roots()
- self.mc.ADD(esp, imm(WORD * extra_on_stack))
+ self.mc.ADD(esp, imm(extra_on_stack))
if size == 1:
self.mc.AND(eax, imm(0xff))
elif size == 2:
@@ -737,16 +885,19 @@
mc.overwrite(jz_location-1, chr(offset))
def not_implemented_op_discard(self, op, arglocs):
- print "not implemented operation: %s" % op.getopname()
- raise NotImplementedError
+ msg = "not implemented operation: %s" % op.getopname()
+ print msg
+ raise NotImplementedError(msg)
def not_implemented_op(self, op, arglocs, resloc):
- print "not implemented operation with res: %s" % op.getopname()
- raise NotImplementedError
+ msg = "not implemented operation with res: %s" % op.getopname()
+ print msg
+ raise NotImplementedError(msg)
def not_implemented_op_guard(self, op, regalloc, arglocs, resloc, descr):
- print "not implemented operation (guard): %s" % op.getopname()
- raise NotImplementedError
+ msg = "not implemented operation (guard): %s" % op.getopname()
+ print msg
+ raise NotImplementedError(msg)
def mark_gc_roots(self):
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
@@ -779,34 +930,32 @@
num = getattr(rop, opname.upper())
genop_list[num] = value
-def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
- if isinstance(reg_or_imm1, IMM32):
- if isinstance(reg_or_imm2, IMM32):
- return heap(reg_or_imm1.value + offset +
- (reg_or_imm2.value << scale))
- else:
- return memSIB(None, reg_or_imm2, scale, reg_or_imm1.value + offset)
- else:
- if isinstance(reg_or_imm2, IMM32):
- return mem(reg_or_imm1, offset + (reg_or_imm2.value << scale))
+def new_addr_add(heap, mem, memsib):
+ def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
+ if isinstance(reg_or_imm1, IMM32):
+ if isinstance(reg_or_imm2, IMM32):
+ return heap(reg_or_imm1.value + offset +
+ (reg_or_imm2.value << scale))
+ else:
+ return memsib(None, reg_or_imm2, scale, reg_or_imm1.value + offset)
else:
- return memSIB(reg_or_imm1, reg_or_imm2, scale, offset)
+ if isinstance(reg_or_imm2, IMM32):
+ return mem(reg_or_imm1, offset + (reg_or_imm2.value << scale))
+ else:
+ return memsib(reg_or_imm1, reg_or_imm2, scale, offset)
+ return addr_add
-def addr8_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
- if isinstance(reg_or_imm1, IMM32):
- if isinstance(reg_or_imm2, IMM32):
- return heap8(reg_or_imm1.value + (offset << scale) +
- reg_or_imm2.value)
- else:
- return memSIB8(None, reg_or_imm2, scale, reg_or_imm1.value + offset)
- else:
- if isinstance(reg_or_imm2, IMM32):
- return mem8(reg_or_imm1, (offset << scale) + reg_or_imm2.value)
- else:
- return memSIB8(reg_or_imm1, reg_or_imm2, scale, offset)
+addr8_add = new_addr_add(heap8, mem8, memSIB8)
+addr_add = new_addr_add(heap, mem, memSIB)
+addr64_add = new_addr_add(heap64, mem64, memSIB64)
def addr_add_const(reg_or_imm1, offset):
if isinstance(reg_or_imm1, IMM32):
return heap(reg_or_imm1.value + offset)
else:
return mem(reg_or_imm1, offset)
+
+def round_up_to_4(size):
+ if size < 4:
+ return 4
+ return size
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/jump.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/jump.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/jump.py Mon Oct 5 10:34:06 2009
@@ -80,10 +80,7 @@
assert pending_dests == 0
def _move(assembler, src, dst, tmpreg):
- if isinstance(dst, MODRM):
- if isinstance(src, MODRM):
- assembler.regalloc_mov(src, tmpreg)
- src = tmpreg
- assembler.regalloc_mov(src, dst)
- else:
- assembler.regalloc_mov(src, dst)
+ if isinstance(dst, MODRM) and isinstance(src, MODRM):
+ assembler.regalloc_mov(src, tmpreg)
+ src = tmpreg
+ assembler.regalloc_mov(src, dst)
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/regalloc.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/regalloc.py Mon Oct 5 10:34:06 2009
@@ -4,7 +4,7 @@
from pypy.jit.metainterp.history import (Box, Const, ConstInt, ConstPtr,
ResOperation, ConstAddr, BoxPtr,
- LoopToken)
+ LoopToken, INT, REF, FLOAT)
from pypy.jit.backend.x86.ri386 import *
from pypy.rpython.lltypesystem import lltype, ll2ctypes, rffi, rstr
from pypy.rlib.objectmodel import we_are_translated
@@ -20,8 +20,15 @@
WORD = 4
+width_of_type = {
+ INT : 1,
+ REF : 1,
+ FLOAT : 2,
+ }
+
class X86RegisterManager(RegisterManager):
+ box_types = [INT, REF]
all_regs = [eax, ecx, edx, ebx, esi, edi]
no_lower_byte_regs = [esi, edi]
save_around_call_regs = [eax, edx, ecx]
@@ -43,11 +50,70 @@
print "convert_to_imm: got a %s" % c
raise AssertionError
+BASE_CONSTANT_SIZE = 1000
+
+# cheat cheat cheat....
+# why not -0.0? People tell me it's platform-dependent
+# nan is not portable
+import struct
+NEG_ZERO, = struct.unpack('d', struct.pack('ll', 0, -2147483648))
+NAN, = struct.unpack('d', struct.pack('ll', -1, 2147483647))
+# XXX These are actually masks for float_neg and float_abs.
+# They should not be converted to 'double' and given
+# names that reflect their float value.
+
+class X86XMMRegisterManager(RegisterManager):
+
+ box_types = [FLOAT]
+ all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
+ # we never need lower byte I hope
+ save_around_call_regs = all_regs
+ reg_width = 2
+
+ def new_const_array(self):
+ return lltype.malloc(rffi.CArray(lltype.Float), BASE_CONSTANT_SIZE,
+ flavor='raw')
+
+ def __init__(self, longevity, stack_manager=None, assembler=None):
+ RegisterManager.__init__(self, longevity, stack_manager=stack_manager,
+ assembler=assembler)
+ self.constant_arrays = [self.new_const_array()]
+ self.constant_arrays[-1][0] = NEG_ZERO
+ self.constant_arrays[-1][1] = NAN
+ self.constant_array_counter = 2
+
+ def convert_to_imm(self, c):
+ if self.constant_array_counter >= BASE_CONSTANT_SIZE:
+ self.constant_arrays.append(self.new_const_array())
+ self.constant_array_counter = 0
+ res = self.constant_array_counter
+ self.constant_array_counter += 1
+ arr = self.constant_arrays[-1]
+ arr[res] = c.getfloat()
+ return self.get_addr_of_const_float(-1, res)
+
+ def get_addr_of_const_float(self, num_arr, num_pos):
+ arr = self.constant_arrays[num_arr]
+ return heap64(rffi.cast(lltype.Signed, arr) + num_pos * WORD * 2)
+
+ def after_call(self, v):
+ # the result is stored in st0, but we don't have this around,
+ # so we move it to some stack location
+ if v is not None:
+ loc = self.stack_manager.loc(v, 2)
+ self.assembler.regalloc_fstp(loc)
+
class X86StackManager(StackManager):
@staticmethod
- def stack_pos(i):
- res = mem(ebp, get_ebp_ofs(i))
+ def stack_pos(i, size):
+ if size == 1:
+ res = mem(ebp, get_ebp_ofs(i))
+ elif size == 2:
+ res = mem64(ebp, get_ebp_ofs(i + 1))
+ else:
+ print "Unimplemented size %d" % i
+ raise NotImplementedError("unimplemented size %d" % i)
res.position = i
return res
@@ -68,9 +134,12 @@
cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
# compute longevity of variables
longevity = self._compute_vars_longevity(inputargs, operations)
+ self.longevity = longevity
self.rm = X86RegisterManager(longevity,
stack_manager = self.sm,
assembler = self.assembler)
+ self.xrm = X86XMMRegisterManager(longevity, stack_manager = self.sm,
+ assembler = self.assembler)
def prepare_loop(self, inputargs, operations, looptoken):
self._prepare(inputargs, operations)
@@ -88,28 +157,72 @@
def _process_inputargs(self, inputargs):
# XXX we can sort out here by longevity if we need something
# more optimal
- locs = [None] * len(inputargs)
+ floatlocs = [None] * len(inputargs)
+ nonfloatlocs = [None] * len(inputargs)
# Don't use all_regs[0] for passing arguments around a loop.
# Must be kept in sync with consider_jump().
# XXX this should probably go to llsupport/regalloc.py
+ xmmtmp = self.xrm.free_regs.pop(0)
tmpreg = self.rm.free_regs.pop(0)
assert tmpreg == X86RegisterManager.all_regs[0]
+ assert xmmtmp == X86XMMRegisterManager.all_regs[0]
for i in range(len(inputargs)):
arg = inputargs[i]
assert not isinstance(arg, Const)
reg = None
- if arg not in self.loop_consts and self.rm.longevity[arg][1] > -1:
- reg = self.rm.try_allocate_reg(arg)
+ if arg not in self.loop_consts and self.longevity[arg][1] > -1:
+ if arg.type == FLOAT:
+ # xxx is it really a good idea? at the first CALL they
+ # will all be flushed anyway
+ reg = self.xrm.try_allocate_reg(arg)
+ else:
+ reg = self.rm.try_allocate_reg(arg)
if reg:
- locs[i] = reg
+ loc = reg
else:
- loc = self.sm.loc(arg)
- locs[i] = loc
+ loc = self.sm.loc(arg, width_of_type[arg.type])
+ if arg.type == FLOAT:
+ floatlocs[i] = loc
+ else:
+ nonfloatlocs[i] = loc
# otherwise we have it saved on stack, so no worry
self.rm.free_regs.insert(0, tmpreg)
- assert tmpreg not in locs
- self.rm.possibly_free_vars(inputargs)
- return locs
+ self.xrm.free_regs.insert(0, xmmtmp)
+ assert tmpreg not in nonfloatlocs
+ assert xmmtmp not in floatlocs
+ self.possibly_free_vars(inputargs)
+ return nonfloatlocs, floatlocs
+
+ def possibly_free_var(self, var):
+ if var.type == FLOAT:
+ self.xrm.possibly_free_var(var)
+ else:
+ self.rm.possibly_free_var(var)
+
+ def possibly_free_vars(self, vars):
+ for var in vars:
+ self.possibly_free_var(var)
+
+ def make_sure_var_in_reg(self, var, forbidden_vars=[],
+ selected_reg=None, imm_fine=True,
+ need_lower_byte=False):
+ if var.type == FLOAT:
+ return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
+ selected_reg, imm_fine,
+ need_lower_byte)
+ else:
+ return self.rm.make_sure_var_in_reg(var, forbidden_vars,
+ selected_reg, imm_fine,
+ need_lower_byte)
+
+ def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
+ need_lower_byte=False):
+ if var.type == FLOAT:
+ return self.xrm.force_allocate_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
+ else:
+ return self.rm.force_allocate_reg(var, forbidden_vars,
+ selected_reg, need_lower_byte)
def _compute_loop_consts(self, inputargs, jump, looptoken):
if jump.opnum != rop.JUMP or jump.descr is not looptoken:
@@ -123,26 +236,33 @@
def _update_bindings(self, locs, args):
# XXX this should probably go to llsupport/regalloc.py
- newlocs = []
- for loc in locs:
- if not isinstance(loc, IMM8) and not isinstance(loc, IMM32):
- newlocs.append(loc)
- locs = newlocs
- assert len(locs) == len(args)
used = {}
- for i in range(len(locs)):
- v = args[i]
+ for i in range(len(args)):
+ arg = args[i]
loc = locs[i]
- if isinstance(loc, REG) and self.rm.longevity[v][1] > -1:
- self.rm.reg_bindings[v] = loc
- used[loc] = None
+ if arg.type == FLOAT:
+ if isinstance(loc, REG):
+ self.xrm.reg_bindings[arg] = loc
+ used[loc] = None
+ else:
+ self.sm.stack_bindings[arg] = loc
else:
- self.sm.stack_bindings[v] = loc
+ if isinstance(loc, REG):
+ self.rm.reg_bindings[arg] = loc
+ used[loc] = None
+ else:
+ self.sm.stack_bindings[arg] = loc
self.rm.free_regs = []
for reg in X86RegisterManager.all_regs:
if reg not in used:
self.rm.free_regs.append(reg)
+ self.xrm.free_regs = []
+ for reg in X86XMMRegisterManager.all_regs:
+ if reg not in used:
+ self.xrm.free_regs.append(reg)
+ self.possibly_free_vars(args)
self.rm._check_invariants()
+ self.xrm._check_invariants()
def Perform(self, op, arglocs, result_loc):
if not we_are_translated():
@@ -155,11 +275,12 @@
def perform_with_guard(self, op, guard_op, arglocs, result_loc):
faillocs = self.locs_for_fail(guard_op)
self.rm.position += 1
+ self.xrm.position += 1
self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
arglocs, result_loc,
self.sm.stack_depth)
self.rm.possibly_free_var(op.result)
- self.rm.possibly_free_vars(guard_op.fail_args)
+ self.possibly_free_vars(guard_op.fail_args)
def perform_guard(self, guard_op, arglocs, result_loc):
faillocs = self.locs_for_fail(guard_op)
@@ -172,7 +293,7 @@
self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
result_loc,
self.sm.stack_depth)
- self.rm.possibly_free_vars(guard_op.fail_args)
+ self.possibly_free_vars(guard_op.fail_args)
def PerformDiscard(self, op, arglocs):
if not we_are_translated():
@@ -188,7 +309,7 @@
return False
if operations[i + 1].args[0] is not op.result:
return False
- if (self.rm.longevity[op.result][1] > i + 1 or
+ if (self.longevity[op.result][1] > i + 1 or
op.result in operations[i + 1].fail_args):
return False
return True
@@ -199,19 +320,23 @@
while i < len(operations):
op = operations[i]
self.rm.position = i
- if op.has_no_side_effect() and op.result not in self.rm.longevity:
+ self.xrm.position = i
+ if op.has_no_side_effect() and op.result not in self.longevity:
i += 1
- self.rm.possibly_free_vars(op.args)
+ self.possibly_free_vars(op.args)
continue
if self.can_optimize_cmp_op(op, i, operations):
oplist[op.opnum](self, op, operations[i + 1])
i += 1
else:
oplist[op.opnum](self, op, None)
- self.rm.possibly_free_var(op.result)
+ if op.result is not None:
+ self.possibly_free_var(op.result)
self.rm._check_invariants()
+ self.xrm._check_invariants()
i += 1
assert not self.rm.reg_bindings
+ assert not self.xrm.reg_bindings
def _compute_vars_longevity(self, inputargs, operations):
# compute a dictionary that maps variables to index in
@@ -245,6 +370,8 @@
return longevity
def loc(self, v):
+ if v.type == FLOAT:
+ return self.xrm.loc(v)
return self.rm.loc(v)
def _consider_guard(self, op, ignored):
@@ -256,10 +383,10 @@
consider_guard_false = _consider_guard
def consider_finish(self, op, ignored):
- locs = [self.loc(arg) for arg in op.args]
+ locs = [self.loc(v) for v in op.args]
self.assembler.generate_failure(self.assembler.mc, op.descr, op.args,
locs, self.exc)
- self.rm.possibly_free_vars(op.args)
+ self.possibly_free_vars(op.args)
def consider_guard_no_exception(self, op, ignored):
self.perform_guard(op, [], None)
@@ -268,7 +395,7 @@
loc = self.rm.make_sure_var_in_reg(op.args[0])
box = TempBox()
loc1 = self.rm.force_allocate_reg(box, op.args)
- if op.result in self.rm.longevity:
+ if op.result in self.longevity:
# this means, is it ever used
resloc = self.rm.force_allocate_reg(op.result, op.args + [box])
else:
@@ -281,10 +408,10 @@
consider_guard_overflow = consider_guard_no_exception
def consider_guard_value(self, op, ignored):
- x = self.rm.make_sure_var_in_reg(op.args[0])
+ x = self.make_sure_var_in_reg(op.args[0])
y = self.loc(op.args[1])
self.perform_guard(op, [x, y], None)
- self.rm.possibly_free_vars(op.args)
+ self.possibly_free_vars(op.args)
def consider_guard_class(self, op, ignored):
assert isinstance(op.args[0], Box)
@@ -385,10 +512,89 @@
consider_oois = _consider_compop
consider_ooisnot = _consider_compop
+ def _consider_float_op(self, op, ignored):
+ loc0 = self.xrm.force_result_in_reg(op.result, op.args[0], op.args)
+ loc1 = self.xrm.loc(op.args[1])
+ self.Perform(op, [loc0, loc1], loc0)
+ self.xrm.possibly_free_vars(op.args)
+
+ consider_float_add = _consider_float_op
+ consider_float_sub = _consider_float_op
+ consider_float_mul = _consider_float_op
+ consider_float_truediv = _consider_float_op
+
+ def _consider_float_cmp(self, op, ignored):
+ assert ignored is None
+ # XXX so far we don't have guards here, but we want them
+ loc0 = self.xrm.make_sure_var_in_reg(op.args[0], op.args,
+ imm_fine=False)
+ loc1 = self.xrm.loc(op.args[1])
+ res = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
+ self.Perform(op, [loc0, loc1], res)
+ self.xrm.possibly_free_vars(op.args)
+
+ consider_float_lt = _consider_float_cmp
+ consider_float_le = _consider_float_cmp
+ consider_float_eq = _consider_float_cmp
+ consider_float_ne = _consider_float_cmp
+ consider_float_gt = _consider_float_cmp
+ consider_float_ge = _consider_float_cmp
+
+ def consider_float_neg(self, op, ignored):
+ # Following what gcc does...
+ # XXX we can ignore having constant in a reg, but we need
+ # to be careful with 128-bit alignment
+ loc0 = self.xrm.force_result_in_reg(op.result, op.args[0])
+ constloc = self.xrm.get_addr_of_const_float(0, 0)
+ tmpbox = TempBox()
+ loc1 = self.xrm.force_allocate_reg(tmpbox, op.args)
+ self.assembler.regalloc_mov(constloc, loc1)
+ self.Perform(op, [loc0, loc1], loc0)
+ self.xrm.possibly_free_var(tmpbox)
+ self.xrm.possibly_free_var(op.args[0])
+
+ def consider_float_abs(self, op, ignored):
+ # XXX we can ignore having constant in a reg, but we need
+ # to be careful with 128-bit alignment
+ loc0 = self.xrm.force_result_in_reg(op.result, op.args[0])
+ constloc = self.xrm.get_addr_of_const_float(0, 1)
+ tmpbox = TempBox()
+ loc1 = self.xrm.force_allocate_reg(tmpbox, op.args)
+ self.assembler.regalloc_mov(constloc, loc1)
+ self.Perform(op, [loc0, loc1], loc0)
+ self.xrm.possibly_free_var(tmpbox)
+ self.xrm.possibly_free_var(op.args[0])
+
+ def consider_float_is_true(self, op, ignored):
+ tmpbox0 = TempBox()
+ loc0 = self.xrm.force_allocate_reg(tmpbox0)
+ loc1 = self.xrm.loc(op.args[0])
+ loc2 = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
+ self.Perform(op, [loc0, loc1], loc2)
+ self.xrm.possibly_free_var(op.args[0])
+ self.xrm.possibly_free_var(tmpbox0)
+
+ def consider_cast_float_to_int(self, op, ignored):
+ loc0 = self.xrm.make_sure_var_in_reg(op.args[0], imm_fine=False)
+ loc1 = self.rm.force_allocate_reg(op.result)
+ self.Perform(op, [loc0], loc1)
+ self.xrm.possibly_free_var(op.args[0])
+
+ def consider_cast_int_to_float(self, op, ignored):
+ loc0 = self.rm.loc(op.args[0])
+ loc1 = self.xrm.force_allocate_reg(op.result)
+ self.Perform(op, [loc0], loc1)
+ self.rm.possibly_free_var(op.args[0])
+
def _call(self, op, arglocs, force_store=[]):
self.rm.before_call(force_store)
+ self.xrm.before_call(force_store)
self.Perform(op, arglocs, eax)
- self.rm.after_call(op.result)
+ if op.result is not None:
+ if op.result.type == FLOAT:
+ self.xrm.after_call(op.result)
+ else:
+ self.rm.after_call(op.result)
def consider_call(self, op, ignored):
calldescr = op.descr
@@ -514,9 +720,9 @@
else:
need_lower_byte = False
base_loc = self.rm.make_sure_var_in_reg(op.args[0], op.args)
- value_loc = self.rm.make_sure_var_in_reg(op.args[1], op.args,
+ value_loc = self.make_sure_var_in_reg(op.args[1], op.args,
need_lower_byte=need_lower_byte)
- self.rm.possibly_free_vars(op.args)
+ self.possibly_free_vars(op.args)
self.PerformDiscard(op, [base_loc, ofs_loc, size_loc, value_loc])
consider_setfield_raw = consider_setfield_gc
@@ -538,10 +744,10 @@
need_lower_byte = True
else:
need_lower_byte = False
- value_loc = self.rm.make_sure_var_in_reg(op.args[2], op.args,
- need_lower_byte=need_lower_byte)
+ value_loc = self.make_sure_var_in_reg(op.args[2], op.args,
+ need_lower_byte=need_lower_byte)
ofs_loc = self.rm.make_sure_var_in_reg(op.args[1], op.args)
- self.rm.possibly_free_vars(op.args)
+ self.possibly_free_vars(op.args)
self.PerformDiscard(op, [base_loc, ofs_loc, value_loc,
imm(scale), imm(ofs)])
@@ -551,7 +757,7 @@
ofs_loc, size_loc, _ = self._unpack_fielddescr(op.descr)
base_loc = self.rm.make_sure_var_in_reg(op.args[0], op.args)
self.rm.possibly_free_vars(op.args)
- result_loc = self.rm.force_allocate_reg(op.result)
+ result_loc = self.force_allocate_reg(op.result)
self.Perform(op, [base_loc, ofs_loc, size_loc], result_loc)
consider_getfield_gc_pure = consider_getfield_gc
@@ -561,7 +767,7 @@
base_loc = self.rm.make_sure_var_in_reg(op.args[0], op.args)
ofs_loc = self.rm.make_sure_var_in_reg(op.args[1], op.args)
self.rm.possibly_free_vars(op.args)
- result_loc = self.rm.force_allocate_reg(op.result)
+ result_loc = self.force_allocate_reg(op.result)
self.Perform(op, [base_loc, ofs_loc, imm(scale), imm(ofs)], result_loc)
consider_getfield_raw = consider_getfield_gc
@@ -586,8 +792,8 @@
def consider_same_as(self, op, ignored):
argloc = self.loc(op.args[0])
- self.rm.possibly_free_var(op.args[0])
- resloc = self.rm.force_allocate_reg(op.result)
+ self.possibly_free_var(op.args[0])
+ resloc = self.force_allocate_reg(op.result)
self.Perform(op, [argloc], resloc)
consider_cast_ptr_to_int = consider_same_as
@@ -623,17 +829,26 @@
descr = op.descr
assert isinstance(descr, LoopToken)
self.jump_target_descr = descr
- arglocs = assembler.target_arglocs(self.jump_target_descr)
+ nonfloatlocs, floatlocs = assembler.target_arglocs(self.jump_target_descr)
# compute 'tmploc' to be all_regs[0] by spilling what is there
box = TempBox()
+ box1 = TempBox()
tmpreg = X86RegisterManager.all_regs[0]
- tmploc = self.rm.force_allocate_reg(box, [], selected_reg=tmpreg)
- src_locations = [self.rm.loc(arg) for arg in op.args]
- dst_locations = arglocs
- assert tmploc not in dst_locations
+ tmploc = self.rm.force_allocate_reg(box, selected_reg=tmpreg)
+ xmmtmp = X86XMMRegisterManager.all_regs[0]
+ xmmtmploc = self.xrm.force_allocate_reg(box1, selected_reg=xmmtmp)
+ # Part about non-floats
+ src_locations = [self.loc(arg) for arg in op.args if arg.type != FLOAT]
+ assert tmploc not in nonfloatlocs
+ dst_locations = [loc for loc in nonfloatlocs if loc is not None]
remap_stack_layout(assembler, src_locations, dst_locations, tmploc)
+ # Part about floats
+ src_locations = [self.loc(arg) for arg in op.args if arg.type == FLOAT]
+ dst_locations = [loc for loc in floatlocs if loc is not None]
+ remap_stack_layout(assembler, src_locations, dst_locations, xmmtmp)
self.rm.possibly_free_var(box)
- self.rm.possibly_free_vars(op.args)
+ self.xrm.possibly_free_var(box1)
+ self.possibly_free_vars(op.args)
assembler.closing_jump(self.jump_target_descr)
def consider_debug_merge_point(self, op, ignored):
@@ -658,8 +873,9 @@
return gcrootmap.compress_callshape(shape)
def not_implemented_op(self, op, ignored):
- print "[regalloc] Not implemented operation: %s" % op.getopname()
- raise NotImplementedError
+ msg = "[regalloc] Not implemented operation: %s" % op.getopname()
+ print msg
+ raise NotImplementedError(msg)
oplist = [RegAlloc.not_implemented_op] * rop._LAST
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386.py Mon Oct 5 10:34:06 2009
@@ -27,7 +27,7 @@
def assembler(self):
raise TypeError("Float registers should not appear in assembler")
-class XMMREG(OPERAND):
+class XMMREG(REG):
width = 8
def __repr__(self):
@@ -309,6 +309,10 @@
assert register.width == 1
return MODRM8(0xC0 | register.op, '')
+def memregister64(register):
+ assert register.width == 8
+ return MODRM64(0xC0 | register.op, '')
+
def mem8(basereg, offset=0):
return memSIB8(basereg, None, 0, offset)
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386setup.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386setup.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/ri386setup.py Mon Oct 5 10:34:06 2009
@@ -11,6 +11,9 @@
def reg2modrm(builder, reg):
return memregister(reg)
+def reg2modrm64(builder, reg):
+ return memregister64(reg)
+
def reg2modrm8(builder, reg):
return memregister8(reg)
@@ -45,7 +48,7 @@
MODRM: [(MODRM, None)],
MODRM8: [(MODRM8, None)],
MODRM64: [(MODRM64, None)],
- XMMREG: [(XMMREG, None)],
+ XMMREG: [(XMMREG, None), (MODRM64, reg2modrm64)],
MISSING: [(MISSING, None)], # missing operands
}
@@ -486,10 +489,10 @@
FUCOMPP = Instruction()
FUCOMPP.mode0(['\xDA\xE9'])
-FSTPL = Instruction()
-FSTPL.mode1(MODRM64, ['\xDD', orbyte(3<<3), modrm(1)])
-FSTL = Instruction()
-FSTL.mode1(MODRM64, ['\xDD', orbyte(2<<3), modrm(1)])
+FSTP = Instruction()
+FSTP.mode1(MODRM64, ['\xDD', orbyte(3<<3), modrm(1)])
+FST = Instruction()
+FST.mode1(MODRM64, ['\xDD', orbyte(2<<3), modrm(1)])
FISTP = Instruction()
FISTP.mode1(MODRM, ['\xDB', orbyte(3<<3), modrm(1)])
@@ -522,6 +525,24 @@
DIVSD = Instruction()
DIVSD.mode2(XMMREG, MODRM64, ['\xF2\x0F\x5E', register(1, 8), modrm(2)])
+UCOMISD = Instruction()
+UCOMISD.mode2(XMMREG, MODRM64, ['\x66\x0F\x2E', register(1, 8), modrm(2)])
+
+XORPD = Instruction()
+XORPD.mode2(XMMREG, XMMREG, ['\x66\x0f\x57', register(1, 8), register(2),
+ '\xC0'])
+
+ANDPD = Instruction()
+ANDPD.mode2(XMMREG, XMMREG, ['\x66\x0F\x54', register(1, 8), register(2),
+ '\xC0'])
+
+CVTTSD2SI = Instruction()
+CVTTSD2SI.mode2(REG, XMMREG, ['\xF2\x0F\x2C', register(1, 8), register(2),
+ '\xC0'])
+
+CVTSI2SD = Instruction()
+CVTSI2SD.mode2(XMMREG, MODRM, ['\xF2\x0F\x2A', register(1, 8), modrm(2)])
+
# ------------------------------ end of SSE2 -----------------------------
UD2 = Instruction() # reserved as an illegal instruction
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/runner.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/runner.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/runner.py Mon Oct 5 10:34:06 2009
@@ -11,6 +11,7 @@
class CPU386(AbstractLLCPU):
debug = True
+ supports_floats = True
BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
dont_keepalive_stuff = False # for tests
@@ -48,6 +49,10 @@
assert index < MAX_FAIL_BOXES, "overflow!"
self.assembler.fail_boxes_int[index] = intvalue
+ def set_future_value_float(self, index, floatvalue):
+ assert index < MAX_FAIL_BOXES, "overflow!"
+ self.assembler.fail_boxes_float[index] = floatvalue
+
def set_future_value_ref(self, index, ptrvalue):
assert index < MAX_FAIL_BOXES, "overflow!"
self.assembler.fail_boxes_ptr[index] = ptrvalue
@@ -55,6 +60,9 @@
def get_latest_value_int(self, index):
return self.assembler.fail_boxes_int[index]
+ def get_latest_value_float(self, index):
+ return self.assembler.fail_boxes_float[index]
+
def get_latest_value_ref(self, index):
ptrvalue = self.assembler.fail_boxes_ptr[index]
# clear after reading
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_gc_integration.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_gc_integration.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_gc_integration.py Mon Oct 5 10:34:06 2009
@@ -18,7 +18,8 @@
from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
-from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86StackManager
+from pypy.jit.backend.x86.regalloc import X86RegisterManager, X86StackManager,\
+ X86XMMRegisterManager
class MockGcRootMap(object):
def get_basic_shape(self):
@@ -64,6 +65,8 @@
regalloc.sm = X86StackManager()
regalloc.rm = X86RegisterManager(longevity, regalloc.sm,
assembler=regalloc.assembler)
+ regalloc.xrm = X86XMMRegisterManager(longevity, regalloc.sm,
+ assembler=regalloc.assembler)
cpu = regalloc.assembler.cpu
for box in boxes:
regalloc.rm.try_allocate_reg(box)
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_jump.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_jump.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_jump.py Mon Oct 5 10:34:06 2009
@@ -41,9 +41,9 @@
remap_stack_layout(assembler, [eax, ebx, ecx, edx, esi, edi],
[eax, ebx, ecx, edx, esi, edi], '?')
assert assembler.ops == []
- s8 = stack_pos(1)
- s12 = stack_pos(31)
- s20 = stack_pos(6)
+ s8 = stack_pos(1, 1)
+ s12 = stack_pos(31, 1)
+ s20 = stack_pos(6, 1)
remap_stack_layout(assembler, [eax, ebx, ecx, s20, s8, edx, s12, esi, edi],
[eax, ebx, ecx, s20, s8, edx, s12, esi, edi],
'?')
@@ -58,10 +58,10 @@
def test_simple_stacklocs():
assembler = MockAssembler()
- s8 = stack_pos(0)
- s12 = stack_pos(13)
- s20 = stack_pos(20)
- s24 = stack_pos(221)
+ s8 = stack_pos(0, 1)
+ s12 = stack_pos(13, 1)
+ s20 = stack_pos(20, 1)
+ s24 = stack_pos(221, 1)
remap_stack_layout(assembler, [s8, eax, s12], [s20, s24, edi], edx)
assert assembler.ops == [('mov', s8, edx),
('mov', edx, s20),
@@ -70,10 +70,10 @@
def test_reordering():
assembler = MockAssembler()
- s8 = stack_pos(8)
- s12 = stack_pos(12)
- s20 = stack_pos(19)
- s24 = stack_pos(1)
+ s8 = stack_pos(8, 1)
+ s12 = stack_pos(12, 1)
+ s20 = stack_pos(19, 1)
+ s24 = stack_pos(1, 1)
remap_stack_layout(assembler, [eax, s8, s20, ebx],
[s8, ebx, eax, edi], '?')
assert assembler.got([('mov', ebx, edi),
@@ -83,10 +83,10 @@
def test_cycle():
assembler = MockAssembler()
- s8 = stack_pos(8)
- s12 = stack_pos(12)
- s20 = stack_pos(19)
- s24 = stack_pos(1)
+ s8 = stack_pos(8, 1)
+ s12 = stack_pos(12, 1)
+ s20 = stack_pos(19, 1)
+ s24 = stack_pos(1, 1)
remap_stack_layout(assembler, [eax, s8, s20, ebx],
[s8, ebx, eax, s20], '?')
assert assembler.got([('push', s8),
@@ -97,12 +97,12 @@
def test_cycle_2():
assembler = MockAssembler()
- s8 = stack_pos(8)
- s12 = stack_pos(12)
- s20 = stack_pos(19)
- s24 = stack_pos(1)
- s2 = stack_pos(2)
- s3 = stack_pos(3)
+ s8 = stack_pos(8, 1)
+ s12 = stack_pos(12, 1)
+ s20 = stack_pos(19, 1)
+ s24 = stack_pos(1, 1)
+ s2 = stack_pos(2, 1)
+ s3 = stack_pos(3, 1)
remap_stack_layout(assembler,
[eax, s8, edi, s20, eax, s20, s24, esi, s2, s3],
[s8, s20, edi, eax, edx, s24, ebx, s12, s3, s2],
@@ -127,14 +127,14 @@
remap_stack_layout(assembler, [c3], [eax], '?')
assert assembler.ops == [('mov', c3, eax)]
assembler = MockAssembler()
- s12 = stack_pos(12)
+ s12 = stack_pos(12, 1)
remap_stack_layout(assembler, [c3], [s12], '?')
assert assembler.ops == [('mov', c3, s12)]
def test_constants_and_cycle():
assembler = MockAssembler()
c3 = imm(3)
- s12 = stack_pos(13)
+ s12 = stack_pos(13, 1)
remap_stack_layout(assembler, [ebx, c3, s12],
[s12, eax, ebx], edi)
assert assembler.ops == [('mov', c3, eax),
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_regalloc.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_regalloc.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_regalloc.py Mon Oct 5 10:34:06 2009
@@ -8,7 +8,8 @@
from pypy.jit.metainterp.resoperation import rop, ResOperation
from pypy.jit.backend.llsupport.descr import GcCache
from pypy.jit.backend.x86.runner import CPU
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, X86RegisterManager
+from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, X86RegisterManager,\
+ BASE_CONSTANT_SIZE
from pypy.jit.metainterp.test.oparser import parse
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
from pypy.rpython.annlowlevel import llhelper
@@ -94,6 +95,8 @@
for i, arg in enumerate(args):
if isinstance(arg, int):
self.cpu.set_future_value_int(i, arg)
+ elif isinstance(arg, float):
+ self.cpu.set_future_value_float(i, arg)
else:
assert isinstance(lltype.typeOf(arg), lltype.Ptr)
llgcref = lltype.cast_opaque_ptr(llmemory.GCREF, arg)
@@ -105,10 +108,17 @@
def getint(self, index):
return self.cpu.get_latest_value_int(index)
+ def getfloat(self, index):
+ return self.cpu.get_latest_value_float(index)
+
def getints(self, end):
return [self.cpu.get_latest_value_int(index) for
index in range(0, end)]
+ def getfloats(self, end):
+ return [self.cpu.get_latest_value_float(index) for
+ index in range(0, end)]
+
def getptr(self, index, T):
gcref = self.cpu.get_latest_value_ref(index)
return lltype.cast_opaque_ptr(T, gcref)
@@ -464,3 +474,61 @@
s = lltype.malloc(self.A, 3)
self.interpret(ops, [s, ord('a')])
assert s[1] == 'a'
+
+class TestRegallocFloats(BaseTestRegalloc):
+ def test_float_add(self):
+ ops = '''
+ [f0, f1]
+ f2 = float_add(f0, f1)
+ finish(f2, f0, f1)
+ '''
+ self.interpret(ops, [3.0, 1.5])
+ assert self.getfloats(3) == [4.5, 3.0, 1.5]
+
+ def test_float_adds_stack(self):
+ ops = '''
+ [f0, f1, f2, f3, f4, f5, f6, f7, f8]
+ f9 = float_add(f0, f1)
+ f10 = float_add(f8, 3.5)
+ finish(f9, f10, f2, f3, f4, f5, f6, f7, f8)
+ '''
+ self.interpret(ops, [0.1, .2, .3, .4, .5, .6, .7, .8, .9])
+ assert self.getfloats(9) == [.1+.2, .9+3.5, .3, .4, .5, .6, .7, .8, .9]
+
+ def test_float_overflow_const_list(self):
+ ops = ['[f0]']
+ for i in range(BASE_CONSTANT_SIZE * 2):
+ ops.append('f%d = float_add(f%d, 3.5)' % (i + 1, i))
+ ops.append('finish(f%d)' % (BASE_CONSTANT_SIZE * 2))
+ ops = "\n".join(ops)
+ self.interpret(ops, [0.1])
+ assert abs(self.getfloat(0) - (BASE_CONSTANT_SIZE * 2) * 3.5 - 0.1) < 0.00001
+
+ def test_lt_const(self):
+ ops = '''
+ [f0]
+ i1 = float_lt(3.5, f0)
+ finish(i1)
+ '''
+ self.interpret(ops, [0.1])
+ assert self.getint(0) == 0
+
+ def test_bug_wrong_stack_adj(self):
+ ops = '''
+ [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+ guard_true(i0) [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+ finish(4.5, i0, i1, i2, i3, i4, i5, i6, i7, i8)
+ '''
+ loop = self.interpret(ops, [0, 1, 2, 3, 4, 5, 6, 7, 8])
+ assert self.getint(0) == 0
+ bridge_ops = '''
+ [i0, i1, i2, i3, i4, i5, i6, i7, i8]
+ call(ConstClass(raising_fptr), 0, descr=raising_calldescr)
+ finish(i0, i1, i2, i3, i4, i5, i6, i7, i8)
+ '''
+ self.attach_bridge(bridge_ops, loop, 0)
+ for i in range(9):
+ self.cpu.set_future_value_int(i, i)
+ self.run(loop)
+ assert self.getints(9) == range(9)
+
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_ri386_auto_encoding.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_ri386_auto_encoding.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/backend/x86/test/test_ri386_auto_encoding.py Mon Oct 5 10:34:06 2009
@@ -141,8 +141,10 @@
all = instr.as_all_suffixes
for m, extra in args:
if m in (i386.MODRM, i386.MODRM8) or all:
- if not instrname == 'FNSTCW':
+ if instrname != 'FNSTCW':
suffix = suffixes[sizes[m]] + suffix
+ if m is i386.MODRM64 and instrname in ['FST', 'FSTP']:
+ suffix = 'l'
following = ""
if instr.indirect:
suffix = ""
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/history.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/history.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/history.py Mon Oct 5 10:34:06 2009
@@ -463,6 +463,8 @@
try:
if self.type == INT:
t = 'i'
+ elif self.type == FLOAT:
+ t = 'f'
else:
t = 'p'
except AttributeError:
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/logger.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/logger.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/logger.py Mon Oct 5 10:34:06 2009
@@ -2,7 +2,7 @@
from pypy.rlib.objectmodel import compute_unique_id
from pypy.jit.metainterp.resoperation import rop
from pypy.jit.metainterp.history import Const, ConstInt, Box, \
- BoxInt, ConstAddr
+ BoxInt, ConstAddr, ConstFloat, BoxFloat
from pypy.rlib.streamio import open_file_as_stream
class Logger(object):
@@ -45,10 +45,14 @@
return 'ConstPtr(ptr' + str(mv) + ')'
elif isinstance(arg, self.ts.BoxRef):
return 'p' + str(mv)
+ elif isinstance(arg, ConstFloat):
+ return str(arg.value)
+ elif isinstance(arg, BoxFloat):
+ return 'f' + str(mv)
elif isinstance(arg, self.ts.ConstAddr):
return 'ConstClass(cls' + str(mv) + ')'
else:
- raise NotImplementedError
+ return '?'
def log_operations(self, inputargs, operations, memo, indent=0):
if self.log_stream is None:
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/oparser.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/oparser.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/oparser.py Mon Oct 5 10:34:06 2009
@@ -4,7 +4,8 @@
"""
from pypy.jit.metainterp.history import TreeLoop, BoxInt, ConstInt,\
- ConstAddr, ConstObj, ConstPtr, Box, BasicFailDescr, LoopToken
+ ConstAddr, ConstObj, ConstPtr, Box, BasicFailDescr, BoxFloat, ConstFloat,\
+ LoopToken
from pypy.jit.metainterp.resoperation import rop, ResOperation
from pypy.jit.metainterp.typesystem import llhelper
from pypy.rpython.lltypesystem import lltype, llmemory
@@ -70,6 +71,9 @@
# integer
box = BoxInt()
_box_counter_more_than(elem[1:])
+ elif elem.startswith('f'):
+ box = BoxFloat()
+ _box_counter_more_than(elem[1:])
elif elem.startswith('p'):
# pointer
ts = getattr(self.cpu, 'ts', llhelper)
@@ -96,12 +100,21 @@
self.vars[elem] = box
return vars
+ def is_float(self, arg):
+ try:
+ float(arg)
+ return True
+ except ValueError:
+ return False
+
def getvar(self, arg):
if not arg:
return ConstInt(0)
try:
return ConstInt(int(arg))
except ValueError:
+ if self.is_float(arg):
+ return ConstFloat(float(arg))
if arg.startswith('"') or arg.startswith("'"):
# XXX ootype
info = arg.strip("'\"")
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_executor.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_executor.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_executor.py Mon Oct 5 10:34:06 2009
@@ -196,6 +196,7 @@
yield (rop.FLOAT_NE, [10.125, y], 'int', 10.125 != y)
yield (rop.FLOAT_GT, [10.125, y], 'int', 10.125 > y)
yield (rop.FLOAT_GE, [10.125, y], 'int', 10.125 >= y)
+ yield (rop.FLOAT_EQ, [0.0, -0.0], 'int', 0.0 == -0.0)
def _float_unary_operations():
yield (rop.FLOAT_NEG, [-5.9], 'float', 5.9)
@@ -204,6 +205,7 @@
yield (rop.FLOAT_ABS, [15.9], 'float', 15.9)
yield (rop.FLOAT_IS_TRUE, [-5.9], 'int', 1)
yield (rop.FLOAT_IS_TRUE, [0.0], 'int', 0)
+ yield (rop.FLOAT_IS_TRUE, [-0.0], 'int', 0)
yield (rop.CAST_FLOAT_TO_INT, [-5.9], 'int', -5)
yield (rop.CAST_FLOAT_TO_INT, [5.9], 'int', 5)
yield (rop.CAST_INT_TO_FLOAT, [123], 'float', 123.0)
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_logger.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_logger.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_logger.py Mon Oct 5 10:34:06 2009
@@ -74,3 +74,12 @@
loop, oloop = self.reparse(inp, check_equal=False)
assert loop.operations[0].args[0]._get_str() == 'info'
assert oloop.operations[0].args[0]._get_str() == 'info'
+
+ def test_floats(self):
+ inp = '''
+ [f0]
+ f1 = float_add(3.5, f0)
+ '''
+ loop, oloop = self.reparse(inp)
+ equaloplists(loop.operations, oloop.operations)
+
Modified: pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_oparser.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_oparser.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/jit/metainterp/test/test_oparser.py Mon Oct 5 10:34:06 2009
@@ -3,7 +3,8 @@
from pypy.jit.metainterp.test.oparser import parse
from pypy.jit.metainterp.resoperation import rop
-from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken
+from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken,\
+ BoxFloat
def test_basic_parse():
x = """
@@ -129,6 +130,14 @@
loop = parse(x, namespace=locals())
assert loop.operations[0].descr is looptoken
+def test_floats():
+ x = '''
+ [f0]
+ f1 = float_add(f0, 3.5)
+ '''
+ loop = parse(x)
+ assert isinstance(loop.operations[0].args[0], BoxFloat)
+
def test_debug_merge_point():
x = '''
[]
Modified: pypy/branch/merge-floats-via-sse2/pypy/module/pypyjit/policy.py
==============================================================================
--- pypy/branch/merge-floats-via-sse2/pypy/module/pypyjit/policy.py (original)
+++ pypy/branch/merge-floats-via-sse2/pypy/module/pypyjit/policy.py Mon Oct 5 10:34:06 2009
@@ -10,12 +10,6 @@
if (func.__name__.startswith('_mm_') or
func.__name__.startswith('__mm_')):
# multimethods
- name = func.__name__.lstrip('_')
- if (name.startswith('mm_truediv') or
- name.startswith('mm_inplace_truediv') or
- name.startswith('mm_float')):
- # floats
- return False
return True
if '_mth_mm_' in func.__name__: # e.g. str_mth_mm_join_xxx
return True
@@ -27,18 +21,15 @@
return False
if mod.startswith('pypy.objspace.'):
- # we don't support floats
- if 'float' in mod or 'complex' in mod:
- return False
- if func.__name__ == 'format_float':
- return False
# gc_id operation
if func.__name__ == 'id__ANY':
return False
- # floats
if mod == 'pypy.rlib.rbigint':
#if func.__name__ == '_bigint_true_divide':
return False
+ if mod == 'pypy.rpython.lltypesystem.module.ll_math':
+ # XXX temporary, contains force_cast
+ return False
if '_geninterp_' in func.func_globals: # skip all geninterped stuff
return False
if mod.startswith('pypy.interpreter.astcompiler.'):
More information about the Pypy-commit
mailing list