[pypy-svn] r69870 - in pypy/trunk/pypy/jit/backend/x86: . test

Thu Dec 3 16:05:50 CET 2009

Author: arigo
Date: Thu Dec  3 16:05:49 2009
New Revision: 69870

Modified:
   pypy/trunk/pypy/jit/backend/x86/assembler.py
   pypy/trunk/pypy/jit/backend/x86/regalloc.py
   pypy/trunk/pypy/jit/backend/x86/ri386setup.py
   pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py
Log:
Kill the hacks to get the constants for neg and abs.  Enhance a test
to shows why such hacks are not working when translated.  Replace
them with a proper 16-bytes-aligned buffer, only in assembler.py,
not in regalloc.py.


Modified: pypy/trunk/pypy/jit/backend/x86/assembler.py
==============================================================================

--- pypy/trunk/pypy/jit/backend/x86/assembler.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/assembler.py	Thu Dec  3 16:05:49 2009
@@ -90,6 +90,8 @@
         self.fail_boxes_ptr = NonmovableGrowableArrayGCREF()
         self.fail_boxes_float = NonmovableGrowableArrayFloat()
         self.fail_ebp = 0
+        self.loc_float_const_neg = None
+        self.loc_float_const_abs = None
         self.setup_failure_recovery()
 
     def leave_jitted_hook(self):
@@ -130,6 +132,26 @@
                 self._build_failure_recovery(False, withfloats=True)
                 self._build_failure_recovery(True, withfloats=True)
                 codebuf.ensure_sse2_floats()
+                self._build_float_constants()
+
+    def _build_float_constants(self):
+        # 11 words: 8 words for the data, and up to 3 words for alignment
+        addr = lltype.malloc(rffi.CArray(lltype.Signed), 11, flavor='raw')
+        if not we_are_translated():
+            self._keepalive_malloced_float_consts = addr
+        float_constants = rffi.cast(lltype.Signed, addr)
+        float_constants = (float_constants + 15) & ~15    # align to 16 bytes
+        addr = rffi.cast(rffi.CArrayPtr(lltype.Signed), float_constants)
+        addr[0] = 0                # \
+        addr[1] = -2147483648      # / for neg
+        addr[2] = 0                #
+        addr[3] = 0                #
+        addr[4] = -1               # \
+        addr[5] = 2147483647       # / for abs
+        addr[6] = 0                #
+        addr[7] = 0                #
+        self.loc_float_const_neg = heap64(float_constants)
+        self.loc_float_const_abs = heap64(float_constants + 16)
 
     def assemble_loop(self, inputargs, operations, looptoken):
         """adds the following attributes to looptoken:
@@ -445,15 +467,13 @@
     genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B")
     genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A")
 
-    # for now all chars are being considered ints, although we should make
-    # a difference at some point
-    xxx_genop_char_eq = genop_int_eq
-
     def genop_float_neg(self, op, arglocs, resloc):
-        self.mc.XORPD(arglocs[0], arglocs[1])
+        # Following what gcc does: res = x ^ 0x8000000000000000
+        self.mc.XORPD(arglocs[0], self.loc_float_const_neg)
 
     def genop_float_abs(self, op, arglocs, resloc):
-        self.mc.ANDPD(arglocs[0], arglocs[1])
+        # Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
+        self.mc.ANDPD(arglocs[0], self.loc_float_const_abs)
 
     def genop_float_is_true(self, op, arglocs, resloc):
         loc0, loc1 = arglocs

Modified: pypy/trunk/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/regalloc.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/regalloc.py	Thu Dec  3 16:05:49 2009
@@ -54,15 +54,6 @@
 
 BASE_CONSTANT_SIZE = 1000
 
-# cheat cheat cheat....
-#  why not -0.0? People tell me it's platform-dependent
-#  nan is not portable
-import struct
-NEG_ZERO, = struct.unpack('d', struct.pack('ll', 0, -2147483648))
-NAN, = struct.unpack('d', struct.pack('ll', -1, 2147483647))
-# XXX These are actually masks for float_neg and float_abs.
-# They should not be converted to 'double' and given
-# names that reflect their float value.
 
 class X86XMMRegisterManager(RegisterManager):
 
@@ -80,9 +71,7 @@
         RegisterManager.__init__(self, longevity, stack_manager=stack_manager,
                                  assembler=assembler)
         self.constant_arrays = [self.new_const_array()]
-        self.constant_arrays[-1][0] = NEG_ZERO
-        self.constant_arrays[-1][1] = NAN
-        self.constant_array_counter = 2
+        self.constant_array_counter = 0
 
     def convert_to_imm(self, c):
         if self.constant_array_counter >= BASE_CONSTANT_SIZE:
@@ -564,28 +553,13 @@
     consider_float_ge = _consider_float_cmp
 
     def consider_float_neg(self, op, ignored):
-        # Following what gcc does...
-        # XXX we can ignore having constant in a reg, but we need
-        #     to be careful with 128-bit alignment
         loc0 = self.xrm.force_result_in_reg(op.result, op.args[0])
-        constloc = self.xrm.get_addr_of_const_float(0, 0)
-        tmpbox = TempBox()
-        loc1 = self.xrm.force_allocate_reg(tmpbox, op.args)
-        self.assembler.regalloc_mov(constloc, loc1)
-        self.Perform(op, [loc0, loc1], loc0)
-        self.xrm.possibly_free_var(tmpbox)
+        self.Perform(op, [loc0], loc0)
         self.xrm.possibly_free_var(op.args[0])
 
     def consider_float_abs(self, op, ignored):
-        # XXX we can ignore having constant in a reg, but we need
-        #     to be careful with 128-bit alignment
         loc0 = self.xrm.force_result_in_reg(op.result, op.args[0])
-        constloc = self.xrm.get_addr_of_const_float(0, 1)
-        tmpbox = TempBox()
-        loc1 = self.xrm.force_allocate_reg(tmpbox, op.args)
-        self.assembler.regalloc_mov(constloc, loc1)
-        self.Perform(op, [loc0, loc1], loc0)
-        self.xrm.possibly_free_var(tmpbox)
+        self.Perform(op, [loc0], loc0)
         self.xrm.possibly_free_var(op.args[0])
 
     def consider_float_is_true(self, op, ignored):

Modified: pypy/trunk/pypy/jit/backend/x86/ri386setup.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/ri386setup.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/ri386setup.py	Thu Dec  3 16:05:49 2009
@@ -541,13 +541,11 @@
 UCOMISD = Instruction()
 UCOMISD.mode2(XMMREG, MODRM64, ['\x66\x0F\x2E', register(1, 8), modrm(2)])
 
-XORPD = Instruction()
-XORPD.mode2(XMMREG, XMMREG, ['\x66\x0f\x57', register(1, 8), register(2),
-                            '\xC0'])
+XORPD = Instruction()  # warning: a memory argument must be aligned to 16 bytes
+XORPD.mode2(XMMREG, MODRM64, ['\x66\x0f\x57', register(1, 8), modrm(2)])
 
-ANDPD = Instruction()
-ANDPD.mode2(XMMREG, XMMREG, ['\x66\x0F\x54', register(1, 8), register(2),
-                             '\xC0'])
+ANDPD = Instruction()  # warning: a memory argument must be aligned to 16 bytes
+ANDPD.mode2(XMMREG, MODRM64, ['\x66\x0F\x54', register(1, 8), modrm(2)])
 
 CVTTSD2SI = Instruction()
 CVTTSD2SI.mode2(REG, XMMREG, ['\xF2\x0F\x2C', register(1, 8), register(2),

Modified: pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py
==============================================================================
--- pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py	(original)
+++ pypy/trunk/pypy/jit/backend/x86/test/test_ztranslation.py	Thu Dec  3 16:05:49 2009
@@ -22,6 +22,7 @@
         # - set_param interface
         # - profiler
         # - full optimizer
+        # - floats neg and abs
 
         class Frame(object):
             _virtualizable2_ = ['i']
@@ -29,9 +30,14 @@
             def __init__(self, i):
                 self.i = i
 
-        jitdriver = JitDriver(greens = [], reds = ['frame', 'total'],
+        @dont_look_inside
+        def myabs(x):
+            return abs(x)
+
+        jitdriver = JitDriver(greens = [],
+                              reds = ['frame', 'total', 'j'],
                               virtualizables = ['frame'])
-        def f(i):
+        def f(i, j):
             for param in unroll_parameters:
                 defl = PARAMETERS[param]
                 jitdriver.set_param(param, defl)
@@ -40,15 +46,20 @@
             total = 0
             frame = Frame(i)
             while frame.i > 3:
-                jitdriver.can_enter_jit(frame=frame, total=total)
-                jitdriver.jit_merge_point(frame=frame, total=total)
+                jitdriver.can_enter_jit(frame=frame, total=total, j=j)
+                jitdriver.jit_merge_point(frame=frame, total=total, j=j)
                 total += frame.i
                 if frame.i >= 20:
                     frame.i -= 2
                 frame.i -= 1
+                j *= -0.712
+                if j + (-j):    raise ValueError
+                k = myabs(j)
+                if k - abs(j):  raise ValueError
+                if k - abs(-j): raise ValueError
             return total * 10
-        res = self.meta_interp(f, [40])
-        assert res == f(40)
+        res = self.meta_interp(f, [40, -49])
+        assert res == f(40, -49)
 
     def test_external_exception_handling_translates(self):
         jitdriver = JitDriver(greens = [], reds = ['n', 'total'])