[pypy-commit] pypy default: hg merge arm-longlong

Mon Sep 1 17:06:57 CEST 2014

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r73271:8ae0ea93f7fe
Date: 2014-09-01 17:05 +0200
http://bitbucket.org/pypy/pypy/changeset/8ae0ea93f7fe/

Log:	hg merge arm-longlong

	Reintroduce longlong support in the ARM backend. Tested with the
	hard-float variant of the calling convention; can't test the soft-
	float variant for now. I'll test myself at a later date or just
	handle bug reports --- if any: I think that for soft-float, we
	simply need to consider longlongs as equivalent to 64-bit floats (as
	we do now).

diff --git a/rpython/jit/backend/arm/callbuilder.py b/rpython/jit/backend/arm/callbuilder.py
--- a/rpython/jit/backend/arm/callbuilder.py
+++ b/rpython/jit/backend/arm/callbuilder.py
@@ -80,15 +80,6 @@
                 self.mc.gen_load_int(r.ip.value, n)
                 self.mc.SUB_rr(r.sp.value, r.sp.value, r.ip.value)
 
-    def _must_remap_fnloc(self):
-        fnloc = self.fnloc
-        if fnloc.is_stack():
-            return True
-        if self.is_call_release_gil:
-            if fnloc is r.r5 or fnloc is r.r6 or fnloc is r.r7:
-                return True
-        return False
-
     def call_releasegil_addr_and_move_real_arguments(self, fastgil):
         assert self.is_call_release_gil
         assert not self.asm._is_asmgcc()
@@ -121,7 +112,7 @@
         self.mc.STREX(r.r3.value, r.ip.value, r.r6.value, c=c.EQ)
                                                  # try to claim the lock
         self.mc.CMP_ri(r.r3.value, 0, cond=c.EQ) # did this succeed?
-        self.mc.DMB(c=c.EQ)
+        self.mc.DMB()
         # the success of the lock acquisition is defined by
         # 'EQ is true', or equivalently by 'r3 == 0'.
         #
@@ -268,7 +259,7 @@
         # or on the stack, which we can not access later
         # If this happens to be the case we remap the register to r4 and use r4
         # to call the function
-        if self.fnloc in r.argument_regs or self._must_remap_fnloc():
+        if not self.fnloc.is_imm():
             non_float_locs.append(self.fnloc)
             non_float_regs.append(r.r4)
             self.fnloc = r.r4
@@ -285,29 +276,23 @@
 
     def get_next_vfp(self, tp):
         assert tp in 'fS'
-        if self.next_arg_vfp == -1:
-            return None
-        if tp == 'S':
+        if tp == 'f':
+            # 64bit double
+            i = max(self.next_arg_vfp, (self.next_arg_svfp + 1) >> 1)
+            if i >= len(r.vfp_argument_regs):
+                self.next_arg_svfp = 1000    # stop that sequence too
+                return None
+            self.next_arg_vfp = i + 1
+            return r.vfp_argument_regs[i]
+        else:
+            # 32bit float
             i = self.next_arg_svfp
-            next_vfp = (i >> 1) + 1
-            if not (i + 1) & 1: # i is even
-                self.next_arg_vfp = max(self.next_arg_vfp, next_vfp)
-                self.next_arg_svfp = self.next_arg_vfp << 1
-            else:
-                self.next_arg_svfp += 1
-                self.next_arg_vfp = next_vfp
-            lst = r.svfp_argument_regs
-        else: # 64bit double
-            i = self.next_arg_vfp
-            self.next_arg_vfp += 1
-            if self.next_arg_svfp >> 1 == i:
-                self.next_arg_svfp = self.next_arg_vfp << 1
-            lst = r.vfp_argument_regs
-        try:
-            return lst[i]
-        except IndexError:
-            self.next_arg_vfp = self.next_arg_svfp = -1
-            return None
+            if not (i & 1):     # if i is even
+                i = max(i, self.next_arg_vfp << 1)
+            if i >= len(r.svfp_argument_regs):
+                return None
+            self.next_arg_svfp = i + 1
+            return r.svfp_argument_regs[i]
 
     def prepare_arguments(self):
         non_float_locs = []
@@ -316,34 +301,64 @@
         float_regs = []
         stack_args = []
         singlefloats = None
+        longlong_mask = 0
 
         arglocs = self.arglocs
         argtypes = self.argtypes
 
-        count = 0                      # stack alignment counter
+        r_register_count = 0
         on_stack = 0
+
         for i in range(len(arglocs)):
             argtype = INT
             if i < len(argtypes) and argtypes[i] == 'S':
                 argtype = argtypes[i]
             arg = arglocs[i]
+
             if arg.is_float():
-                argtype = FLOAT
-                reg = self.get_next_vfp(argtype)
-                if reg:
-                    assert len(float_regs) < len(r.vfp_argument_regs)
-                    float_locs.append(arg)
-                    assert reg not in float_regs
-                    float_regs.append(reg)
-                else:  # float argument that needs to go on the stack
-                    if count % 2 != 0:
-                        stack_args.append(None)
-                        count = 0
-                        on_stack += 1
-                    stack_args.append(arg)
-                    on_stack += 2
+                if i < len(argtypes) and argtypes[i] == 'L':
+                    # A longlong argument.  It uses two regular argument
+                    # positions, but aligned to an even number.  This is
+                    # a bit strange, but it is the case even for registers:
+                    # it can be in r0-r1 or in r2-r3 but not in r1-r2.
+                    assert arg.is_float()
+                    if r_register_count == 0:
+                        # will temporarily load the register into d8
+                        float_locs.append(arg)
+                        float_regs.append(r.d8)
+                        longlong_mask |= 1
+                        r_register_count = 2
+                        continue
+                    elif r_register_count <= 2:
+                        # will temporarily load the register into d9
+                        float_locs.append(arg)
+                        float_regs.append(r.d9)
+                        longlong_mask |= 2
+                        r_register_count = 4
+                        continue
+                    elif r_register_count == 3:
+                        r_register_count = 4
+                else:
+                    # A 64-bit float argument.  Goes into the next free v#
+                    # register, or if none, to the stack aligned to an
+                    # even number of words.
+                    argtype = FLOAT
+                    reg = self.get_next_vfp(argtype)
+                    if reg:
+                        float_locs.append(arg)
+                        assert reg not in float_regs
+                        float_regs.append(reg)
+                        continue
+                # float or longlong argument that needs to go on the stack
+                if on_stack & 1:   # odd: realign
+                    stack_args.append(None)
+                    on_stack += 1
+                stack_args.append(arg)
+                on_stack += 2
+
             elif argtype == 'S':
-                # Singlefloat argument
+                # Singlefloat (32-bit) argument.  Goes into the next free
+                # v# register, or if none, to the stack in a single word.
                 if singlefloats is None:
                     singlefloats = []
                 tgt = self.get_next_vfp(argtype)
@@ -351,32 +366,36 @@
                     singlefloats.append((arg, tgt))
                 else:  # Singlefloat argument that needs to go on the stack
                        # treated the same as a regular core register argument
-                    count += 1
+                    stack_args.append(arg)
                     on_stack += 1
-                    stack_args.append(arg)
             else:
-                if len(non_float_regs) < len(r.argument_regs):
-                    reg = r.argument_regs[len(non_float_regs)]
+                # Regular one-word argument.  Goes into the next register
+                # free from the list r0, r1, r2, r3, or to the stack.
+                if r_register_count < len(r.argument_regs):
+                    reg = r.argument_regs[r_register_count]
+                    r_register_count += 1
                     non_float_locs.append(arg)
                     non_float_regs.append(reg)
                 else:  # non-float argument that needs to go on the stack
-                    count += 1
+                    stack_args.append(arg)
                     on_stack += 1
-                    stack_args.append(arg)
+
         # align the stack
-        if count % 2 != 0:
+        if on_stack & 1:    # odd: realign
             stack_args.append(None)
             on_stack += 1
         self._push_stack_args(stack_args, on_stack*WORD)
+
         # Check that the address of the function we want to call is not
         # currently stored in one of the registers used to pass the arguments
         # or on the stack, which we can not access later
         # If this happens to be the case we remap the register to r4 and use r4
         # to call the function
-        if self.fnloc in non_float_regs or self._must_remap_fnloc():
+        if not self.fnloc.is_imm():
             non_float_locs.append(self.fnloc)
             non_float_regs.append(r.r4)
             self.fnloc = r.r4
+
         # remap values stored in vfp registers
         remap_frame_layout(self.asm, float_locs, float_regs, r.vfp_ip)
         if singlefloats:
@@ -392,13 +411,22 @@
                     src = r.ip
                 if src.is_core_reg():
                     self.mc.VMOV_cs(dest.value, src.value)
+
         # remap values stored in core registers
         remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
+        if longlong_mask & 1:
+            self.mc.FMRRD(r.r0.value, r.r1.value, r.d8.value)
+        if longlong_mask & 2:
+            self.mc.FMRRD(r.r2.value, r.r3.value, r.d9.value)
+
 
     def load_result(self):
         resloc = self.resloc
         if self.restype == 'S':
             self.mc.VMOV_sc(resloc.value, r.s0.value)
+        elif self.restype == 'L':
+            assert resloc.is_vfp_reg()
+            self.mc.FMDRR(resloc.value, r.r0.value, r.r1.value)
         # ensure the result is wellformed and stored in the correct location
         if resloc is not None and resloc.is_core_reg():
             self._ensure_result_bit_extension(resloc,
@@ -408,7 +436,10 @@
         if self.resloc is None:
             return [], []
         if self.resloc.is_vfp_reg():
-            return [], [r.d0]
+            if self.restype == 'L':      # long long
+                return [r.r0, r.r1], []
+            else:
+                return [], [r.d0]
         assert self.resloc.is_core_reg()
         return [r.r0], []
 
diff --git a/rpython/jit/backend/arm/codebuilder.py b/rpython/jit/backend/arm/codebuilder.py
--- a/rpython/jit/backend/arm/codebuilder.py
+++ b/rpython/jit/backend/arm/codebuilder.py
@@ -332,13 +332,17 @@
                     | (rd & 0xF) << 12
                     | (rn & 0xF) << 16)
 
-    def DMB(self, c=cond.AL):
-        self.write32(c << 28 | 0x157ff05f)
+    def DMB(self):
+        # note: 'cond' is only permitted on Thumb here
+        self.write32(0xf57ff05f)
 
     DIV = binary_helper_call('int_div')
     MOD = binary_helper_call('int_mod')
     UDIV = binary_helper_call('uint_div')
 
+    FMDRR = VMOV_cr     # uh, there are synonyms?
+    FMRRD = VMOV_rc
+
     def _encode_reg_list(self, instr, regs):
         for reg in regs:
             instr |= 0x1 << reg
diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -20,7 +20,7 @@
     IS_64_BIT = False
 
     supports_floats = True
-    supports_longlong = False     # incomplete, notably in callbuilder.py
+    supports_longlong = True
     supports_singlefloats = True
 
     from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE
diff --git a/rpython/jit/backend/arm/test/test_callbuilder.py b/rpython/jit/backend/arm/test/test_callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/arm/test/test_callbuilder.py
@@ -0,0 +1,47 @@
+from rpython.jit.backend.arm.callbuilder import HardFloatCallBuilder
+from rpython.jit.backend.arm import registers as r
+
+
+
+def test_hf_vfp_registers_all_singlefloat():
+    hf = HardFloatCallBuilder.__new__(HardFloatCallBuilder)
+    got = [hf.get_next_vfp('S') for i in range(18)]
+    assert got == [r.s0, r.s1, r.s2, r.s3, r.s4, r.s5, r.s6, r.s7,
+                   r.s8, r.s9, r.s10, r.s11, r.s12, r.s13, r.s14, r.s15,
+                   None, None]
+
+def test_hf_vfp_registers_all_doublefloat():
+    hf = HardFloatCallBuilder.__new__(HardFloatCallBuilder)
+    got = [hf.get_next_vfp('f') for i in range(10)]
+    assert got == [r.d0, r.d1, r.d2, r.d3, r.d4, r.d5, r.d6, r.d7,
+                   None, None]
+
+def test_hf_vfp_registers_mixture():
+    hf = HardFloatCallBuilder.__new__(HardFloatCallBuilder)
+    got = [hf.get_next_vfp('S'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('S'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('S'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('S'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('S'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('S'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('S'), hf.get_next_vfp('f')]
+    assert got == [r.s0,  r.d1,
+                   r.s1,  r.d2,
+                   r.s6,  r.d4,
+                   r.s7,  r.d5,
+                   r.s12, r.d7,
+                   r.s13, None,
+                   None,  None]
+
+def test_hf_vfp_registers_mixture_2():
+    hf = HardFloatCallBuilder.__new__(HardFloatCallBuilder)
+    got = [hf.get_next_vfp('f'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('f'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('f'), hf.get_next_vfp('f'),
+           hf.get_next_vfp('f'), hf.get_next_vfp('S'),
+           hf.get_next_vfp('f'), hf.get_next_vfp('S')]
+    assert got == [r.d0, r.d1,
+                   r.d2, r.d3,
+                   r.d4, r.d5,
+                   r.d6, r.s14,
+                   None, None]    # <- and not r.s15 for the last item
diff --git a/rpython/jit/backend/arm/test/test_instr_codebuilder.py b/rpython/jit/backend/arm/test/test_instr_codebuilder.py
--- a/rpython/jit/backend/arm/test/test_instr_codebuilder.py
+++ b/rpython/jit/backend/arm/test/test_instr_codebuilder.py
@@ -199,6 +199,14 @@
         self.cb.DMB()
         self.assert_equal('DMB')
 
+    def test_fmdrr(self):
+        self.cb.FMDRR(r.d11.value, r.r9.value, r.r14.value)
+        self.assert_equal('FMDRR d11, r9, r14')
+
+    def test_fmrrd(self):
+        self.cb.FMRRD(r.r9.value, r.r14.value, r.d11.value)
+        self.assert_equal('FMRRD r9, r14, d11')
+
 
 def test_size_of_gen_load_int():
     for v, n in [(5, 4), (6, 4), (7, 2)]:
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2718,12 +2718,11 @@
                 assert r == result
 
     def test_call_release_gil_variable_function_and_arguments(self):
-        # NOTE NOTE NOTE
-        # This also works as a test for ctypes and libffi.
-        # On some platforms, one of these is buggy...
+        from rpython.translator.tool.cbuild import ExternalCompilationInfo
         from rpython.rlib.libffi import types
         from rpython.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong
         from rpython.rlib.rarithmetic import r_singlefloat
+        from rpython.translator.c import primitive
 
         cpu = self.cpu
         rnd = random.Random(525)
@@ -2752,25 +2751,76 @@
                 (types.float,  rffi.FLOAT),
                 ] * 4
 
-        for k in range(100):
+        NB_TESTS = 100
+        c_source = []
+        all_tests = []
+        export_symbols = []
+
+        def prepare_c_source():
+            """Pick a random choice of argument types and length,
+            and build a C function with these arguments.  The C
+            function will simply copy them all into static global
+            variables.  There are then additional functions to fetch
+            them, one per argument, with a signature 'void(ARG *)'.
+            """
             POSSIBLE_TYPES = [rnd.choice(ALL_TYPES)
                               for i in range(random.randrange(2, 5))]
             load_factor = rnd.random()
             keepalive_factor = rnd.random()
             #
-            def pseudo_c_function(*args):
-                seen.append(list(args))
-            #
             ffitypes = []
             ARGTYPES = []
             for i in range(rnd.randrange(4, 20)):
                 ffitype, TP = rnd.choice(POSSIBLE_TYPES)
                 ffitypes.append(ffitype)
                 ARGTYPES.append(TP)
+            fn_name = 'vartest%d' % k
+            all_tests.append((ARGTYPES, ffitypes, fn_name))
             #
-            FPTR = self.Ptr(self.FuncType(ARGTYPES, lltype.Void))
-            func_ptr = llhelper(FPTR, pseudo_c_function)
-            funcbox = self.get_funcbox(cpu, func_ptr)
+            fn_args = []
+            for i, ARG in enumerate(ARGTYPES):
+                arg_decl = primitive.cdecl(primitive.PrimitiveType[ARG],
+                                           'x%d' % i)
+                fn_args.append(arg_decl)
+                var_name = 'argcopy_%s_x%d' % (fn_name, i)
+                var_decl = primitive.cdecl(primitive.PrimitiveType[ARG],
+                                           var_name)
+                c_source.append('static %s;' % var_decl)
+                getter_name = '%s_get%d' % (fn_name, i)
+                export_symbols.append(getter_name)
+                c_source.append('void %s(%s) { *p = %s; }' % (
+                    getter_name,
+                    primitive.cdecl(primitive.PrimitiveType[ARG], '*p'),
+                    var_name))
+            export_symbols.append(fn_name)
+            c_source.append('')
+            c_source.append('static void real%s(%s)' % (
+                fn_name, ', '.join(fn_args)))
+            c_source.append('{')
+            for i in range(len(ARGTYPES)):
+                c_source.append('    argcopy_%s_x%d = x%d;' % (fn_name, i, i))
+            c_source.append('}')
+            c_source.append('void *%s(void)' % fn_name)
+            c_source.append('{')
+            c_source.append('    return (void *)&real%s;' % fn_name)
+            c_source.append('}')
+            c_source.append('')
+
+        for k in range(NB_TESTS):
+            prepare_c_source()
+
+        eci = ExternalCompilationInfo(
+            separate_module_sources=['\n'.join(c_source)],
+            export_symbols=export_symbols)
+
+        for k in range(NB_TESTS):
+            ARGTYPES, ffitypes, fn_name = all_tests[k]
+            func_getter_ptr = rffi.llexternal(fn_name, [], lltype.Signed,
+                                         compilation_info=eci, _nowrapper=True)
+            load_factor = rnd.random()
+            keepalive_factor = rnd.random()
+            #
+            func_raw = func_getter_ptr()
             calldescr = cpu._calldescr_dynamic_for_tests(ffitypes, types.void)
             faildescr = BasicFailDescr(1)
             #
@@ -2790,7 +2840,7 @@
             print
             print codes
             #
-            argvalues = [funcbox.getint()]
+            argvalues = [func_raw]
             for TP in ARGTYPES:
                 r = (rnd.random() - 0.5) * 999999999999.9
                 r = rffi.cast(TP, r)
@@ -2840,16 +2890,26 @@
             looptoken = JitCellToken()
             self.cpu.compile_loop(argboxes, ops, looptoken)
             #
-            seen = []
             deadframe = self.cpu.execute_token(looptoken, *argvalues_normal)
             fail = self.cpu.get_latest_descr(deadframe)
             assert fail.identifier == 0
             expected = argvalues[1:]
-            [got] = seen
-            different_values = ['%r != %r' % (a, b)
-                                    for a, b in zip(got, expected)
-                                        if a != b]
-            assert got == expected, ', '.join(different_values)
+            got = []
+            for i, ARG in enumerate(ARGTYPES):
+                PARG = rffi.CArrayPtr(ARG)
+                getter_name = '%s_get%d' % (fn_name, i)
+                getter_ptr = rffi.llexternal(getter_name, [PARG], lltype.Void,
+                                             compilation_info=eci,
+                                             _nowrapper=True)
+                my_arg = lltype.malloc(PARG.TO, 1, zero=True, flavor='raw')
+                getter_ptr(my_arg)
+                got.append(my_arg[0])
+                lltype.free(my_arg, flavor='raw')
+            different_values = ['x%d: got %r, expected %r' % (i, a, b)
+                                for i, (a, b) in enumerate(zip(got, expected))
+                                if a != b]
+            assert got == expected, '\n'.join(
+                ['bad args, signature %r' % codes[1:]] + different_values)
 
 
     def test_guard_not_invalidated(self):