[pypy-svn] pypy arm-backed-float: start extending the calling convention implementation to support floats

Fri Apr 1 11:03:14 CEST 2011

Author: David Schneider <david.schneider at picle.org>
Branch: arm-backed-float
Changeset: r43071:44a933e393c8
Date: 2011-03-31 16:00 +0200
http://bitbucket.org/pypy/pypy/changeset/44a933e393c8/

Log:	start extending the calling convention implementation to support
	floats

diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -641,6 +641,10 @@
                 assert 0, 'unsupported case'
         elif loc.is_reg() and prev_loc.is_reg():
             self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
+        elif loc.is_reg() and prev_loc.is_vfp_reg():
+            self.mc.VMOV_rc(loc.value, prev_loc.value, cond=cond)
+        elif loc.is_vfp_reg() and prev_loc.is_reg():
+            self.mc.VMOV_cr(loc.value, prev_loc.value, cond=cond)
         else:
             assert 0, 'unsupported case'
     mov_loc_loc = regalloc_mov

diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -248,7 +248,8 @@
         descr = op.getdescr()
         #XXX Hack, Hack, Hack
         if op.result and not we_are_translated() and not isinstance(descr, LoopToken):
-            loc = regalloc.call_result_location(op.result)
+            #XXX check result type
+            loc = regalloc.rm.call_result_location(op.result)
             size = descr.get_result_size(False)
             signed = descr.is_result_signed()
             self._ensure_result_bit_extension(loc, size, signed)
@@ -258,13 +259,28 @@
     # emit_op_call_may_force
     # XXX improve freeing of stuff here
     def _emit_call(self, adr, args, regalloc, fcond=c.AL, result=None):
-        n = 0
         n_args = len(args)
-        reg_args = min(n_args, 4)
-        # prepare arguments passed in registers
-        for i in range(0, reg_args):
-            l = regalloc.make_sure_var_in_reg(args[i],
-                                            selected_reg=r.all_regs[i])
+        reg_args = 0
+        for x in range(min(n_args, 4)):
+            if args[x].type == FLOAT:
+                reg_args += 2
+            else:
+                reg_args += 1
+            if reg_args > 4:
+                reg_args = x - 1
+                break
+
+        # collect the locations of the arguments and spill those that are in
+        # the caller saved registers
+        locs = []
+        for v in range(reg_args):
+            var = args[v]
+            loc = regalloc.loc(var)
+            if loc in r.caller_resp:
+                regalloc.force_spill(var)
+                loc = regalloc.loc(var)
+            locs.append(loc)
+
         # save caller saved registers
         if result:
             # XXX hack if the call has a result force the value in r0 to be
@@ -274,10 +290,25 @@
                 t = TempBox()
                 regalloc.force_allocate_reg(t, selected_reg=regalloc.call_result_location(t))
                 regalloc.possibly_free_var(t)
-            saved_regs = r.caller_resp[1:]
+            if result.type == FLOAT:
+                saved_regs = r.caller_resp[2:]
+            else:
+                saved_regs = r.caller_resp[1:]
         else:
             saved_regs = r.caller_resp
-        with saved_registers(self.mc, saved_regs, regalloc=regalloc):
+
+        with saved_registers(self.mc, saved_regs, r.caller_vfp_resp, regalloc):
+            # move variables to the argument registers
+            num = 0
+            for i in range(reg_args):
+                arg = args[i]
+                reg = r.all_regs[num]
+                self.mov_loc_loc(locs[i], reg)
+                if arg.type == FLOAT:
+                    num += 2
+                else:
+                    num += 1
+
             # all arguments past the 4th go on the stack
             if n_args > 4:
                 stack_args = n_args - 4
@@ -297,7 +328,13 @@
 
             # restore the argumets stored on the stack
             if result is not None:
-                regalloc.after_call(result)
+                # support floats here
+                resloc = regalloc.after_call(result)
+                if result.type == FLOAT:
+                    # XXX ugly and fragile
+                    # move result to the allocated register
+                    self.mov_loc_loc(resloc, r.r0)
+
         return fcond
 
     def emit_op_same_as(self, op, arglocs, regalloc, fcond):
@@ -683,7 +720,7 @@
         jd = descr.outermost_jitdriver_sd
         assert jd is not None
         asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
-        with saved_registers(self.mc, r.caller_resp[1:], regalloc=regalloc):
+        with saved_registers(self.mc, r.caller_resp[1:], r.caller_vfp_resp, regalloc=regalloc):
             # resbox is allready in r0
             self.mov_loc_loc(arglocs[1], r.r1)
             self.mc.BL(asm_helper_adr)

diff --git a/pypy/jit/backend/arm/codebuilder.py b/pypy/jit/backend/arm/codebuilder.py
--- a/pypy/jit/backend/arm/codebuilder.py
+++ b/pypy/jit/backend/arm/codebuilder.py
@@ -72,6 +72,41 @@
                 | 0xB << 8
                 | nregs)
         self.write32(instr)
+    
+    def VMOV_rc(self, rt, dm, cond=cond.AL):
+        """This instruction copies two words from two ARM core registers into a
+        doubleword extension register, or from a doubleword extension register
+        to two ARM core registers.
+        This implementation is modified in way that it takes to consecutive
+        core registers (rt and rt+1)"""
+        rt2 = rt + 1
+        op = 1
+        instr = (cond << 28
+                | 0xC << 24
+                | 0x4 << 20
+                | op << 20
+                | (rt2 & 0xF) << 16
+                | (rt & 0xF) << 12
+                | 0xB << 8
+                | (dm & 0xF))
+
+    # VMOV<c> <Dm>, <Rt>, <Rt2>
+    def VMOV_cr(self, dm, rt, cond=cond.AL):
+        """This instruction copies two words from two ARM core registers into a
+        doubleword extension register, or from a doubleword extension register
+        to two ARM core registers.
+        This implementation is modified in way that it takes to consecutive
+        core registers (rt and rt+1)"""
+        rt2 = rt + 1
+        op = 0
+        instr = (cond << 28
+                | 0xC << 24
+                | 0x4 << 20
+                | op << 20
+                | (rt2 & 0xF) << 16
+                | (rt & 0xF) << 12
+                | 0xB << 8
+                | (dm & 0xF))
 
     def VCVT_float_to_int(self, target, source, cond=cond.AL):
         opc2 = 0x5

diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -69,6 +69,13 @@
     def __init__(self, longevity, frame_manager=None, assembler=None):
         RegisterManager.__init__(self, longevity, frame_manager, assembler)
 
+    def after_call(self, v):
+        """ Adjust registers according to the result of the call,
+        which is in variable v.
+        """
+        self._check_type(v)
+        r = self.force_allocate_reg(v)
+        return r
 class ARMv7RegisterMananger(RegisterManager):
     all_regs              = r.all_regs
     box_types             = None       # or a list of acceptable types
@@ -136,6 +143,12 @@
         else:
             return self.rm.stays_alive(v)
 
+    def call_result_location(self, v):
+        if v.type == FLOAT:
+            return self.vfprm.call_result_location(v)
+        else:
+            return self.rm.call_result_location(v)
+
     def after_call(self, v):
         if v.type == FLOAT:
             return self.vfprm.after_call(v)