[pypy-svn] r68110 - pypy/branch/floats-via-sse2/pypy/jit/backend/x86

fijal at codespeak.net fijal at codespeak.net
Thu Oct 1 17:53:19 CEST 2009


Author: fijal
Date: Thu Oct  1 17:53:19 2009
New Revision: 68110

Modified:
   pypy/branch/floats-via-sse2/pypy/jit/backend/x86/assembler.py
   pypy/branch/floats-via-sse2/pypy/jit/backend/x86/regalloc.py
Log:
Implement jumps, without touching jump.py. There is inneficient
trick about pushing MODRM64 on top of stack


Modified: pypy/branch/floats-via-sse2/pypy/jit/backend/x86/assembler.py
==============================================================================
--- pypy/branch/floats-via-sse2/pypy/jit/backend/x86/assembler.py	(original)
+++ pypy/branch/floats-via-sse2/pypy/jit/backend/x86/assembler.py	Thu Oct  1 17:53:19 2009
@@ -7,7 +7,8 @@
 from pypy.rpython.lltypesystem.rclass import OBJECT
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.tool.uid import fixid
-from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte
+from pypy.jit.backend.x86.regalloc import RegAlloc, WORD, lower_byte,\
+     X86RegisterManager, X86XMMRegisterManager, get_ebp_ofs
 from pypy.rlib.objectmodel import we_are_translated, specialize
 from pypy.jit.backend.x86 import codebuf
 from pypy.jit.backend.x86.ri386 import *
@@ -192,6 +193,7 @@
         mc.done()
 
     def _assemble_bootstrap_code(self, inputargs, arglocs):
+        nonfloatlocs, floatlocs = arglocs
         self.mc.PUSH(ebp)
         self.mc.MOV(ebp, esp)
         self.mc.PUSH(ebx)
@@ -200,41 +202,37 @@
         # NB. exactly 4 pushes above; if this changes, fix stack_pos().
         # You must also keep _get_callshape() in sync.
         adr_stackadjust = self._patchable_stackadjust()
-        for i in range(len(arglocs)):
-            loc = arglocs[i]
-            if not isinstance(loc, REG):
-                if inputargs[i].type == FLOAT:
-                    self.mc.MOVSD(xmm0,
-                                  addr64_add(imm(self.fail_box_float_addr),
-                                             imm(i*WORD*2)))
-                    self.mc.MOVSD(loc, xmm0)
-                else:
-                    if inputargs[i].type == REF:
-                        # This uses XCHG to put zeroes in fail_boxes_ptr after
-                        # reading them
-                        self.mc.XOR(ecx, ecx)
-                        self.mc.XCHG(ecx, addr_add(imm(self.fail_box_ptr_addr),
-                                                   imm(i*WORD)))
-                    else:
-                        self.mc.MOV(ecx, addr_add(imm(self.fail_box_int_addr),
-                                                  imm(i*WORD)))
-                    self.mc.MOV(loc, ecx)
-        for i in range(len(arglocs)):
-            loc = arglocs[i]
+        tmp = X86RegisterManager.all_regs[0]
+        xmmtmp = X86XMMRegisterManager.all_regs[0]
+        for i in range(len(nonfloatlocs)):
+            loc = nonfloatlocs[i]
+            if loc is None:
+                continue
             if isinstance(loc, REG):
-                if inputargs[i].type == FLOAT:
-                    self.mc.MOVSD(loc,
-                                  addr64_add(imm(self.fail_box_float_addr),
-                                             imm(i*WORD*2)))
-                elif inputargs[i].type == REF:
-                    # This uses XCHG to put zeroes in fail_boxes_ptr after
-                    # reading them
-                    self.mc.XOR(loc, loc)
-                    self.mc.XCHG(loc, addr_add(imm(self.fail_box_ptr_addr),
-                                               imm(i*WORD)))
-                else:
-                    self.mc.MOV(loc, addr_add(imm(self.fail_box_int_addr),
+                target = loc
+            else:
+                target = tmp
+            if inputargs[i].type == REF:
+                # This uses XCHG to put zeroes in fail_boxes_ptr after
+                # reading them
+                self.mc.XOR(target, target)
+                self.mc.XCHG(target, addr_add(imm(self.fail_box_ptr_addr),
                                               imm(i*WORD)))
+            else:
+                self.mc.MOV(target, addr_add(imm(self.fail_box_int_addr),
+                                             imm(i*WORD)))
+            self.mc.MOV(loc, target)
+        for i in range(len(floatlocs)):
+            loc = floatlocs[i]
+            if loc is None:
+                continue
+            if isinstance(loc, REG):
+                self.mc.MOVSD(loc, addr64_add(imm(self.fail_box_float_addr),
+                                              imm(i*WORD*2)))
+            else:
+                self.mc.MOVSD(xmmtmp, addr64_add(imm(self.fail_box_float_addr),
+                                               imm(i*WORD*2)))
+                self.mc.MOVSD(loc, xmmtmp)
         return adr_stackadjust
 
     def dump(self, text):
@@ -261,10 +259,26 @@
         self.mc.FSTP(loc)
 
     def regalloc_push(self, loc):
-        self.mc.PUSH(loc)
+        if isinstance(loc, XMMREG):
+            self.mc.SUB(esp, imm(2*WORD))
+            self.mc.MOVSD(mem64(esp, 0), loc)
+        elif isinstance(loc, MODRM64):
+            # XXX evil trick
+            self.mc.PUSH(mem(ebp, get_ebp_ofs(loc.position)))
+            self.mc.PUSH(mem(ebp, get_ebp_ofs(loc.position + 1)))
+        else:
+            self.mc.PUSH(loc)
 
     def regalloc_pop(self, loc):
-        self.mc.POP(loc)
+        if isinstance(loc, XMMREG):
+            self.mc.MOVSD(loc, mem64(esp, 0))
+            self.mc.ADD(esp, imm(2*WORD))
+        elif isinstance(loc, MODRM64):
+            # XXX evil trick
+            self.mc.POP(mem(ebp, get_ebp_ofs(loc.position + 1)))
+            self.mc.POP(mem(ebp, get_ebp_ofs(loc.position)))
+        else:
+            self.mc.POP(loc)
 
     def regalloc_perform(self, op, arglocs, resloc):
         genop_list[op.opnum](self, op, arglocs, resloc)
@@ -472,7 +486,7 @@
             return self.implement_guard(addr, self.mc.JNZ)
 
     def genop_guard_ooisnull(self, op, guard_op, addr, arglocs, resloc):
-        guard_opnum == guard_op.opnum
+        guard_opnum = guard_op.opnum
         loc = arglocs[0]
         self.mc.TEST(loc, loc)
         if guard_opnum == rop.GUARD_TRUE:
@@ -786,7 +800,7 @@
         for arg in range(2, nargs + 2):
             extra_on_stack += round_up_to_4(arglocs[arg].width)
         extra_on_stack = self.align_stack_for_call(extra_on_stack)
-        self.mc.SUB(esp, imm(WORD * extra_on_stack))
+        self.mc.SUB(esp, imm(extra_on_stack))
         if isinstance(op.args[0], Const):
             x = rel32(op.args[0].getint())
         else:
@@ -817,7 +831,7 @@
             p += round_up_to_4(loc.width)
         self.mc.CALL(x)
         self.mark_gc_roots()
-        self.mc.ADD(esp, imm(WORD * extra_on_stack))
+        self.mc.ADD(esp, imm(extra_on_stack))
         if size == 1:
             self.mc.AND(eax, imm(0xff))
         elif size == 2:

Modified: pypy/branch/floats-via-sse2/pypy/jit/backend/x86/regalloc.py
==============================================================================
--- pypy/branch/floats-via-sse2/pypy/jit/backend/x86/regalloc.py	(original)
+++ pypy/branch/floats-via-sse2/pypy/jit/backend/x86/regalloc.py	Thu Oct  1 17:53:19 2009
@@ -110,7 +110,7 @@
         if size == 1:
             res = mem(ebp, get_ebp_ofs(i))
         elif size == 2:
-            res = mem64(ebp, get_ebp_ofs(i))
+            res = mem64(ebp, get_ebp_ofs(i + 1))
         else:
             print "Unimplemented size %d" % i
             raise NotImplementedError("unimplemented size %d" % i)
@@ -157,7 +157,8 @@
     def _process_inputargs(self, inputargs):
         # XXX we can sort out here by longevity if we need something
         # more optimal
-        locs = [None] * len(inputargs)
+        floatlocs = [None] * len(inputargs)
+        nonfloatlocs = [None] * len(inputargs)
         # Don't use all_regs[0] for passing arguments around a loop.
         # Must be kept in sync with consider_jump().
         # XXX this should probably go to llsupport/regalloc.py
@@ -177,17 +178,20 @@
                 else:
                     reg = self.rm.try_allocate_reg(arg)
             if reg:
-                locs[i] = reg
+                loc = reg
             else:
                 loc = self.sm.loc(arg, width_of_type[arg.type])
-                locs[i] = loc
+            if arg.type == FLOAT:
+                floatlocs[i] = loc
+            else:
+                nonfloatlocs[i] = loc
             # otherwise we have it saved on stack, so no worry
         self.rm.free_regs.insert(0, tmpreg)
         self.xrm.free_regs.insert(0, xmmtmp)
-        assert tmpreg not in locs
-        assert xmmtmp not in locs
+        assert tmpreg not in nonfloatlocs
+        assert xmmtmp not in floatlocs
         self.possibly_free_vars(inputargs)
-        return locs
+        return nonfloatlocs, floatlocs
 
     def possibly_free_var(self, var):
         if var.type == FLOAT:
@@ -317,7 +321,7 @@
             self.xrm.position = i
             if op.has_no_side_effect() and op.result not in self.longevity:
                 i += 1
-                self.rm.possibly_free_vars(op.args)
+                self.possibly_free_vars(op.args)
                 continue
             if self.can_optimize_cmp_op(op, i, operations):
                 oplist[op.opnum](self, op, operations[i + 1])
@@ -823,7 +827,7 @@
         assembler = self.assembler
         assert self.jump_target is None
         self.jump_target = op.jump_target
-        arglocs = assembler.target_arglocs(self.jump_target)
+        nonfloatlocs, floatlocs = assembler.target_arglocs(self.jump_target)
         # compute 'tmploc' to be all_regs[0] by spilling what is there
         box = TempBox()
         box1 = TempBox()
@@ -831,11 +835,15 @@
         tmploc = self.rm.force_allocate_reg(box, selected_reg=tmpreg)
         xmmtmp = X86XMMRegisterManager.all_regs[0]
         xmmtmploc = self.xrm.force_allocate_reg(box1, selected_reg=xmmtmp)
-        src_locations = [self.loc(arg) for arg in op.args]
-        dst_locations = arglocs
-        assert tmploc not in dst_locations
-        remap_stack_layout(assembler, src_locations, dst_locations, tmploc,
-                           xmmtmp)
+        # Part about non-floats
+        src_locations = [self.loc(arg) for arg in op.args if arg.type != FLOAT]
+        assert tmploc not in nonfloatlocs
+        dst_locations = [loc for loc in nonfloatlocs if loc is not None]
+        remap_stack_layout(assembler, src_locations, dst_locations, tmploc)
+        # Part about floats
+        src_locations = [self.loc(arg) for arg in op.args if arg.type == FLOAT]
+        dst_locations = [loc for loc in floatlocs if loc is not None]
+        remap_stack_layout(assembler, src_locations, dst_locations, xmmtmp)
         self.rm.possibly_free_var(box)
         self.xrm.possibly_free_var(box1)
         self.possibly_free_vars(op.args)



More information about the Pypy-commit mailing list