[pypy-commit] pypy regalloc-playground: try to fix 32bit

Wed Oct 18 06:46:40 EDT 2017

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: regalloc-playground
Changeset: r92787:e02007d88e4b
Date: 2017-10-18 12:46 +0200
http://bitbucket.org/pypy/pypy/changeset/e02007d88e4b/

Log:	try to fix 32bit

diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -436,9 +436,11 @@
         self.subtract_esp_aligned(stack_depth - self.stack_max)
         #
         p = 0
+        num_moves = 0
         for i in range(n):
             loc = arglocs[i]
             if isinstance(loc, RegLoc):
+                num_moves += 1
                 if loc.is_xmm:
                     self.mc.MOVSD_sx(p, loc.value)
                 else:
@@ -449,11 +451,14 @@
             loc = arglocs[i]
             if not isinstance(loc, RegLoc):
                 if loc.get_width() == 8:
+                    num_moves += 2
                     self.mc.MOVSD(xmm0, loc)
                     self.mc.MOVSD_sx(p, xmm0.value)
                 elif isinstance(loc, ImmedLoc):
+                    num_moves += 1
                     self.mc.MOV_si(p, loc.value)
                 else:
+                    num_moves += 2
                     self.mc.MOV(eax, loc)
                     self.mc.MOV_sr(p, eax.value)
             p += loc.get_width()
@@ -461,7 +466,7 @@
         #
         if not self.fnloc_is_immediate:    # the last "argument" pushed above
             self.fnloc = RawEspLoc(p - WORD, INT)
-
+        self.num_moves = num_moves
 
     def emit_raw_call(self):
         if stdcall_or_cdecl and self.is_call_release_gil:
diff --git a/rpython/jit/backend/x86/test/test_regalloc.py b/rpython/jit/backend/x86/test/test_regalloc.py
--- a/rpython/jit/backend/x86/test/test_regalloc.py
+++ b/rpython/jit/backend/x86/test/test_regalloc.py
@@ -4,6 +4,7 @@
 
 from rpython.jit.backend.llsupport.test import test_regalloc_integration
 from rpython.jit.backend.x86.assembler import Assembler386
+from rpython.jit.backend.x86.arch import IS_X86_64
 
 class LogEntry(object):
     def __init__(self, position, name, *args):
@@ -59,7 +60,7 @@
         self._log("jump", self._regalloc.final_jump_op.getdescr()._x86_arglocs)
         return Assembler386.closing_jump(self, jump_target_descr)
 
-class TestCheckRegistersExplicitly(test_regalloc_integration.BaseTestRegalloc):
+class BaseTestCheckRegistersExplicitly(test_regalloc_integration.BaseTestRegalloc):
     def setup_class(cls):
         cls.cpu.assembler = LoggingAssembler(cls.cpu, False)
         cls.cpu.assembler.setup_once()
@@ -74,6 +75,7 @@
     def filter_log_moves(self):
         return [entry for entry in self.log if entry.name == "mov"]
 
+class TestCheckRegistersExplicitly(BaseTestCheckRegistersExplicitly):
     def test_unused(self):
         ops = '''
         [i0, i1, i2, i3]
@@ -121,22 +123,6 @@
         # i0 and i1, one for the result to the stack
         assert len(self.filter_log_moves()) == 3
 
-    def test_call_use_argument_twice(self):
-        ops = '''
-        [i0, i1, i2, i3]
-        i7 = int_add(i0, i1)
-        i8 = int_add(i2, 13)
-        i9 = call_i(ConstClass(f2ptr), i7, i7, descr=f2_calldescr)
-        i10 = int_is_true(i9)
-        guard_true(i10) [i8]
-        finish(i9)
-        '''
-        self.interpret(ops, [5, 6, 7, 8])
-        # two moves are needed from the stack frame to registers for arguments
-        # i0 and i1, one for the result to the stack
-        # one for the copy to the other argument register
-        assert len(self.filter_log_moves()) == 4
-
     @pytest.mark.skip("later")
     def test_same_stack_entry_many_times(self):
         ops = '''
@@ -227,39 +213,6 @@
         # that would break coalescing between i7 and i9)
         assert op.args[1][0] is add1.args[-1]
 
-    def test_coalescing_float(self):
-        ops = '''
-        [f0, f1, f3]
-        f7 = float_add(f0, f1)
-        f8 = float_add(f7, f3)
-        f9 = call_f(ConstClass(ffptr), f8, 1.0, descr=ff_calldescr)
-        i10 = float_ne(f9, 0.0)
-        guard_true(i10) []
-        finish(f9)
-        '''
-        self.interpret(ops, [5.0, 6.0, 8.0])
-        assert len(self.filter_log_moves()) == 3
-
-    def test_malloc(self, monkeypatch):
-        ops = '''
-        [i0]
-        i1 = int_add(i0, 1) # this is using ecx or edx because it fits
-        i6 = int_add(i0, 6) # this is using ecx or edx because it fits
-        i2 = int_add(i6, i1)
-        p0 = call_malloc_nursery(16)
-        gc_store(p0, 0, 83944, 8)
-        gc_store(p0, 8, i2, 8)
-        i10 = int_is_true(i2)
-        guard_true(i10) [p0, i0]
-        finish(p0)
-        '''
-        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_top_addr", lambda: 61)
-        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_free_addr", lambda: 68)
-        self.interpret(ops, [0], run=False)
-        # 2 moves, because the call_malloc_nursery hints prevent using ecx and
-        # edx for any of the integer results
-        assert len(self.filter_log_moves()) == 2
-
     def test_jump_hinting(self):
         self.targettoken._ll_loop_code = 0
         ops = '''
@@ -366,6 +319,60 @@
         # 4 moves, three for args, one for result
         assert len(self.filter_log_moves()) == 4
 
+class TestCheckRegistersExplicitly64(BaseTestCheckRegistersExplicitly):
+    def setup_class(self):
+        if not IS_X86_64:
+            pytest.skip("needs 64 bit")
+
+    def test_call_use_argument_twice(self):
+        ops = '''
+        [i0, i1, i2, i3]
+        i7 = int_add(i0, i1)
+        i8 = int_add(i2, 13)
+        i9 = call_i(ConstClass(f2ptr), i7, i7, descr=f2_calldescr)
+        i10 = int_is_true(i9)
+        guard_true(i10) [i8]
+        finish(i9)
+        '''
+        self.interpret(ops, [5, 6, 7, 8])
+        # two moves are needed from the stack frame to registers for arguments
+        # i0 and i1, one for the result to the stack
+        # one for the copy to the other argument register
+        assert len(self.filter_log_moves()) == 4
+
+    def test_coalescing_float(self):
+        ops = '''
+        [f0, f1, f3]
+        f7 = float_add(f0, f1)
+        f8 = float_add(f7, f3)
+        f9 = call_f(ConstClass(ffptr), f8, 1.0, descr=ff_calldescr)
+        i10 = float_ne(f9, 0.0)
+        guard_true(i10) []
+        finish(f9)
+        '''
+        self.interpret(ops, [5.0, 6.0, 8.0])
+        assert len(self.filter_log_moves()) == 3
+
+    def test_malloc(self, monkeypatch):
+        ops = '''
+        [i0]
+        i1 = int_add(i0, 1) # this is using ecx or edx because it fits
+        i6 = int_add(i0, 6) # this is using ecx or edx because it fits
+        i2 = int_add(i6, i1)
+        p0 = call_malloc_nursery(16)
+        gc_store(p0, 0, 83944, 8)
+        gc_store(p0, 8, i2, 8)
+        i10 = int_is_true(i2)
+        guard_true(i10) [p0, i0]
+        finish(p0)
+        '''
+        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_top_addr", lambda: 61)
+        monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_free_addr", lambda: 68)
+        self.interpret(ops, [0], run=False)
+        # 2 moves, because the call_malloc_nursery hints prevent using ecx and
+        # edx for any of the integer results
+        assert len(self.filter_log_moves()) == 2
+
     def test_dict_lookup(self, monkeypatch):
         monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_top_addr", lambda: 61)
         monkeypatch.setattr(self.cpu.gc_ll_descr, "get_nursery_free_addr", lambda: 68)