[pypy-commit] pypy default: Painfully insert the same NOPs as gcc does before labels to align them

arigo noreply at buildbot.pypy.org
Sun Feb 22 18:23:01 CET 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r76054:b6b0af53a51d
Date: 2015-02-22 18:22 +0100
http://bitbucket.org/pypy/pypy/changeset/b6b0af53a51d/

Log:	Painfully insert the same NOPs as gcc does before labels to align
	them to 16 bytes.

diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -339,14 +339,21 @@
             self.possibly_free_var(arg)
 
     def flush_loop(self):
+        # Force the code to be aligned to a multiple of 16.  Also,
         # rare case: if the loop is too short, or if we are just after
-        # a GUARD_NOT_INVALIDATED, pad with NOPs.  Important!  This must
-        # be called to ensure that there are enough bytes produced,
-        # because GUARD_NOT_INVALIDATED or redirect_call_assembler()
-        # will maybe overwrite them.
+        # a GUARD_NOT_INVALIDATED, we need to make sure we insert enough
+        # NOPs.  This is important to ensure that there are enough bytes
+        # produced, because GUARD_NOT_INVALIDATED or
+        # redirect_call_assembler() will maybe overwrite them.  (In that
+        # rare case we don't worry too much about alignment.)
         mc = self.assembler.mc
-        while mc.get_relative_pos() < self.min_bytes_before_label:
-            mc.NOP()
+        current_pos = mc.get_relative_pos()
+        target_pos = (current_pos + 15) & ~15
+        target_pos = max(target_pos, self.min_bytes_before_label)
+        insert_nops = target_pos - current_pos
+        assert 0 <= insert_nops <= 15
+        for c in mc.MULTIBYTE_NOPs[insert_nops]:
+            mc.writechar(c)
 
     def loc(self, v):
         if v is None: # xxx kludgy
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -671,11 +671,39 @@
 def invert_condition(cond_num):
     return cond_num ^ 1
 
+
 class X86_32_CodeBuilder(AbstractX86CodeBuilder):
     WORD = 4
 
     PMOVMSKB_rx = xmminsn('\x66', rex_nw, '\x0F\xD7', register(1, 8), register(2), '\xC0')
 
+    # multibyte nops, from 0 to 15 bytes
+    MULTIBYTE_NOPs = [
+        '',
+        '\x90',                          # nop
+        '\x66\x90',                      # xchg ax, ax
+        '\x8d\x76\x00',                  # lea    0x0(%esi),%esi
+        '\x8d\x74\x26\x00',              # lea    0x0(%esi,%eiz,1),%esi
+        '\x90\x8d\x74\x26\x00',          # nop; lea 0x0(%esi,%eiz,1),%esi
+        '\x8d\xb6\x00\x00\x00\x00',      # lea    0x0(%esi),%esi
+        '\x8d\xb4\x26\x00\x00\x00\x00',  # lea    0x0(%esi,%eiz,1),%esi
+        ('\x90'                          # nop
+         '\x8d\xb4\x26\x00\x00\x00\x00'),#   lea    0x0(%esi,%eiz,1),%esi
+        ('\x89\xf6'                      # mov    %esi,%esi
+         '\x8d\xbc\x27\x00\x00\x00\x00'),#   lea    0x0(%edi,%eiz,1),%edi
+        ('\x8d\x76\x00'                  # lea    0x0(%esi),%esi
+         '\x8d\xbc\x27\x00\x00\x00\x00'),#   lea    0x0(%edi,%eiz,1),%edi
+        ('\x8d\x74\x26\x00'              # lea    0x0(%esi,%eiz,1),%esi
+         '\x8d\xbc\x27\x00\x00\x00\x00'),#   lea    0x0(%edi,%eiz,1),%edi
+        ('\x8d\xb6\x00\x00\x00\x00'      # lea    0x0(%esi),%esi
+         '\x8d\xbf\x00\x00\x00\x00'),    #   lea    0x0(%edi),%edi
+        ('\x8d\xb6\x00\x00\x00\x00'      # lea    0x0(%esi),%esi
+         '\x8d\xbc\x27\x00\x00\x00\x00'),#   lea    0x0(%edi,%eiz,1),%edi
+        ('\x8d\xb4\x26\x00\x00\x00\x00'  # lea    0x0(%esi,%eiz,1),%esi
+         '\x8d\xbc\x27\x00\x00\x00\x00'),#   lea    0x0(%edi,%eiz,1),%edi
+        ('\xeb\x0d' + '\x90' * 13)]      # jmp +x0d; a bunch of nops
+
+
 class X86_64_CodeBuilder(AbstractX86CodeBuilder):
     WORD = 8
 
@@ -706,6 +734,24 @@
         else:
             self.MOV_ri64(reg, immed)
 
+    # multibyte nops, from 0 to 15 bytes
+    MULTIBYTE_NOPs = ([
+        '',
+        '\x90',                          # nop
+        '\x66\x90',                      # xchg ax, ax
+        '\x0f\x1f\x00',                  # nopl   (%rax)
+        '\x0f\x1f\x40\x00',              # nopl   0x0(%rax)
+        '\x0f\x1f\x44\x00\x00',          # nopl   0x0(%rax,%rax,1)
+        '\x66\x0f\x1f\x44\x00\x00',      # nopw   0x0(%rax,%rax,1)
+        '\x0f\x1f\x80\x00\x00\x00\x00',  # nopl   0x0(%rax)
+        ('\x0f\x1f\x84\x00\x00\x00\x00'  # nopl   0x0(%rax,%rax,1)
+         '\x00'),
+        ('\x66\x0f\x1f\x84\x00\x00\x00'  # nopw   0x0(%rax,%rax,1)
+         '\x00\x00')] +
+        ['\x66' * _i + '\x2e\x0f\x1f'    # nopw   %cs:0x0(%rax,%rax,1)
+         '\x84\x00\x00\x00\x00\x00' for _i in range(1, 7)])
+
+
 def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR'):
     def add_insn(code, *modrm):
         args = before_modrm + list(modrm)
diff --git a/rpython/jit/backend/x86/test/test_runner.py b/rpython/jit/backend/x86/test/test_runner.py
--- a/rpython/jit/backend/x86/test/test_runner.py
+++ b/rpython/jit/backend/x86/test/test_runner.py
@@ -30,14 +30,21 @@
     # for the individual tests see
     # ====> ../../test/runner_test.py
 
-    add_loop_instructions = ['mov', 'add', 'test', 'je', 'jmp']
+    add_loop_instructions = ['mov',
+                             'nop',    # for the label
+                             'add', 'test', 'je', 'jmp',
+                             'data32',   # padding
+                             ]
     if WORD == 4:
-        bridge_loop_instructions = ['cmp', 'jge', 'mov', 'mov', 'call', 'jmp']
+        bridge_loop_instructions = ['cmp', 'jge', 'mov', 'mov', 'call', 'jmp'
+                                    'nop']   # padding
     else:
         bridge_loop_instructions = [
-            'cmp', 'jge', 'mov', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp']
+            'cmp', 'jge', 'mov', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp',
+            'nop']      # padding
         bridge_loop_instructions_alternative = [
-            'cmp', 'jge', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp']
+            'cmp', 'jge', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp',
+            'nop']      # padding
 
     def get_cpu(self):
         cpu = CPU(rtyper=None, stats=FakeStats())
diff --git a/rpython/jit/backend/x86/test/test_rx86.py b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -229,3 +229,9 @@
     s = CodeBuilder64()
     s.MOVSD_xj(xmm2, 0x01234567)
     assert s.getvalue() == '\xF2\x0F\x10\x14\x25\x67\x45\x23\x01'
+
+def test_multibyte_nops():
+    for cls in [X86_64_CodeBuilder, X86_32_CodeBuilder]:
+        assert len(cls.MULTIBYTE_NOPs) == 16
+        for i in range(16):
+            assert len(cls.MULTIBYTE_NOPs[i]) == i


More information about the pypy-commit mailing list