[pypy-commit] pypy default: Painfully insert the same NOPs as gcc does before labels to align them
arigo
noreply at buildbot.pypy.org
Sun Feb 22 18:23:01 CET 2015
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r76054:b6b0af53a51d
Date: 2015-02-22 18:22 +0100
http://bitbucket.org/pypy/pypy/changeset/b6b0af53a51d/
Log: Painfully insert the same NOPs as gcc does before labels to align
them to 16 bytes.
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -339,14 +339,21 @@
self.possibly_free_var(arg)
def flush_loop(self):
+ # Force the code to be aligned to a multiple of 16. Also,
# rare case: if the loop is too short, or if we are just after
- # a GUARD_NOT_INVALIDATED, pad with NOPs. Important! This must
- # be called to ensure that there are enough bytes produced,
- # because GUARD_NOT_INVALIDATED or redirect_call_assembler()
- # will maybe overwrite them.
+ # a GUARD_NOT_INVALIDATED, we need to make sure we insert enough
+ # NOPs. This is important to ensure that there are enough bytes
+ # produced, because GUARD_NOT_INVALIDATED or
+ # redirect_call_assembler() will maybe overwrite them. (In that
+ # rare case we don't worry too much about alignment.)
mc = self.assembler.mc
- while mc.get_relative_pos() < self.min_bytes_before_label:
- mc.NOP()
+ current_pos = mc.get_relative_pos()
+ target_pos = (current_pos + 15) & ~15
+ target_pos = max(target_pos, self.min_bytes_before_label)
+ insert_nops = target_pos - current_pos
+ assert 0 <= insert_nops <= 15
+ for c in mc.MULTIBYTE_NOPs[insert_nops]:
+ mc.writechar(c)
def loc(self, v):
if v is None: # xxx kludgy
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -671,11 +671,39 @@
def invert_condition(cond_num):
return cond_num ^ 1
+
class X86_32_CodeBuilder(AbstractX86CodeBuilder):
WORD = 4
PMOVMSKB_rx = xmminsn('\x66', rex_nw, '\x0F\xD7', register(1, 8), register(2), '\xC0')
+ # multibyte nops, from 0 to 15 bytes
+ MULTIBYTE_NOPs = [
+ '',
+ '\x90', # nop
+ '\x66\x90', # xchg ax, ax
+ '\x8d\x76\x00', # lea 0x0(%esi),%esi
+ '\x8d\x74\x26\x00', # lea 0x0(%esi,%eiz,1),%esi
+ '\x90\x8d\x74\x26\x00', # nop; lea 0x0(%esi,%eiz,1),%esi
+ '\x8d\xb6\x00\x00\x00\x00', # lea 0x0(%esi),%esi
+ '\x8d\xb4\x26\x00\x00\x00\x00', # lea 0x0(%esi,%eiz,1),%esi
+ ('\x90' # nop
+ '\x8d\xb4\x26\x00\x00\x00\x00'),# lea 0x0(%esi,%eiz,1),%esi
+ ('\x89\xf6' # mov %esi,%esi
+ '\x8d\xbc\x27\x00\x00\x00\x00'),# lea 0x0(%edi,%eiz,1),%edi
+ ('\x8d\x76\x00' # lea 0x0(%esi),%esi
+ '\x8d\xbc\x27\x00\x00\x00\x00'),# lea 0x0(%edi,%eiz,1),%edi
+ ('\x8d\x74\x26\x00' # lea 0x0(%esi,%eiz,1),%esi
+ '\x8d\xbc\x27\x00\x00\x00\x00'),# lea 0x0(%edi,%eiz,1),%edi
+ ('\x8d\xb6\x00\x00\x00\x00' # lea 0x0(%esi),%esi
+ '\x8d\xbf\x00\x00\x00\x00'), # lea 0x0(%edi),%edi
+ ('\x8d\xb6\x00\x00\x00\x00' # lea 0x0(%esi),%esi
+ '\x8d\xbc\x27\x00\x00\x00\x00'),# lea 0x0(%edi,%eiz,1),%edi
+ ('\x8d\xb4\x26\x00\x00\x00\x00' # lea 0x0(%esi,%eiz,1),%esi
+ '\x8d\xbc\x27\x00\x00\x00\x00'),# lea 0x0(%edi,%eiz,1),%edi
+ ('\xeb\x0d' + '\x90' * 13)] # jmp +x0d; a bunch of nops
+
+
class X86_64_CodeBuilder(AbstractX86CodeBuilder):
WORD = 8
@@ -706,6 +734,24 @@
else:
self.MOV_ri64(reg, immed)
+ # multibyte nops, from 0 to 15 bytes
+ MULTIBYTE_NOPs = ([
+ '',
+ '\x90', # nop
+ '\x66\x90', # xchg ax, ax
+ '\x0f\x1f\x00', # nopl (%rax)
+ '\x0f\x1f\x40\x00', # nopl 0x0(%rax)
+ '\x0f\x1f\x44\x00\x00', # nopl 0x0(%rax,%rax,1)
+ '\x66\x0f\x1f\x44\x00\x00', # nopw 0x0(%rax,%rax,1)
+ '\x0f\x1f\x80\x00\x00\x00\x00', # nopl 0x0(%rax)
+ ('\x0f\x1f\x84\x00\x00\x00\x00' # nopl 0x0(%rax,%rax,1)
+ '\x00'),
+ ('\x66\x0f\x1f\x84\x00\x00\x00' # nopw 0x0(%rax,%rax,1)
+ '\x00\x00')] +
+ ['\x66' * _i + '\x2e\x0f\x1f' # nopw %cs:0x0(%rax,%rax,1)
+ '\x84\x00\x00\x00\x00\x00' for _i in range(1, 7)])
+
+
def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR'):
def add_insn(code, *modrm):
args = before_modrm + list(modrm)
diff --git a/rpython/jit/backend/x86/test/test_runner.py b/rpython/jit/backend/x86/test/test_runner.py
--- a/rpython/jit/backend/x86/test/test_runner.py
+++ b/rpython/jit/backend/x86/test/test_runner.py
@@ -30,14 +30,21 @@
# for the individual tests see
# ====> ../../test/runner_test.py
- add_loop_instructions = ['mov', 'add', 'test', 'je', 'jmp']
+ add_loop_instructions = ['mov',
+ 'nop', # for the label
+ 'add', 'test', 'je', 'jmp',
+ 'data32', # padding
+ ]
if WORD == 4:
- bridge_loop_instructions = ['cmp', 'jge', 'mov', 'mov', 'call', 'jmp']
+ bridge_loop_instructions = ['cmp', 'jge', 'mov', 'mov', 'call', 'jmp'
+ 'nop'] # padding
else:
bridge_loop_instructions = [
- 'cmp', 'jge', 'mov', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp']
+ 'cmp', 'jge', 'mov', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp',
+ 'nop'] # padding
bridge_loop_instructions_alternative = [
- 'cmp', 'jge', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp']
+ 'cmp', 'jge', 'mov', 'mov', 'mov', 'call', 'mov', 'jmp',
+ 'nop'] # padding
def get_cpu(self):
cpu = CPU(rtyper=None, stats=FakeStats())
diff --git a/rpython/jit/backend/x86/test/test_rx86.py b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -229,3 +229,9 @@
s = CodeBuilder64()
s.MOVSD_xj(xmm2, 0x01234567)
assert s.getvalue() == '\xF2\x0F\x10\x14\x25\x67\x45\x23\x01'
+
+def test_multibyte_nops():
+ for cls in [X86_64_CodeBuilder, X86_32_CodeBuilder]:
+ assert len(cls.MULTIBYTE_NOPs) == 16
+ for i in range(16):
+ assert len(cls.MULTIBYTE_NOPs[i]) == i
More information about the pypy-commit
mailing list