[pypy-commit] pypy remove-raisingops: Attempt: track writes to r11 in order to generate less "movabs r11, 64-bit constant"
arigo
pypy.commits at gmail.com
Sat May 28 05:32:58 EDT 2016
Author: Armin Rigo <arigo at tunes.org>
Branch: remove-raisingops
Changeset: r84780:cb1508c1e40f
Date: 2016-05-28 11:33 +0200
http://bitbucket.org/pypy/pypy/changeset/cb1508c1e40f/
Log: Attempt: track writes to r11 in order to generate less "movabs r11,
64-bit constant"
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -51,6 +51,8 @@
def fits_in_32bits(value):
return -2147483648 <= value <= 2147483647
+_SCRATCH_REG = R.r11
+
# ____________________________________________________________
# Emit a single char
@@ -74,6 +76,12 @@
return orbyte | (reg_number_3bits(mc, reg) * factor)
@specialize.arg(2)
+def encode_register_out(mc, reg, factor, orbyte):
+ if reg != _SCRATCH_REG:
+ mc._dont_clobber_scratch_reg += 1
+ return encode_register(mc, reg, factor, orbyte)
+
+ at specialize.arg(2)
def rex_register(mc, reg, factor):
if reg >= 8:
if factor == 1:
@@ -88,6 +96,12 @@
assert factor in (1, 8)
return encode_register, argnum, factor, rex_register
+def reg_out(argnum, factor=1):
+ # only for instructions that are not jump/calls, and that emit only
+ # their output in this register (plus optionally some flags).
+ assert factor in (1, 8)
+ return encode_register_out, argnum, factor, rex_register
+
@specialize.arg(2)
def rex_byte_register(mc, reg, factor):
assert reg & BYTE_REG_FLAG
@@ -98,10 +112,29 @@
assert reg & BYTE_REG_FLAG
return encode_register(mc, reg & ~BYTE_REG_FLAG, factor, orbyte)
+ at specialize.arg(2)
+def encode_byte_register_out(mc, reg, factor, orbyte):
+ if reg != (_SCRATCH_REG | BYTE_REG_FLAG):
+ mc._dont_clobber_scratch_reg += 1
+ return encode_byte_register(mc, reg, factor, orbyte)
+
def byte_register(argnum, factor=1):
assert factor in (1, 8)
return encode_byte_register, argnum, factor, rex_byte_register
+def byte_reg_out(argnum, factor=1):
+ assert factor in (1, 8)
+ return encode_byte_register_out, argnum, factor, rex_byte_register
+
+# ____________________________________________________________
+# Marker for instructions with no registers written and
+# that are not jumps/calls
+
+def encode_no_reg_out(mc, _1, _2, orbyte):
+ mc._dont_clobber_scratch_reg += 1
+ return orbyte
+
+no_reg_out = encode_no_reg_out, None, None, None
# ____________________________________________________________
# Encode a constant in the orbyte
@@ -362,12 +395,18 @@
rexbyte |= rex_step(mc, arg, extra)
args = (rexbyte,) + args
# emit the bytes of the instruction
+ mc._dont_clobber_scratch_reg = 0
orbyte = 0
for encode_step, arg, extra, rex_step in encoding_steps:
if arg is not None:
arg = args[arg]
orbyte = encode_step(mc, arg, extra, orbyte)
assert orbyte == 0
+ if mc.WORD == 8:
+ if mc._dont_clobber_scratch_reg == 0:
+ mc.clobber_scratch_reg()
+ else:
+ assert mc._dont_clobber_scratch_reg == 1
#
encoding_steps = []
@@ -389,18 +428,21 @@
def common_modes(group):
base = group * 8
char = chr(0xC0 | base)
- INSN_ri8 = insn(rex_w, '\x83', register(1), char, immediate(2,'b'))
- INSN_ri32= insn(rex_w, '\x81', register(1), char, immediate(2))
- INSN_rr = insn(rex_w, chr(base+1), register(2,8), register(1,1), '\xC0')
- INSN_br = insn(rex_w, chr(base+1), register(2,8), stack_bp(1))
- INSN_rb = insn(rex_w, chr(base+3), register(1,8), stack_bp(2))
- INSN_rm = insn(rex_w, chr(base+3), register(1,8), mem_reg_plus_const(2))
- INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_(2))
- INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_(1), immediate(2,'b'))
+ INSN_ri8 = insn(rex_w, '\x83', reg_out(1), char, immediate(2,'b'))
+ INSN_ri32= insn(rex_w, '\x81', reg_out(1), char, immediate(2))
+ INSN_rr = insn(rex_w, chr(base+1), register(2,8), reg_out(1,1), '\xC0')
+ INSN_br = insn(rex_w, chr(base+1), register(2,8), stack_bp(1), no_reg_out)
+ INSN_rb = insn(rex_w, chr(base+3), reg_out(1,8), stack_bp(2))
+ INSN_rm = insn(rex_w, chr(base+3), reg_out(1,8), mem_reg_plus_const(2))
+ INSN_rj = insn(rex_w, chr(base+3), reg_out(1,8), abs_(2))
+ INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_(1),
+ immediate(2,'b'), no_reg_out)
INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
- immediate(2,'b'))
- INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1), immediate(2,'b'))
- INSN_bi32= insn(rex_w, '\x81', orbyte(base), stack_bp(1), immediate(2))
+ immediate(2,'b'), no_reg_out)
+ INSN_bi8 = insn(rex_w, '\x83', orbyte(base), stack_bp(1),
+ immediate(2,'b'), no_reg_out)
+ INSN_bi32= insn(rex_w, '\x81', orbyte(base), stack_bp(1),
+ immediate(2), no_reg_out)
def INSN_ri(mc, reg, immed):
if single_byte(immed):
@@ -432,9 +474,9 @@
def shifts(mod_field):
modrm = chr(0xC0 | (mod_field << 3))
- shift_once = insn(rex_w, '\xD1', register(1), modrm)
- shift_r_by_cl = insn(rex_w, '\xD3', register(1), modrm)
- shift_ri8 = insn(rex_w, '\xC1', register(1), modrm, immediate(2, 'b'))
+ shift_once = insn(rex_w, '\xD1', reg_out(1), modrm)
+ shift_r_by_cl = insn(rex_w, '\xD3', reg_out(1), modrm)
+ shift_ri8 = insn(rex_w, '\xC1', reg_out(1), modrm, immediate(2, 'b'))
def shift_ri(mc, reg, immed):
if immed == 1:
@@ -505,13 +547,13 @@
# ------------------------------ MOV ------------------------------
- MOV_ri = insn(register(1), '\xB8', immediate(2))
- MOV8_ri = insn(rex_fw, byte_register(1), '\xB0', immediate(2, 'b'))
+ MOV_ri = insn(reg_out(1), '\xB8', immediate(2))
+ MOV8_ri = insn(rex_fw, byte_reg_out(1), '\xB0', immediate(2, 'b'))
# ------------------------------ Arithmetic ------------------------------
- INC_m = insn(rex_w, '\xFF', orbyte(0), mem_reg_plus_const(1))
- INC_j = insn(rex_w, '\xFF', orbyte(0), abs_(1))
+ INC_m = insn(rex_w, '\xFF', orbyte(0), mem_reg_plus_const(1), no_reg_out)
+ INC_j = insn(rex_w, '\xFF', orbyte(0), abs_(1), no_reg_out)
AD1_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_ = common_modes(0)
OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_ = common_modes(1)
@@ -531,29 +573,37 @@
if reg == R.esp:
self.stack_frame_size_delta(+immed)
- CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
- CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
+ CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1),
+ immediate(2, 'b'), no_reg_out)
+ CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1),
+ immediate(2), no_reg_out)
CMP_mi = select_8_or_32_bit_immed(CMP_mi8, CMP_mi32)
- CMP_mr = insn(rex_w, '\x39', register(2, 8), mem_reg_plus_const(1))
+ CMP_mr = insn(rex_w, '\x39', register(2, 8), mem_reg_plus_const(1),
+ no_reg_out)
- CMP_ji8 = insn(rex_w, '\x83', orbyte(7<<3), abs_(1), immediate(2, 'b'))
- CMP_ji32 = insn(rex_w, '\x81', orbyte(7<<3), abs_(1), immediate(2))
+ CMP_ji8 = insn(rex_w, '\x83', orbyte(7<<3), abs_(1), immediate(2, 'b'),
+ no_reg_out)
+ CMP_ji32 = insn(rex_w, '\x81', orbyte(7<<3), abs_(1), immediate(2),
+ no_reg_out)
CMP_ji = select_8_or_32_bit_immed(CMP_ji8, CMP_ji32)
- CMP_jr = insn(rex_w, '\x39', register(2, 8), abs_(1))
+ CMP_jr = insn(rex_w, '\x39', register(2, 8), abs_(1), no_reg_out)
- CMP32_mi = insn(rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
- CMP16_mi = insn('\x66', rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'h'))
- CMP8_ri = insn(rex_fw, '\x80', byte_register(1), '\xF8', immediate(2, 'b'))
+ CMP32_mi = insn(rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1),
+ immediate(2), no_reg_out)
+ CMP16_mi = insn('\x66', rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1),
+ immediate(2, 'h'), no_reg_out)
+ CMP8_ri = insn(rex_fw, '\x80', byte_register(1), '\xF8',
+ immediate(2, 'b'), no_reg_out)
- AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
+ AND8_rr = insn(rex_fw, '\x20', byte_reg_out(1), byte_register(2,8), '\xC0')
- OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
+ OR8_rr = insn(rex_fw, '\x08', byte_reg_out(1), byte_register(2,8), '\xC0')
OR8_mi = insn(rex_nw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
- immediate(2, 'b'))
+ immediate(2, 'b'), no_reg_out)
OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_(1),
- immediate(2, 'b'))
+ immediate(2, 'b'), no_reg_out)
- NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
+ NEG_r = insn(rex_w, '\xF7', reg_out(1), '\xD8')
DIV_r = insn(rex_w, '\xF7', register(1), '\xF0')
IDIV_r = insn(rex_w, '\xF7', register(1), '\xF8')
@@ -561,11 +611,11 @@
MUL_r = insn(rex_w, '\xF7', orbyte(4<<3), register(1), '\xC0')
MUL_b = insn(rex_w, '\xF7', orbyte(4<<3), stack_bp(1))
- IMUL_rr = insn(rex_w, '\x0F\xAF', register(1, 8), register(2), '\xC0')
- IMUL_rb = insn(rex_w, '\x0F\xAF', register(1, 8), stack_bp(2))
+ IMUL_rr = insn(rex_w, '\x0F\xAF', reg_out(1, 8), register(2), '\xC0')
+ IMUL_rb = insn(rex_w, '\x0F\xAF', reg_out(1, 8), stack_bp(2))
- IMUL_rri8 = insn(rex_w, '\x6B', register(1, 8), register(2), '\xC0', immediate(3, 'b'))
- IMUL_rri32 = insn(rex_w, '\x69', register(1, 8), register(2), '\xC0', immediate(3))
+ IMUL_rri8 = insn(rex_w, '\x6B', reg_out(1, 8), register(2), '\xC0', immediate(3, 'b'))
+ IMUL_rri32 = insn(rex_w, '\x69', reg_out(1, 8), register(2), '\xC0', immediate(3))
IMUL_rri = select_8_or_32_bit_immed(IMUL_rri8, IMUL_rri32)
def IMUL_ri(self, reg, immed):
@@ -575,14 +625,14 @@
SHR_ri, SHR_rr = shifts(5)
SAR_ri, SAR_rr = shifts(7)
- NOT_r = insn(rex_w, '\xF7', register(1), '\xD0')
- NOT_b = insn(rex_w, '\xF7', orbyte(2<<3), stack_bp(1))
+ NOT_r = insn(rex_w, '\xF7', reg_out(1), '\xD0')
+ NOT_b = insn(rex_w, '\xF7', orbyte(2<<3), stack_bp(1), no_reg_out)
- CMOVNS_rr = insn(rex_w, '\x0F\x49', register(1, 8), register(2), '\xC0')
+ CMOVNS_rr = insn(rex_w, '\x0F\x49', reg_out(1, 8), register(2), '\xC0')
# ------------------------------ Misc stuff ------------------------------
- NOP = insn('\x90')
+ NOP = insn('\x90', no_reg_out)
RE1 = insn('\xC3')
RE116_i = insn('\xC2', immediate(1, 'h'))
@@ -594,13 +644,14 @@
self.check_stack_size_at_ret()
self.RE116_i(immed)
- PUS1_r = insn(rex_nw, register(1), '\x50')
- PUS1_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
- PUS1_m = insn(rex_nw, '\xFF', orbyte(6<<3), mem_reg_plus_const(1))
- PUS1_j = insn(rex_nw, '\xFF', orbyte(6<<3), abs_(1))
- PUS1_p = insn(rex_nw, '\xFF', orbyte(6<<3), rip_offset(1))
- PUS1_i8 = insn('\x6A', immediate(1, 'b'))
- PUS1_i32 = insn('\x68', immediate(1, 'i'))
+ PUS1_r = insn(rex_nw, register(1), '\x50', no_reg_out)
+ PUS1_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1), no_reg_out)
+ PUS1_m = insn(rex_nw, '\xFF', orbyte(6<<3), mem_reg_plus_const(1),
+ no_reg_out)
+ PUS1_j = insn(rex_nw, '\xFF', orbyte(6<<3), abs_(1), no_reg_out)
+ PUS1_p = insn(rex_nw, '\xFF', orbyte(6<<3), rip_offset(1), no_reg_out)
+ PUS1_i8 = insn('\x6A', immediate(1, 'b'), no_reg_out)
+ PUS1_i32 = insn('\x68', immediate(1, 'i'), no_reg_out)
def PUSH_r(self, reg):
self.PUS1_r(reg)
@@ -629,8 +680,8 @@
self.PUS1_p(rip_offset)
self.stack_frame_size_delta(+self.WORD)
- PO1_r = insn(rex_nw, register(1), '\x58')
- PO1_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
+ PO1_r = insn(rex_nw, reg_out(1), '\x58')
+ PO1_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1), no_reg_out)
def POP_r(self, reg):
self.PO1_r(reg)
@@ -640,12 +691,12 @@
self.PO1_b(ofs)
self.stack_frame_size_delta(-self.WORD)
- LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
- LE1_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
- LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
- LEA_ra = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
- LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
- LEA_rj = insn(rex_w, '\x8D', register(1, 8), abs_(2))
+ LEA_rb = insn(rex_w, '\x8D', reg_out(1,8), stack_bp(2))
+ LE1_rs = insn(rex_w, '\x8D', reg_out(1,8), stack_sp(2))
+ LEA32_rb = insn(rex_w, '\x8D', reg_out(1,8),stack_bp(2,force_32bits=True))
+ LEA_ra = insn(rex_w, '\x8D', reg_out(1, 8), mem_reg_plus_scaled_reg_plus_const(2))
+ LEA_rm = insn(rex_w, '\x8D', reg_out(1, 8), mem_reg_plus_const(2))
+ LEA_rj = insn(rex_w, '\x8D', reg_out(1, 8), abs_(2))
def LEA_rs(self, reg, ofs):
self.LE1_rs(reg, ofs)
@@ -686,29 +737,41 @@
if not we_are_translated():
self._frame_size = None
- SET_ir = insn(rex_fw, '\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
+ SET_ir = insn(rex_fw, '\x0F', immediate(1,'o'),'\x90', byte_reg_out(2), '\xC0')
# The 64-bit version of this, CQO, is defined in X86_64_CodeBuilder
CDQ = insn(rex_nw, '\x99')
- TEST8_mi = insn(rex_nw, '\xF6', orbyte(0<<3), mem_reg_plus_const(1), immediate(2, 'b'))
- TEST8_ai = insn(rex_nw, '\xF6', orbyte(0<<3), mem_reg_plus_scaled_reg_plus_const(1), immediate(2, 'b'))
- TEST8_bi = insn(rex_nw, '\xF6', orbyte(0<<3), stack_bp(1), immediate(2, 'b'))
- TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_(1), immediate(2, 'b'))
- TEST_rr = insn(rex_w, '\x85', register(2,8), register(1), '\xC0')
- TEST_ai = insn(rex_w, '\xF7', orbyte(0<<3), mem_reg_plus_scaled_reg_plus_const(1), immediate(2))
- TEST_mi = insn(rex_w, '\xF7', orbyte(0<<3), mem_reg_plus_const(1), immediate(2))
- TEST_ji = insn(rex_w, '\xF7', orbyte(0<<3), abs_(1), immediate(2))
+ TEST8_mi = insn(rex_nw, '\xF6', orbyte(0<<3), mem_reg_plus_const(1),
+ immediate(2, 'b'), no_reg_out)
+ TEST8_ai = insn(rex_nw, '\xF6', orbyte(0<<3),
+ mem_reg_plus_scaled_reg_plus_const(1),
+ immediate(2, 'b'), no_reg_out)
+ TEST8_bi = insn(rex_nw, '\xF6', orbyte(0<<3), stack_bp(1),
+ immediate(2, 'b'), no_reg_out)
+ TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_(1),
+ immediate(2, 'b'), no_reg_out)
+ TEST_rr = insn(rex_w, '\x85', register(2,8), register(1), '\xC0',
+ no_reg_out)
+ TEST_ai = insn(rex_w, '\xF7', orbyte(0<<3),
+ mem_reg_plus_scaled_reg_plus_const(1),
+ immediate(2), no_reg_out)
+ TEST_mi = insn(rex_w, '\xF7', orbyte(0<<3), mem_reg_plus_const(1),
+ immediate(2), no_reg_out)
+ TEST_ji = insn(rex_w, '\xF7', orbyte(0<<3), abs_(1),
+ immediate(2), no_reg_out)
- BTS_mr = insn(rex_w, '\x0F\xAB', register(2,8), mem_reg_plus_const(1))
- BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_(1))
+ BTS_mr = insn(rex_w, '\x0F\xAB', register(2,8), mem_reg_plus_const(1),
+ no_reg_out)
+ BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_(1),
+ no_reg_out)
# x87 instructions
- FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
- FSTPL_s = insn('\xDD', orbyte(3<<3), stack_sp(1)) # rffi.DOUBLE ('as' wants L??)
- FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
- FLDL_s = insn('\xDD', orbyte(0<<3), stack_sp(1))
- FLDS_s = insn('\xD9', orbyte(0<<3), stack_sp(1))
+ FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1), no_reg_out) # rffi.DOUBLE ('as' wants L??)
+ FSTPL_s = insn('\xDD', orbyte(3<<3), stack_sp(1), no_reg_out) # rffi.DOUBLE ('as' wants L??)
+ FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1), no_reg_out) # lltype.SingleFloat
+ FLDL_s = insn('\xDD', orbyte(0<<3), stack_sp(1), no_reg_out)
+ FLDS_s = insn('\xD9', orbyte(0<<3), stack_sp(1), no_reg_out)
# ------------------------------ Random mess -----------------------
RDTSC = insn('\x0F\x31')
@@ -717,84 +780,84 @@
UD2 = insn('\x0F\x0B')
# a breakpoint
- INT3 = insn('\xCC')
+ INT3 = insn('\xCC', no_reg_out)
# ------------------------------ SSE2 ------------------------------
# Conversion
- CVTSI2SD_xr = xmminsn('\xF2', rex_w, '\x0F\x2A', register(1, 8), register(2), '\xC0')
- CVTSI2SD_xb = xmminsn('\xF2', rex_w, '\x0F\x2A', register(1, 8), stack_bp(2))
+ CVTSI2SD_xr = xmminsn('\xF2', rex_w, '\x0F\x2A', register(1, 8), register(2), '\xC0', no_reg_out)
+ CVTSI2SD_xb = xmminsn('\xF2', rex_w, '\x0F\x2A', register(1, 8), stack_bp(2), no_reg_out)
- CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), register(2), '\xC0')
- CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
+ CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', reg_out(1, 8), register(2), '\xC0')
+ CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', reg_out(1, 8), stack_bp(2))
- CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0')
- CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A', register(1, 8), stack_bp(2))
- CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0')
- CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A', register(1, 8), stack_bp(2))
+ CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0', no_reg_out)
+ CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A', register(1, 8), stack_bp(2), no_reg_out)
+ CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0', no_reg_out)
+ CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A', register(1, 8), stack_bp(2), no_reg_out)
- CVTPD2PS_xx = xmminsn('\x66', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0')
- CVTPS2PD_xx = xmminsn(rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0')
- CVTDQ2PD_xx = xmminsn('\xF3', rex_nw, '\x0F\xE6', register(1, 8), register(2), '\xC0')
- CVTPD2DQ_xx = xmminsn('\xF2', rex_nw, '\x0F\xE6', register(1, 8), register(2), '\xC0')
+ CVTPD2PS_xx = xmminsn('\x66', rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0', no_reg_out)
+ CVTPS2PD_xx = xmminsn(rex_nw, '\x0F\x5A', register(1, 8), register(2), '\xC0', no_reg_out)
+ CVTDQ2PD_xx = xmminsn('\xF3', rex_nw, '\x0F\xE6', register(1, 8), register(2), '\xC0', no_reg_out)
+ CVTPD2DQ_xx = xmminsn('\xF2', rex_nw, '\x0F\xE6', register(1, 8), register(2), '\xC0', no_reg_out)
# These work on machine sized registers, so "MOVDQ" is MOVD when running
# on 32 bits and MOVQ when running on 64 bits. "MOVD32" is always 32-bit.
# Note a bug in the Intel documentation:
# http://lists.gnu.org/archive/html/bug-binutils/2007-07/msg00095.html
- MOVDQ_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
- MOVDQ_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
- MOVDQ_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2))
- MOVDQ_xx = xmminsn('\xF3', rex_nw, '\x0F\x7E', register(1, 8), register(2), '\xC0')
+ MOVDQ_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), reg_out(1), '\xC0')
+ MOVDQ_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0', no_reg_out)
+ MOVDQ_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2), no_reg_out)
+ MOVDQ_xx = xmminsn('\xF3', rex_nw, '\x0F\x7E', register(1, 8), register(2), '\xC0', no_reg_out)
- MOVD32_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
- MOVD32_sx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), stack_sp(1))
- MOVD32_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
- MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
- MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2))
+ MOVD32_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), reg_out(1), '\xC0')
+ MOVD32_sx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), stack_sp(1), no_reg_out)
+ MOVD32_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0', no_reg_out)
+ MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2), no_reg_out)
+ MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2), no_reg_out)
- MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), '\xC0')
+ MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), '\xC0', no_reg_out)
- PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
- PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1),
- orbyte(0x3 << 3), '\xC0', immediate(2, 'b'))
- UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
- UNPCKHPD_xx = xmminsn('\x66', rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0')
- UNPCKLPS_xx = xmminsn( rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
- UNPCKHPS_xx = xmminsn( rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0')
- MOVDDUP_xx = xmminsn('\xF2', rex_nw, '\x0F\x12', register(1, 8), register(2), '\xC0')
- SHUFPS_xxi = xmminsn(rex_nw, '\x0F\xC6', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- SHUFPD_xxi = xmminsn('\x66', rex_nw, '\x0F\xC6', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'), no_reg_out)
+ PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1),
+ orbyte(0x3 << 3), '\xC0', immediate(2, 'b'), no_reg_out)
+ UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0', no_reg_out)
+ UNPCKHPD_xx = xmminsn('\x66', rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0', no_reg_out)
+ UNPCKLPS_xx = xmminsn( rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0', no_reg_out)
+ UNPCKHPS_xx = xmminsn( rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0', no_reg_out)
+ MOVDDUP_xx = xmminsn('\xF2', rex_nw, '\x0F\x12', register(1, 8), register(2), '\xC0', no_reg_out)
+ SHUFPS_xxi = xmminsn(rex_nw, '\x0F\xC6', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ SHUFPD_xxi = xmminsn('\x66', rex_nw, '\x0F\xC6', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
- PSHUFD_xxi = xmminsn('\x66', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PSHUFHW_xxi = xmminsn('\xF3', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PSHUFLW_xxi = xmminsn('\xF2', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PSHUFB_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), register(2), '\xC0')
- PSHUFB_xm = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), mem_reg_plus_const(2))
- PSHUFB_xj = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), abs_(2))
+ PSHUFD_xxi = xmminsn('\x66', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ PSHUFHW_xxi = xmminsn('\xF3', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ PSHUFLW_xxi = xmminsn('\xF2', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ PSHUFB_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), register(2), '\xC0', no_reg_out)
+ PSHUFB_xm = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), mem_reg_plus_const(2), no_reg_out)
+ PSHUFB_xj = xmminsn('\x66', rex_nw, '\x0F\x38\x00', register(1,8), abs_(2), no_reg_out)
# SSE3
- HADDPD_xx = xmminsn('\x66', rex_nw, '\x0F\x7C', register(1,8), register(2), '\xC0')
- HADDPS_xx = xmminsn('\xF2', rex_nw, '\x0F\x7C', register(1,8), register(2), '\xC0')
- PHADDD_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x02', register(1,8), register(2), '\xC0')
+ HADDPD_xx = xmminsn('\x66', rex_nw, '\x0F\x7C', register(1,8), register(2), '\xC0', no_reg_out)
+ HADDPS_xx = xmminsn('\xF2', rex_nw, '\x0F\x7C', register(1,8), register(2), '\xC0', no_reg_out)
+ PHADDD_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x02', register(1,8), register(2), '\xC0', no_reg_out)
# following require SSE4_1
- PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(1), register(2,8), '\xC0', immediate(3, 'b'))
- PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(1), register(2,8), '\xC0', immediate(3, 'b'))
- PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(1), register(2,8), '\xC0', immediate(3, 'b'))
- EXTRACTPS_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x17', register(1), register(2,8), '\xC0', immediate(3, 'b'))
-
- PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC4', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PINSRB_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x20', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- INSERTPS_xxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x21', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', reg_out(1), register(2,8), '\xC0', immediate(3, 'b'))
+ PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', reg_out(1), register(2,8), '\xC0', immediate(3, 'b'))
+ PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC5', reg_out(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', reg_out(1), register(2,8), '\xC0', immediate(3, 'b'))
+ EXTRACTPS_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x17', reg_out(1), register(2,8), '\xC0', immediate(3, 'b'))
- PTEST_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x17', register(1,8), register(2), '\xC0')
- PBLENDW_xxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x0E', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- CMPPD_xxi = xmminsn('\x66', rex_nw, '\x0F\xC2', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- CMPPS_xxi = xmminsn( rex_nw, '\x0F\xC2', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC4', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ PINSRB_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x20', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ INSERTPS_xxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x21', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+
+ PTEST_xx = xmminsn('\x66', rex_nw, '\x0F\x38\x17', register(1,8), register(2), '\xC0', no_reg_out)
+ PBLENDW_xxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x0E', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ CMPPD_xxi = xmminsn('\x66', rex_nw, '\x0F\xC2', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
+ CMPPS_xxi = xmminsn( rex_nw, '\x0F\xC2', register(1,8), register(2), '\xC0', immediate(3, 'b'), no_reg_out)
# ------------------------------------------------------------
@@ -825,7 +888,7 @@
class X86_32_CodeBuilder(AbstractX86CodeBuilder):
WORD = 4
- PMOVMSKB_rx = xmminsn('\x66', rex_nw, '\x0F\xD7', register(1, 8), register(2), '\xC0')
+ PMOVMSKB_rx = xmminsn('\x66', rex_nw, '\x0F\xD7', reg_out(1, 8), register(2), '\xC0')
# multibyte nops, from 0 to 15 bytes
MULTIBYTE_NOPs = [
@@ -871,9 +934,9 @@
# Three different encodings... following what gcc does. From the
# shortest encoding to the longest one.
- MOV_riu32 = insn(rex_nw, register(1), '\xB8', immediate(2, 'i'))
- MOV_ri32 = insn(rex_w, '\xC7', register(1), '\xC0', immediate(2, 'i'))
- MOV_ri64 = insn(rex_w, register(1), '\xB8', immediate(2, 'q'))
+ MOV_riu32 = insn(rex_nw, reg_out(1), '\xB8', immediate(2, 'i'))
+ MOV_ri32 = insn(rex_w, '\xC7', reg_out(1), '\xC0', immediate(2, 'i'))
+ MOV_ri64 = insn(rex_w, reg_out(1), '\xB8', immediate(2, 'q'))
def MOV_ri(self, reg, immed):
if 0 <= immed <= 4294967295:
@@ -902,7 +965,8 @@
'\x84\x00\x00\x00\x00\x00' for _i in range(1, 7)])
-def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR'):
+def define_modrm_modes(insnname_template, before_modrm, after_modrm=[], regtype='GPR',
+ output_star=False):
def add_insn(code, *modrm):
args = before_modrm + list(modrm)
methname = insnname_template.replace('*', code)
@@ -922,89 +986,98 @@
modrm_argnum = insnname_template.split('_')[1].index('*')+1
if regtype == 'GPR':
- add_insn('r', register(modrm_argnum))
+ if output_star:
+ add_insn('r', reg_out(modrm_argnum))
+ else:
+ add_insn('r', register(modrm_argnum))
elif regtype == 'BYTE':
- add_insn('r', byte_register(modrm_argnum))
+ if output_star:
+ add_insn('r', byte_reg_out(modrm_argnum))
+ else:
+ add_insn('r', byte_register(modrm_argnum))
elif regtype == 'XMM':
+ assert not output_star
add_insn('x', register(modrm_argnum))
else:
raise AssertionError("Invalid type")
- add_insn('b', stack_bp(modrm_argnum))
- add_insn('s', stack_sp(modrm_argnum))
- add_insn('m', mem_reg_plus_const(modrm_argnum))
- add_insn('a', mem_reg_plus_scaled_reg_plus_const(modrm_argnum))
- add_insn('j', abs_(modrm_argnum))
- add_insn('p', rip_offset(modrm_argnum))
+ extra = (no_reg_out,) if output_star else ()
+ add_insn('b', stack_bp(modrm_argnum), *extra)
+ add_insn('s', stack_sp(modrm_argnum), *extra)
+ add_insn('m', mem_reg_plus_const(modrm_argnum), *extra)
+ add_insn('a', mem_reg_plus_scaled_reg_plus_const(modrm_argnum), *extra)
+ add_insn('j', abs_(modrm_argnum), *extra)
+ add_insn('p', rip_offset(modrm_argnum), *extra)
# Define a regular MOV, and a variant MOV32 that only uses the low 4 bytes of a
# register
for insnname, rex_type in [('MOV', rex_w), ('MOV32', rex_nw)]:
- define_modrm_modes(insnname + '_*r', [rex_type, '\x89', register(2, 8)])
- define_modrm_modes(insnname + '_r*', [rex_type, '\x8B', register(1, 8)])
- define_modrm_modes(insnname + '_*i', [rex_type, '\xC7', orbyte(0<<3)], [immediate(2)])
+ define_modrm_modes(insnname + '_*r', [rex_type, '\x89', register(2, 8)], output_star=True)
+ define_modrm_modes(insnname + '_r*', [rex_type, '\x8B', reg_out(1, 8)])
+ define_modrm_modes(insnname + '_*i', [rex_type, '\xC7', orbyte(0<<3)], [immediate(2)], output_star=True)
-define_modrm_modes('MOV8_*r', [rex_fw, '\x88', byte_register(2, 8)], regtype='BYTE')
-define_modrm_modes('MOV8_*i', [rex_fw, '\xC6', orbyte(0<<3)], [immediate(2, 'b')], regtype='BYTE')
-define_modrm_modes('MOV16_*r', ['\x66', rex_nw, '\x89', register(2, 8)])
-define_modrm_modes('MOV16_*i', ['\x66', rex_nw, '\xC7', orbyte(0<<3)], [immediate(2, 'h')])
+define_modrm_modes('MOV8_*r', [rex_fw, '\x88', byte_register(2, 8)], regtype='BYTE', output_star=True)
+define_modrm_modes('MOV8_*i', [rex_fw, '\xC6', orbyte(0<<3)], [immediate(2, 'b')], regtype='BYTE', output_star=True)
+define_modrm_modes('MOV16_*r', ['\x66', rex_nw, '\x89', register(2, 8)], output_star=True)
+define_modrm_modes('MOV16_*i', ['\x66', rex_nw, '\xC7', orbyte(0<<3)], [immediate(2, 'h')], output_star=True)
-define_modrm_modes('MOVZX8_r*', [rex_w, '\x0F\xB6', register(1, 8)], regtype='BYTE')
-define_modrm_modes('MOVSX8_r*', [rex_w, '\x0F\xBE', register(1, 8)], regtype='BYTE')
-define_modrm_modes('MOVZX16_r*', [rex_w, '\x0F\xB7', register(1, 8)])
-define_modrm_modes('MOVSX16_r*', [rex_w, '\x0F\xBF', register(1, 8)])
-define_modrm_modes('MOVSX32_r*', [rex_w, '\x63', register(1, 8)])
+define_modrm_modes('MOVZX8_r*', [rex_w, '\x0F\xB6', reg_out(1, 8)], regtype='BYTE')
+define_modrm_modes('MOVSX8_r*', [rex_w, '\x0F\xBE', reg_out(1, 8)], regtype='BYTE')
+define_modrm_modes('MOVZX16_r*', [rex_w, '\x0F\xB7', reg_out(1, 8)])
+define_modrm_modes('MOVSX16_r*', [rex_w, '\x0F\xBF', reg_out(1, 8)])
+define_modrm_modes('MOVSX32_r*', [rex_w, '\x63', reg_out(1, 8)])
-define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8)], regtype='XMM')
-define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)], regtype='XMM')
-define_modrm_modes('MOVSS_x*', ['\xF3', rex_nw, '\x0F\x10', register(1,8)], regtype='XMM')
-define_modrm_modes('MOVSS_*x', ['\xF3', rex_nw, '\x0F\x11', register(2,8)], regtype='XMM')
-define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8)], regtype='XMM')
-define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8)], regtype='XMM')
-define_modrm_modes('MOVAPS_x*', [ rex_nw, '\x0F\x28', register(1,8)], regtype='XMM')
-define_modrm_modes('MOVAPS_*x', [ rex_nw, '\x0F\x29', register(2,8)], regtype='XMM')
+define_modrm_modes('MOVSD_x*', ['\xF2', rex_nw, '\x0F\x10', register(1,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVSS_x*', ['\xF3', rex_nw, '\x0F\x10', register(1,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVSS_*x', ['\xF3', rex_nw, '\x0F\x11', register(2,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVAPS_x*', [ rex_nw, '\x0F\x28', register(1,8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVAPS_*x', [ rex_nw, '\x0F\x29', register(2,8), no_reg_out], regtype='XMM')
-define_modrm_modes('MOVDQA_x*', ['\x66', rex_nw, '\x0F\x6F', register(1, 8)], regtype='XMM')
-define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)], regtype='XMM')
-define_modrm_modes('MOVDQU_x*', ['\xF3', rex_nw, '\x0F\x6F', register(1, 8)], regtype='XMM')
-define_modrm_modes('MOVDQU_*x', ['\xF3', rex_nw, '\x0F\x7F', register(2, 8)], regtype='XMM')
-define_modrm_modes('MOVUPS_x*', [ rex_nw, '\x0F\x10', register(1, 8)], regtype='XMM')
-define_modrm_modes('MOVUPS_*x', [ rex_nw, '\x0F\x11', register(2, 8)], regtype='XMM')
-define_modrm_modes('MOVUPD_x*', ['\x66', rex_nw, '\x0F\x10', register(1, 8)], regtype='XMM')
-define_modrm_modes('MOVUPD_*x', ['\x66', rex_nw, '\x0F\x11', register(2, 8)], regtype='XMM')
+define_modrm_modes('MOVDQA_x*', ['\x66', rex_nw, '\x0F\x6F', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVDQU_x*', ['\xF3', rex_nw, '\x0F\x6F', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVDQU_*x', ['\xF3', rex_nw, '\x0F\x7F', register(2, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVUPS_x*', [ rex_nw, '\x0F\x10', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVUPS_*x', [ rex_nw, '\x0F\x11', register(2, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVUPD_x*', ['\x66', rex_nw, '\x0F\x10', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MOVUPD_*x', ['\x66', rex_nw, '\x0F\x11', register(2, 8), no_reg_out], regtype='XMM')
-define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8)], regtype='XMM')
+define_modrm_modes('SQRTSD_x*', ['\xF2', rex_nw, '\x0F\x51', register(1,8), no_reg_out], regtype='XMM')
define_modrm_modes('XCHG_r*', [rex_w, '\x87', register(1, 8)])
-define_modrm_modes('ADDSD_x*', ['\xF2', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM')
-define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM')
-define_modrm_modes('SUBSD_x*', ['\xF2', rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM')
-define_modrm_modes('MULSD_x*', ['\xF2', rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM')
-define_modrm_modes('DIVSD_x*', ['\xF2', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
-define_modrm_modes('UCOMISD_x*', ['\x66', rex_nw, '\x0F\x2E', register(1, 8)], regtype='XMM')
-define_modrm_modes('XORPD_x*', ['\x66', rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
-define_modrm_modes('XORPS_x*', [ rex_nw, '\x0F\x57', register(1, 8)], regtype='XMM')
-define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
-define_modrm_modes('ANDPS_x*', [ rex_nw, '\x0F\x54', register(1, 8)], regtype='XMM')
+define_modrm_modes('ADDSD_x*', ['\xF2', rex_nw, '\x0F\x58', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('SUBSD_x*', ['\xF2', rex_nw, '\x0F\x5C', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MULSD_x*', ['\xF2', rex_nw, '\x0F\x59', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('DIVSD_x*', ['\xF2', rex_nw, '\x0F\x5E', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('UCOMISD_x*', ['\x66', rex_nw, '\x0F\x2E', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('XORPD_x*', ['\x66', rex_nw, '\x0F\x57', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('XORPS_x*', [ rex_nw, '\x0F\x57', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('ANDPD_x*', ['\x66', rex_nw, '\x0F\x54', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('ANDPS_x*', [ rex_nw, '\x0F\x54', register(1, 8), no_reg_out], regtype='XMM')
# floating point operations (single & double)
-define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM')
-define_modrm_modes('ADDPS_x*', [ rex_nw, '\x0F\x58', register(1, 8)], regtype='XMM')
-define_modrm_modes('SUBPD_x*', ['\x66', rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM')
-define_modrm_modes('SUBPS_x*', [ rex_nw, '\x0F\x5C', register(1, 8)], regtype='XMM')
-define_modrm_modes('MULPD_x*', ['\x66', rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM')
-define_modrm_modes('MULPS_x*', [ rex_nw, '\x0F\x59', register(1, 8)], regtype='XMM')
-define_modrm_modes('DIVPD_x*', ['\x66', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
-define_modrm_modes('DIVPS_x*', [ rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
-define_modrm_modes('DIVPD_x*', ['\x66', rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
-define_modrm_modes('DIVPS_x*', [ rex_nw, '\x0F\x5E', register(1, 8)], regtype='XMM')
+define_modrm_modes('ADDPD_x*', ['\x66', rex_nw, '\x0F\x58', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('ADDPS_x*', [ rex_nw, '\x0F\x58', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('SUBPD_x*', ['\x66', rex_nw, '\x0F\x5C', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('SUBPS_x*', [ rex_nw, '\x0F\x5C', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MULPD_x*', ['\x66', rex_nw, '\x0F\x59', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('MULPS_x*', [ rex_nw, '\x0F\x59', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('DIVPD_x*', ['\x66', rex_nw, '\x0F\x5E', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('DIVPS_x*', [ rex_nw, '\x0F\x5E', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('DIVPD_x*', ['\x66', rex_nw, '\x0F\x5E', register(1, 8), no_reg_out], regtype='XMM')
+define_modrm_modes('DIVPS_x*', [ rex_nw, '\x0F\x5E', register(1, 8), no_reg_out], regtype='XMM')
def define_pxmm_insn(insnname_template, insn_char):
+ # NOTE: these instructions are all "no_reg_out"
def add_insn(char, *post):
methname = insnname_template.replace('*', char)
insn_func = xmminsn('\x66', rex_nw, '\x0F' + insn_char,
- register(1, 8), *post)
+ register(1, 8), no_reg_out, *post)
assert not hasattr(AbstractX86CodeBuilder, methname)
setattr(AbstractX86CodeBuilder, methname, insn_func)
#
diff --git a/rpython/jit/backend/x86/test/test_rx86.py b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -242,3 +242,21 @@
assert len(cls.MULTIBYTE_NOPs) == 16
for i in range(16):
assert len(cls.MULTIBYTE_NOPs[i]) == i
+
+def test_clobber_scratch_reg():
+ class CodeBuilder64Clobber(CodeBuilder64):
+ called = 0
+ def clobber_scratch_reg(self):
+ self.called += 1
+ s = CodeBuilder64Clobber()
+ for r in [eax, ebx, ecx, edx]:
+ s.MOV_rm(r, (edi, 123))
+ assert s.called == 0
+ s.MOV_rm(r11, (edi, 123))
+ assert s.called == 1
+ s.MOV32_rm(r11, (edi, 123))
+ assert s.called == 2
+ s.MOVSX32_rm(r11, (edi, 123))
+ assert s.called == 3
+ s.MOV_mr((edi, 123), r11)
+ assert s.called == 3
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -6,7 +6,11 @@
class TestRx86_64(test_rx86_32_auto_encoding.TestRx86_32):
WORD = 8
TESTDIR = 'rx86_64'
- X86_CodeBuilder = rx86.X86_64_CodeBuilder
+
+ class X86_CodeBuilder(rx86.X86_64_CodeBuilder):
+ def clobber_scratch_reg(self):
+ pass
+
REGNAMES = ['%rax', '%rcx', '%rdx', '%rbx', '%rsp', '%rbp', '%rsi', '%rdi',
'%r8', '%r9', '%r10', '%r11', '%r12', '%r13', '%r14', '%r15']
REGNAMES8 = ['%al', '%cl', '%dl', '%bl', '%spl', '%bpl', '%sil', '%dil',
More information about the pypy-commit
mailing list