[pypy-commit] pypy fast-gil: Fixes and tweaks
arigo
noreply at buildbot.pypy.org
Sun Mar 9 12:06:31 CET 2014
Author: Armin Rigo <arigo at tunes.org>
Branch: fast-gil
Changeset: r69820:d1696c4adffb
Date: 2014-03-09 12:05 +0100
http://bitbucket.org/pypy/pypy/changeset/d1696c4adffb/
Log: Fixes and tweaks
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -25,9 +25,6 @@
# arguments, we need to decrease esp temporarily
stack_max = PASS_ON_MY_FRAME
- # set by save_result_value()
- tmpresloc = None
-
def __init__(self, assembler, fnloc, arglocs,
resloc=eax, restype=INT, ressize=WORD):
AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
@@ -68,12 +65,10 @@
if self.ressize == 0:
return # void result
# use the code in load_from_mem to do the zero- or sign-extension
- srcloc = self.tmpresloc
- if srcloc is None:
- if self.restype == FLOAT:
- srcloc = xmm0
- else:
- srcloc = eax
+ if self.restype == FLOAT:
+ srcloc = xmm0
+ else:
+ srcloc = eax
if self.ressize >= WORD and self.resloc is srcloc:
return # no need for any MOV
if self.ressize == 1 and isinstance(srcloc, RegLoc):
@@ -139,19 +134,25 @@
from rpython.jit.backend.x86.assembler import heap
from rpython.jit.backend.x86 import rx86
#
- # save the result we just got (in eax/eax+edx/st(0)/xmm0)
- self.save_result_value()
- # call the reopenstack() function (also reacquiring the GIL)
+ # check if we need to call the reopenstack() function
+ # (to acquiring the GIL, remove the asmgcc head from
+ # the chained list, etc.)
mc = self.mc
+ restore_edx = False
if not self.asm._is_asmgcc():
+ css = 0
css_value = imm(1)
- old_value = edx
+ old_value = ecx
else:
from rpython.memory.gctransform import asmgcroot
css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
if IS_X86_32:
- css_value = ecx
- old_value = edx
+ assert css >= 16
+ if self.restype == 'L': # long long result: eax/edx
+ mc.MOV_sr(12, edx.value)
+ restore_edx = True
+ css_value = edx
+ old_value = ecx
elif IS_X86_64:
css_value = edi
old_value = esi
@@ -167,29 +168,39 @@
mc.J_il8(rx86.Conditions['E'], 0)
je_location = mc.get_relative_pos()
#
+ # Yes, we need to call the reopenstack() function
+ self.save_result_value_reacq()
if IS_X86_32:
mc.MOV_sr(4, css_value.value)
mc.MOV_sr(0, old_value.value)
mc.CALL(imm(self.asm.reacqgil_addr))
+ self.restore_result_value_reacq()
#
# patch the JE above
offset = mc.get_relative_pos() - je_location
assert 0 < offset <= 127
mc.overwrite(je_location-1, chr(offset))
#
- if not we_are_translated(): # for testing: now we can accesss
- self.mc.SUB(ebp, imm(1)) # ebp again
+ if restore_edx:
+ mc.MOV_rs(edx.value, 12) # restore this
+ #
+ if not we_are_translated(): # for testing: now we can accesss
+ mc.SUB(ebp, imm(1)) # ebp again
#
# Now that we required the GIL, we can reload a possibly modified ebp
if self.asm._is_asmgcc():
# special-case: reload ebp from the css
from rpython.memory.gctransform import asmgcroot
index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
- self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
+ mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
#else:
# for shadowstack, done for us by _reload_frame_if_necessary()
- def save_result_value(self):
+ def save_result_value_reacq(self):
+ """Overridden in CallBuilder32 and CallBuilder64"""
+ raise NotImplementedError
+
+ def restore_result_value_reacq(self):
"""Overridden in CallBuilder32 and CallBuilder64"""
raise NotImplementedError
@@ -242,45 +253,40 @@
resloc = self.resloc
if resloc is not None and resloc.is_float():
# a float or a long long return
- if self.tmpresloc is None:
- if self.restype == 'L': # long long
- # move eax/edx -> xmm0
- self.mc.MOVD_xr(resloc.value^1, edx.value)
- self.mc.MOVD_xr(resloc.value, eax.value)
- self.mc.PUNPCKLDQ_xx(resloc.value, resloc.value^1)
- else:
- # float: we have to go via the stack
- self.mc.FSTPL_s(0)
- self.mc.MOVSD_xs(resloc.value, 0)
+ if self.restype == 'L': # long long
+ # move eax/edx -> xmm0
+ self.mc.MOVD_xr(resloc.value^1, edx.value)
+ self.mc.MOVD_xr(resloc.value, eax.value)
+ self.mc.PUNPCKLDQ_xx(resloc.value, resloc.value^1)
else:
- self.mc.MOVSD(resloc, self.tmpresloc)
+ # float: we have to go via the stack
+ self.mc.FSTPL_s(0)
+ self.mc.MOVSD_xs(resloc.value, 0)
#
elif self.restype == 'S':
# singlefloat return: must convert ST(0) to a 32-bit singlefloat
# and load it into self.resloc. mess mess mess
- if self.tmpresloc is None:
- self.mc.FSTPS_s(0)
- self.mc.MOV_rs(resloc.value, 0)
- else:
- self.mc.MOV(resloc, self.tmpresloc)
+ self.mc.FSTPS_s(0)
+ self.mc.MOV_rs(resloc.value, 0)
else:
CallBuilderX86.load_result(self)
- def save_result_value(self):
+ def save_result_value_reacq(self):
# Temporarily save the result value into [ESP+8]. We use "+8"
- # in order to leave the two initial words free, in case it's needed
+ # in order to leave the two initial words free, in case it's needed.
+ # Also note that in this 32-bit case, a long long return value is
+ # in eax/edx, but we already saved the value of edx in
+ # move_real_result_and_call_reacqgil_addr().
if self.ressize == 0: # void return
return
if self.resloc.is_float():
# a float or a long long return
- self.tmpresloc = RawEspLoc(8, FLOAT)
if self.restype == 'L':
self.mc.MOV_sr(8, eax.value) # long long
- self.mc.MOV_sr(12, edx.value)
+ #self.mc.MOV_sr(12, edx.value) -- already done
else:
self.mc.FSTPL_s(8) # float return
else:
- self.tmpresloc = RawEspLoc(8, INT)
if self.restype == 'S':
self.mc.FSTPS_s(8)
else:
@@ -288,6 +294,25 @@
assert self.ressize <= WORD
self.mc.MOV_sr(8, eax.value)
+ def restore_result_value_reacq(self):
+ # Opposite of save_result_value_reacq()
+ if self.ressize == 0: # void return
+ return
+ if self.resloc.is_float():
+ # a float or a long long return
+ if self.restype == 'L':
+ self.mc.MOV_rs(eax.value, 8) # long long
+ #self.mc.MOV_rs(edx.value, 12) -- will be done for us
+ else:
+ self.mc.FLDL_s(8) # float return
+ else:
+ if self.restype == 'S':
+ self.mc.FLDS_s(8)
+ else:
+ assert self.restype == INT
+ assert self.ressize <= WORD
+ self.mc.MOV_rs(eax.value, 8)
+
class CallBuilder64(CallBuilderX86):
@@ -394,35 +419,45 @@
assert 0 # should not occur on 64-bit
def load_result(self):
- if self.restype == 'S' and self.tmpresloc is None:
+ if self.restype == 'S':
# singlefloat return: use MOVD to load the target register
# from the lower 32 bits of XMM0
self.mc.MOVD(self.resloc, xmm0)
else:
CallBuilderX86.load_result(self)
- def save_result_value(self):
+ def save_result_value_reacq(self):
# Temporarily save the result value into [ESP].
if self.ressize == 0: # void return
return
#
if self.restype == FLOAT: # and not 'S'
self.mc.MOVSD_sx(0, xmm0.value)
- self.tmpresloc = RawEspLoc(0, FLOAT)
return
#
- if len(self.free_callee_save_gprs) == 0:
- self.tmpresloc = RawEspLoc(0, INT)
- else:
- self.tmpresloc = self.free_callee_save_gprs[0]
- #
if self.restype == 'S':
# singlefloat return: use MOVD to store the lower 32 bits
- # of XMM0 into the tmpresloc (register or [ESP])
- self.mc.MOVD(self.tmpresloc, xmm0)
+ # of XMM0 into [ESP] (nb. this is actually MOVQ, so will
+ # store 64 bits instead of only 32, but that's fine)
+ self.mc.MOVD_sx(0, xmm0.value)
else:
assert self.restype == INT
- self.mc.MOV(self.tmpresloc, eax)
+ self.mc.MOV_sr(0, eax.value)
+
+ def restore_result_value_reacq(self):
+ # Opposite of save_result_value_reacq()
+ if self.ressize == 0: # void return
+ return
+ #
+ if self.restype == FLOAT: # and not 'S'
+ self.mc.MOVSD_xs(xmm0.value, 0)
+ return
+ #
+ if self.restype == 'S':
+ self.mc.MOVD_xs(xmm0.value, 0)
+ else:
+ assert self.restype == INT
+ self.mc.MOV_rs(eax.value, 0)
if IS_X86_32:
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -586,6 +586,8 @@
FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
FSTPL_s = insn('\xDD', orbyte(3<<3), stack_sp(1)) # rffi.DOUBLE ('as' wants L??)
FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
+ FLDL_s = insn('\xDD', orbyte(0<<3), stack_sp(1))
+ FLDS_s = insn('\xD9', orbyte(0<<3), stack_sp(1))
# ------------------------------ Random mess -----------------------
RDTSC = insn('\x0F\x31')
@@ -620,6 +622,8 @@
MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
MOVD_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2))
+ MOVD_sx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), stack_sp(1))
+ MOVD_xs = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_sp(2))
PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -105,9 +105,9 @@
def get_fastgil_addr_raw(is_asmgcc):
if is_asmgcc: # must be constant!
- return _fetch_fastgil(42)
+ return _fetch_fastgil(42)()
else:
- return _fetch_fastgil(1)
+ return _fetch_fastgil(1)()
def allocate_lock():
More information about the pypy-commit
mailing list