[pypy-commit] pypy ppc-vsx-support: merge default

plan_rich pypy.commits at gmail.com
Tue Jul 19 07:52:19 EDT 2016


Author: Richard Plangger <planrichi at gmail.com>
Branch: ppc-vsx-support
Changeset: r85770:5090a8e44421
Date: 2016-07-19 13:51 +0200
http://bitbucket.org/pypy/pypy/changeset/5090a8e44421/

Log:	merge default

diff --git a/lib_pypy/_pypy_winbase_build.py b/lib_pypy/_pypy_winbase_build.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_pypy_winbase_build.py
@@ -0,0 +1,91 @@
+# Note: uses the CFFI out-of-line ABI mode.  We can't use the API
+# mode because ffi.compile() needs to run the compiler, which
+# needs 'subprocess', which needs 'msvcrt' and '_subprocess',
+# which depend on '_pypy_winbase_cffi' already.
+#
+# Note that if you need to regenerate _pypy_winbase_cffi and
+# can't use a preexisting PyPy to do that, then running this
+# file should work as long as 'subprocess' is not imported
+# by cffi.  I had to hack in 'cffi._pycparser' to move an
+#'import subprocess' to the inside of a function.  (Also,
+# CPython+CFFI should work as well.)
+#
+# This module supports both msvcrt.py and _subprocess.py.
+
+from cffi import FFI
+
+ffi = FFI()
+
+ffi.set_source("_pypy_winbase_cffi", None)
+
+# ---------- MSVCRT ----------
+
+ffi.cdef("""
+typedef unsigned short wint_t;
+
+int _open_osfhandle(intptr_t osfhandle, int flags);
+intptr_t _get_osfhandle(int fd);
+int _setmode(int fd, int mode);
+int _locking(int fd, int mode, long nbytes);
+
+int _kbhit(void);
+int _getch(void);
+wint_t _getwch(void);
+int _getche(void);
+wint_t _getwche(void);
+int _putch(int);
+wint_t _putwch(wchar_t);
+int _ungetch(int);
+wint_t _ungetwch(wint_t);
+""")
+
+# ---------- SUBPROCESS ----------
+
+ffi.cdef("""
+typedef struct {
+    DWORD  cb;
+    char * lpReserved;
+    char * lpDesktop;
+    char * lpTitle;
+    DWORD  dwX;
+    DWORD  dwY;
+    DWORD  dwXSize;
+    DWORD  dwYSize;
+    DWORD  dwXCountChars;
+    DWORD  dwYCountChars;
+    DWORD  dwFillAttribute;
+    DWORD  dwFlags;
+    WORD   wShowWindow;
+    WORD   cbReserved2;
+    LPBYTE lpReserved2;
+    HANDLE hStdInput;
+    HANDLE hStdOutput;
+    HANDLE hStdError;
+} STARTUPINFO, *LPSTARTUPINFO;
+
+typedef struct {
+    HANDLE hProcess;
+    HANDLE hThread;
+    DWORD  dwProcessId;
+    DWORD  dwThreadId;
+} PROCESS_INFORMATION, *LPPROCESS_INFORMATION;
+
+DWORD WINAPI GetVersion(void);
+BOOL WINAPI CreatePipe(PHANDLE, PHANDLE, void *, DWORD);
+BOOL WINAPI CloseHandle(HANDLE);
+HANDLE WINAPI GetCurrentProcess(void);
+BOOL WINAPI DuplicateHandle(HANDLE, HANDLE, HANDLE, LPHANDLE,
+                            DWORD, BOOL, DWORD);
+BOOL WINAPI CreateProcessA(char *, char *, void *,
+                           void *, BOOL, DWORD, char *,
+                           char *, LPSTARTUPINFO, LPPROCESS_INFORMATION);
+DWORD WINAPI WaitForSingleObject(HANDLE, DWORD);
+BOOL WINAPI GetExitCodeProcess(HANDLE, LPDWORD);
+BOOL WINAPI TerminateProcess(HANDLE, UINT);
+HANDLE WINAPI GetStdHandle(DWORD);
+""")
+
+# --------------------
+
+if __name__ == "__main__":
+    ffi.compile()
diff --git a/lib_pypy/_pypy_winbase_cffi.py b/lib_pypy/_pypy_winbase_cffi.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_pypy_winbase_cffi.py
@@ -0,0 +1,10 @@
+# auto-generated file
+import _cffi_backend
+
+ffi = _cffi_backend.FFI('_pypy_winbase_cffi',
+    _version = 0x2601,
+    _types = b'\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x07\x01\x00\x00\x07\x01\x00\x00\x09\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x19\x01\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x00\x0F\x00\x00\x01\x0D\x00\x00\x50\x03\x00\x00\x13\x11\x00\x00\x53\x03\x00\x00\x15\x11\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x13\x11\x00\x00\x13\x11\x00\x00\x4F\x03\x00\x00\x4E\x03\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x03\x00\x00\x1F\x11\x00\x00\x15\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x08\x01\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x18\x03\x00\x00\x02\x0F\x00\x00\x01\x0D\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x15\x11\x00\x00\x1F\x11\x00\x00\x0A\x01\x00\x00\x07\x01\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x0D\x0D\x00\x00\x07\x01\x00\x00\x00\x0F\x00\x00\x18\x0D\x00\x00\x15\x11\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x18\x0D\x00\x00\x02\x0F\x00\x00\x42\x0D\x00\x00\x06\x01\x00\x00\x00\x0F\x00\x00\x42\x0D\x00\x00\x00\x0F\x00\x00\x42\x0D\x00\x00\x10\x01\x00\x00\x00\x0F\x00\x00\x15\x0D\x00\x00\x0A\x01\x00\x00\x02\x0F\x00\x00\x15\x0D\x00\x00\x02\x0F\x00\x00\x00\x09\x00\x00\x01\x09\x00\x00\x02\x01\x00\x00\x52\x03\x00\x00\x04\x01\x00\x00\x00\x01',
+    _globals = (b'\x00\x00\x24\x23CloseHandle',0,b'\x00\x00\x1E\x23CreatePipe',0,b'\x00\x00\x12\x23CreateProcessA',0,b'\x00\x00\x2F\x23DuplicateHandle',0,b'\x00\x00\x4C\x23GetCurrentProcess',0,b'\x00\x00\x2B\x23GetExitCodeProcess',0,b'\x00\x00\x49\x23GetStdHandle',0,b'\x00\x00\x3F\x23GetVersion',0,b'\x00\x00\x27\x23TerminateProcess',0,b'\x00\x00\x3B\x23WaitForSingleObject',0,b'\x00\x00\x38\x23_get_osfhandle',0,b'\x00\x00\x10\x23_getch',0,b'\x00\x00\x10\x23_getche',0,b'\x00\x00\x44\x23_getwch',0,b'\x00\x00\x44\x23_getwche',0,b'\x00\x00\x10\x23_kbhit',0,b'\x00\x00\x07\x23_locking',0,b'\x00\x00\x0C\x23_open_osfhandle',0,b'\x00\x00\x00\x23_putch',0,b'\x00\x00\x46\x23_putwch',0,b'\x00\x00\x03\x23_setmode',0,b'\x00\x00\x00\x23_ungetch',0,b'\x00\x00\x41\x23_ungetwch',0),
+    _struct_unions = ((b'\x00\x00\x00\x4E\x00\x00\x00\x02$PROCESS_INFORMATION',b'\x00\x00\x15\x11hProcess',b'\x00\x00\x15\x11hThread',b'\x00\x00\x18\x11dwProcessId',b'\x00\x00\x18\x11dwThreadId'),(b'\x00\x00\x00\x4F\x00\x00\x00\x02$STARTUPINFO',b'\x00\x00\x18\x11cb',b'\x00\x00\x13\x11lpReserved',b'\x00\x00\x13\x11lpDesktop',b'\x00\x00\x13\x11lpTitle',b'\x00\x00\x18\x11dwX',b'\x00\x00\x18\x11dwY',b'\x00\x00\x18\x11dwXSize',b'\x00\x00\x18\x11dwYSize',b'\x00\x00\x18\x11dwXCountChars',b'\x00\x00\x18\x11dwYCountChars',b'\x00\x00\x18\x11dwFillAttribute',b'\x00\x00\x18\x11dwFlags',b'\x00\x00\x42\x11wShowWindow',b'\x00\x00\x42\x11cbReserved2',b'\x00\x00\x51\x11lpReserved2',b'\x00\x00\x15\x11hStdInput',b'\x00\x00\x15\x11hStdOutput',b'\x00\x00\x15\x11hStdError')),
+    _typenames = (b'\x00\x00\x00\x1CLPPROCESS_INFORMATION',b'\x00\x00\x00\x1BLPSTARTUPINFO',b'\x00\x00\x00\x4EPROCESS_INFORMATION',b'\x00\x00\x00\x4FSTARTUPINFO',b'\x00\x00\x00\x42wint_t'),
+)
diff --git a/lib_pypy/_subprocess.py b/lib_pypy/_subprocess.py
--- a/lib_pypy/_subprocess.py
+++ b/lib_pypy/_subprocess.py
@@ -10,148 +10,99 @@
 
 # Declare external Win32 functions
 
-import ctypes
-
-_kernel32 = ctypes.WinDLL('kernel32')
-
-_CloseHandle = _kernel32.CloseHandle
-_CloseHandle.argtypes = [ctypes.c_int]
-_CloseHandle.restype = ctypes.c_int
-
-_CreatePipe = _kernel32.CreatePipe
-_CreatePipe.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int),
-                        ctypes.c_void_p, ctypes.c_int]
-_CreatePipe.restype = ctypes.c_int
-
-_GetCurrentProcess = _kernel32.GetCurrentProcess
-_GetCurrentProcess.argtypes = []
-_GetCurrentProcess.restype = ctypes.c_int
+from _pypy_winbase_cffi import ffi as _ffi
+_kernel32 = _ffi.dlopen('kernel32')
 
 GetVersion = _kernel32.GetVersion
-GetVersion.argtypes = []
-GetVersion.restype = ctypes.c_int
 
-_DuplicateHandle = _kernel32.DuplicateHandle
-_DuplicateHandle.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_int,
-                             ctypes.POINTER(ctypes.c_int),
-                             ctypes.c_int, ctypes.c_int, ctypes.c_int]
-_DuplicateHandle.restype = ctypes.c_int
-
-_WaitForSingleObject = _kernel32.WaitForSingleObject
-_WaitForSingleObject.argtypes = [ctypes.c_int, ctypes.c_uint]
-_WaitForSingleObject.restype = ctypes.c_int
-
-_GetExitCodeProcess = _kernel32.GetExitCodeProcess
-_GetExitCodeProcess.argtypes = [ctypes.c_int, ctypes.POINTER(ctypes.c_int)]
-_GetExitCodeProcess.restype = ctypes.c_int
-
-_TerminateProcess = _kernel32.TerminateProcess
-_TerminateProcess.argtypes = [ctypes.c_int, ctypes.c_int]
-_TerminateProcess.restype = ctypes.c_int
-
-_GetStdHandle = _kernel32.GetStdHandle
-_GetStdHandle.argtypes = [ctypes.c_int]
-_GetStdHandle.restype = ctypes.c_int
-
-class _STARTUPINFO(ctypes.Structure):
-    _fields_ = [('cb',         ctypes.c_int),
-                ('lpReserved', ctypes.c_void_p),
-                ('lpDesktop',  ctypes.c_char_p),
-                ('lpTitle',    ctypes.c_char_p),
-                ('dwX',        ctypes.c_int),
-                ('dwY',        ctypes.c_int),
-                ('dwXSize',    ctypes.c_int),
-                ('dwYSize',    ctypes.c_int),
-                ('dwXCountChars', ctypes.c_int),
-                ('dwYCountChars', ctypes.c_int),
-                ("dwFillAttribute", ctypes.c_int),
-                ("dwFlags", ctypes.c_int),
-                ("wShowWindow", ctypes.c_short),
-                ("cbReserved2", ctypes.c_short),
-                ("lpReserved2", ctypes.c_void_p),
-                ("hStdInput", ctypes.c_int),
-                ("hStdOutput", ctypes.c_int),
-                ("hStdError", ctypes.c_int)
-                ]
-
-class _PROCESS_INFORMATION(ctypes.Structure):
-    _fields_ = [("hProcess", ctypes.c_int),
-                ("hThread", ctypes.c_int),
-                ("dwProcessID", ctypes.c_int),
-                ("dwThreadID", ctypes.c_int)]
-
-_CreateProcess = _kernel32.CreateProcessA
-_CreateProcess.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_void_p, ctypes.c_void_p,
-                           ctypes.c_int, ctypes.c_int, ctypes.c_char_p, ctypes.c_char_p,
-                           ctypes.POINTER(_STARTUPINFO), ctypes.POINTER(_PROCESS_INFORMATION)]
-_CreateProcess.restype = ctypes.c_int
-
-del ctypes
 
 # Now the _subprocess module implementation
 
-from ctypes import c_int as _c_int, byref as _byref, WinError as _WinError
+def _WinError():
+    code, message = _ffi.getwinerror()
+    raise WindowsError(code, message)
 
-class _handle:
-    def __init__(self, handle):
-        self.handle = handle
+_INVALID_HANDLE_VALUE = _ffi.cast("HANDLE", -1)
+
+class _handle(object):
+    def __init__(self, c_handle):
+        # 'c_handle' is a cffi cdata of type HANDLE, which is basically 'void *'
+        self.c_handle = c_handle
+        if int(self) != -1:
+            self.c_handle = _ffi.gc(self.c_handle, _kernel32.CloseHandle)
 
     def __int__(self):
-        return self.handle
+        return int(_ffi.cast("intptr_t", self.c_handle))
 
-    def __del__(self):
-        if self.handle is not None:
-            _CloseHandle(self.handle)
+    def __repr__(self):
+        return '<_subprocess.handle %d at 0x%x>' % (int(self), id(self))
 
     def Detach(self):
-        handle, self.handle = self.handle, None
-        return handle
+        h = int(self)
+        if h != -1:
+            c_handle = self.c_handle
+            self.c_handle = _INVALID_HANDLE_VALUE
+            _ffi.gc(c_handle, None)
+        return h
 
     def Close(self):
-        if self.handle not in (-1, None):
-            _CloseHandle(self.handle)
-            self.handle = None
+        if int(self) != -1:
+            c_handle = self.c_handle
+            self.c_handle = _INVALID_HANDLE_VALUE
+            _ffi.gc(c_handle, None)
+            _kernel32.CloseHandle(c_handle)
 
 def CreatePipe(attributes, size):
-    read = _c_int()
-    write = _c_int()
+    handles = _ffi.new("HANDLE[2]")
 
-    res = _CreatePipe(_byref(read), _byref(write), None, size)
+    res = _kernel32.CreatePipe(handles, handles + 1, _ffi.NULL, size)
 
     if not res:
         raise _WinError()
 
-    return _handle(read.value), _handle(write.value)
+    return _handle(handles[0]), _handle(handles[1])
 
 def GetCurrentProcess():
-    return _handle(_GetCurrentProcess())
+    return _handle(_kernel32.GetCurrentProcess())
 
 def DuplicateHandle(source_process, source, target_process, access, inherit, options=0):
-    target = _c_int()
+    # CPython: the first three arguments are expected to be integers
+    target = _ffi.new("HANDLE[1]")
 
-    res = _DuplicateHandle(int(source_process), int(source), int(target_process),
-                           _byref(target),
-                           access, inherit, options)
+    res = _kernel32.DuplicateHandle(
+        _ffi.cast("HANDLE", source_process),
+        _ffi.cast("HANDLE", source),
+        _ffi.cast("HANDLE", target_process),
+        target, access, inherit, options)
 
     if not res:
         raise _WinError()
 
-    return _handle(target.value)
+    return _handle(target[0])
+
+def _z(input):
+    if input is None:
+        return _ffi.NULL
+    if isinstance(input, basestring):
+        return str(input)
+    raise TypeError("string/unicode/None expected, got %r" % (
+        type(input).__name__,))
 
 def CreateProcess(name, command_line, process_attr, thread_attr,
                   inherit, flags, env, start_dir, startup_info):
-    si = _STARTUPINFO()
+    si = _ffi.new("STARTUPINFO *")
     if startup_info is not None:
         si.dwFlags = startup_info.dwFlags
         si.wShowWindow = startup_info.wShowWindow
+        # CPython: these three handles are expected to be _handle objects
         if startup_info.hStdInput:
-            si.hStdInput = int(startup_info.hStdInput)
+            si.hStdInput = startup_info.hStdInput.c_handle
         if startup_info.hStdOutput:
-            si.hStdOutput = int(startup_info.hStdOutput)
+            si.hStdOutput = startup_info.hStdOutput.c_handle
         if startup_info.hStdError:
-            si.hStdError = int(startup_info.hStdError)
+            si.hStdError = startup_info.hStdError.c_handle
 
-    pi = _PROCESS_INFORMATION()
+    pi = _ffi.new("PROCESS_INFORMATION *")
 
     if env is not None:
         envbuf = ""
@@ -159,47 +110,55 @@
             envbuf += "%s=%s\0" % (k, v)
         envbuf += '\0'
     else:
-        envbuf = None
+        envbuf = _ffi.NULL
 
-    res = _CreateProcess(name, command_line, None, None, inherit, flags, envbuf,
-                        start_dir, _byref(si), _byref(pi))
+    res = _kernel32.CreateProcessA(_z(name), _z(command_line), _ffi.NULL,
+                                   _ffi.NULL, inherit, flags, envbuf,
+                                   _z(start_dir), si, pi)
 
     if not res:
         raise _WinError()
 
-    return _handle(pi.hProcess), _handle(pi.hThread), pi.dwProcessID, pi.dwThreadID
+    return _handle(pi.hProcess), _handle(pi.hThread), pi.dwProcessId, pi.dwThreadId
 
 def WaitForSingleObject(handle, milliseconds):
-    res = _WaitForSingleObject(int(handle), milliseconds)
-
+    # CPython: the first argument is expected to be an integer.
+    res = _kernel32.WaitForSingleObject(_ffi.cast("HANDLE", handle),
+                                        milliseconds)
     if res < 0:
         raise _WinError()
 
     return res
 
 def GetExitCodeProcess(handle):
-    code = _c_int()
+    # CPython: the first argument is expected to be an integer.
+    code = _ffi.new("DWORD[1]")
 
-    res = _GetExitCodeProcess(int(handle), _byref(code))
+    res = _kernel32.GetExitCodeProcess(_ffi.cast("HANDLE", handle), code)
 
     if not res:
         raise _WinError()
 
-    return code.value
+    return code[0]
 
 def TerminateProcess(handle, exitcode):
-    res = _TerminateProcess(int(handle), exitcode)
+    # CPython: the first argument is expected to be an integer.
+    # The second argument is silently wrapped in a UINT.
+    res = _kernel32.TerminateProcess(_ffi.cast("HANDLE", handle),
+                                     _ffi.cast("UINT", exitcode))
 
     if not res:
         raise _WinError()
 
 def GetStdHandle(stdhandle):
-    res = _GetStdHandle(stdhandle)
+    stdhandle = _ffi.cast("DWORD", stdhandle)
+    res = _kernel32.GetStdHandle(stdhandle)
 
     if not res:
         return None
     else:
-        return res
+        # note: returns integer, not handle object
+        return int(_ffi.cast("intptr_t", res))
 
 STD_INPUT_HANDLE = -10
 STD_OUTPUT_HANDLE = -11
diff --git a/lib_pypy/cffi/_pycparser/__init__.py b/lib_pypy/cffi/_pycparser/__init__.py
--- a/lib_pypy/cffi/_pycparser/__init__.py
+++ b/lib_pypy/cffi/_pycparser/__init__.py
@@ -10,7 +10,6 @@
 __all__ = ['c_lexer', 'c_parser', 'c_ast']
 __version__ = '2.14'
 
-from subprocess import Popen, PIPE
 from .c_parser import CParser
 
 
@@ -28,6 +27,7 @@
         When successful, returns the preprocessed file's contents.
         Errors from cpp will be printed out.
     """
+    from subprocess import Popen, PIPE
     path_list = [cpp_path]
     if isinstance(cpp_args, list):
         path_list += cpp_args
diff --git a/lib_pypy/msvcrt.py b/lib_pypy/msvcrt.py
--- a/lib_pypy/msvcrt.py
+++ b/lib_pypy/msvcrt.py
@@ -7,26 +7,39 @@
 # XXX incomplete: implemented only functions needed by subprocess.py
 # PAC: 2010/08 added MS locking for Whoosh
 
-import ctypes
+# 07/2016: rewrote in CFFI
+
+import sys
+if sys.platform != 'win32':
+    raise ImportError("The 'msvcrt' module is only available on Windows")
+
+import _rawffi
+from _pypy_winbase_cffi import ffi as _ffi
+_lib = _ffi.dlopen(_rawffi.get_libc().name)
+
 import errno
-from ctypes_support import standard_c_lib as _c
-from ctypes_support import get_errno
-
-try:
-    open_osfhandle = _c._open_osfhandle
-except AttributeError: # we are not on windows
-    raise ImportError
 
 try: from __pypy__ import builtinify, validate_fd
 except ImportError: builtinify = validate_fd = lambda f: f
 
 
-open_osfhandle.argtypes = [ctypes.c_int, ctypes.c_int]
-open_osfhandle.restype = ctypes.c_int
+def _ioerr():
+    e = _ffi.errno
+    raise IOError(e, errno.errorcode[e])
 
-_get_osfhandle = _c._get_osfhandle
-_get_osfhandle.argtypes = [ctypes.c_int]
-_get_osfhandle.restype = ctypes.c_int
+
+ at builtinify
+def open_osfhandle(fd, flags):
+    """"open_osfhandle(handle, flags) -> file descriptor
+
+    Create a C runtime file descriptor from the file handle handle. The
+    flags parameter should be a bitwise OR of os.O_APPEND, os.O_RDONLY,
+    and os.O_TEXT. The returned file descriptor may be used as a parameter
+    to os.fdopen() to create a file object."""
+    fd = _lib._open_osfhandle(fd, flags)
+    if fd == -1:
+        _ioerr()
+    return fd
 
 @builtinify
 def get_osfhandle(fd):
@@ -38,62 +51,74 @@
         validate_fd(fd)
     except OSError as e:
         raise IOError(*e.args)
-    return _get_osfhandle(fd)
+    result = _lib._get_osfhandle(fd)
+    if result == -1:
+        _ioerr()
+    return result
 
-setmode = _c._setmode
-setmode.argtypes = [ctypes.c_int, ctypes.c_int]
-setmode.restype = ctypes.c_int
+ at builtinify
+def setmode(fd, flags):
+    """setmode(fd, mode) -> Previous mode
+
+    Set the line-end translation mode for the file descriptor fd. To set
+    it to text mode, flags should be os.O_TEXT; for binary, it should be
+    os.O_BINARY."""
+    flags = _lib._setmode(fd, flags)
+    if flags == -1:
+        _ioerr()
+    return flags
 
 LK_UNLCK, LK_LOCK, LK_NBLCK, LK_RLCK, LK_NBRLCK = range(5)
 
-_locking = _c._locking
-_locking.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_int]
-_locking.restype = ctypes.c_int
-
 @builtinify
 def locking(fd, mode, nbytes):
-    '''lock or unlock a number of bytes in a file.'''
-    rv = _locking(fd, mode, nbytes)
+    """"locking(fd, mode, nbytes) -> None
+
+    Lock part of a file based on file descriptor fd from the C runtime.
+    Raises IOError on failure. The locked region of the file extends from
+    the current file position for nbytes bytes, and may continue beyond
+    the end of the file. mode must be one of the LK_* constants listed
+    below. Multiple regions in a file may be locked at the same time, but
+    may not overlap. Adjacent regions are not merged; they must be unlocked
+    individually."""
+    rv = _lib._locking(fd, mode, nbytes)
     if rv != 0:
-        e = get_errno()
-        raise IOError(e, errno.errorcode[e])
+        _ioerr()
 
 # Console I/O routines
 
-kbhit = _c._kbhit
-kbhit.argtypes = []
-kbhit.restype = ctypes.c_int
+kbhit = _lib._kbhit
 
-getch = _c._getch
-getch.argtypes = []
-getch.restype = ctypes.c_char
+ at builtinify
+def getch():
+    return chr(_lib._getch())
 
-getwch = _c._getwch
-getwch.argtypes = []
-getwch.restype = ctypes.c_wchar
+ at builtinify
+def getwch():
+    return unichr(_lib._getwch())
 
-getche = _c._getche
-getche.argtypes = []
-getche.restype = ctypes.c_char
+ at builtinify
+def getche():
+    return chr(_lib._getche())
 
-getwche = _c._getwche
-getwche.argtypes = []
-getwche.restype = ctypes.c_wchar
+ at builtinify
+def getwche():
+    return unichr(_lib._getwche())
 
-putch = _c._putch
-putch.argtypes = [ctypes.c_char]
-putch.restype = None
+ at builtinify
+def putch(ch):
+    _lib._putch(ord(ch))
 
-putwch = _c._putwch
-putwch.argtypes = [ctypes.c_wchar]
-putwch.restype = None
+ at builtinify
+def putwch(ch):
+    _lib._putwch(ord(ch))
 
-ungetch = _c._ungetch
-ungetch.argtypes = [ctypes.c_char]
-ungetch.restype = None
+ at builtinify
+def ungetch(ch):
+    if _lib._ungetch(ord(ch)) == -1:   # EOF
+        _ioerr()
 
-ungetwch = _c._ungetwch
-ungetwch.argtypes = [ctypes.c_wchar]
-ungetwch.restype = None
-
-del ctypes
+ at builtinify
+def ungetwch(ch):
+    if _lib._ungetwch(ord(ch)) == -1:   # EOF
+        _ioerr()
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -86,3 +86,10 @@
 
 Copy CPython's logic more closely for handling of ``__instancecheck__()``
 and ``__subclasscheck__()``.  Fixes issue 2343.
+
+.. branch: msvcrt-cffi
+
+Rewrite the Win32 dependencies of 'subprocess' to use cffi instead
+of ctypes. This avoids importing ctypes in many small programs and
+scripts, which in turn avoids enabling threads (because ctypes
+creates callbacks at import time, and callbacks need threads).
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -2,6 +2,7 @@
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import interp_attrproperty
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 
 from rpython.rlib.clibffi import *
@@ -237,6 +238,7 @@
     __new__     = interp2app(descr_new_cdll),
     ptr         = interp2app(W_CDLL.ptr),
     getaddressindll = interp2app(W_CDLL.getaddressindll),
+    name        = interp_attrproperty('name', W_CDLL),
     __doc__     = """ C Dynamically loaded library
 use CDLL(libname) to create a handle to a C library (the argument is processed
 the same way as dlopen processes it). On such a library you can call:
diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -1223,6 +1223,11 @@
         assert z == 43
         arg.free()
 
+    def test_cdll_name(self):
+        import _rawffi
+        lib = _rawffi.CDLL(self.lib_name)
+        assert lib.name == self.lib_name
+
 
 class AppTestAutoFree:
     spaceconfig = dict(usemodules=['_rawffi', 'struct'])
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -544,6 +544,14 @@
         raise oefmt(space.w_ValueError,
                     "the environment variable is longer than %d bytes",
                     _MAX_ENV)
+    if _WIN32 and not objectmodel.we_are_translated() and value == '':
+        # special case: on Windows, _putenv("NAME=") really means that
+        # we want to delete NAME.  So that's what the os.environ[name]=''
+        # below will do after translation.  But before translation, it
+        # will cache the environment value '' instead of <missing> and
+        # then return that.  We need to avoid that.
+        del os.environ[name]
+        return
     try:
         os.environ[name] = value
     except OSError as e:
diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py
--- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py
@@ -424,6 +424,7 @@
         --TICK--
         i123 = arraylen_gc(p67, descr=<ArrayP .>)
         i119 = call_i(ConstClass(_ll_1_raw_malloc_varsize__Signed), 6, descr=<Calli . i EF=5 OS=110>)
+        check_memory_error(i119)
         raw_store(i119, 0, i160, descr=<ArrayS 2>)
         raw_store(i119, 2, i160, descr=<ArrayS 2>)
         raw_store(i119, 4, i160, descr=<ArrayS 2>)
diff --git a/pypy/module/thread/os_lock.py b/pypy/module/thread/os_lock.py
--- a/pypy/module/thread/os_lock.py
+++ b/pypy/module/thread/os_lock.py
@@ -9,12 +9,9 @@
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.error import oefmt
-from rpython.rlib.rarithmetic import r_longlong
+from rpython.rlib.rarithmetic import r_longlong, ovfcheck_float_to_longlong
 
 
-LONGLONG_MAX = r_longlong(2 ** (r_longlong.BITS-1) - 1)
-TIMEOUT_MAX = LONGLONG_MAX
-
 RPY_LOCK_FAILURE, RPY_LOCK_ACQUIRED, RPY_LOCK_INTR = range(3)
 
 def parse_acquire_args(space, blocking, timeout):
@@ -29,10 +26,12 @@
     elif timeout == -1.0:
         microseconds = -1
     else:
-        timeout *= 1e6
-        if timeout > float(TIMEOUT_MAX):
+        # 0.0 => 0.0, but otherwise tends to round up
+        timeout = timeout * 1e6 + 0.999
+        try:
+            microseconds = ovfcheck_float_to_longlong(timeout)
+        except OverflowError:
             raise oefmt(space.w_OverflowError, "timeout value is too large")
-        microseconds = r_longlong(timeout)
     return microseconds
 
 
@@ -45,7 +44,8 @@
             # Run signal handlers if we were interrupted
             space.getexecutioncontext().checksignals()
             if microseconds >= 0:
-                microseconds = r_longlong(endtime - (time.time() * 1e6))
+                microseconds = r_longlong((endtime - (time.time() * 1e6))
+                                          + 0.999)
                 # Check for negative values, since those mean block
                 # forever
                 if microseconds <= 0:
diff --git a/pypy/module/thread/test/test_lock.py b/pypy/module/thread/test/test_lock.py
--- a/pypy/module/thread/test/test_lock.py
+++ b/pypy/module/thread/test/test_lock.py
@@ -64,6 +64,25 @@
         else:
             assert self.runappdirect, "missing lock._py3k_acquire()"
 
+    def test_py3k_acquire_timeout_overflow(self):
+        import thread
+        lock = thread.allocate_lock()
+        if not hasattr(lock, '_py3k_acquire'):
+            skip("missing lock._py3k_acquire()")
+        maxint = 2**63 - 1
+        boundary = int(maxint * 1e-6)
+        for i in [-100000, -10000, -1000, -100, -10, -1, 0,
+                  1, 10, 100, 1000, 10000, 100000]:
+            timeout = (maxint + i) * 1e-6
+            try:
+                lock._py3k_acquire(True, timeout=timeout)
+            except OverflowError:
+                got_ovf = True
+            else:
+                got_ovf = False
+                lock.release()
+            assert (i, got_ovf) == (i, int(timeout * 1e6 + 0.999) > maxint)
+
     @py.test.mark.xfail(machine()=='s390x', reason='may fail under heavy load')
     def test_ping_pong(self):
         # The purpose of this test is that doing a large number of ping-pongs
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -82,9 +82,6 @@
         self.failure_recovery_code = [0, 0, 0, 0]
 
     def _build_propagate_exception_path(self):
-        if not self.cpu.propagate_exception_descr:
-            return      # not supported (for tests, or non-translated)
-        #
         mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
         self._store_and_reset_exception(mc, r.r0)
         ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
@@ -372,9 +369,9 @@
             self._write_barrier_fastpath(mc, wbdescr, [r.fp], array=False,
                                          is_frame=True)
 
-    def propagate_memoryerror_if_r0_is_null(self):
-        # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null
-        self.mc.CMP_ri(r.r0.value, 0)
+    def propagate_memoryerror_if_reg_is_null(self, reg_loc):
+        # see ../x86/assembler.py:genop_discard_check_memory_error()
+        self.mc.CMP_ri(reg_loc.value, 0)
         self.mc.B(self.propagate_exception_path, c=c.EQ)
 
     def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -1050,9 +1050,8 @@
         regalloc = self._regalloc
         return regalloc.operations[regalloc.rm.position + delta]
 
-    def emit_op_call_malloc_gc(self, op, arglocs, regalloc, fcond):
-        self._emit_call(op, arglocs, fcond=fcond)
-        self.propagate_memoryerror_if_r0_is_null()
+    def emit_op_check_memory_error(self, op, arglocs, regalloc, fcond):
+        self.propagate_memoryerror_if_reg_is_null(arglocs[0])
         self._alignment_check()
         return fcond
 
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -594,8 +594,9 @@
         resloc = self.after_call(op)
         return resloc
 
-    def prepare_op_call_malloc_gc(self, op, fcond):
-        return self._prepare_call(op)
+    def prepare_op_check_memory_error(self, op, fcond):
+        argloc = self.make_sure_var_in_reg(op.getarg(0))
+        return [argloc]
 
     def _prepare_llong_binop_xx(self, op, fcond):
         # arg 0 is the address of the function
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -1520,6 +1520,11 @@
             lle = None
         self.last_exception = lle
 
+    def execute_check_memory_error(self, descr, value):
+        if not value:
+            from rpython.jit.backend.llsupport import llmodel
+            raise llmodel.MissingLatestDescrError
+
 
 def _getdescr(op):
     d = op.getdescr()
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -48,7 +48,10 @@
         anything, it must be an optional MemoryError.
         """
         FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, RESULT))
-        descr = get_call_descr(self, ARGS, RESULT)
+        # Note: the call may invoke the GC, which may run finalizers.
+        # Finalizers are constrained in what they can do, but we can't
+        # really express that in a useful way here.
+        descr = get_call_descr(self, ARGS, RESULT, EffectInfo.MOST_GENERAL)
         setattr(self, funcname, func)
         setattr(self, funcname + '_FUNCPTR', FUNCPTR)
         setattr(self, funcname + '_descr', descr)
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -405,6 +405,9 @@
         deadframe = lltype.cast_opaque_ptr(jitframe.JITFRAMEPTR, deadframe)
         descr = deadframe.jf_descr
         res = history.AbstractDescr.show(self, descr)
+        if not we_are_translated():   # tests only: for missing
+            if res is None:           # propagate_exception_descr
+                raise MissingLatestDescrError
         assert isinstance(res, history.AbstractFailDescr)
         return res
 
@@ -813,6 +816,9 @@
         calldescr.call_stub_i(func, args_i, args_r, args_f)
 
 
+class MissingLatestDescrError(Exception):
+    """For propagate_exception_descr in untranslated tests."""
+
 final_descr_rd_locs = [rffi.cast(rffi.USHORT, 0)]
 history.BasicFinalDescr.rd_locs = final_descr_rd_locs
 compile._DoneWithThisFrameDescr.rd_locs = final_descr_rd_locs
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -26,7 +26,8 @@
 class GcRewriterAssembler(object):
     """ This class performs the following rewrites on the list of operations:
 
-     - Turn all NEW_xxx to either a CALL_MALLOC_GC, or a CALL_MALLOC_NURSERY
+     - Turn all NEW_xxx to either a CALL_R/CHECK_MEMORY_ERROR,
+       or a CALL_MALLOC_NURSERY,
        followed by SETFIELDs in order to initialize their GC fields.  The
        two advantages of CALL_MALLOC_NURSERY is that it inlines the common
        path, and we need only one such operation to allocate several blocks
@@ -696,16 +697,17 @@
         self._delayed_zero_setfields.clear()
 
     def _gen_call_malloc_gc(self, args, v_result, descr):
-        """Generate a CALL_MALLOC_GC with the given args."""
+        """Generate a CALL_R/CHECK_MEMORY_ERROR with the given args."""
         self.emitting_an_operation_that_can_collect()
-        op = ResOperation(rop.CALL_MALLOC_GC, args, descr=descr)
+        op = ResOperation(rop.CALL_R, args, descr=descr)
         self.replace_op_with(v_result, op)
         self.emit_op(op)
+        self.emit_op(ResOperation(rop.CHECK_MEMORY_ERROR, [op]))
         # In general, don't add v_result to write_barrier_applied:
         # v_result might be a large young array.
 
     def gen_malloc_fixedsize(self, size, typeid, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_fixedsize_fn, ...).
+        """Generate a CALL_R(malloc_fixedsize_fn, ...).
         Used on Boehm, and on the framework GC for large fixed-size
         mallocs.  (For all I know this latter case never occurs in
         practice, but better safe than sorry.)
@@ -725,7 +727,7 @@
         self.remember_write_barrier(v_result)
 
     def gen_boehm_malloc_array(self, arraydescr, v_num_elem, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) for Boehm."""
+        """Generate a CALL_R(malloc_array_fn, ...) for Boehm."""
         addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_array')
         self._gen_call_malloc_gc([ConstInt(addr),
                                   ConstInt(arraydescr.basesize),
@@ -736,7 +738,7 @@
                                  self.gc_ll_descr.malloc_array_descr)
 
     def gen_malloc_array(self, arraydescr, v_num_elem, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_array_fn, ...) going either
+        """Generate a CALL_R(malloc_array_fn, ...) going either
         to the standard or the nonstandard version of the function."""
         #
         if (arraydescr.basesize == self.gc_ll_descr.standard_array_basesize
@@ -763,13 +765,13 @@
         self._gen_call_malloc_gc(args, v_result, calldescr)
 
     def gen_malloc_str(self, v_num_elem, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_str_fn, ...)."""
+        """Generate a CALL_R(malloc_str_fn, ...)."""
         addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_str')
         self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
                                  self.gc_ll_descr.malloc_str_descr)
 
     def gen_malloc_unicode(self, v_num_elem, v_result):
-        """Generate a CALL_MALLOC_GC(malloc_unicode_fn, ...)."""
+        """Generate a CALL_R(malloc_unicode_fn, ...)."""
         addr = self.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
         self._gen_call_malloc_gc([ConstInt(addr), v_num_elem], v_result,
                                  self.gc_ll_descr.malloc_unicode_descr)
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -254,8 +254,9 @@
             jump()
         """, """
             [p1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
-                                descr=malloc_fixedsize_descr)
+            p0 = call_r(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
+                        descr=malloc_fixedsize_descr)
+            check_memory_error(p0)
             jump()
         """)
 
@@ -267,10 +268,12 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
-                                descr=malloc_fixedsize_descr)
-            p1 = call_malloc_gc(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
-                                descr=malloc_fixedsize_descr)
+            p0 = call_r(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
+                        descr=malloc_fixedsize_descr)
+            check_memory_error(p0)
+            p1 = call_r(ConstClass(malloc_fixedsize), %(sdescr.size)d,\
+                        descr=malloc_fixedsize_descr)
+            check_memory_error(p1)
             jump()
         """)
 
@@ -281,16 +284,17 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+            p0 = call_r(ConstClass(malloc_array),           \
                                 %(adescr.basesize)d,        \
                                 10,                         \
                                 %(adescr.itemsize)d,        \
                                 %(adescr.lendescr.offset)d, \
                                 descr=malloc_array_descr)
+            check_memory_error(p0)
             jump()
         """)
 ##      should ideally be:
-##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), \
+##            p0 = call_r(ConstClass(malloc_fixedsize), \
 ##                                %(adescr.basesize + 10 * adescr.itemsize)d, \
 ##                                descr=malloc_fixedsize_descr)
 ##            setfield_gc(p0, 10, descr=alendescr)
@@ -302,12 +306,13 @@
             jump()
         """, """
             [i1]
-            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+            p0 = call_r(ConstClass(malloc_array),   \
                                 %(adescr.basesize)d,        \
                                 i1,                         \
                                 %(adescr.itemsize)d,        \
                                 %(adescr.lendescr.offset)d, \
                                 descr=malloc_array_descr)
+            check_memory_error(p0)
             jump()
         """)
 
@@ -318,8 +323,9 @@
             jump()
         """, """
             [p1]
-            p0 = call_malloc_gc(ConstClass(malloc_fixedsize), 102, \
+            p0 = call_r(ConstClass(malloc_fixedsize), 102, \
                                 descr=malloc_fixedsize_descr)
+            check_memory_error(p0)
             gc_store(p0, 0, ConstClass(o_vtable), %(vtable_descr.field_size)s)
             jump()
         """)
@@ -331,12 +337,13 @@
             jump()
         """, """
             [i1]
-            p0 = call_malloc_gc(ConstClass(malloc_array), \
+            p0 = call_r(ConstClass(malloc_array),         \
                                 %(strdescr.basesize)d,    \
                                 i1,                       \
                                 %(strdescr.itemsize)d,    \
                                 %(strlendescr.offset)d,   \
                                 descr=malloc_array_descr)
+            check_memory_error(p0)
             jump()
         """)
 
@@ -347,16 +354,17 @@
             jump()
         """, """
             [i1]
-            p0 = call_malloc_gc(ConstClass(malloc_array),   \
+            p0 = call_r(ConstClass(malloc_array),           \
                                 %(unicodedescr.basesize)d,  \
                                 10,                         \
                                 %(unicodedescr.itemsize)d,  \
                                 %(unicodelendescr.offset)d, \
                                 descr=malloc_array_descr)
+            check_memory_error(p0)
             jump()
         """)
 ##      should ideally be:
-##            p0 = call_malloc_gc(ConstClass(malloc_fixedsize),   \
+##            p0 = call_r(ConstClass(malloc_fixedsize),           \
 ##                                %(unicodedescr.basesize +       \
 ##                                  10 * unicodedescr.itemsize)d, \
 ##                                descr=malloc_fixedsize_descr)
@@ -545,11 +553,12 @@
             jump(i0)
         """, """
             [i0, p1]
-            p0 = call_malloc_gc(ConstClass(malloc_array_nonstandard), \
+            p0 = call_r(ConstClass(malloc_array_nonstandard),         \
                                 64, 8,                                \
                                 %(nonstd_descr.lendescr.offset)d,     \
                                 6464, i0,                             \
                                 descr=malloc_array_nonstandard_descr)
+            check_memory_error(p0)
             cond_call_gc_wb_array(p0, i0, descr=wbdescr)
             gc_store_indexed(p0, i0, p1, 8, 64, 8)
             jump(i0)
@@ -563,9 +572,10 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_array), 1,  \
+            p0 = call_r(ConstClass(malloc_array), 1,          \
                                 %(bdescr.tid)d, 103,          \
                                 descr=malloc_array_descr)
+            check_memory_error(p0)
             jump()
         """)
 
@@ -601,9 +611,10 @@
             jump()
         """, """
             []
-            p0 = call_malloc_gc(ConstClass(malloc_array), 1, \
+            p0 = call_r(ConstClass(malloc_array), 1,         \
                                 %(bdescr.tid)d, 20000000,    \
                                 descr=malloc_array_descr)
+            check_memory_error(p0)
             jump()
         """)
 
@@ -628,8 +639,9 @@
             jump()
         """, """
             [p1]
-            p0 = call_malloc_gc(ConstClass(malloc_big_fixedsize), 104, 9315, \
+            p0 = call_r(ConstClass(malloc_big_fixedsize), 104, 9315, \
                                 descr=malloc_big_fixedsize_descr)
+            check_memory_error(p0)
             gc_store(p0, 0,  0, %(vtable_descr.field_size)s)
             jump()
         """)
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -40,7 +40,7 @@
 
         eci = ExternalCompilationInfo(post_include_bits=['''
 #define pypy_my_fabs(x)  fabs(x)
-'''])
+'''], includes=['math.h'])
         myabs1 = rffi.llexternal('pypy_my_fabs', [lltype.Float],
                                  lltype.Float, macro=True, releasegil=False,
                                  compilation_info=eci)
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -1026,9 +1026,8 @@
 
     _mixin_ = True
 
-    def emit_call_malloc_gc(self, op, arglocs, regalloc):
-        self._emit_call(op, arglocs)
-        self.propagate_memoryerror_if_r3_is_null()
+    def emit_check_memory_error(self, op, arglocs, regalloc):
+        self.propagate_memoryerror_if_reg_is_null(arglocs[0])
 
     def emit_call_malloc_nursery(self, op, arglocs, regalloc):
         # registers r.RES and r.RSZ are allocated for this call
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -413,7 +413,7 @@
         # Check that we don't get NULL; if we do, we always interrupt the
         # current loop, as a "good enough" approximation (same as
         # emit_call_malloc_gc()).
-        self.propagate_memoryerror_if_r3_is_null()
+        self.propagate_memoryerror_if_reg_is_null(r.r3)
 
         mc.mtlr(r.RCS1.value)     # restore LR
         self._pop_core_regs_from_jitframe(mc, saved_regs)
@@ -595,9 +595,6 @@
             self.wb_slowpath[withcards + 2 * withfloats] = rawstart
 
     def _build_propagate_exception_path(self):
-        if not self.cpu.propagate_exception_descr:
-            return
-
         self.mc = PPCBuilder()
         #
         # read and reset the current exception
@@ -1340,11 +1337,8 @@
         pmc.b(offset)    # jump always
         pmc.overwrite()
 
-    def propagate_memoryerror_if_r3_is_null(self):
-        # if self.propagate_exception_path == 0 (tests), this may jump to 0
-        # and segfaults.  too bad.  the alternative is to continue anyway
-        # with r3==0, but that will segfault too.
-        self.mc.cmp_op(0, r.r3.value, 0, imm=True)
+    def propagate_memoryerror_if_reg_is_null(self, reg_loc):
+        self.mc.cmp_op(0, reg_loc.value, 0, imm=True)
         self.mc.b_cond_abs(self.propagate_exception_path, c.EQ)
 
     def write_new_force_index(self):
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -573,8 +573,9 @@
         res = self.rm.force_allocate_reg(op)
         return [res]
 
-    def prepare_call_malloc_gc(self, op):
-        return self._prepare_call(op)
+    def prepare_check_memory_error(self, op):
+        loc = self.ensure_reg(op.getarg(0))
+        return [loc]
 
     def _prepare_guard(self, op, args=None):
         if args is None:
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -22,6 +22,7 @@
 from rpython.jit.backend.detect_cpu import autodetect
 from rpython.jit.backend.llsupport import jitframe
 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+from rpython.jit.backend.llsupport.llmodel import MissingLatestDescrError
 from rpython.jit.backend.llsupport.rewrite import GcRewriterAssembler
 
 
@@ -4391,6 +4392,12 @@
                          'float', descr=calldescr)
             assert longlong.getrealfloat(res) == expected
 
+    def test_check_memory_error(self):
+        self.execute_operation(
+                       rop.CHECK_MEMORY_ERROR, [InputArgInt(12345)], 'void')
+        py.test.raises(MissingLatestDescrError, self.execute_operation,
+                       rop.CHECK_MEMORY_ERROR, [InputArgInt(0)], 'void')
+
     def test_compile_loop_with_target(self):
         looptoken = JitCellToken()
         targettoken1 = TargetToken()
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -292,9 +292,6 @@
         return rawstart
 
     def _build_propagate_exception_path(self):
-        if not self.cpu.propagate_exception_descr:
-            return      # not supported (for tests, or non-translated)
-        #
         self.mc = codebuf.MachineCodeBlockWrapper()
         self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
         #
@@ -1519,15 +1516,9 @@
 
     # ----------
 
-    def genop_call_malloc_gc(self, op, arglocs, result_loc):
-        self._genop_call(op, arglocs, result_loc)
-        self.propagate_memoryerror_if_eax_is_null()
-
-    def propagate_memoryerror_if_eax_is_null(self):
-        # if self.propagate_exception_path == 0 (tests), this may jump to 0
-        # and segfaults.  too bad.  the alternative is to continue anyway
-        # with eax==0, but that will segfault too.
-        self.mc.TEST_rr(eax.value, eax.value)
+    def genop_discard_check_memory_error(self, op, arglocs):
+        reg = arglocs[0]
+        self.mc.TEST(reg, reg)
         if WORD == 4:
             self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
             self.mc.add_pending_relocation()
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -902,9 +902,10 @@
     consider_call_release_gil_i = _consider_call_release_gil
     consider_call_release_gil_f = _consider_call_release_gil
     consider_call_release_gil_n = _consider_call_release_gil
-    
-    def consider_call_malloc_gc(self, op):
-        self._consider_call(op)
+
+    def consider_check_memory_error(self, op):
+        x = self.rm.make_sure_var_in_reg(op.getarg(0))
+        self.perform_discard(op, [x])
 
     def _consider_call_assembler(self, op):
         locs = self.locs_for_call_assembler(op)
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -379,7 +379,7 @@
                          rop.CALL_RELEASE_GIL_F,
                          rop.CALL_RELEASE_GIL_N,
                          rop.QUASIIMMUT_FIELD,
-                         rop.CALL_MALLOC_GC,
+                         rop.CHECK_MEMORY_ERROR,
                          rop.CALL_MALLOC_NURSERY,
                          rop.CALL_MALLOC_NURSERY_VARSIZE,
                          rop.CALL_MALLOC_NURSERY_VARSIZE_FRAME,
diff --git a/rpython/jit/metainterp/optimizeopt/heap.py b/rpython/jit/metainterp/optimizeopt/heap.py
--- a/rpython/jit/metainterp/optimizeopt/heap.py
+++ b/rpython/jit/metainterp/optimizeopt/heap.py
@@ -345,7 +345,8 @@
             opnum == rop.ENTER_PORTAL_FRAME or   # no effect whatsoever
             opnum == rop.LEAVE_PORTAL_FRAME or   # no effect whatsoever
             opnum == rop.COPYSTRCONTENT or       # no effect on GC struct/array
-            opnum == rop.COPYUNICODECONTENT):    # no effect on GC struct/array
+            opnum == rop.COPYUNICODECONTENT or   # no effect on GC struct/array
+            opnum == rop.CHECK_MEMORY_ERROR):    # may only abort the whole loop
             return
         if rop.is_call(op.opnum):
             if rop.is_call_assembler(op.getopnum()):
diff --git a/rpython/jit/metainterp/optimizeopt/info.py b/rpython/jit/metainterp/optimizeopt/info.py
--- a/rpython/jit/metainterp/optimizeopt/info.py
+++ b/rpython/jit/metainterp/optimizeopt/info.py
@@ -400,6 +400,12 @@
 
     def _force_elements(self, op, optforce, descr):
         self.size = -1
+        # at this point we have just written the
+        # 'op = CALL_I(..., OS_RAW_MALLOC_VARSIZE_CHAR)'.
+        # Emit now a CHECK_MEMORY_ERROR resop.
+        check_op = ResOperation(rop.CHECK_MEMORY_ERROR, [op])
+        optforce.emit_operation(check_op)
+        #
         buffer = self._get_buffer()
         for i in range(len(buffer.offsets)):
             # write the value
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1818,7 +1818,7 @@
         [i1]
         label(i1)
         i2 = call_i('malloc', 20, descr=raw_malloc_descr)
-        #guard_no_exception() []  # XXX should appear
+        check_memory_error(i2)
         raw_store(i2, 0, i1, descr=rawarraydescr_char)
         raw_store(i2, 1, 123, descr=rawarraydescr_char)
         raw_store(i2, 2, 456, descr=rawarraydescr_char)
@@ -1844,7 +1844,7 @@
         [i1]
         label(i1)
         i2 = call_i('malloc', 10, descr=raw_malloc_descr)
-        #guard_no_exception() []  # XXX should appear
+        check_memory_error(i2)
         raw_store(i2, 0, i1, descr=rawarraydescr)
         setarrayitem_raw(i2, 2, 456, descr=rawarraydescr_char)
         call_n('free', i2, descr=raw_free_descr)
@@ -1867,7 +1867,7 @@
         [i1]
         label(i1)
         i2 = call_i('malloc', 10, descr=raw_malloc_descr)
-        #guard_no_exception() []  # XXX should appear
+        check_memory_error(i2)
         raw_store(i2, 0, i1, descr=rawarraydescr)
         i3 = getarrayitem_raw_i(i2, 0, descr=rawarraydescr_char)
         call_n('free', i2, descr=raw_free_descr)
@@ -1930,7 +1930,7 @@
         label(i0, i1)
         # these ops are generated by VirtualRawBufferValue._really_force
         i2 = call_i('malloc', 10, descr=raw_malloc_descr)
-        #guard_no_exception() []  # XXX should appear
+        check_memory_error(i2)
         raw_store(i2, 0, 42, descr=rawarraydescr_char)
         raw_store(i2, 5, 4242, descr=rawarraydescr_char)
         # this is generated by VirtualRawSliceValue._really_force
@@ -1959,7 +1959,7 @@
         call_n('free', i0, descr=raw_free_descr)
         label(i2)
         i3 = call_i('malloc', 10, descr=raw_malloc_descr)
-        #guard_no_exception() []  # XXX should appear
+        check_memory_error(i3)
         raw_store(i3, 0, i2, descr=rawarraydescr)
         jump(i3)
         """
@@ -2032,6 +2032,7 @@
         expected = """
         [f1]
         i0 = call_i('malloc', 16, descr=raw_malloc_descr)
+        check_memory_error(i0)
         escape_n(i0)
         i1 = int_add(i0, 8)
         setarrayitem_raw(i1, 0, f1, descr=rawarraydescr_float)
@@ -8802,14 +8803,22 @@
         ops = """
         [i1]
         i0 = call_i(123, 10, descr=raw_malloc_descr)
+        guard_no_exception() []
         jump(i0)
         """
-        self.optimize_loop(ops, ops)
+        expected = """
+        [i1]
+        i0 = call_i(123, 10, descr=raw_malloc_descr)
+        check_memory_error(i0)
+        jump(i0)
+        """
+        self.optimize_loop(ops, expected)
 
     def test_raw_buffer_int_is_true(self):
         ops = """
         [iinp]
         i0 = call_i(123, 10, descr=raw_malloc_descr)
+        guard_no_exception() []
         i1 = int_is_true(i0)
         guard_true(i1) []
         i2 = int_is_zero(i0)
@@ -8819,6 +8828,7 @@
         expected = """
         [i2]
         i0 = call_i(123, 10, descr=raw_malloc_descr)
+        check_memory_error(i0)
         jump(i0)
         """
         self.optimize_loop(ops, expected)
@@ -8877,6 +8887,7 @@
         ops = """
         [i0]
         i = call_i('malloc', 10, descr=raw_malloc_descr)
+        guard_no_exception() []
         is = int_add(i, 8)
         escape_n(i)
         i1 = int_add(i0, 1)
@@ -8888,6 +8899,7 @@
         expected = """
         [i0]
         i = call_i('malloc', 10, descr=raw_malloc_descr)
+        check_memory_error(i)
         escape_n(i)
         i1 = int_add(i0, 1)
         i2 = int_lt(i1, 100)
@@ -8955,6 +8967,7 @@
         ops = """
         [i0, p0]
         i2 = call_i('malloc', 10, descr=raw_malloc_descr)
+        guard_no_exception() []
         setarrayitem_raw(i2, 0, 13, descr=rawarraydescr)
         setfield_gc(p0, i2, descr=valuedescr)
         i1 = int_add(i0, 1)
@@ -8976,12 +8989,20 @@
         ops = """
         []
         i2 = call_i('malloc', 10, descr=raw_malloc_descr)
+        guard_no_exception() []
         guard_value(i2, 12345) []
         jump()
         """
+        expected = """
+        []
+        i2 = call_i('malloc', 10, descr=raw_malloc_descr)
+        check_memory_error(i2)
+        guard_value(i2, 12345) []
+        jump()
+        """
         # getting InvalidLoop would be a good idea, too.
         # (this test was written to show it would previously crash)
-        self.optimize_loop(ops, ops)
+        self.optimize_loop(ops, expected)
 
     def test_unroll_constant_null_1(self):
         ops = """
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1154,7 +1154,7 @@
     'CALL_RELEASE_GIL/*d/fin',
     # release the GIL and "close the stack" for asmgcc
     'CALL_PURE/*d/rfin',             # removed before it's passed to the backend
-    'CALL_MALLOC_GC/*d/r',      # like CALL, but NULL => propagate MemoryError
+    'CHECK_MEMORY_ERROR/1/n',   # after a CALL: NULL => propagate MemoryError
     'CALL_MALLOC_NURSERY/1/r',  # nursery malloc, const number of bytes, zeroed
     'CALL_MALLOC_NURSERY_VARSIZE/3d/r',
     'CALL_MALLOC_NURSERY_VARSIZE_FRAME/1/r',
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -14,6 +14,8 @@
          integer operation did overflow
 ovfcheck_float_to_int
          convert to an integer or raise OverflowError
+ovfcheck_float_to_longlong
+         convert to a longlong or raise OverflowError
 r_longlong
          like r_int but double word size
 r_ulonglong
@@ -182,6 +184,18 @@
 # int(float(i)) != i  because of rounding issues.
 # These are the minimum and maximum float value that can
 # successfully be casted to an int.
+
+# The following values are not quite +/-sys.maxint.
+# Note the "<= x <" here, as opposed to "< x <" above.
+# This is justified by test_typed in translator/c/test.
+def ovfcheck_float_to_longlong(x):
+    from rpython.rlib.rfloat import isnan
+    if isnan(x):
+        raise OverflowError
+    if -9223372036854776832.0 <= x < 9223372036854775296.0:
+        return r_longlong(x)
+    raise OverflowError
+
 if sys.maxint == 2147483647:
     def ovfcheck_float_to_int(x):
         from rpython.rlib.rfloat import isnan
@@ -191,16 +205,8 @@
             return int(x)
         raise OverflowError
 else:
-    # The following values are not quite +/-sys.maxint.
-    # Note the "<= x <" here, as opposed to "< x <" above.
-    # This is justified by test_typed in translator/c/test.
     def ovfcheck_float_to_int(x):
-        from rpython.rlib.rfloat import isnan
-        if isnan(x):
-            raise OverflowError
-        if -9223372036854776832.0 <= x < 9223372036854775296.0:
-            return int(x)
-        raise OverflowError
+        return int(ovfcheck_float_to_longlong(x))
 
 def compute_restype(self_type, other_type):
     if self_type is other_type:
diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py
--- a/rpython/translator/c/gc.py
+++ b/rpython/translator/c/gc.py
@@ -225,7 +225,7 @@
         sourcelines.append('\tNULL')
         sourcelines.append('};')
         sourcelines.append('struct boehm_fq_s *boehm_fq_queues[%d];' % (
-            len(gct.finalizer_triggers),))
+            len(gct.finalizer_triggers) or 1,))
         sourcelines.append('')
         eci = eci.merge(ExternalCompilationInfo(
             separate_module_sources=['\n'.join(sourcelines)]))
diff --git a/rpython/translator/c/src/asm_gcc_x86.h b/rpython/translator/c/src/asm_gcc_x86.h
--- a/rpython/translator/c/src/asm_gcc_x86.h
+++ b/rpython/translator/c/src/asm_gcc_x86.h
@@ -106,3 +106,6 @@
 #define PYPY_X86_CHECK_SSE2_DEFINED
 RPY_EXTERN void pypy_x86_check_sse2(void);
 #endif
+
+
+#define RPy_YieldProcessor()  asm("pause")
diff --git a/rpython/translator/c/src/asm_gcc_x86_64.h b/rpython/translator/c/src/asm_gcc_x86_64.h
--- a/rpython/translator/c/src/asm_gcc_x86_64.h
+++ b/rpython/translator/c/src/asm_gcc_x86_64.h
@@ -6,3 +6,6 @@
     asm volatile("rdtsc" : "=a"(_rax), "=d"(_rdx)); \
     val = (_rdx << 32) | _rax;                          \
 } while (0)
+
+
+#define RPy_YieldProcessor()  asm("pause")
diff --git a/rpython/translator/c/src/thread_gil.c b/rpython/translator/c/src/thread_gil.c
--- a/rpython/translator/c/src/thread_gil.c
+++ b/rpython/translator/c/src/thread_gil.c
@@ -44,6 +44,7 @@
 */
 long rpy_fastgil = 0;
 static long rpy_waiting_threads = -42;    /* GIL not initialized */
+static volatile int rpy_early_poll_n = 0;
 static mutex1_t mutex_gil_stealer;
 static mutex2_t mutex_gil;
 
@@ -66,6 +67,30 @@
     }
 }
 
+static void check_and_save_old_fastgil(long old_fastgil)
+{
+    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
+
+#ifdef PYPY_USE_ASMGCC
+    if (old_fastgil != 0) {
+        /* this case only occurs from the JIT compiler */
+        struct pypy_ASM_FRAMEDATA_HEAD0 *new =
+            (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil;
+        struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD;
+        struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next;
+        new->as_next = next;
+        new->as_prev = root;
+        root->as_next = new;
+        next->as_prev = new;
+    }
+#else
+    assert(old_fastgil == 0);
+#endif
+}
+
+#define RPY_GIL_POKE_MIN   40
+#define RPY_GIL_POKE_MAX  400
+
 void RPyGilAcquireSlowPath(long old_fastgil)
 {
     /* Acquires the GIL.  This assumes that we already did:
@@ -79,6 +104,8 @@
     }
     else {
         /* Otherwise, another thread is busy with the GIL. */
+        int n;
+        long old_waiting_threads;
 
         if (rpy_waiting_threads < 0) {
             /* <arigo> I tried to have RPyGilAllocate() called from
@@ -98,7 +125,56 @@
         /* Register me as one of the threads that is actively waiting
            for the GIL.  The number of such threads is found in
            rpy_waiting_threads. */
-        atomic_increment(&rpy_waiting_threads);
+        old_waiting_threads = atomic_increment(&rpy_waiting_threads);
+
+        /* Early polling: before entering the waiting queue, we check
+           a certain number of times if the GIL becomes free.  The
+           motivation for this is issue #2341.  Note that we do this
+           polling even if there are already other threads in the
+           queue, and one of thesee threads is the stealer.  This is
+           because the stealer is likely sleeping right now.  There
+           are use cases where the GIL will really be released very
+           soon after RPyGilAcquireSlowPath() is called, so it's worth
+           always doing this check.
+
+           To avoid falling into bad cases, we "randomize" the number
+           of iterations: we loop N times, where N is choosen between
+           RPY_GIL_POKE_MIN and RPY_GIL_POKE_MAX.
+        */
+        n = rpy_early_poll_n * 2 + 1;
+        while (n >= RPY_GIL_POKE_MAX)
+            n -= (RPY_GIL_POKE_MAX - RPY_GIL_POKE_MIN);
+        rpy_early_poll_n = n;
+        while (n >= 0) {
+            n--;
+            if (old_waiting_threads != rpy_waiting_threads) {
+                /* If the number changed, it is because another thread 
+                   entered or left this function.  In that case, stop
+                   this loop: if another thread left it means the GIL
+                   has been acquired by that thread; if another thread 
+                   entered there is no point in running the present
+                   loop twice. */
+                break;
+            }
+            RPy_YieldProcessor();
+            RPy_CompilerMemoryBarrier();
+
+            if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) {
+                old_fastgil = pypy_lock_test_and_set(&rpy_fastgil, 1);
+                if (!RPY_FASTGIL_LOCKED(old_fastgil)) {
+                    /* We got the gil before entering the waiting
+                       queue.  In case there are other threads waiting
+                       for the GIL, wake up the stealer thread now and
+                       go to the waiting queue anyway, for fairness.
+                       This will fall through if there are no other
+                       threads waiting.
+                    */
+                    check_and_save_old_fastgil(old_fastgil);
+                    mutex2_unlock(&mutex_gil);
+                    break;
+                }
+            }
+        }
 
         /* Enter the waiting queue from the end.  Assuming a roughly
            first-in-first-out order, this will nicely give the threads
@@ -109,6 +185,15 @@
 
         /* We are now the stealer thread.  Steals! */
         while (1) {
+            /* Busy-looping here.  Try to look again if 'rpy_fastgil' is
+               released.
+            */
+            if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) {
+                old_fastgil = pypy_lock_test_and_set(&rpy_fastgil, 1);
+                if (!RPY_FASTGIL_LOCKED(old_fastgil))
+                    /* yes, got a non-held value!  Now we hold it. */
+                    break;
+            }
             /* Sleep for one interval of time.  We may be woken up earlier
                if 'mutex_gil' is released.
             */
@@ -119,39 +204,13 @@
                 old_fastgil = 0;
                 break;
             }
-
-            /* Busy-looping here.  Try to look again if 'rpy_fastgil' is
-               released.
-            */
-            if (!RPY_FASTGIL_LOCKED(rpy_fastgil)) {
-                old_fastgil = pypy_lock_test_and_set(&rpy_fastgil, 1);
-                if (!RPY_FASTGIL_LOCKED(old_fastgil))
-                    /* yes, got a non-held value!  Now we hold it. */
-                    break;
-            }
-            /* Otherwise, loop back. */
+            /* Loop back. */
         }
         atomic_decrement(&rpy_waiting_threads);
         mutex2_loop_stop(&mutex_gil);
         mutex1_unlock(&mutex_gil_stealer);
     }
-    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
-
-#ifdef PYPY_USE_ASMGCC
-    if (old_fastgil != 0) {
-        /* this case only occurs from the JIT compiler */
-        struct pypy_ASM_FRAMEDATA_HEAD0 *new =
-            (struct pypy_ASM_FRAMEDATA_HEAD0 *)old_fastgil;
-        struct pypy_ASM_FRAMEDATA_HEAD0 *root = &pypy_g_ASM_FRAMEDATA_HEAD;
-        struct pypy_ASM_FRAMEDATA_HEAD0 *next = root->as_next;
-        new->as_next = next;
-        new->as_prev = root;
-        root->as_next = new;
-        next->as_prev = new;
-    }
-#else
-    assert(old_fastgil == 0);
-#endif
+    check_and_save_old_fastgil(old_fastgil);
 }
 
 long RPyGilYieldThread(void)
diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c
--- a/rpython/translator/c/src/thread_nt.c
+++ b/rpython/translator/c/src/thread_nt.c
@@ -258,5 +258,11 @@
 //#define pypy_lock_test_and_set(ptr, value)  see thread_nt.h
 #define atomic_increment(ptr)          InterlockedIncrement(ptr)
 #define atomic_decrement(ptr)          InterlockedDecrement(ptr)
+#ifdef YieldProcessor
+#  define RPy_YieldProcessor()         YieldProcessor()
+#else
+#  define RPy_YieldProcessor()         __asm { rep nop }
+#endif
+#define RPy_CompilerMemoryBarrier()    _ReadWriteBarrier()
 
 #include "src/thread_gil.c"
diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c
--- a/rpython/translator/c/src/thread_pthread.c
+++ b/rpython/translator/c/src/thread_pthread.c
@@ -552,8 +552,14 @@
 }
 
 //#define pypy_lock_test_and_set(ptr, value)  see thread_pthread.h
-#define atomic_increment(ptr)          __sync_fetch_and_add(ptr, 1)
-#define atomic_decrement(ptr)          __sync_fetch_and_sub(ptr, 1)
+#define atomic_increment(ptr)          __sync_add_and_fetch(ptr, 1)
+#define atomic_decrement(ptr)          __sync_sub_and_fetch(ptr, 1)
+#define RPy_CompilerMemoryBarrier()    asm("":::"memory")
 #define HAVE_PTHREAD_ATFORK            1
 
+#include "src/asm.h"   /* for RPy_YieldProcessor() */
+#ifndef RPy_YieldProcessor
+#  define RPy_YieldProcessor()   /* nothing */
+#endif
+
 #include "src/thread_gil.c"


More information about the pypy-commit mailing list