[Python-checkins] Revert "bpo-30860: Consolidate stateful runtime globals." (#3379)

Wed Sep 6 00:43:12 EDT 2017

https://github.com/python/cpython/commit/05351c1bd8b70d1878527762174cdaaba3572395
commit: 05351c1bd8b70d1878527762174cdaaba3572395
branch: master
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-09-05T21:43:08-07:00
summary:

Revert "bpo-30860: Consolidate stateful runtime globals." (#3379)

Windows buildbots started failing due to include-related errors.

files:
D Include/internal/_Python.h
D Include/internal/_ceval.h
D Include/internal/_condvar.h
D Include/internal/_gil.h
D Include/internal/_mem.h
D Include/internal/_pymalloc.h
D Include/internal/_pystate.h
D Include/internal/_warnings.h
D Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst
D Tools/c-globals/README
D Tools/c-globals/check-c-globals.py
D Tools/c-globals/ignored-globals.txt
M Include/Python.h
M Include/ceval.h
M Include/object.h
M Include/pylifecycle.h
M Include/pystate.h
M Makefile.pre.in
M Modules/_io/bufferedio.c
M Modules/_threadmodule.c
M Modules/_winapi.c
M Modules/gcmodule.c
M Modules/main.c
M Objects/object.c
M Objects/obmalloc.c
M Objects/setobject.c
M Objects/typeobject.c
M PCbuild/pythoncore.vcxproj
M PCbuild/pythoncore.vcxproj.filters
M Parser/pgenmain.c
M Python/_warnings.c
M Python/ceval.c
M Python/ceval_gil.h
M Python/condvar.h
M Python/pylifecycle.c
M Python/pystate.c
M Python/sysmodule.c
M Python/thread.c
M Python/thread_nt.h
M Python/thread_pthread.h

diff --git a/Include/Python.h b/Include/Python.h
index 3ab9fe914ec..061d693f34b 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -133,8 +133,4 @@
 #include "fileutils.h"
 #include "pyfpe.h"
 
-#ifdef Py_BUILD_CORE
-#include "internal/_Python.h"
-#endif
-
 #endif /* !Py_PYTHON_H */
diff --git a/Include/ceval.h b/Include/ceval.h
index 7cbbf7c5287..b2d57cbd6f7 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -93,12 +93,7 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void);
       PyThreadState_GET()->overflowed = 0;  \
     } while(0)
 PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where);
-#ifdef Py_BUILD_CORE
-#define _Py_CheckRecursionLimit _PyRuntime.ceval.check_recursion_limit
-#else
-PyAPI_FUNC(int) _PyEval_CheckRecursionLimit(void);
-#define _Py_CheckRecursionLimit _PyEval_CheckRecursionLimit()
-#endif
+PyAPI_DATA(int) _Py_CheckRecursionLimit;
 
 #ifdef USE_STACKCHECK
 /* With USE_STACKCHECK, we artificially decrement the recursion limit in order
diff --git a/Include/internal/_Python.h b/Include/internal/_Python.h
deleted file mode 100644
index c56e98f740b..00000000000
--- a/Include/internal/_Python.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _Py_PYTHON_H
-#define _Py_PYTHON_H
-/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */
-
-/* Include all internal Python header files */
-
-#ifndef Py_BUILD_CORE
-#error "Internal headers are not available externally."
-#endif
-
-#include "_mem.h"
-#include "_ceval.h"
-#include "_warnings.h"
-#include "_pystate.h"
-
-#endif /* !_Py_PYTHON_H */
diff --git a/Include/internal/_ceval.h b/Include/internal/_ceval.h
deleted file mode 100644
index c2343f11323..00000000000
--- a/Include/internal/_ceval.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef _Py_CEVAL_H
-#define _Py_CEVAL_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "ceval.h"
-#include "compile.h"
-#include "pyatomic.h"
-
-#ifdef WITH_THREAD
-#include "pythread.h"
-#endif
-
-struct _pending_calls {
-    unsigned long main_thread;
-#ifdef WITH_THREAD
-    PyThread_type_lock lock;
-    /* Request for running pending calls. */
-    _Py_atomic_int calls_to_do;
-    /* Request for looking at the `async_exc` field of the current
-       thread state.
-       Guarded by the GIL. */
-    int async_exc;
-#define NPENDINGCALLS 32
-    struct {
-        int (*func)(void *);
-        void *arg;
-    } calls[NPENDINGCALLS];
-    int first;
-    int last;
-#else /* ! WITH_THREAD */
-    _Py_atomic_int calls_to_do;
-#define NPENDINGCALLS 32
-    struct {
-        int (*func)(void *);
-        void *arg;
-    } calls[NPENDINGCALLS];
-    volatile int first;
-    volatile int last;
-#endif /* WITH_THREAD */
-};
-
-#include "_gil.h"
-
-struct _ceval_runtime_state {
-    int recursion_limit;
-    int check_recursion_limit;
-    /* Records whether tracing is on for any thread.  Counts the number
-       of threads for which tstate->c_tracefunc is non-NULL, so if the
-       value is 0, we know we don't have to check this thread's
-       c_tracefunc.  This speeds up the if statement in
-       PyEval_EvalFrameEx() after fast_next_opcode. */
-    int tracing_possible;
-    /* This single variable consolidates all requests to break out of
-       the fast path in the eval loop. */
-    _Py_atomic_int eval_breaker;
-#ifdef WITH_THREAD
-    /* Request for dropping the GIL */
-    _Py_atomic_int gil_drop_request;
-#endif
-    struct _pending_calls pending;
-    struct _gil_runtime_state gil;
-};
-
-PyAPI_FUNC(void) _PyEval_Initialize(struct _ceval_runtime_state *);
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !_Py_CEVAL_H */
diff --git a/Include/internal/_condvar.h b/Include/internal/_condvar.h
deleted file mode 100644
index 6827db7e0b4..00000000000
--- a/Include/internal/_condvar.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _CONDVAR_H_
-#define _CONDVAR_H_
-
-#ifndef _POSIX_THREADS
-/* This means pthreads are not implemented in libc headers, hence the macro
-   not present in unistd.h. But they still can be implemented as an external
-   library (e.g. gnu pth in pthread emulation) */
-# ifdef HAVE_PTHREAD_H
-#  include <pthread.h> /* _POSIX_THREADS */
-# endif
-#endif
-
-#ifdef _POSIX_THREADS
-/*
- * POSIX support
- */
-#define Py_HAVE_CONDVAR
-
-#include <pthread.h>
-
-#define PyMUTEX_T pthread_mutex_t
-#define PyCOND_T pthread_cond_t
-
-#elif defined(NT_THREADS)
-/*
- * Windows (XP, 2003 server and later, as well as (hopefully) CE) support
- *
- * Emulated condition variables ones that work with XP and later, plus
- * example native support on VISTA and onwards.
- */
-#define Py_HAVE_CONDVAR
-
-/* include windows if it hasn't been done before */
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-/* options */
-/* non-emulated condition variables are provided for those that want
- * to target Windows Vista.  Modify this macro to enable them.
- */
-#ifndef _PY_EMULATED_WIN_CV
-#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
-#endif
-
-/* fall back to emulation if not targeting Vista */
-#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
-#undef _PY_EMULATED_WIN_CV
-#define _PY_EMULATED_WIN_CV 1
-#endif
-
-#if _PY_EMULATED_WIN_CV
-
-typedef CRITICAL_SECTION PyMUTEX_T;
-
-/* The ConditionVariable object.  From XP onwards it is easily emulated
-   with a Semaphore.
-   Semaphores are available on Windows XP (2003 server) and later.
-   We use a Semaphore rather than an auto-reset event, because although
-   an auto-resent event might appear to solve the lost-wakeup bug (race
-   condition between releasing the outer lock and waiting) because it
-   maintains state even though a wait hasn't happened, there is still
-   a lost wakeup problem if more than one thread are interrupted in the
-   critical place.  A semaphore solves that, because its state is
-   counted, not Boolean.
-   Because it is ok to signal a condition variable with no one
-   waiting, we need to keep track of the number of
-   waiting threads.  Otherwise, the semaphore's state could rise
-   without bound.  This also helps reduce the number of "spurious wakeups"
-   that would otherwise happen.
- */
-
-typedef struct _PyCOND_T
-{
-    HANDLE sem;
-    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
-} PyCOND_T;
-
-#else /* !_PY_EMULATED_WIN_CV */
-
-/* Use native Win7 primitives if build target is Win7 or higher */
-
-/* SRWLOCK is faster and better than CriticalSection */
-typedef SRWLOCK PyMUTEX_T;
-
-typedef CONDITION_VARIABLE  PyCOND_T;
-
-#endif /* _PY_EMULATED_WIN_CV */
-
-#endif /* _POSIX_THREADS, NT_THREADS */
-
-#endif /* _CONDVAR_H_ */
diff --git a/Include/internal/_gil.h b/Include/internal/_gil.h
deleted file mode 100644
index 42301bf3fca..00000000000
--- a/Include/internal/_gil.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef _Py_GIL_H
-#define _Py_GIL_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "pyatomic.h"
-
-#include "internal/_condvar.h"
-#ifndef Py_HAVE_CONDVAR
-#error You need either a POSIX-compatible or a Windows system!
-#endif
-
-/* Enable if you want to force the switching of threads at least
-   every `interval`. */
-#undef FORCE_SWITCHING
-#define FORCE_SWITCHING
-
-struct _gil_runtime_state {
-    /* microseconds (the Python API uses seconds, though) */
-    unsigned long interval;
-    /* Last PyThreadState holding / having held the GIL. This helps us
-       know whether anyone else was scheduled after we dropped the GIL. */
-    _Py_atomic_address last_holder;
-    /* Whether the GIL is already taken (-1 if uninitialized). This is
-       atomic because it can be read without any lock taken in ceval.c. */
-    _Py_atomic_int locked;
-    /* Number of GIL switches since the beginning. */
-    unsigned long switch_number;
-#ifdef WITH_THREAD
-    /* This condition variable allows one or several threads to wait
-       until the GIL is released. In addition, the mutex also protects
-       the above variables. */
-    PyCOND_T cond;
-    PyMUTEX_T mutex;
-#ifdef FORCE_SWITCHING
-    /* This condition variable helps the GIL-releasing thread wait for
-       a GIL-awaiting thread to be scheduled and take the GIL. */
-    PyCOND_T switch_cond;
-    PyMUTEX_T switch_mutex;
-#endif
-#endif /* WITH_THREAD */
-};
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !_Py_GIL_H */
diff --git a/Include/internal/_mem.h b/Include/internal/_mem.h
deleted file mode 100644
index 2932377148e..00000000000
--- a/Include/internal/_mem.h
+++ /dev/null
@@ -1,197 +0,0 @@
-#ifndef _Py_MEM_H
-#define _Py_MEM_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "objimpl.h"
-#include "pymem.h"
-
-#ifdef WITH_PYMALLOC
-#include "_pymalloc.h"
-#endif
-
-/* Low-level memory runtime state */
-
-struct _pymem_runtime_state {
-    struct _allocator_runtime_state {
-        PyMemAllocatorEx mem;
-        PyMemAllocatorEx obj;
-        PyMemAllocatorEx raw;
-    } allocators;
-#ifdef WITH_PYMALLOC
-    /* Array of objects used to track chunks of memory (arenas). */
-    struct arena_object* arenas;
-    /* The head of the singly-linked, NULL-terminated list of available
-       arena_objects. */
-    struct arena_object* unused_arena_objects;
-    /* The head of the doubly-linked, NULL-terminated at each end,
-       list of arena_objects associated with arenas that have pools
-       available. */
-    struct arena_object* usable_arenas;
-    /* Number of slots currently allocated in the `arenas` vector. */
-    unsigned int maxarenas;
-    /* Number of arenas allocated that haven't been free()'d. */
-    size_t narenas_currently_allocated;
-    /* High water mark (max value ever seen) for
-     * narenas_currently_allocated. */
-    size_t narenas_highwater;
-    /* Total number of times malloc() called to allocate an arena. */
-    size_t ntimes_arena_allocated;
-    poolp usedpools[MAX_POOLS];
-    Py_ssize_t num_allocated_blocks;
-    size_t serialno;     /* incremented on each debug {m,re}alloc */
-#endif /* WITH_PYMALLOC */
-};
-
-PyAPI_FUNC(void) _PyMem_Initialize(struct _pymem_runtime_state *);
-
-
-/* High-level memory runtime state */
-
-struct _pyobj_runtime_state {
-    PyObjectArenaAllocator allocator_arenas;
-};
-
-PyAPI_FUNC(void) _PyObject_Initialize(struct _pyobj_runtime_state *);
-
-
-/* GC runtime state */
-
-/* If we change this, we need to change the default value in the
-   signature of gc.collect. */
-#define NUM_GENERATIONS 3
-
-/*
-   NOTE: about the counting of long-lived objects.
-
-   To limit the cost of garbage collection, there are two strategies;
-     - make each collection faster, e.g. by scanning fewer objects
-     - do less collections
-   This heuristic is about the latter strategy.
-
-   In addition to the various configurable thresholds, we only trigger a
-   full collection if the ratio
-    long_lived_pending / long_lived_total
-   is above a given value (hardwired to 25%).
-
-   The reason is that, while "non-full" collections (i.e., collections of
-   the young and middle generations) will always examine roughly the same
-   number of objects -- determined by the aforementioned thresholds --,
-   the cost of a full collection is proportional to the total number of
-   long-lived objects, which is virtually unbounded.
-
-   Indeed, it has been remarked that doing a full collection every
-   <constant number> of object creations entails a dramatic performance
-   degradation in workloads which consist in creating and storing lots of
-   long-lived objects (e.g. building a large list of GC-tracked objects would
-   show quadratic performance, instead of linear as expected: see issue #4074).
-
-   Using the above ratio, instead, yields amortized linear performance in
-   the total number of objects (the effect of which can be summarized
-   thusly: "each full garbage collection is more and more costly as the
-   number of objects grows, but we do fewer and fewer of them").
-
-   This heuristic was suggested by Martin von Löwis on python-dev in
-   June 2008. His original analysis and proposal can be found at:
-    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
-*/
-
-/*
-   NOTE: about untracking of mutable objects.
-
-   Certain types of container cannot participate in a reference cycle, and
-   so do not need to be tracked by the garbage collector. Untracking these
-   objects reduces the cost of garbage collections. However, determining
-   which objects may be untracked is not free, and the costs must be
-   weighed against the benefits for garbage collection.
-
-   There are two possible strategies for when to untrack a container:
-
-   i) When the container is created.
-   ii) When the container is examined by the garbage collector.
-
-   Tuples containing only immutable objects (integers, strings etc, and
-   recursively, tuples of immutable objects) do not need to be tracked.
-   The interpreter creates a large number of tuples, many of which will
-   not survive until garbage collection. It is therefore not worthwhile
-   to untrack eligible tuples at creation time.
-
-   Instead, all tuples except the empty tuple are tracked when created.
-   During garbage collection it is determined whether any surviving tuples
-   can be untracked. A tuple can be untracked if all of its contents are
-   already not tracked. Tuples are examined for untracking in all garbage
-   collection cycles. It may take more than one cycle to untrack a tuple.
-
-   Dictionaries containing only immutable objects also do not need to be
-   tracked. Dictionaries are untracked when created. If a tracked item is
-   inserted into a dictionary (either as a key or value), the dictionary
-   becomes tracked. During a full garbage collection (all generations),
-   the collector will untrack any dictionaries whose contents are not
-   tracked.
-
-   The module provides the python function is_tracked(obj), which returns
-   the CURRENT tracking status of the object. Subsequent garbage
-   collections may change the tracking status of the object.
-
-   Untracking of certain containers was introduced in issue #4688, and
-   the algorithm was refined in response to issue #14775.
-*/
-
-struct gc_generation {
-    PyGC_Head head;
-    int threshold; /* collection threshold */
-    int count; /* count of allocations or collections of younger
-                  generations */
-};
-
-/* Running stats per generation */
-struct gc_generation_stats {
-    /* total number of collections */
-    Py_ssize_t collections;
-    /* total number of collected objects */
-    Py_ssize_t collected;
-    /* total number of uncollectable objects (put into gc.garbage) */
-    Py_ssize_t uncollectable;
-};
-
-struct _gc_runtime_state {
-    /* List of objects that still need to be cleaned up, singly linked
-     * via their gc headers' gc_prev pointers.  */
-    PyObject *trash_delete_later;
-    /* Current call-stack depth of tp_dealloc calls. */
-    int trash_delete_nesting;
-
-    int enabled;
-    int debug;
-    /* linked lists of container objects */
-    struct gc_generation generations[NUM_GENERATIONS];
-    PyGC_Head *generation0;
-    struct gc_generation_stats generation_stats[NUM_GENERATIONS];
-    /* true if we are currently running the collector */
-    int collecting;
-    /* list of uncollectable objects */
-    PyObject *garbage;
-    /* a list of callbacks to be invoked when collection is performed */
-    PyObject *callbacks;
-    /* This is the number of objects that survived the last full
-       collection. It approximates the number of long lived objects
-       tracked by the GC.
-
-       (by "full collection", we mean a collection of the oldest
-       generation). */
-    Py_ssize_t long_lived_total;
-    /* This is the number of objects that survived all "non-full"
-       collections, and are awaiting to undergo a full collection for
-       the first time. */
-    Py_ssize_t long_lived_pending;
-};
-
-PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *);
-
-#define _PyGC_generation0 _PyRuntime.gc.generation0
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !_Py_MEM_H */
diff --git a/Include/internal/_pymalloc.h b/Include/internal/_pymalloc.h
deleted file mode 100644
index 764edf94ffd..00000000000
--- a/Include/internal/_pymalloc.h
+++ /dev/null
@@ -1,443 +0,0 @@
-
-/* An object allocator for Python.
-
-   Here is an introduction to the layers of the Python memory architecture,
-   showing where the object allocator is actually used (layer +2), It is
-   called for every object allocation and deallocation (PyObject_New/Del),
-   unless the object-specific allocators implement a proprietary allocation
-   scheme (ex.: ints use a simple free list). This is also the place where
-   the cyclic garbage collector operates selectively on container objects.
-
-
-    Object-specific allocators
-    _____   ______   ______       ________
-   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
-+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
-    _______________________________       |                           |
-   [   Python's object allocator   ]      |                           |
-+2 | ####### Object memory ####### | <------ Internal buffers ------> |
-    ______________________________________________________________    |
-   [          Python's raw memory allocator (PyMem_ API)          ]   |
-+1 | <----- Python memory (under PyMem manager's control) ------> |   |
-    __________________________________________________________________
-   [    Underlying general-purpose allocator (ex: C library malloc)   ]
- 0 | <------ Virtual memory allocated for the python process -------> |
-
-   =========================================================================
-    _______________________________________________________________________
-   [                OS-specific Virtual Memory Manager (VMM)               ]
--1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
-    __________________________________   __________________________________
-   [                                  ] [                                  ]
--2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
-
-*/
-/*==========================================================================*/
-
-/* A fast, special-purpose memory allocator for small blocks, to be used
-   on top of a general-purpose malloc -- heavily based on previous art. */
-
-/* Vladimir Marangozov -- August 2000 */
-
-/*
- * "Memory management is where the rubber meets the road -- if we do the wrong
- * thing at any level, the results will not be good. And if we don't make the
- * levels work well together, we are in serious trouble." (1)
- *
- * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
- *    "Dynamic Storage Allocation: A Survey and Critical Review",
- *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
- */
-
-#ifndef _Py_PYMALLOC_H
-#define _Py_PYMALLOC_H
-
-/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
-
-/*==========================================================================*/
-
-/*
- * Allocation strategy abstract:
- *
- * For small requests, the allocator sub-allocates <Big> blocks of memory.
- * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
- * system's allocator.
- *
- * Small requests are grouped in size classes spaced 8 bytes apart, due
- * to the required valid alignment of the returned address. Requests of
- * a particular size are serviced from memory pools of 4K (one VMM page).
- * Pools are fragmented on demand and contain free lists of blocks of one
- * particular size class. In other words, there is a fixed-size allocator
- * for each size class. Free pools are shared by the different allocators
- * thus minimizing the space reserved for a particular size class.
- *
- * This allocation strategy is a variant of what is known as "simple
- * segregated storage based on array of free lists". The main drawback of
- * simple segregated storage is that we might end up with lot of reserved
- * memory for the different free lists, which degenerate in time. To avoid
- * this, we partition each free list in pools and we share dynamically the
- * reserved space between all free lists. This technique is quite efficient
- * for memory intensive programs which allocate mainly small-sized blocks.
- *
- * For small requests we have the following table:
- *
- * Request in bytes     Size of allocated block      Size class idx
- * ----------------------------------------------------------------
- *        1-8                     8                       0
- *        9-16                   16                       1
- *       17-24                   24                       2
- *       25-32                   32                       3
- *       33-40                   40                       4
- *       41-48                   48                       5
- *       49-56                   56                       6
- *       57-64                   64                       7
- *       65-72                   72                       8
- *        ...                   ...                     ...
- *      497-504                 504                      62
- *      505-512                 512                      63
- *
- *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
- *      allocator.
- */
-
-/*==========================================================================*/
-
-/*
- * -- Main tunable settings section --
- */
-
-/*
- * Alignment of addresses returned to the user. 8-bytes alignment works
- * on most current architectures (with 32-bit or 64-bit address busses).
- * The alignment value is also used for grouping small requests in size
- * classes spaced ALIGNMENT bytes apart.
- *
- * You shouldn't change this unless you know what you are doing.
- */
-#define ALIGNMENT               8               /* must be 2^N */
-#define ALIGNMENT_SHIFT         3
-
-/* Return the number of bytes in size class I, as a uint. */
-#define INDEX2SIZE(I) (((unsigned int)(I) + 1) << ALIGNMENT_SHIFT)
-
-/*
- * Max size threshold below which malloc requests are considered to be
- * small enough in order to use preallocated memory pools. You can tune
- * this value according to your application behaviour and memory needs.
- *
- * Note: a size threshold of 512 guarantees that newly created dictionaries
- * will be allocated from preallocated memory pools on 64-bit.
- *
- * The following invariants must hold:
- *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
- *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
- *
- * Although not required, for better performance and space efficiency,
- * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
- */
-#define SMALL_REQUEST_THRESHOLD 512
-#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
-
-#if NB_SMALL_SIZE_CLASSES > 64
-#error "NB_SMALL_SIZE_CLASSES should be less than 64"
-#endif /* NB_SMALL_SIZE_CLASSES > 64 */
-
-/*
- * The system's VMM page size can be obtained on most unices with a
- * getpagesize() call or deduced from various header files. To make
- * things simpler, we assume that it is 4K, which is OK for most systems.
- * It is probably better if this is the native page size, but it doesn't
- * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
- * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
- * violation fault.  4K is apparently OK for all the platforms that python
- * currently targets.
- */
-#define SYSTEM_PAGE_SIZE        (4 * 1024)
-#define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
-
-/*
- * Maximum amount of memory managed by the allocator for small requests.
- */
-#ifdef WITH_MEMORY_LIMITS
-#ifndef SMALL_MEMORY_LIMIT
-#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
-#endif
-#endif
-
-/*
- * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
- * on a page boundary. This is a reserved virtual address space for the
- * current process (obtained through a malloc()/mmap() call). In no way this
- * means that the memory arenas will be used entirely. A malloc(<Big>) is
- * usually an address range reservation for <Big> bytes, unless all pages within
- * this space are referenced subsequently. So malloc'ing big blocks and not
- * using them does not mean "wasting memory". It's an addressable range
- * wastage...
- *
- * Arenas are allocated with mmap() on systems supporting anonymous memory
- * mappings to reduce heap fragmentation.
- */
-#define ARENA_SIZE              (256 << 10)     /* 256KB */
-
-#ifdef WITH_MEMORY_LIMITS
-#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
-#endif
-
-/*
- * Size of the pools used for small blocks. Should be a power of 2,
- * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
- */
-#define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
-#define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
-
-/*
- * -- End of tunable settings section --
- */
-
-/*==========================================================================*/
-
-/*
- * Locking
- *
- * To reduce lock contention, it would probably be better to refine the
- * crude function locking with per size class locking. I'm not positive
- * however, whether it's worth switching to such locking policy because
- * of the performance penalty it might introduce.
- *
- * The following macros describe the simplest (should also be the fastest)
- * lock object on a particular platform and the init/fini/lock/unlock
- * operations on it. The locks defined here are not expected to be recursive
- * because it is assumed that they will always be called in the order:
- * INIT, [LOCK, UNLOCK]*, FINI.
- */
-
-/*
- * Python's threads are serialized, so object malloc locking is disabled.
- */
-#define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
-#define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
-#define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
-#define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
-#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
-
-/* When you say memory, my mind reasons in terms of (pointers to) blocks */
-typedef uint8_t pyblock;
-
-/* Pool for small blocks. */
-struct pool_header {
-    union { pyblock *_padding;
-            unsigned int count; } ref;  /* number of allocated blocks    */
-    pyblock *freeblock;                 /* pool's free list head         */
-    struct pool_header *nextpool;       /* next pool of this size class  */
-    struct pool_header *prevpool;       /* previous pool       ""        */
-    unsigned int arenaindex;            /* index into arenas of base adr */
-    unsigned int szidx;                 /* block size class index        */
-    unsigned int nextoffset;            /* bytes to virgin block         */
-    unsigned int maxnextoffset;         /* largest valid nextoffset      */
-};
-
-typedef struct pool_header *poolp;
-
-/* Record keeping for arenas. */
-struct arena_object {
-    /* The address of the arena, as returned by malloc.  Note that 0
-     * will never be returned by a successful malloc, and is used
-     * here to mark an arena_object that doesn't correspond to an
-     * allocated arena.
-     */
-    uintptr_t address;
-
-    /* Pool-aligned pointer to the next pool to be carved off. */
-    pyblock* pool_address;
-
-    /* The number of available pools in the arena:  free pools + never-
-     * allocated pools.
-     */
-    unsigned int nfreepools;
-
-    /* The total number of pools in the arena, whether or not available. */
-    unsigned int ntotalpools;
-
-    /* Singly-linked list of available pools. */
-    struct pool_header* freepools;
-
-    /* Whenever this arena_object is not associated with an allocated
-     * arena, the nextarena member is used to link all unassociated
-     * arena_objects in the singly-linked `unused_arena_objects` list.
-     * The prevarena member is unused in this case.
-     *
-     * When this arena_object is associated with an allocated arena
-     * with at least one available pool, both members are used in the
-     * doubly-linked `usable_arenas` list, which is maintained in
-     * increasing order of `nfreepools` values.
-     *
-     * Else this arena_object is associated with an allocated arena
-     * all of whose pools are in use.  `nextarena` and `prevarena`
-     * are both meaningless in this case.
-     */
-    struct arena_object* nextarena;
-    struct arena_object* prevarena;
-};
-
-#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
-
-#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
-
-/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
-#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
-
-/* Return total number of blocks in pool of size index I, as a uint. */
-#define NUMBLOCKS(I) \
-    ((unsigned int)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
-
-/*==========================================================================*/
-
-/*
- * This malloc lock
- */
-SIMPLELOCK_DECL(_malloc_lock)
-#define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
-#define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
-#define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
-#define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
-
-/*
- * Pool table -- headed, circular, doubly-linked lists of partially used pools.
-
-This is involved.  For an index i, usedpools[i+i] is the header for a list of
-all partially used pools holding small blocks with "size class idx" i. So
-usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
-16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
-
-Pools are carved off an arena's highwater mark (an arena_object's pool_address
-member) as needed.  Once carved off, a pool is in one of three states forever
-after:
-
-used == partially used, neither empty nor full
-    At least one block in the pool is currently allocated, and at least one
-    block in the pool is not currently allocated (note this implies a pool
-    has room for at least two blocks).
-    This is a pool's initial state, as a pool is created only when malloc
-    needs space.
-    The pool holds blocks of a fixed size, and is in the circular list headed
-    at usedpools[i] (see above).  It's linked to the other used pools of the
-    same size class via the pool_header's nextpool and prevpool members.
-    If all but one block is currently allocated, a malloc can cause a
-    transition to the full state.  If all but one block is not currently
-    allocated, a free can cause a transition to the empty state.
-
-full == all the pool's blocks are currently allocated
-    On transition to full, a pool is unlinked from its usedpools[] list.
-    It's not linked to from anything then anymore, and its nextpool and
-    prevpool members are meaningless until it transitions back to used.
-    A free of a block in a full pool puts the pool back in the used state.
-    Then it's linked in at the front of the appropriate usedpools[] list, so
-    that the next allocation for its size class will reuse the freed block.
-
-empty == all the pool's blocks are currently available for allocation
-    On transition to empty, a pool is unlinked from its usedpools[] list,
-    and linked to the front of its arena_object's singly-linked freepools list,
-    via its nextpool member.  The prevpool member has no meaning in this case.
-    Empty pools have no inherent size class:  the next time a malloc finds
-    an empty list in usedpools[], it takes the first pool off of freepools.
-    If the size class needed happens to be the same as the size class the pool
-    last had, some pool initialization can be skipped.
-
-
-Block Management
-
-Blocks within pools are again carved out as needed.  pool->freeblock points to
-the start of a singly-linked list of free blocks within the pool.  When a
-block is freed, it's inserted at the front of its pool's freeblock list.  Note
-that the available blocks in a pool are *not* linked all together when a pool
-is initialized.  Instead only "the first two" (lowest addresses) blocks are
-set up, returning the first such block, and setting pool->freeblock to a
-one-block list holding the second such block.  This is consistent with that
-pymalloc strives at all levels (arena, pool, and block) never to touch a piece
-of memory until it's actually needed.
-
-So long as a pool is in the used state, we're certain there *is* a block
-available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
-points to the end of the free list before we've carved the entire pool into
-blocks, that means we simply haven't yet gotten to one of the higher-address
-blocks.  The offset from the pool_header to the start of "the next" virgin
-block is stored in the pool_header nextoffset member, and the largest value
-of nextoffset that makes sense is stored in the maxnextoffset member when a
-pool is initialized.  All the blocks in a pool have been passed out at least
-once when and only when nextoffset > maxnextoffset.
-
-
-Major obscurity:  While the usedpools vector is declared to have poolp
-entries, it doesn't really.  It really contains two pointers per (conceptual)
-poolp entry, the nextpool and prevpool members of a pool_header.  The
-excruciating initialization code below fools C so that
-
-    usedpool[i+i]
-
-"acts like" a genuine poolp, but only so long as you only reference its
-nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
-compensating for that a pool_header's nextpool and prevpool members
-immediately follow a pool_header's first two members:
-
-    union { block *_padding;
-            uint count; } ref;
-    block *freeblock;
-
-each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
-contains is a fudged-up pointer p such that *if* C believes it's a poolp
-pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
-circular list is empty).
-
-It's unclear why the usedpools setup is so convoluted.  It could be to
-minimize the amount of cache required to hold this heavily-referenced table
-(which only *needs* the two interpool pointer members of a pool_header). OTOH,
-referencing code has to remember to "double the index" and doing so isn't
-free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
-on that C doesn't insert any padding anywhere in a pool_header at or before
-the prevpool member.
-**************************************************************************** */
-
-#define MAX_POOLS  (2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8)
-
-/*==========================================================================
-Arena management.
-
-`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
-which may not be currently used (== they're arena_objects that aren't
-currently associated with an allocated arena).  Note that arenas proper are
-separately malloc'ed.
-
-Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
-we do try to free() arenas, and use some mild heuristic strategies to increase
-the likelihood that arenas eventually can be freed.
-
-unused_arena_objects
-
-    This is a singly-linked list of the arena_objects that are currently not
-    being used (no arena is associated with them).  Objects are taken off the
-    head of the list in new_arena(), and are pushed on the head of the list in
-    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
-    is on this list if and only if its .address member is 0.
-
-usable_arenas
-
-    This is a doubly-linked list of the arena_objects associated with arenas
-    that have pools available.  These pools are either waiting to be reused,
-    or have not been used before.  The list is sorted to have the most-
-    allocated arenas first (ascending order based on the nfreepools member).
-    This means that the next allocation will come from a heavily used arena,
-    which gives the nearly empty arenas a chance to be returned to the system.
-    In my unscientific tests this dramatically improved the number of arenas
-    that could be freed.
-
-Note that an arena_object associated with an arena all of whose pools are
-currently in use isn't on either list.
-*/
-
-/* How many arena_objects do we initially allocate?
- * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
- * `arenas` vector.
- */
-#define INITIAL_ARENA_OBJECTS 16
-
-#endif /* _Py_PYMALLOC_H */
diff --git a/Include/internal/_pystate.h b/Include/internal/_pystate.h
deleted file mode 100644
index 9f2dea1befa..00000000000
--- a/Include/internal/_pystate.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef _Py_PYSTATE_H
-#define _Py_PYSTATE_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "pystate.h"
-#include "pyatomic.h"
-
-#ifdef WITH_THREAD
-#include "pythread.h"
-#endif
-
-#include "_mem.h"
-#include "_ceval.h"
-#include "_warnings.h"
-
-
-/* GIL state */
-
-struct _gilstate_runtime_state {
-    int check_enabled;
-    /* Assuming the current thread holds the GIL, this is the
-       PyThreadState for the current thread. */
-    _Py_atomic_address tstate_current;
-    PyThreadFrameGetter getframe;
-#ifdef WITH_THREAD
-    /* The single PyInterpreterState used by this process'
-       GILState implementation
-    */
-    /* TODO: Given interp_main, it may be possible to kill this ref */
-    PyInterpreterState *autoInterpreterState;
-    int autoTLSkey;
-#endif /* WITH_THREAD */
-};
-
-/* hook for PyEval_GetFrame(), requested for Psyco */
-#define _PyThreadState_GetFrame _PyRuntime.gilstate.getframe
-
-/* Issue #26558: Flag to disable PyGILState_Check().
-   If set to non-zero, PyGILState_Check() always return 1. */
-#define _PyGILState_check_enabled _PyRuntime.gilstate.check_enabled
-
-
-/* Full Python runtime state */
-
-typedef struct pyruntimestate {
-    int initialized;
-    int core_initialized;
-    PyThreadState *finalizing;
-
-    struct pyinterpreters {
-#ifdef WITH_THREAD
-        PyThread_type_lock mutex;
-#endif
-        PyInterpreterState *head;
-        PyInterpreterState *main;
-        /* _next_interp_id is an auto-numbered sequence of small
-           integers.  It gets initialized in _PyInterpreterState_Init(),
-           which is called in Py_Initialize(), and used in
-           PyInterpreterState_New().  A negative interpreter ID
-           indicates an error occurred.  The main interpreter will
-           always have an ID of 0.  Overflow results in a RuntimeError.
-           If that becomes a problem later then we can adjust, e.g. by
-           using a Python int. */
-        int64_t next_id;
-    } interpreters;
-
-#define NEXITFUNCS 32
-    void (*exitfuncs[NEXITFUNCS])(void);
-    int nexitfuncs;
-    void (*pyexitfunc)(void);
-
-    struct _pyobj_runtime_state obj;
-    struct _gc_runtime_state gc;
-    struct _pymem_runtime_state mem;
-    struct _warnings_runtime_state warnings;
-    struct _ceval_runtime_state ceval;
-    struct _gilstate_runtime_state gilstate;
-
-    // XXX Consolidate globals found via the check-c-globals script.
-} _PyRuntimeState;
-
-PyAPI_DATA(_PyRuntimeState) _PyRuntime;
-PyAPI_FUNC(void) _PyRuntimeState_Init(_PyRuntimeState *);
-PyAPI_FUNC(void) _PyRuntimeState_Fini(_PyRuntimeState *);
-
-PyAPI_FUNC(void) _PyInterpreterState_Enable(_PyRuntimeState *);
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !_Py_PYSTATE_H */
diff --git a/Include/internal/_warnings.h b/Include/internal/_warnings.h
deleted file mode 100644
index 2a1abb2d5d2..00000000000
--- a/Include/internal/_warnings.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _Py_WARNINGS_H
-#define _Py_WARNINGS_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "object.h"
-
-struct _warnings_runtime_state {
-    /* Both 'filters' and 'onceregistry' can be set in warnings.py;
-       get_warnings_attr() will reset these variables accordingly. */
-    PyObject *filters;  /* List */
-    PyObject *once_registry;  /* Dict */
-    PyObject *default_action; /* String */
-    long filters_version;
-};
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !_Py_WARNINGS_H */
diff --git a/Include/object.h b/Include/object.h
index b46d4c30e1e..f5ed70b1129 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -1038,6 +1038,8 @@ with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL.
    Kept for binary compatibility of extensions using the stable ABI. */
 PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*);
 PyAPI_FUNC(void) _PyTrash_destroy_chain(void);
+PyAPI_DATA(int) _PyTrash_delete_nesting;
+PyAPI_DATA(PyObject *) _PyTrash_delete_later;
 #endif /* !Py_LIMITED_API */
 
 /* The new thread-safe private API, invoked by the macros below. */
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index b02cd4cc543..0d609ec2344 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -119,10 +119,7 @@ PyAPI_FUNC(void) _PyType_Fini(void);
 PyAPI_FUNC(void) _Py_HashRandomization_Fini(void);
 PyAPI_FUNC(void) PyAsyncGen_Fini(void);
 
-#define _Py_IS_FINALIZING() \
-    (_PyRuntime.finalizing != NULL)
-#define _Py_CURRENTLY_FINALIZING(tstate) \
-    (_PyRuntime.finalizing == tstate)
+PyAPI_DATA(PyThreadState *) _Py_Finalizing;
 #endif
 
 /* Signals */
diff --git a/Include/pystate.h b/Include/pystate.h
index 90081c51c0e..8a92f3ec3ed 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -29,10 +29,9 @@ typedef struct {
     int use_hash_seed;
     unsigned long hash_seed;
     int _disable_importlib; /* Needed by freeze_importlib */
-    char *allocator;
 } _PyCoreConfig;
 
-#define _PyCoreConfig_INIT {0, -1, 0, 0, NULL}
+#define _PyCoreConfig_INIT {0, -1, 0, 0}
 
 /* Placeholders while working on the new configuration API
  *
@@ -58,19 +57,6 @@ typedef struct _is {
     PyObject *builtins;
     PyObject *importlib;
 
-    /* Used in Python/sysmodule.c. */
-    int check_interval;
-    PyObject *warnoptions;
-    PyObject *xoptions;
-
-    /* Used in Modules/_threadmodule.c. */
-    long num_threads;
-    /* Support for runtime thread stack size tuning.
-       A value of 0 means using the platform's default stack size
-       or the size specified by the THREAD_STACK_SIZE macro. */
-    /* Used in Python/thread.c. */
-    size_t pythread_stacksize;
-
     PyObject *codec_search_path;
     PyObject *codec_search_cache;
     PyObject *codec_error_registry;
@@ -199,6 +185,9 @@ typedef struct _ts {
 #endif
 
 
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void) _PyInterpreterState_Init(void);
+#endif /* !Py_LIMITED_API */
 PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void);
 PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *);
 PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *);
@@ -257,7 +246,7 @@ PyAPI_FUNC(int) PyThreadState_SetAsyncExc(unsigned long, PyObject *);
 /* Assuming the current thread holds the GIL, this is the
    PyThreadState for the current thread. */
 #ifdef Py_BUILD_CORE
-#  define _PyThreadState_Current _PyRuntime.gilstate.tstate_current
+PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current;
 #  define PyThreadState_GET() \
              ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
 #else
@@ -312,6 +301,10 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE);
 PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void);
 
 #ifndef Py_LIMITED_API
+/* Issue #26558: Flag to disable PyGILState_Check().
+   If set to non-zero, PyGILState_Check() always return 1. */
+PyAPI_DATA(int) _PyGILState_check_enabled;
+
 /* Helper/diagnostic function - return 1 if the current thread
    currently holds the GIL, 0 otherwise.
 
@@ -347,6 +340,11 @@ PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *);
 typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_);
 #endif
 
+/* hook for PyEval_GetFrame(), requested for Psyco */
+#ifndef Py_LIMITED_API
+PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame;
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Makefile.pre.in b/Makefile.pre.in
index d6ebf854eda..57d2ab72ba9 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -987,13 +987,6 @@ PYTHON_HEADERS= \
 		pyconfig.h \
 		$(PARSER_HEADERS) \
 		$(srcdir)/Include/Python-ast.h \
-		$(srcdir)/Include/internal/_Python.h \
-		$(srcdir)/Include/internal/_ceval.h \
-		$(srcdir)/Include/internal/_gil.h \
-		$(srcdir)/Include/internal/_mem.h \
-		$(srcdir)/Include/internal/_pymalloc.h \
-		$(srcdir)/Include/internal/_pystate.h \
-		$(srcdir)/Include/internal/_warnings.h \
 		$(DTRACE_HEADERS)
 
 $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst
deleted file mode 100644
index d8e9d5eeea1..00000000000
--- a/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst	
+++ /dev/null
@@ -1,2 +0,0 @@
-Consolidate CPython's global runtime state under a single struct.  This
-improves discoverability of the runtime state.
diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c
index 3f57041855d..189b1cd8442 100644
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@@ -279,7 +279,7 @@ _enter_buffered_busy(buffered *self)
                      "reentrant call inside %R", self);
         return 0;
     }
-    relax_locking = _Py_IS_FINALIZING();
+    relax_locking = (_Py_Finalizing != NULL);
     Py_BEGIN_ALLOW_THREADS
     if (!relax_locking)
         st = PyThread_acquire_lock(self->lock, 1);
diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c
index 89be96c313f..da750c01cd9 100644
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
@@ -14,6 +14,7 @@
 #include "pythread.h"
 
 static PyObject *ThreadError;
+static long nb_threads = 0;
 static PyObject *str_dict;
 
 _Py_IDENTIFIER(stderr);
@@ -992,7 +993,7 @@ t_bootstrap(void *boot_raw)
     tstate->thread_id = PyThread_get_thread_ident();
     _PyThreadState_Init(tstate);
     PyEval_AcquireThread(tstate);
-    tstate->interp->num_threads++;
+    nb_threads++;
     res = PyObject_Call(boot->func, boot->args, boot->keyw);
     if (res == NULL) {
         if (PyErr_ExceptionMatches(PyExc_SystemExit))
@@ -1019,7 +1020,7 @@ t_bootstrap(void *boot_raw)
     Py_DECREF(boot->args);
     Py_XDECREF(boot->keyw);
     PyMem_DEL(boot_raw);
-    tstate->interp->num_threads--;
+    nb_threads--;
     PyThreadState_Clear(tstate);
     PyThreadState_DeleteCurrent();
     PyThread_exit_thread();
@@ -1158,8 +1159,7 @@ A thread's identity may be reused for another thread after it exits.");
 static PyObject *
 thread__count(PyObject *self)
 {
-    PyThreadState *tstate = PyThreadState_Get();
-    return PyLong_FromLong(tstate->interp->num_threads);
+    return PyLong_FromLong(nb_threads);
 }
 
 PyDoc_STRVAR(_count_doc,
@@ -1352,7 +1352,6 @@ PyInit__thread(void)
     PyObject *m, *d, *v;
     double time_max;
     double timeout_max;
-    PyThreadState *tstate = PyThreadState_Get();
 
     /* Initialize types: */
     if (PyType_Ready(&localdummytype) < 0)
@@ -1397,7 +1396,7 @@ PyInit__thread(void)
     if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0)
         return NULL;
 
-    tstate->interp->num_threads = 0;
+    nb_threads = 0;
 
     str_dict = PyUnicode_InternFromString("__dict__");
     if (str_dict == NULL)
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 6556d99ea8e..682d0a3cdd8 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -114,7 +114,7 @@ overlapped_dealloc(OverlappedObject *self)
         {
             /* The operation is no longer pending -- nothing to do. */
         }
-        else if _Py_IS_FINALIZING()
+        else if (_Py_Finalizing == NULL)
         {
             /* The operation is still pending -- give a warning.  This
                will probably only happen on Windows XP. */
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index fa67f7f5439..4e5acf305b9 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -39,9 +39,133 @@ module gc
 /* Get the object given the GC head */
 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
 
+/*** Global GC state ***/
+
+struct gc_generation {
+    PyGC_Head head;
+    int threshold; /* collection threshold */
+    int count; /* count of allocations or collections of younger
+                  generations */
+};
+
+/* If we change this, we need to change the default value in the signature of
+   gc.collect. */
+#define NUM_GENERATIONS 3
+#define GEN_HEAD(n) (&generations[n].head)
+
+/* linked lists of container objects */
+static struct gc_generation generations[NUM_GENERATIONS] = {
+    /* PyGC_Head,                               threshold,      count */
+    {{{GEN_HEAD(0), GEN_HEAD(0), 0}},           700,            0},
+    {{{GEN_HEAD(1), GEN_HEAD(1), 0}},           10,             0},
+    {{{GEN_HEAD(2), GEN_HEAD(2), 0}},           10,             0},
+};
+
+PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
+
+static int enabled = 1; /* automatic collection enabled? */
+
+/* true if we are currently running the collector */
+static int collecting = 0;
+
+/* list of uncollectable objects */
+static PyObject *garbage = NULL;
+
 /* Python string to use if unhandled exception occurs */
 static PyObject *gc_str = NULL;
 
+/* a list of callbacks to be invoked when collection is performed */
+static PyObject *callbacks = NULL;
+
+/* This is the number of objects that survived the last full collection. It
+   approximates the number of long lived objects tracked by the GC.
+
+   (by "full collection", we mean a collection of the oldest generation).
+*/
+static Py_ssize_t long_lived_total = 0;
+
+/* This is the number of objects that survived all "non-full" collections,
+   and are awaiting to undergo a full collection for the first time.
+
+*/
+static Py_ssize_t long_lived_pending = 0;
+
+/*
+   NOTE: about the counting of long-lived objects.
+
+   To limit the cost of garbage collection, there are two strategies;
+     - make each collection faster, e.g. by scanning fewer objects
+     - do less collections
+   This heuristic is about the latter strategy.
+
+   In addition to the various configurable thresholds, we only trigger a
+   full collection if the ratio
+    long_lived_pending / long_lived_total
+   is above a given value (hardwired to 25%).
+
+   The reason is that, while "non-full" collections (i.e., collections of
+   the young and middle generations) will always examine roughly the same
+   number of objects -- determined by the aforementioned thresholds --,
+   the cost of a full collection is proportional to the total number of
+   long-lived objects, which is virtually unbounded.
+
+   Indeed, it has been remarked that doing a full collection every
+   <constant number> of object creations entails a dramatic performance
+   degradation in workloads which consist in creating and storing lots of
+   long-lived objects (e.g. building a large list of GC-tracked objects would
+   show quadratic performance, instead of linear as expected: see issue #4074).
+
+   Using the above ratio, instead, yields amortized linear performance in
+   the total number of objects (the effect of which can be summarized
+   thusly: "each full garbage collection is more and more costly as the
+   number of objects grows, but we do fewer and fewer of them").
+
+   This heuristic was suggested by Martin von Löwis on python-dev in
+   June 2008. His original analysis and proposal can be found at:
+    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
+*/
+
+/*
+   NOTE: about untracking of mutable objects.
+
+   Certain types of container cannot participate in a reference cycle, and
+   so do not need to be tracked by the garbage collector. Untracking these
+   objects reduces the cost of garbage collections. However, determining
+   which objects may be untracked is not free, and the costs must be
+   weighed against the benefits for garbage collection.
+
+   There are two possible strategies for when to untrack a container:
+
+   i) When the container is created.
+   ii) When the container is examined by the garbage collector.
+
+   Tuples containing only immutable objects (integers, strings etc, and
+   recursively, tuples of immutable objects) do not need to be tracked.
+   The interpreter creates a large number of tuples, many of which will
+   not survive until garbage collection. It is therefore not worthwhile
+   to untrack eligible tuples at creation time.
+
+   Instead, all tuples except the empty tuple are tracked when created.
+   During garbage collection it is determined whether any surviving tuples
+   can be untracked. A tuple can be untracked if all of its contents are
+   already not tracked. Tuples are examined for untracking in all garbage
+   collection cycles. It may take more than one cycle to untrack a tuple.
+
+   Dictionaries containing only immutable objects also do not need to be
+   tracked. Dictionaries are untracked when created. If a tracked item is
+   inserted into a dictionary (either as a key or value), the dictionary
+   becomes tracked. During a full garbage collection (all generations),
+   the collector will untrack any dictionaries whose contents are not
+   tracked.
+
+   The module provides the python function is_tracked(obj), which returns
+   the CURRENT tracking status of the object. Subsequent garbage
+   collections may change the tracking status of the object.
+
+   Untracking of certain containers was introduced in issue #4688, and
+   the algorithm was refined in response to issue #14775.
+*/
+
 /* set for debugging information */
 #define DEBUG_STATS             (1<<0) /* print collection statistics */
 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
@@ -50,26 +174,19 @@ static PyObject *gc_str = NULL;
 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
                 DEBUG_UNCOLLECTABLE | \
                 DEBUG_SAVEALL
+static int debug;
+
+/* Running stats per generation */
+struct gc_generation_stats {
+    /* total number of collections */
+    Py_ssize_t collections;
+    /* total number of collected objects */
+    Py_ssize_t collected;
+    /* total number of uncollectable objects (put into gc.garbage) */
+    Py_ssize_t uncollectable;
+};
 
-#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head)
-
-void
-_PyGC_Initialize(struct _gc_runtime_state *state)
-{
-    state->enabled = 1; /* automatic collection enabled? */
-
-#define _GEN_HEAD(n) (&state->generations[n].head)
-    struct gc_generation generations[NUM_GENERATIONS] = {
-        /* PyGC_Head,                                 threshold,      count */
-        {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}},           700,            0},
-        {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}},           10,             0},
-        {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}},           10,             0},
-    };
-    for (int i = 0; i < NUM_GENERATIONS; i++) {
-        state->generations[i] = generations[i];
-    };
-    state->generation0 = GEN_HEAD(0);
-}
+static struct gc_generation_stats generation_stats[NUM_GENERATIONS];
 
 /*--------------------------------------------------------------------------
 gc_refs values.
@@ -649,16 +766,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
 {
     PyGC_Head *gc = finalizers->gc.gc_next;
 
-    if (_PyRuntime.gc.garbage == NULL) {
-        _PyRuntime.gc.garbage = PyList_New(0);
-        if (_PyRuntime.gc.garbage == NULL)
+    if (garbage == NULL) {
+        garbage = PyList_New(0);
+        if (garbage == NULL)
             Py_FatalError("gc couldn't create gc.garbage list");
     }
     for (; gc != finalizers; gc = gc->gc.gc_next) {
         PyObject *op = FROM_GC(gc);
 
-        if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
-            if (PyList_Append(_PyRuntime.gc.garbage, op) < 0)
+        if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
+            if (PyList_Append(garbage, op) < 0)
                 return -1;
         }
     }
@@ -748,8 +865,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
         PyGC_Head *gc = collectable->gc.gc_next;
         PyObject *op = FROM_GC(gc);
 
-        if (_PyRuntime.gc.debug & DEBUG_SAVEALL) {
-            PyList_Append(_PyRuntime.gc.garbage, op);
+        if (debug & DEBUG_SAVEALL) {
+            PyList_Append(garbage, op);
         }
         else {
             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
@@ -802,9 +919,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
     PyGC_Head *gc;
     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
 
-    struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation];
+    struct gc_generation_stats *stats = &generation_stats[generation];
 
-    if (_PyRuntime.gc.debug & DEBUG_STATS) {
+    if (debug & DEBUG_STATS) {
         PySys_WriteStderr("gc: collecting generation %d...\n",
                           generation);
         PySys_WriteStderr("gc: objects in each generation:");
@@ -821,9 +938,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
 
     /* update collection and allocation counters */
     if (generation+1 < NUM_GENERATIONS)
-        _PyRuntime.gc.generations[generation+1].count += 1;
+        generations[generation+1].count += 1;
     for (i = 0; i <= generation; i++)
-        _PyRuntime.gc.generations[i].count = 0;
+        generations[i].count = 0;
 
     /* merge younger generations with one we are currently collecting */
     for (i = 0; i < generation; i++) {
@@ -857,7 +974,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
     /* Move reachable objects to next generation. */
     if (young != old) {
         if (generation == NUM_GENERATIONS - 2) {
-            _PyRuntime.gc.long_lived_pending += gc_list_size(young);
+            long_lived_pending += gc_list_size(young);
         }
         gc_list_merge(young, old);
     }
@@ -865,8 +982,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
         /* We only untrack dicts in full collections, to avoid quadratic
            dict build-up. See issue #14775. */
         untrack_dicts(young);
-        _PyRuntime.gc.long_lived_pending = 0;
-        _PyRuntime.gc.long_lived_total = gc_list_size(young);
+        long_lived_pending = 0;
+        long_lived_total = gc_list_size(young);
     }
 
     /* All objects in unreachable are trash, but objects reachable from
@@ -886,7 +1003,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
     for (gc = unreachable.gc.gc_next; gc != &unreachable;
                     gc = gc->gc.gc_next) {
         m++;
-        if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) {
+        if (debug & DEBUG_COLLECTABLE) {
             debug_cycle("collectable", FROM_GC(gc));
         }
     }
@@ -915,10 +1032,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
          gc != &finalizers;
          gc = gc->gc.gc_next) {
         n++;
-        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
+        if (debug & DEBUG_UNCOLLECTABLE)
             debug_cycle("uncollectable", FROM_GC(gc));
     }
-    if (_PyRuntime.gc.debug & DEBUG_STATS) {
+    if (debug & DEBUG_STATS) {
         _PyTime_t t2 = _PyTime_GetMonotonicClock();
 
         if (m == 0 && n == 0)
@@ -981,11 +1098,11 @@ invoke_gc_callback(const char *phase, int generation,
     PyObject *info = NULL;
 
     /* we may get called very early */
-    if (_PyRuntime.gc.callbacks == NULL)
+    if (callbacks == NULL)
         return;
     /* The local variable cannot be rebound, check it for sanity */
-    assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks));
-    if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) {
+    assert(callbacks != NULL && PyList_CheckExact(callbacks));
+    if (PyList_GET_SIZE(callbacks) != 0) {
         info = Py_BuildValue("{sisnsn}",
             "generation", generation,
             "collected", collected,
@@ -995,8 +1112,8 @@ invoke_gc_callback(const char *phase, int generation,
             return;
         }
     }
-    for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) {
-        PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i);
+    for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
         Py_INCREF(cb); /* make sure cb doesn't go away */
         r = PyObject_CallFunction(cb, "sO", phase, info);
         Py_XDECREF(r);
@@ -1030,13 +1147,13 @@ collect_generations(void)
      * exceeds the threshold.  Objects in the that generation and
      * generations younger than it will be collected. */
     for (i = NUM_GENERATIONS-1; i >= 0; i--) {
-        if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) {
+        if (generations[i].count > generations[i].threshold) {
             /* Avoid quadratic performance degradation in number
                of tracked objects. See comments at the beginning
                of this file, and issue #4074.
             */
             if (i == NUM_GENERATIONS - 1
-                && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4)
+                && long_lived_pending < long_lived_total / 4)
                 continue;
             n = collect_with_callback(i);
             break;
@@ -1057,7 +1174,7 @@ static PyObject *
 gc_enable_impl(PyObject *module)
 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
 {
-    _PyRuntime.gc.enabled = 1;
+    enabled = 1;
     Py_RETURN_NONE;
 }
 
@@ -1071,7 +1188,7 @@ static PyObject *
 gc_disable_impl(PyObject *module)
 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
 {
-    _PyRuntime.gc.enabled = 0;
+    enabled = 0;
     Py_RETURN_NONE;
 }
 
@@ -1085,7 +1202,7 @@ static int
 gc_isenabled_impl(PyObject *module)
 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
 {
-    return _PyRuntime.gc.enabled;
+    return enabled;
 }
 
 /*[clinic input]
@@ -1113,12 +1230,12 @@ gc_collect_impl(PyObject *module, int generation)
         return -1;
     }
 
-    if (_PyRuntime.gc.collecting)
+    if (collecting)
         n = 0; /* already collecting, don't do anything */
     else {
-        _PyRuntime.gc.collecting = 1;
+        collecting = 1;
         n = collect_with_callback(generation);
-        _PyRuntime.gc.collecting = 0;
+        collecting = 0;
     }
 
     return n;
@@ -1146,7 +1263,7 @@ static PyObject *
 gc_set_debug_impl(PyObject *module, int flags)
 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
 {
-    _PyRuntime.gc.debug = flags;
+    debug = flags;
 
     Py_RETURN_NONE;
 }
@@ -1161,7 +1278,7 @@ static int
 gc_get_debug_impl(PyObject *module)
 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
 {
-    return _PyRuntime.gc.debug;
+    return debug;
 }
 
 PyDoc_STRVAR(gc_set_thresh__doc__,
@@ -1175,13 +1292,13 @@ gc_set_thresh(PyObject *self, PyObject *args)
 {
     int i;
     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
-                          &_PyRuntime.gc.generations[0].threshold,
-                          &_PyRuntime.gc.generations[1].threshold,
-                          &_PyRuntime.gc.generations[2].threshold))
+                          &generations[0].threshold,
+                          &generations[1].threshold,
+                          &generations[2].threshold))
         return NULL;
     for (i = 2; i < NUM_GENERATIONS; i++) {
         /* generations higher than 2 get the same threshold */
-        _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold;
+        generations[i].threshold = generations[2].threshold;
     }
 
     Py_RETURN_NONE;
@@ -1198,9 +1315,9 @@ gc_get_threshold_impl(PyObject *module)
 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
 {
     return Py_BuildValue("(iii)",
-                         _PyRuntime.gc.generations[0].threshold,
-                         _PyRuntime.gc.generations[1].threshold,
-                         _PyRuntime.gc.generations[2].threshold);
+                         generations[0].threshold,
+                         generations[1].threshold,
+                         generations[2].threshold);
 }
 
 /*[clinic input]
@@ -1214,9 +1331,9 @@ gc_get_count_impl(PyObject *module)
 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
 {
     return Py_BuildValue("(iii)",
-                         _PyRuntime.gc.generations[0].count,
-                         _PyRuntime.gc.generations[1].count,
-                         _PyRuntime.gc.generations[2].count);
+                         generations[0].count,
+                         generations[1].count,
+                         generations[2].count);
 }
 
 static int
@@ -1347,7 +1464,7 @@ gc_get_stats_impl(PyObject *module)
     /* To get consistent values despite allocations while constructing
        the result list, we use a snapshot of the running stats. */
     for (i = 0; i < NUM_GENERATIONS; i++) {
-        stats[i] = _PyRuntime.gc.generation_stats[i];
+        stats[i] = generation_stats[i];
     }
 
     result = PyList_New(0);
@@ -1464,22 +1581,22 @@ PyInit_gc(void)
     if (m == NULL)
         return NULL;
 
-    if (_PyRuntime.gc.garbage == NULL) {
-        _PyRuntime.gc.garbage = PyList_New(0);
-        if (_PyRuntime.gc.garbage == NULL)
+    if (garbage == NULL) {
+        garbage = PyList_New(0);
+        if (garbage == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.gc.garbage);
-    if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0)
+    Py_INCREF(garbage);
+    if (PyModule_AddObject(m, "garbage", garbage) < 0)
         return NULL;
 
-    if (_PyRuntime.gc.callbacks == NULL) {
-        _PyRuntime.gc.callbacks = PyList_New(0);
-        if (_PyRuntime.gc.callbacks == NULL)
+    if (callbacks == NULL) {
+        callbacks = PyList_New(0);
+        if (callbacks == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.gc.callbacks);
-    if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0)
+    Py_INCREF(callbacks);
+    if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
         return NULL;
 
 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
@@ -1498,12 +1615,12 @@ PyGC_Collect(void)
 {
     Py_ssize_t n;
 
-    if (_PyRuntime.gc.collecting)
+    if (collecting)
         n = 0; /* already collecting, don't do anything */
     else {
-        _PyRuntime.gc.collecting = 1;
+        collecting = 1;
         n = collect_with_callback(NUM_GENERATIONS - 1);
-        _PyRuntime.gc.collecting = 0;
+        collecting = 0;
     }
 
     return n;
@@ -1512,7 +1629,7 @@ PyGC_Collect(void)
 Py_ssize_t
 _PyGC_CollectIfEnabled(void)
 {
-    if (!_PyRuntime.gc.enabled)
+    if (!enabled)
         return 0;
 
     return PyGC_Collect();
@@ -1529,12 +1646,12 @@ _PyGC_CollectNoFail(void)
        during interpreter shutdown (and then never finish it).
        See http://bugs.python.org/issue8713#msg195178 for an example.
        */
-    if (_PyRuntime.gc.collecting)
+    if (collecting)
         n = 0;
     else {
-        _PyRuntime.gc.collecting = 1;
+        collecting = 1;
         n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1);
-        _PyRuntime.gc.collecting = 0;
+        collecting = 0;
     }
     return n;
 }
@@ -1542,10 +1659,10 @@ _PyGC_CollectNoFail(void)
 void
 _PyGC_DumpShutdownStats(void)
 {
-    if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL)
-        && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) {
+    if (!(debug & DEBUG_SAVEALL)
+        && garbage != NULL && PyList_GET_SIZE(garbage) > 0) {
         char *message;
-        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
+        if (debug & DEBUG_UNCOLLECTABLE)
             message = "gc: %zd uncollectable objects at " \
                 "shutdown";
         else
@@ -1556,13 +1673,13 @@ _PyGC_DumpShutdownStats(void)
            already. */
         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
                                      "gc", NULL, message,
-                                     PyList_GET_SIZE(_PyRuntime.gc.garbage)))
+                                     PyList_GET_SIZE(garbage)))
             PyErr_WriteUnraisable(NULL);
-        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) {
+        if (debug & DEBUG_UNCOLLECTABLE) {
             PyObject *repr = NULL, *bytes = NULL;
-            repr = PyObject_Repr(_PyRuntime.gc.garbage);
+            repr = PyObject_Repr(garbage);
             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
-                PyErr_WriteUnraisable(_PyRuntime.gc.garbage);
+                PyErr_WriteUnraisable(garbage);
             else {
                 PySys_WriteStderr(
                     "      %s\n",
@@ -1578,7 +1695,7 @@ _PyGC_DumpShutdownStats(void)
 void
 _PyGC_Fini(void)
 {
-    Py_CLEAR(_PyRuntime.gc.callbacks);
+    Py_CLEAR(callbacks);
 }
 
 /* for debugging */
@@ -1629,15 +1746,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
         return PyErr_NoMemory();
     g->gc.gc_refs = 0;
     _PyGCHead_SET_REFS(g, GC_UNTRACKED);
-    _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */
-    if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold &&
-        _PyRuntime.gc.enabled &&
-        _PyRuntime.gc.generations[0].threshold &&
-        !_PyRuntime.gc.collecting &&
+    generations[0].count++; /* number of allocated GC objects */
+    if (generations[0].count > generations[0].threshold &&
+        enabled &&
+        generations[0].threshold &&
+        !collecting &&
         !PyErr_Occurred()) {
-        _PyRuntime.gc.collecting = 1;
+        collecting = 1;
         collect_generations();
-        _PyRuntime.gc.collecting = 0;
+        collecting = 0;
     }
     op = FROM_GC(g);
     return op;
@@ -1702,8 +1819,8 @@ PyObject_GC_Del(void *op)
     PyGC_Head *g = AS_GC(op);
     if (IS_TRACKED(op))
         gc_list_remove(g);
-    if (_PyRuntime.gc.generations[0].count > 0) {
-        _PyRuntime.gc.generations[0].count--;
+    if (generations[0].count > 0) {
+        generations[0].count--;
     }
     PyObject_FREE(g);
 }
diff --git a/Modules/main.c b/Modules/main.c
index 3e347dc8e24..08b22760de1 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -598,10 +598,16 @@ Py_Main(int argc, wchar_t **argv)
         }
     }
 
+    char *pymalloc = Py_GETENV("PYTHONMALLOC");
+    if (_PyMem_SetupAllocators(pymalloc) < 0) {
+        fprintf(stderr,
+            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", pymalloc);
+        exit(1);
+    }
+
     /* Initialize the core language runtime */
     Py_IgnoreEnvironmentFlag = core_config.ignore_environment;
     core_config._disable_importlib = 0;
-    core_config.allocator = Py_GETENV("PYTHONMALLOC");
     _Py_InitializeCore(&core_config);
 
     /* Reprocess the command line with the language runtime available */
diff --git a/Objects/object.c b/Objects/object.c
index 68a90c23107..2ba6e572ea6 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2028,6 +2028,14 @@ Py_ReprLeave(PyObject *obj)
 
 /* Trashcan support. */
 
+/* Current call-stack depth of tp_dealloc calls. */
+int _PyTrash_delete_nesting = 0;
+
+/* List of objects that still need to be cleaned up, singly linked via their
+ * gc headers' gc_prev pointers.
+ */
+PyObject *_PyTrash_delete_later = NULL;
+
 /* Add op to the _PyTrash_delete_later list.  Called when the current
  * call-stack depth gets large.  op must be a currently untracked gc'ed
  * object, with refcount 0.  Py_DECREF must already have been called on it.
@@ -2038,8 +2046,8 @@ _PyTrash_deposit_object(PyObject *op)
     assert(PyObject_IS_GC(op));
     assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
     assert(op->ob_refcnt == 0);
-    _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyRuntime.gc.trash_delete_later;
-    _PyRuntime.gc.trash_delete_later = op;
+    _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later;
+    _PyTrash_delete_later = op;
 }
 
 /* The equivalent API, using per-thread state recursion info */
@@ -2060,11 +2068,11 @@ _PyTrash_thread_deposit_object(PyObject *op)
 void
 _PyTrash_destroy_chain(void)
 {
-    while (_PyRuntime.gc.trash_delete_later) {
-        PyObject *op = _PyRuntime.gc.trash_delete_later;
+    while (_PyTrash_delete_later) {
+        PyObject *op = _PyTrash_delete_later;
         destructor dealloc = Py_TYPE(op)->tp_dealloc;
 
-        _PyRuntime.gc.trash_delete_later =
+        _PyTrash_delete_later =
             (PyObject*) _Py_AS_GC(op)->gc.gc_prev;
 
         /* Call the deallocator directly.  This used to try to
@@ -2074,9 +2082,9 @@ _PyTrash_destroy_chain(void)
          * up distorting allocation statistics.
          */
         assert(op->ob_refcnt == 0);
-        ++_PyRuntime.gc.trash_delete_nesting;
+        ++_PyTrash_delete_nesting;
         (*dealloc)(op);
-        --_PyRuntime.gc.trash_delete_nesting;
+        --_PyTrash_delete_nesting;
     }
 }
 
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 3698cfc260e..32e7ecbe1e0 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -178,9 +178,7 @@ static struct {
 #define PYDBG_FUNCS \
     _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
 
-
-#define _PyMem_Raw _PyRuntime.mem.allocators.raw
-static const PyMemAllocatorEx _pymem_raw = {
+static PyMemAllocatorEx _PyMem_Raw = {
 #ifdef Py_DEBUG
     &_PyMem_Debug.raw, PYRAWDBG_FUNCS
 #else
@@ -188,8 +186,7 @@ static const PyMemAllocatorEx _pymem_raw = {
 #endif
     };
 
-#define _PyMem _PyRuntime.mem.allocators.mem
-static const PyMemAllocatorEx _pymem = {
+static PyMemAllocatorEx _PyMem = {
 #ifdef Py_DEBUG
     &_PyMem_Debug.mem, PYDBG_FUNCS
 #else
@@ -197,8 +194,7 @@ static const PyMemAllocatorEx _pymem = {
 #endif
     };
 
-#define _PyObject _PyRuntime.mem.allocators.obj
-static const PyMemAllocatorEx _pyobject = {
+static PyMemAllocatorEx _PyObject = {
 #ifdef Py_DEBUG
     &_PyMem_Debug.obj, PYDBG_FUNCS
 #else
@@ -271,7 +267,7 @@ _PyMem_SetupAllocators(const char *opt)
 #undef PYRAWDBG_FUNCS
 #undef PYDBG_FUNCS
 
-static const PyObjectArenaAllocator _PyObject_Arena = {NULL,
+static PyObjectArenaAllocator _PyObject_Arena = {NULL,
 #ifdef MS_WINDOWS
     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
 #elif defined(ARENAS_USE_MMAP)
@@ -281,34 +277,6 @@ static const PyObjectArenaAllocator _PyObject_Arena = {NULL,
 #endif
     };
 
-void
-_PyObject_Initialize(struct _pyobj_runtime_state *state)
-{
-    state->allocator_arenas = _PyObject_Arena;
-}
-
-void
-_PyMem_Initialize(struct _pymem_runtime_state *state)
-{
-    state->allocators.raw = _pymem_raw;
-    state->allocators.mem = _pymem;
-    state->allocators.obj = _pyobject;
-
-#ifdef WITH_PYMALLOC
-    for (int i = 0; i < 8; i++) {
-        if (NB_SMALL_SIZE_CLASSES <= i * 8)
-            break;
-        for (int j = 0; j < 8; j++) {
-            int x = i * 8 + j;
-            poolp *addr = &(state->usedpools[2*(x)]);
-            poolp val = (poolp)((uint8_t *)addr - 2*sizeof(pyblock *));
-            state->usedpools[x * 2] = val;
-            state->usedpools[x * 2 + 1] = val;
-        };
-    };
-#endif /* WITH_PYMALLOC */
-}
-
 #ifdef WITH_PYMALLOC
 static int
 _PyMem_DebugEnabled(void)
@@ -395,13 +363,13 @@ PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
 void
 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
 {
-    *allocator = _PyRuntime.obj.allocator_arenas;
+    *allocator = _PyObject_Arena;
 }
 
 void
 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
 {
-    _PyRuntime.obj.allocator_arenas = *allocator;
+    _PyObject_Arena = *allocator;
 }
 
 void *
@@ -436,8 +404,7 @@ PyMem_RawRealloc(void *ptr, size_t new_size)
     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
 }
 
-void
-PyMem_RawFree(void *ptr)
+void PyMem_RawFree(void *ptr)
 {
     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
 }
@@ -554,10 +521,497 @@ PyObject_Free(void *ptr)
 static int running_on_valgrind = -1;
 #endif
 
+/* An object allocator for Python.
+
+   Here is an introduction to the layers of the Python memory architecture,
+   showing where the object allocator is actually used (layer +2), It is
+   called for every object allocation and deallocation (PyObject_New/Del),
+   unless the object-specific allocators implement a proprietary allocation
+   scheme (ex.: ints use a simple free list). This is also the place where
+   the cyclic garbage collector operates selectively on container objects.
+
+
+    Object-specific allocators
+    _____   ______   ______       ________
+   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
++3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
+    _______________________________       |                           |
+   [   Python's object allocator   ]      |                           |
++2 | ####### Object memory ####### | <------ Internal buffers ------> |
+    ______________________________________________________________    |
+   [          Python's raw memory allocator (PyMem_ API)          ]   |
++1 | <----- Python memory (under PyMem manager's control) ------> |   |
+    __________________________________________________________________
+   [    Underlying general-purpose allocator (ex: C library malloc)   ]
+ 0 | <------ Virtual memory allocated for the python process -------> |
+
+   =========================================================================
+    _______________________________________________________________________
+   [                OS-specific Virtual Memory Manager (VMM)               ]
+-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
+    __________________________________   __________________________________
+   [                                  ] [                                  ]
+-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
+
+*/
+/*==========================================================================*/
+
+/* A fast, special-purpose memory allocator for small blocks, to be used
+   on top of a general-purpose malloc -- heavily based on previous art. */
+
+/* Vladimir Marangozov -- August 2000 */
+
+/*
+ * "Memory management is where the rubber meets the road -- if we do the wrong
+ * thing at any level, the results will not be good. And if we don't make the
+ * levels work well together, we are in serious trouble." (1)
+ *
+ * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
+ *    "Dynamic Storage Allocation: A Survey and Critical Review",
+ *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
+ */
+
+/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
+
+/*==========================================================================*/
+
+/*
+ * Allocation strategy abstract:
+ *
+ * For small requests, the allocator sub-allocates <Big> blocks of memory.
+ * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
+ * system's allocator.
+ *
+ * Small requests are grouped in size classes spaced 8 bytes apart, due
+ * to the required valid alignment of the returned address. Requests of
+ * a particular size are serviced from memory pools of 4K (one VMM page).
+ * Pools are fragmented on demand and contain free lists of blocks of one
+ * particular size class. In other words, there is a fixed-size allocator
+ * for each size class. Free pools are shared by the different allocators
+ * thus minimizing the space reserved for a particular size class.
+ *
+ * This allocation strategy is a variant of what is known as "simple
+ * segregated storage based on array of free lists". The main drawback of
+ * simple segregated storage is that we might end up with lot of reserved
+ * memory for the different free lists, which degenerate in time. To avoid
+ * this, we partition each free list in pools and we share dynamically the
+ * reserved space between all free lists. This technique is quite efficient
+ * for memory intensive programs which allocate mainly small-sized blocks.
+ *
+ * For small requests we have the following table:
+ *
+ * Request in bytes     Size of allocated block      Size class idx
+ * ----------------------------------------------------------------
+ *        1-8                     8                       0
+ *        9-16                   16                       1
+ *       17-24                   24                       2
+ *       25-32                   32                       3
+ *       33-40                   40                       4
+ *       41-48                   48                       5
+ *       49-56                   56                       6
+ *       57-64                   64                       7
+ *       65-72                   72                       8
+ *        ...                   ...                     ...
+ *      497-504                 504                      62
+ *      505-512                 512                      63
+ *
+ *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
+ *      allocator.
+ */
+
+/*==========================================================================*/
+
+/*
+ * -- Main tunable settings section --
+ */
+
+/*
+ * Alignment of addresses returned to the user. 8-bytes alignment works
+ * on most current architectures (with 32-bit or 64-bit address busses).
+ * The alignment value is also used for grouping small requests in size
+ * classes spaced ALIGNMENT bytes apart.
+ *
+ * You shouldn't change this unless you know what you are doing.
+ */
+#define ALIGNMENT               8               /* must be 2^N */
+#define ALIGNMENT_SHIFT         3
+
+/* Return the number of bytes in size class I, as a uint. */
+#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
+
+/*
+ * Max size threshold below which malloc requests are considered to be
+ * small enough in order to use preallocated memory pools. You can tune
+ * this value according to your application behaviour and memory needs.
+ *
+ * Note: a size threshold of 512 guarantees that newly created dictionaries
+ * will be allocated from preallocated memory pools on 64-bit.
+ *
+ * The following invariants must hold:
+ *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
+ *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
+ *
+ * Although not required, for better performance and space efficiency,
+ * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
+ */
+#define SMALL_REQUEST_THRESHOLD 512
+#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
+
+/*
+ * The system's VMM page size can be obtained on most unices with a
+ * getpagesize() call or deduced from various header files. To make
+ * things simpler, we assume that it is 4K, which is OK for most systems.
+ * It is probably better if this is the native page size, but it doesn't
+ * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
+ * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
+ * violation fault.  4K is apparently OK for all the platforms that python
+ * currently targets.
+ */
+#define SYSTEM_PAGE_SIZE        (4 * 1024)
+#define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
+
+/*
+ * Maximum amount of memory managed by the allocator for small requests.
+ */
+#ifdef WITH_MEMORY_LIMITS
+#ifndef SMALL_MEMORY_LIMIT
+#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
+#endif
+#endif
+
+/*
+ * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
+ * on a page boundary. This is a reserved virtual address space for the
+ * current process (obtained through a malloc()/mmap() call). In no way this
+ * means that the memory arenas will be used entirely. A malloc(<Big>) is
+ * usually an address range reservation for <Big> bytes, unless all pages within
+ * this space are referenced subsequently. So malloc'ing big blocks and not
+ * using them does not mean "wasting memory". It's an addressable range
+ * wastage...
+ *
+ * Arenas are allocated with mmap() on systems supporting anonymous memory
+ * mappings to reduce heap fragmentation.
+ */
+#define ARENA_SIZE              (256 << 10)     /* 256KB */
+
+#ifdef WITH_MEMORY_LIMITS
+#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
+#endif
+
+/*
+ * Size of the pools used for small blocks. Should be a power of 2,
+ * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
+ */
+#define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
+#define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
+
+/*
+ * -- End of tunable settings section --
+ */
+
+/*==========================================================================*/
+
+/*
+ * Locking
+ *
+ * To reduce lock contention, it would probably be better to refine the
+ * crude function locking with per size class locking. I'm not positive
+ * however, whether it's worth switching to such locking policy because
+ * of the performance penalty it might introduce.
+ *
+ * The following macros describe the simplest (should also be the fastest)
+ * lock object on a particular platform and the init/fini/lock/unlock
+ * operations on it. The locks defined here are not expected to be recursive
+ * because it is assumed that they will always be called in the order:
+ * INIT, [LOCK, UNLOCK]*, FINI.
+ */
+
+/*
+ * Python's threads are serialized, so object malloc locking is disabled.
+ */
+#define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
+#define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
+#define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
+#define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
+#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
+
+/* When you say memory, my mind reasons in terms of (pointers to) blocks */
+typedef uint8_t block;
+
+/* Pool for small blocks. */
+struct pool_header {
+    union { block *_padding;
+            uint count; } ref;          /* number of allocated blocks    */
+    block *freeblock;                   /* pool's free list head         */
+    struct pool_header *nextpool;       /* next pool of this size class  */
+    struct pool_header *prevpool;       /* previous pool       ""        */
+    uint arenaindex;                    /* index into arenas of base adr */
+    uint szidx;                         /* block size class index        */
+    uint nextoffset;                    /* bytes to virgin block         */
+    uint maxnextoffset;                 /* largest valid nextoffset      */
+};
+
+typedef struct pool_header *poolp;
+
+/* Record keeping for arenas. */
+struct arena_object {
+    /* The address of the arena, as returned by malloc.  Note that 0
+     * will never be returned by a successful malloc, and is used
+     * here to mark an arena_object that doesn't correspond to an
+     * allocated arena.
+     */
+    uintptr_t address;
+
+    /* Pool-aligned pointer to the next pool to be carved off. */
+    block* pool_address;
+
+    /* The number of available pools in the arena:  free pools + never-
+     * allocated pools.
+     */
+    uint nfreepools;
+
+    /* The total number of pools in the arena, whether or not available. */
+    uint ntotalpools;
+
+    /* Singly-linked list of available pools. */
+    struct pool_header* freepools;
+
+    /* Whenever this arena_object is not associated with an allocated
+     * arena, the nextarena member is used to link all unassociated
+     * arena_objects in the singly-linked `unused_arena_objects` list.
+     * The prevarena member is unused in this case.
+     *
+     * When this arena_object is associated with an allocated arena
+     * with at least one available pool, both members are used in the
+     * doubly-linked `usable_arenas` list, which is maintained in
+     * increasing order of `nfreepools` values.
+     *
+     * Else this arena_object is associated with an allocated arena
+     * all of whose pools are in use.  `nextarena` and `prevarena`
+     * are both meaningless in this case.
+     */
+    struct arena_object* nextarena;
+    struct arena_object* prevarena;
+};
+
+#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
+
+#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
+
+/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
+#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
+
+/* Return total number of blocks in pool of size index I, as a uint. */
+#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
+
+/*==========================================================================*/
+
+/*
+ * This malloc lock
+ */
+SIMPLELOCK_DECL(_malloc_lock)
+#define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
+#define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
+#define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
+#define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
+
+/*
+ * Pool table -- headed, circular, doubly-linked lists of partially used pools.
+
+This is involved.  For an index i, usedpools[i+i] is the header for a list of
+all partially used pools holding small blocks with "size class idx" i. So
+usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
+16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
+
+Pools are carved off an arena's highwater mark (an arena_object's pool_address
+member) as needed.  Once carved off, a pool is in one of three states forever
+after:
+
+used == partially used, neither empty nor full
+    At least one block in the pool is currently allocated, and at least one
+    block in the pool is not currently allocated (note this implies a pool
+    has room for at least two blocks).
+    This is a pool's initial state, as a pool is created only when malloc
+    needs space.
+    The pool holds blocks of a fixed size, and is in the circular list headed
+    at usedpools[i] (see above).  It's linked to the other used pools of the
+    same size class via the pool_header's nextpool and prevpool members.
+    If all but one block is currently allocated, a malloc can cause a
+    transition to the full state.  If all but one block is not currently
+    allocated, a free can cause a transition to the empty state.
+
+full == all the pool's blocks are currently allocated
+    On transition to full, a pool is unlinked from its usedpools[] list.
+    It's not linked to from anything then anymore, and its nextpool and
+    prevpool members are meaningless until it transitions back to used.
+    A free of a block in a full pool puts the pool back in the used state.
+    Then it's linked in at the front of the appropriate usedpools[] list, so
+    that the next allocation for its size class will reuse the freed block.
+
+empty == all the pool's blocks are currently available for allocation
+    On transition to empty, a pool is unlinked from its usedpools[] list,
+    and linked to the front of its arena_object's singly-linked freepools list,
+    via its nextpool member.  The prevpool member has no meaning in this case.
+    Empty pools have no inherent size class:  the next time a malloc finds
+    an empty list in usedpools[], it takes the first pool off of freepools.
+    If the size class needed happens to be the same as the size class the pool
+    last had, some pool initialization can be skipped.
+
+
+Block Management
+
+Blocks within pools are again carved out as needed.  pool->freeblock points to
+the start of a singly-linked list of free blocks within the pool.  When a
+block is freed, it's inserted at the front of its pool's freeblock list.  Note
+that the available blocks in a pool are *not* linked all together when a pool
+is initialized.  Instead only "the first two" (lowest addresses) blocks are
+set up, returning the first such block, and setting pool->freeblock to a
+one-block list holding the second such block.  This is consistent with that
+pymalloc strives at all levels (arena, pool, and block) never to touch a piece
+of memory until it's actually needed.
+
+So long as a pool is in the used state, we're certain there *is* a block
+available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
+points to the end of the free list before we've carved the entire pool into
+blocks, that means we simply haven't yet gotten to one of the higher-address
+blocks.  The offset from the pool_header to the start of "the next" virgin
+block is stored in the pool_header nextoffset member, and the largest value
+of nextoffset that makes sense is stored in the maxnextoffset member when a
+pool is initialized.  All the blocks in a pool have been passed out at least
+once when and only when nextoffset > maxnextoffset.
+
+
+Major obscurity:  While the usedpools vector is declared to have poolp
+entries, it doesn't really.  It really contains two pointers per (conceptual)
+poolp entry, the nextpool and prevpool members of a pool_header.  The
+excruciating initialization code below fools C so that
+
+    usedpool[i+i]
+
+"acts like" a genuine poolp, but only so long as you only reference its
+nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
+compensating for that a pool_header's nextpool and prevpool members
+immediately follow a pool_header's first two members:
+
+    union { block *_padding;
+            uint count; } ref;
+    block *freeblock;
+
+each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
+contains is a fudged-up pointer p such that *if* C believes it's a poolp
+pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
+circular list is empty).
+
+It's unclear why the usedpools setup is so convoluted.  It could be to
+minimize the amount of cache required to hold this heavily-referenced table
+(which only *needs* the two interpool pointer members of a pool_header). OTOH,
+referencing code has to remember to "double the index" and doing so isn't
+free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
+on that C doesn't insert any padding anywhere in a pool_header at or before
+the prevpool member.
+**************************************************************************** */
+
+#define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
+#define PT(x)   PTA(x), PTA(x)
+
+static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
+    PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
+#if NB_SMALL_SIZE_CLASSES > 8
+    , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
+#if NB_SMALL_SIZE_CLASSES > 16
+    , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
+#if NB_SMALL_SIZE_CLASSES > 24
+    , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
+#if NB_SMALL_SIZE_CLASSES > 32
+    , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
+#if NB_SMALL_SIZE_CLASSES > 40
+    , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
+#if NB_SMALL_SIZE_CLASSES > 48
+    , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
+#if NB_SMALL_SIZE_CLASSES > 56
+    , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
+#if NB_SMALL_SIZE_CLASSES > 64
+#error "NB_SMALL_SIZE_CLASSES should be less than 64"
+#endif /* NB_SMALL_SIZE_CLASSES > 64 */
+#endif /* NB_SMALL_SIZE_CLASSES > 56 */
+#endif /* NB_SMALL_SIZE_CLASSES > 48 */
+#endif /* NB_SMALL_SIZE_CLASSES > 40 */
+#endif /* NB_SMALL_SIZE_CLASSES > 32 */
+#endif /* NB_SMALL_SIZE_CLASSES > 24 */
+#endif /* NB_SMALL_SIZE_CLASSES > 16 */
+#endif /* NB_SMALL_SIZE_CLASSES >  8 */
+};
+
+/*==========================================================================
+Arena management.
+
+`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
+which may not be currently used (== they're arena_objects that aren't
+currently associated with an allocated arena).  Note that arenas proper are
+separately malloc'ed.
+
+Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
+we do try to free() arenas, and use some mild heuristic strategies to increase
+the likelihood that arenas eventually can be freed.
+
+unused_arena_objects
+
+    This is a singly-linked list of the arena_objects that are currently not
+    being used (no arena is associated with them).  Objects are taken off the
+    head of the list in new_arena(), and are pushed on the head of the list in
+    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
+    is on this list if and only if its .address member is 0.
+
+usable_arenas
+
+    This is a doubly-linked list of the arena_objects associated with arenas
+    that have pools available.  These pools are either waiting to be reused,
+    or have not been used before.  The list is sorted to have the most-
+    allocated arenas first (ascending order based on the nfreepools member).
+    This means that the next allocation will come from a heavily used arena,
+    which gives the nearly empty arenas a chance to be returned to the system.
+    In my unscientific tests this dramatically improved the number of arenas
+    that could be freed.
+
+Note that an arena_object associated with an arena all of whose pools are
+currently in use isn't on either list.
+*/
+
+/* Array of objects used to track chunks of memory (arenas). */
+static struct arena_object* arenas = NULL;
+/* Number of slots currently allocated in the `arenas` vector. */
+static uint maxarenas = 0;
+
+/* The head of the singly-linked, NULL-terminated list of available
+ * arena_objects.
+ */
+static struct arena_object* unused_arena_objects = NULL;
+
+/* The head of the doubly-linked, NULL-terminated at each end, list of
+ * arena_objects associated with arenas that have pools available.
+ */
+static struct arena_object* usable_arenas = NULL;
+
+/* How many arena_objects do we initially allocate?
+ * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
+ * `arenas` vector.
+ */
+#define INITIAL_ARENA_OBJECTS 16
+
+/* Number of arenas allocated that haven't been free()'d. */
+static size_t narenas_currently_allocated = 0;
+
+/* Total number of times malloc() called to allocate an arena. */
+static size_t ntimes_arena_allocated = 0;
+/* High water mark (max value ever seen) for narenas_currently_allocated. */
+static size_t narenas_highwater = 0;
+
+static Py_ssize_t _Py_AllocatedBlocks = 0;
+
 Py_ssize_t
 _Py_GetAllocatedBlocks(void)
 {
-    return _PyRuntime.mem.num_allocated_blocks;
+    return _Py_AllocatedBlocks;
 }
 
 
@@ -581,7 +1035,7 @@ new_arena(void)
     if (debug_stats)
         _PyObject_DebugMallocStats(stderr);
 
-    if (_PyRuntime.mem.unused_arena_objects == NULL) {
+    if (unused_arena_objects == NULL) {
         uint i;
         uint numarenas;
         size_t nbytes;
@@ -589,18 +1043,18 @@ new_arena(void)
         /* Double the number of arena objects on each allocation.
          * Note that it's possible for `numarenas` to overflow.
          */
-        numarenas = _PyRuntime.mem.maxarenas ? _PyRuntime.mem.maxarenas << 1 : INITIAL_ARENA_OBJECTS;
-        if (numarenas <= _PyRuntime.mem.maxarenas)
+        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
+        if (numarenas <= maxarenas)
             return NULL;                /* overflow */
 #if SIZEOF_SIZE_T <= SIZEOF_INT
-        if (numarenas > SIZE_MAX / sizeof(*_PyRuntime.mem.arenas))
+        if (numarenas > SIZE_MAX / sizeof(*arenas))
             return NULL;                /* overflow */
 #endif
-        nbytes = numarenas * sizeof(*_PyRuntime.mem.arenas);
-        arenaobj = (struct arena_object *)PyMem_RawRealloc(_PyRuntime.mem.arenas, nbytes);
+        nbytes = numarenas * sizeof(*arenas);
+        arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
         if (arenaobj == NULL)
             return NULL;
-        _PyRuntime.mem.arenas = arenaobj;
+        arenas = arenaobj;
 
         /* We might need to fix pointers that were copied.  However,
          * new_arena only gets called when all the pages in the
@@ -608,45 +1062,45 @@ new_arena(void)
          * into the old array. Thus, we don't have to worry about
          * invalid pointers.  Just to be sure, some asserts:
          */
-        assert(_PyRuntime.mem.usable_arenas == NULL);
-        assert(_PyRuntime.mem.unused_arena_objects == NULL);
+        assert(usable_arenas == NULL);
+        assert(unused_arena_objects == NULL);
 
         /* Put the new arenas on the unused_arena_objects list. */
-        for (i = _PyRuntime.mem.maxarenas; i < numarenas; ++i) {
-            _PyRuntime.mem.arenas[i].address = 0;              /* mark as unassociated */
-            _PyRuntime.mem.arenas[i].nextarena = i < numarenas - 1 ?
-                                   &_PyRuntime.mem.arenas[i+1] : NULL;
+        for (i = maxarenas; i < numarenas; ++i) {
+            arenas[i].address = 0;              /* mark as unassociated */
+            arenas[i].nextarena = i < numarenas - 1 ?
+                                   &arenas[i+1] : NULL;
         }
 
         /* Update globals. */
-        _PyRuntime.mem.unused_arena_objects = &_PyRuntime.mem.arenas[_PyRuntime.mem.maxarenas];
-        _PyRuntime.mem.maxarenas = numarenas;
+        unused_arena_objects = &arenas[maxarenas];
+        maxarenas = numarenas;
     }
 
     /* Take the next available arena object off the head of the list. */
-    assert(_PyRuntime.mem.unused_arena_objects != NULL);
-    arenaobj = _PyRuntime.mem.unused_arena_objects;
-    _PyRuntime.mem.unused_arena_objects = arenaobj->nextarena;
+    assert(unused_arena_objects != NULL);
+    arenaobj = unused_arena_objects;
+    unused_arena_objects = arenaobj->nextarena;
     assert(arenaobj->address == 0);
-    address = _PyRuntime.obj.allocator_arenas.alloc(_PyRuntime.obj.allocator_arenas.ctx, ARENA_SIZE);
+    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
     if (address == NULL) {
         /* The allocation failed: return NULL after putting the
          * arenaobj back.
          */
-        arenaobj->nextarena = _PyRuntime.mem.unused_arena_objects;
-        _PyRuntime.mem.unused_arena_objects = arenaobj;
+        arenaobj->nextarena = unused_arena_objects;
+        unused_arena_objects = arenaobj;
         return NULL;
     }
     arenaobj->address = (uintptr_t)address;
 
-    ++_PyRuntime.mem.narenas_currently_allocated;
-    ++_PyRuntime.mem.ntimes_arena_allocated;
-    if (_PyRuntime.mem.narenas_currently_allocated > _PyRuntime.mem.narenas_highwater)
-        _PyRuntime.mem.narenas_highwater = _PyRuntime.mem.narenas_currently_allocated;
+    ++narenas_currently_allocated;
+    ++ntimes_arena_allocated;
+    if (narenas_currently_allocated > narenas_highwater)
+        narenas_highwater = narenas_currently_allocated;
     arenaobj->freepools = NULL;
     /* pool_address <- first pool-aligned address in the arena
        nfreepools <- number of whole pools that fit after alignment */
-    arenaobj->pool_address = (pyblock*)arenaobj->address;
+    arenaobj->pool_address = (block*)arenaobj->address;
     arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
     assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
@@ -743,9 +1197,9 @@ address_in_range(void *p, poolp pool)
     // the GIL. The following dance forces the compiler to read pool->arenaindex
     // only once.
     uint arenaindex = *((volatile uint *)&pool->arenaindex);
-    return arenaindex < _PyRuntime.mem.maxarenas &&
-        (uintptr_t)p - _PyRuntime.mem.arenas[arenaindex].address < ARENA_SIZE &&
-        _PyRuntime.mem.arenas[arenaindex].address != 0;
+    return arenaindex < maxarenas &&
+        (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
+        arenas[arenaindex].address != 0;
 }
 
 /*==========================================================================*/
@@ -766,12 +1220,12 @@ static void *
 _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
 {
     size_t nbytes;
-    pyblock *bp;
+    block *bp;
     poolp pool;
     poolp next;
     uint size;
 
-    _PyRuntime.mem.num_allocated_blocks++;
+    _Py_AllocatedBlocks++;
 
     assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
     nbytes = nelem * elsize;
@@ -792,7 +1246,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
          * Most frequent paths first
          */
         size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
-        pool = _PyRuntime.mem.usedpools[size + size];
+        pool = usedpools[size + size];
         if (pool != pool->nextpool) {
             /*
              * There is a used pool for this size class.
@@ -801,7 +1255,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
             ++pool->ref.count;
             bp = pool->freeblock;
             assert(bp != NULL);
-            if ((pool->freeblock = *(pyblock **)bp) != NULL) {
+            if ((pool->freeblock = *(block **)bp) != NULL) {
                 UNLOCK();
                 if (use_calloc)
                     memset(bp, 0, nbytes);
@@ -812,10 +1266,10 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
              */
             if (pool->nextoffset <= pool->maxnextoffset) {
                 /* There is room for another block. */
-                pool->freeblock = (pyblock*)pool +
+                pool->freeblock = (block*)pool +
                                   pool->nextoffset;
                 pool->nextoffset += INDEX2SIZE(size);
-                *(pyblock **)(pool->freeblock) = NULL;
+                *(block **)(pool->freeblock) = NULL;
                 UNLOCK();
                 if (use_calloc)
                     memset(bp, 0, nbytes);
@@ -835,29 +1289,29 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
         /* There isn't a pool of the right size class immediately
          * available:  use a free pool.
          */
-        if (_PyRuntime.mem.usable_arenas == NULL) {
+        if (usable_arenas == NULL) {
             /* No arena has a free pool:  allocate a new arena. */
 #ifdef WITH_MEMORY_LIMITS
-            if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) {
+            if (narenas_currently_allocated >= MAX_ARENAS) {
                 UNLOCK();
                 goto redirect;
             }
 #endif
-            _PyRuntime.mem.usable_arenas = new_arena();
-            if (_PyRuntime.mem.usable_arenas == NULL) {
+            usable_arenas = new_arena();
+            if (usable_arenas == NULL) {
                 UNLOCK();
                 goto redirect;
             }
-            _PyRuntime.mem.usable_arenas->nextarena =
-                _PyRuntime.mem.usable_arenas->prevarena = NULL;
+            usable_arenas->nextarena =
+                usable_arenas->prevarena = NULL;
         }
-        assert(_PyRuntime.mem.usable_arenas->address != 0);
+        assert(usable_arenas->address != 0);
 
         /* Try to get a cached free pool. */
-        pool = _PyRuntime.mem.usable_arenas->freepools;
+        pool = usable_arenas->freepools;
         if (pool != NULL) {
             /* Unlink from cached pools. */
-            _PyRuntime.mem.usable_arenas->freepools = pool->nextpool;
+            usable_arenas->freepools = pool->nextpool;
 
             /* This arena already had the smallest nfreepools
              * value, so decreasing nfreepools doesn't change
@@ -866,18 +1320,18 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
              * become wholly allocated, we need to remove its
              * arena_object from usable_arenas.
              */
-            --_PyRuntime.mem.usable_arenas->nfreepools;
-            if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
+            --usable_arenas->nfreepools;
+            if (usable_arenas->nfreepools == 0) {
                 /* Wholly allocated:  remove. */
-                assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
-                assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
-                       _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
-                       _PyRuntime.mem.usable_arenas);
-
-                _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
-                if (_PyRuntime.mem.usable_arenas != NULL) {
-                    _PyRuntime.mem.usable_arenas->prevarena = NULL;
-                    assert(_PyRuntime.mem.usable_arenas->address != 0);
+                assert(usable_arenas->freepools == NULL);
+                assert(usable_arenas->nextarena == NULL ||
+                       usable_arenas->nextarena->prevarena ==
+                       usable_arenas);
+
+                usable_arenas = usable_arenas->nextarena;
+                if (usable_arenas != NULL) {
+                    usable_arenas->prevarena = NULL;
+                    assert(usable_arenas->address != 0);
                 }
             }
             else {
@@ -886,14 +1340,14 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
                  * off all the arena's pools for the first
                  * time.
                  */
-                assert(_PyRuntime.mem.usable_arenas->freepools != NULL ||
-                       _PyRuntime.mem.usable_arenas->pool_address <=
-                       (pyblock*)_PyRuntime.mem.usable_arenas->address +
+                assert(usable_arenas->freepools != NULL ||
+                       usable_arenas->pool_address <=
+                       (block*)usable_arenas->address +
                            ARENA_SIZE - POOL_SIZE);
             }
         init_pool:
             /* Frontlink to used pools. */
-            next = _PyRuntime.mem.usedpools[size + size]; /* == prev */
+            next = usedpools[size + size]; /* == prev */
             pool->nextpool = next;
             pool->prevpool = next;
             next->nextpool = pool;
@@ -906,7 +1360,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
                  */
                 bp = pool->freeblock;
                 assert(bp != NULL);
-                pool->freeblock = *(pyblock **)bp;
+                pool->freeblock = *(block **)bp;
                 UNLOCK();
                 if (use_calloc)
                     memset(bp, 0, nbytes);
@@ -919,11 +1373,11 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
              */
             pool->szidx = size;
             size = INDEX2SIZE(size);
-            bp = (pyblock *)pool + POOL_OVERHEAD;
+            bp = (block *)pool + POOL_OVERHEAD;
             pool->nextoffset = POOL_OVERHEAD + (size << 1);
             pool->maxnextoffset = POOL_SIZE - size;
             pool->freeblock = bp + size;
-            *(pyblock **)(pool->freeblock) = NULL;
+            *(block **)(pool->freeblock) = NULL;
             UNLOCK();
             if (use_calloc)
                 memset(bp, 0, nbytes);
@@ -931,26 +1385,26 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
         }
 
         /* Carve off a new pool. */
-        assert(_PyRuntime.mem.usable_arenas->nfreepools > 0);
-        assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
-        pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address;
-        assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address +
-                                 ARENA_SIZE - POOL_SIZE);
-        pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas);
-        assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas);
+        assert(usable_arenas->nfreepools > 0);
+        assert(usable_arenas->freepools == NULL);
+        pool = (poolp)usable_arenas->pool_address;
+        assert((block*)pool <= (block*)usable_arenas->address +
+                               ARENA_SIZE - POOL_SIZE);
+        pool->arenaindex = (uint)(usable_arenas - arenas);
+        assert(&arenas[pool->arenaindex] == usable_arenas);
         pool->szidx = DUMMY_SIZE_IDX;
-        _PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE;
-        --_PyRuntime.mem.usable_arenas->nfreepools;
+        usable_arenas->pool_address += POOL_SIZE;
+        --usable_arenas->nfreepools;
 
-        if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
-            assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
-                   _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
-                   _PyRuntime.mem.usable_arenas);
+        if (usable_arenas->nfreepools == 0) {
+            assert(usable_arenas->nextarena == NULL ||
+                   usable_arenas->nextarena->prevarena ==
+                   usable_arenas);
             /* Unlink the arena:  it is completely allocated. */
-            _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
-            if (_PyRuntime.mem.usable_arenas != NULL) {
-                _PyRuntime.mem.usable_arenas->prevarena = NULL;
-                assert(_PyRuntime.mem.usable_arenas->address != 0);
+            usable_arenas = usable_arenas->nextarena;
+            if (usable_arenas != NULL) {
+                usable_arenas->prevarena = NULL;
+                assert(usable_arenas->address != 0);
             }
         }
 
@@ -972,7 +1426,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
         else
             result = PyMem_RawMalloc(nbytes);
         if (!result)
-            _PyRuntime.mem.num_allocated_blocks--;
+            _Py_AllocatedBlocks--;
         return result;
     }
 }
@@ -995,14 +1449,14 @@ static void
 _PyObject_Free(void *ctx, void *p)
 {
     poolp pool;
-    pyblock *lastfree;
+    block *lastfree;
     poolp next, prev;
     uint size;
 
     if (p == NULL)      /* free(NULL) has no effect */
         return;
 
-    _PyRuntime.mem.num_allocated_blocks--;
+    _Py_AllocatedBlocks--;
 
 #ifdef WITH_VALGRIND
     if (UNLIKELY(running_on_valgrind > 0))
@@ -1020,8 +1474,8 @@ _PyObject_Free(void *ctx, void *p)
          * list in any case).
          */
         assert(pool->ref.count > 0);            /* else it was empty */
-        *(pyblock **)p = lastfree = pool->freeblock;
-        pool->freeblock = (pyblock *)p;
+        *(block **)p = lastfree = pool->freeblock;
+        pool->freeblock = (block *)p;
         if (lastfree) {
             struct arena_object* ao;
             uint nf;  /* ao->nfreepools */
@@ -1047,7 +1501,7 @@ _PyObject_Free(void *ctx, void *p)
             /* Link the pool to freepools.  This is a singly-linked
              * list, and pool->prevpool isn't used there.
              */
-            ao = &_PyRuntime.mem.arenas[pool->arenaindex];
+            ao = &arenas[pool->arenaindex];
             pool->nextpool = ao->freepools;
             ao->freepools = pool;
             nf = ++ao->nfreepools;
@@ -1076,9 +1530,9 @@ _PyObject_Free(void *ctx, void *p)
                  * usable_arenas pointer.
                  */
                 if (ao->prevarena == NULL) {
-                    _PyRuntime.mem.usable_arenas = ao->nextarena;
-                    assert(_PyRuntime.mem.usable_arenas == NULL ||
-                           _PyRuntime.mem.usable_arenas->address != 0);
+                    usable_arenas = ao->nextarena;
+                    assert(usable_arenas == NULL ||
+                           usable_arenas->address != 0);
                 }
                 else {
                     assert(ao->prevarena->nextarena == ao);
@@ -1094,14 +1548,14 @@ _PyObject_Free(void *ctx, void *p)
                 /* Record that this arena_object slot is
                  * available to be reused.
                  */
-                ao->nextarena = _PyRuntime.mem.unused_arena_objects;
-                _PyRuntime.mem.unused_arena_objects = ao;
+                ao->nextarena = unused_arena_objects;
+                unused_arena_objects = ao;
 
                 /* Free the entire arena. */
-                _PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx,
+                _PyObject_Arena.free(_PyObject_Arena.ctx,
                                      (void *)ao->address, ARENA_SIZE);
                 ao->address = 0;                        /* mark unassociated */
-                --_PyRuntime.mem.narenas_currently_allocated;
+                --narenas_currently_allocated;
 
                 UNLOCK();
                 return;
@@ -1112,12 +1566,12 @@ _PyObject_Free(void *ctx, void *p)
                  * ao->nfreepools was 0 before, ao isn't
                  * currently on the usable_arenas list.
                  */
-                ao->nextarena = _PyRuntime.mem.usable_arenas;
+                ao->nextarena = usable_arenas;
                 ao->prevarena = NULL;
-                if (_PyRuntime.mem.usable_arenas)
-                    _PyRuntime.mem.usable_arenas->prevarena = ao;
-                _PyRuntime.mem.usable_arenas = ao;
-                assert(_PyRuntime.mem.usable_arenas->address != 0);
+                if (usable_arenas)
+                    usable_arenas->prevarena = ao;
+                usable_arenas = ao;
+                assert(usable_arenas->address != 0);
 
                 UNLOCK();
                 return;
@@ -1147,8 +1601,8 @@ _PyObject_Free(void *ctx, void *p)
             }
             else {
                 /* ao is at the head of the list */
-                assert(_PyRuntime.mem.usable_arenas == ao);
-                _PyRuntime.mem.usable_arenas = ao->nextarena;
+                assert(usable_arenas == ao);
+                usable_arenas = ao->nextarena;
             }
             ao->nextarena->prevarena = ao->prevarena;
 
@@ -1177,7 +1631,7 @@ _PyObject_Free(void *ctx, void *p)
                       nf > ao->prevarena->nfreepools);
             assert(ao->nextarena == NULL ||
                 ao->nextarena->prevarena == ao);
-            assert((_PyRuntime.mem.usable_arenas == ao &&
+            assert((usable_arenas == ao &&
                 ao->prevarena == NULL) ||
                 ao->prevarena->nextarena == ao);
 
@@ -1193,7 +1647,7 @@ _PyObject_Free(void *ctx, void *p)
         --pool->ref.count;
         assert(pool->ref.count > 0);            /* else the pool is empty */
         size = pool->szidx;
-        next = _PyRuntime.mem.usedpools[size + size];
+        next = usedpools[size + size];
         prev = next->prevpool;
         /* insert pool before next:   prev <-> pool <-> next */
         pool->nextpool = next;
@@ -1315,13 +1769,15 @@ _Py_GetAllocatedBlocks(void)
 #define DEADBYTE       0xDB    /* dead (newly freed) memory */
 #define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
 
+static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
+
 /* serialno is always incremented via calling this routine.  The point is
  * to supply a single place to set a breakpoint.
  */
 static void
 bumpserialno(void)
 {
-    ++_PyRuntime.mem.serialno;
+    ++serialno;
 }
 
 #define SST SIZEOF_SIZE_T
@@ -1412,7 +1868,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
     tail = p + 2*SST + nbytes;
     memset(tail, FORBIDDENBYTE, SST);
-    write_size_t(tail + SST, _PyRuntime.mem.serialno);
+    write_size_t(tail + SST, serialno);
 
     return p + 2*SST;
 }
@@ -1497,7 +1953,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
 
     tail = q + nbytes;
     memset(tail, FORBIDDENBYTE, SST);
-    write_size_t(tail + SST, _PyRuntime.mem.serialno);
+    write_size_t(tail + SST, serialno);
 
     if (nbytes > original_nbytes) {
         /* growing:  mark new extra memory clean */
@@ -1829,16 +2285,16 @@ _PyObject_DebugMallocStats(FILE *out)
      * to march over all the arenas.  If we're lucky, most of the memory
      * will be living in full pools -- would be a shame to miss them.
      */
-    for (i = 0; i < _PyRuntime.mem.maxarenas; ++i) {
+    for (i = 0; i < maxarenas; ++i) {
         uint j;
-        uintptr_t base = _PyRuntime.mem.arenas[i].address;
+        uintptr_t base = arenas[i].address;
 
         /* Skip arenas which are not allocated. */
-        if (_PyRuntime.mem.arenas[i].address == (uintptr_t)NULL)
+        if (arenas[i].address == (uintptr_t)NULL)
             continue;
         narenas += 1;
 
-        numfreepools += _PyRuntime.mem.arenas[i].nfreepools;
+        numfreepools += arenas[i].nfreepools;
 
         /* round up to pool alignment */
         if (base & (uintptr_t)POOL_SIZE_MASK) {
@@ -1848,8 +2304,8 @@ _PyObject_DebugMallocStats(FILE *out)
         }
 
         /* visit every pool in the arena */
-        assert(base <= (uintptr_t) _PyRuntime.mem.arenas[i].pool_address);
-        for (j = 0; base < (uintptr_t) _PyRuntime.mem.arenas[i].pool_address;
+        assert(base <= (uintptr_t) arenas[i].pool_address);
+        for (j = 0; base < (uintptr_t) arenas[i].pool_address;
              ++j, base += POOL_SIZE) {
             poolp p = (poolp)base;
             const uint sz = p->szidx;
@@ -1858,7 +2314,7 @@ _PyObject_DebugMallocStats(FILE *out)
             if (p->ref.count == 0) {
                 /* currently unused */
 #ifdef Py_DEBUG
-                assert(pool_is_in_list(p, _PyRuntime.mem.arenas[i].freepools));
+                assert(pool_is_in_list(p, arenas[i].freepools));
 #endif
                 continue;
             }
@@ -1868,11 +2324,11 @@ _PyObject_DebugMallocStats(FILE *out)
             numfreeblocks[sz] += freeblocks;
 #ifdef Py_DEBUG
             if (freeblocks > 0)
-                assert(pool_is_in_list(p, _PyRuntime.mem.usedpools[sz + sz]));
+                assert(pool_is_in_list(p, usedpools[sz + sz]));
 #endif
         }
     }
-    assert(narenas == _PyRuntime.mem.narenas_currently_allocated);
+    assert(narenas == narenas_currently_allocated);
 
     fputc('\n', out);
     fputs("class   size   num pools   blocks in use  avail blocks\n"
@@ -1900,10 +2356,10 @@ _PyObject_DebugMallocStats(FILE *out)
     }
     fputc('\n', out);
     if (_PyMem_DebugEnabled())
-        (void)printone(out, "# times object malloc called", _PyRuntime.mem.serialno);
-    (void)printone(out, "# arenas allocated total", _PyRuntime.mem.ntimes_arena_allocated);
-    (void)printone(out, "# arenas reclaimed", _PyRuntime.mem.ntimes_arena_allocated - narenas);
-    (void)printone(out, "# arenas highwater mark", _PyRuntime.mem.narenas_highwater);
+        (void)printone(out, "# times object malloc called", serialno);
+    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
+    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
+    (void)printone(out, "# arenas highwater mark", narenas_highwater);
     (void)printone(out, "# arenas allocated current", narenas);
 
     PyOS_snprintf(buf, sizeof(buf),
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 6001f7b6f43..219e81d0baf 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -1115,7 +1115,6 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     }
     /* The empty frozenset is a singleton */
     if (emptyfrozenset == NULL)
-        /* There is a possible (relatively harmless) race here. */
         emptyfrozenset = make_new_set(type, NULL);
     Py_XINCREF(emptyfrozenset);
     return emptyfrozenset;
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 6bf474a7d1f..1d963aae3f8 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -1157,10 +1157,10 @@ subtype_dealloc(PyObject *self)
     /* UnTrack and re-Track around the trashcan macro, alas */
     /* See explanation at end of function for full disclosure */
     PyObject_GC_UnTrack(self);
-    ++_PyRuntime.gc.trash_delete_nesting;
+    ++_PyTrash_delete_nesting;
     ++ tstate->trash_delete_nesting;
     Py_TRASHCAN_SAFE_BEGIN(self);
-    --_PyRuntime.gc.trash_delete_nesting;
+    --_PyTrash_delete_nesting;
     -- tstate->trash_delete_nesting;
 
     /* Find the nearest base with a different tp_dealloc */
@@ -1254,10 +1254,10 @@ subtype_dealloc(PyObject *self)
       Py_DECREF(type);
 
   endlabel:
-    ++_PyRuntime.gc.trash_delete_nesting;
+    ++_PyTrash_delete_nesting;
     ++ tstate->trash_delete_nesting;
     Py_TRASHCAN_SAFE_END(self);
-    --_PyRuntime.gc.trash_delete_nesting;
+    --_PyTrash_delete_nesting;
     -- tstate->trash_delete_nesting;
 
     /* Explanation of the weirdness around the trashcan macros:
@@ -1297,7 +1297,7 @@ subtype_dealloc(PyObject *self)
           a subtle disaster.
 
        Q. Why the bizarre (net-zero) manipulation of
-          _PyRuntime.trash_delete_nesting around the trashcan macros?
+          _PyTrash_delete_nesting around the trashcan macros?
 
        A. Some base classes (e.g. list) also use the trashcan mechanism.
           The following scenario used to be possible:
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 5db80b6cf7c..8ebb22e0e2b 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -106,14 +106,6 @@
     <ClInclude Include="..\Include\graminit.h" />
     <ClInclude Include="..\Include\grammar.h" />
     <ClInclude Include="..\Include\import.h" />
-    <ClInclude Include="..\Include\internal\_Python.h" />
-    <ClInclude Include="..\Include\internal\_ceval.h" />
-    <ClInclude Include="..\Include\internal\_condvar.h" />
-    <ClInclude Include="..\Include\internal\_gil.h" />
-    <ClInclude Include="..\Include\internal\_mem.h" />
-    <ClInclude Include="..\Include\internal\_pymalloc.h" />
-    <ClInclude Include="..\Include\internal\_pystate.h" />
-    <ClInclude Include="..\Include\internal\_warnings.h" />
     <ClInclude Include="..\Include\intrcheck.h" />
     <ClInclude Include="..\Include\iterobject.h" />
     <ClInclude Include="..\Include\listobject.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index e5a9b6293c8..cbe1a3943ff 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -129,30 +129,6 @@
     <ClInclude Include="..\Include\import.h">
       <Filter>Include</Filter>
     </ClInclude>
-    <ClInclude Include="..\Include\internal\_Python.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_ceval.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_condvar.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_gil.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_mem.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_pymalloc.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_pystate.h">
-      <Filter>Include</Filter>
-    </ClInclude>
-    <ClInclude Include="..\Include\internal\_warnings.h">
-      <Filter>Include</Filter>
-    </ClInclude>
     <ClInclude Include="..\Include\intrcheck.h">
       <Filter>Include</Filter>
     </ClInclude>
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index fd927c0a96b..e386248c2f8 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -21,12 +21,10 @@
 #include "node.h"
 #include "parsetok.h"
 #include "pgen.h"
-#include "internal/_mem.h"
 
 int Py_DebugFlag;
 int Py_VerboseFlag;
 int Py_IgnoreEnvironmentFlag;
-struct pyruntimestate _PyRuntime = {};
 
 /* Forward */
 grammar *getgrammar(const char *filename);
@@ -63,8 +61,6 @@ main(int argc, char **argv)
     filename = argv[1];
     graminit_h = argv[2];
     graminit_c = argv[3];
-    _PyObject_Initialize(&_PyRuntime.obj);
-    _PyMem_Initialize(&_PyRuntime.mem);
     g = getgrammar(filename);
     fp = fopen(graminit_c, "w");
     if (fp == NULL) {
diff --git a/Python/_warnings.c b/Python/_warnings.c
index a5e42a31dc4..8616195c4e3 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -8,6 +8,13 @@ PyDoc_STRVAR(warnings__doc__,
 MODULE_NAME " provides basic warning filtering support.\n"
 "It is a helper module to speed up interpreter start-up.");
 
+/* Both 'filters' and 'onceregistry' can be set in warnings.py;
+   get_warnings_attr() will reset these variables accordingly. */
+static PyObject *_filters;  /* List */
+static PyObject *_once_registry;  /* Dict */
+static PyObject *_default_action; /* String */
+static long _filters_version;
+
 _Py_IDENTIFIER(argv);
 _Py_IDENTIFIER(stderr);
 
@@ -46,7 +53,7 @@ get_warnings_attr(const char *attr, int try_import)
     }
 
     /* don't try to import after the start of the Python finallization */
-    if (try_import && !_Py_IS_FINALIZING()) {
+    if (try_import && _Py_Finalizing == NULL) {
         warnings_module = PyImport_Import(warnings_str);
         if (warnings_module == NULL) {
             /* Fallback to the C implementation if we cannot get
@@ -83,10 +90,10 @@ get_once_registry(void)
     if (registry == NULL) {
         if (PyErr_Occurred())
             return NULL;
-        return _PyRuntime.warnings.once_registry;
+        return _once_registry;
     }
-    Py_DECREF(_PyRuntime.warnings.once_registry);
-    _PyRuntime.warnings.once_registry = registry;
+    Py_DECREF(_once_registry);
+    _once_registry = registry;
     return registry;
 }
 
@@ -101,11 +108,11 @@ get_default_action(void)
         if (PyErr_Occurred()) {
             return NULL;
         }
-        return _PyRuntime.warnings.default_action;
+        return _default_action;
     }
 
-    Py_DECREF(_PyRuntime.warnings.default_action);
-    _PyRuntime.warnings.default_action = default_action;
+    Py_DECREF(_default_action);
+    _default_action = default_action;
     return default_action;
 }
 
@@ -125,24 +132,23 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno,
             return NULL;
     }
     else {
-        Py_DECREF(_PyRuntime.warnings.filters);
-        _PyRuntime.warnings.filters = warnings_filters;
+        Py_DECREF(_filters);
+        _filters = warnings_filters;
     }
 
-    PyObject *filters = _PyRuntime.warnings.filters;
-    if (filters == NULL || !PyList_Check(filters)) {
+    if (_filters == NULL || !PyList_Check(_filters)) {
         PyErr_SetString(PyExc_ValueError,
                         MODULE_NAME ".filters must be a list");
         return NULL;
     }
 
-    /* _PyRuntime.warnings.filters could change while we are iterating over it. */
-    for (i = 0; i < PyList_GET_SIZE(filters); i++) {
+    /* _filters could change while we are iterating over it. */
+    for (i = 0; i < PyList_GET_SIZE(_filters); i++) {
         PyObject *tmp_item, *action, *msg, *cat, *mod, *ln_obj;
         Py_ssize_t ln;
         int is_subclass, good_msg, good_mod;
 
-        tmp_item = PyList_GET_ITEM(filters, i);
+        tmp_item = PyList_GET_ITEM(_filters, i);
         if (!PyTuple_Check(tmp_item) || PyTuple_GET_SIZE(tmp_item) != 5) {
             PyErr_Format(PyExc_ValueError,
                          MODULE_NAME ".filters item %zd isn't a 5-tuple", i);
@@ -214,9 +220,9 @@ already_warned(PyObject *registry, PyObject *key, int should_set)
     version_obj = _PyDict_GetItemId(registry, &PyId_version);
     if (version_obj == NULL
         || !PyLong_CheckExact(version_obj)
-        || PyLong_AsLong(version_obj) != _PyRuntime.warnings.filters_version) {
+        || PyLong_AsLong(version_obj) != _filters_version) {
         PyDict_Clear(registry);
-        version_obj = PyLong_FromLong(_PyRuntime.warnings.filters_version);
+        version_obj = PyLong_FromLong(_filters_version);
         if (version_obj == NULL)
             return -1;
         if (_PyDict_SetItemId(registry, &PyId_version, version_obj) < 0) {
@@ -514,7 +520,7 @@ warn_explicit(PyObject *category, PyObject *message,
                 if (registry == NULL)
                     goto cleanup;
             }
-            /* _PyRuntime.warnings.once_registry[(text, category)] = 1 */
+            /* _once_registry[(text, category)] = 1 */
             rc = update_registry(registry, text, category, 0);
         }
         else if (_PyUnicode_EqualToASCIIString(action, "module")) {
@@ -904,7 +910,7 @@ warnings_warn_explicit(PyObject *self, PyObject *args, PyObject *kwds)
 static PyObject *
 warnings_filters_mutated(PyObject *self, PyObject *args)
 {
-    _PyRuntime.warnings.filters_version++;
+    _filters_version++;
     Py_RETURN_NONE;
 }
 
@@ -1154,8 +1160,7 @@ create_filter(PyObject *category, const char *action)
     }
 
     /* This assumes the line number is zero for now. */
-    return PyTuple_Pack(5, action_obj, Py_None,
-                        category, Py_None, _PyLong_Zero);
+    return PyTuple_Pack(5, action_obj, Py_None, category, Py_None, _PyLong_Zero);
 }
 
 static PyObject *
@@ -1223,35 +1228,33 @@ _PyWarnings_Init(void)
     if (m == NULL)
         return NULL;
 
-    if (_PyRuntime.warnings.filters == NULL) {
-        _PyRuntime.warnings.filters = init_filters();
-        if (_PyRuntime.warnings.filters == NULL)
+    if (_filters == NULL) {
+        _filters = init_filters();
+        if (_filters == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.warnings.filters);
-    if (PyModule_AddObject(m, "filters", _PyRuntime.warnings.filters) < 0)
+    Py_INCREF(_filters);
+    if (PyModule_AddObject(m, "filters", _filters) < 0)
         return NULL;
 
-    if (_PyRuntime.warnings.once_registry == NULL) {
-        _PyRuntime.warnings.once_registry = PyDict_New();
-        if (_PyRuntime.warnings.once_registry == NULL)
+    if (_once_registry == NULL) {
+        _once_registry = PyDict_New();
+        if (_once_registry == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.warnings.once_registry);
-    if (PyModule_AddObject(m, "_onceregistry",
-                           _PyRuntime.warnings.once_registry) < 0)
+    Py_INCREF(_once_registry);
+    if (PyModule_AddObject(m, "_onceregistry", _once_registry) < 0)
         return NULL;
 
-    if (_PyRuntime.warnings.default_action == NULL) {
-        _PyRuntime.warnings.default_action = PyUnicode_FromString("default");
-        if (_PyRuntime.warnings.default_action == NULL)
+    if (_default_action == NULL) {
+        _default_action = PyUnicode_FromString("default");
+        if (_default_action == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.warnings.default_action);
-    if (PyModule_AddObject(m, "_defaultaction",
-                           _PyRuntime.warnings.default_action) < 0)
+    Py_INCREF(_default_action);
+    if (PyModule_AddObject(m, "_defaultaction", _default_action) < 0)
         return NULL;
 
-    _PyRuntime.warnings.filters_version = 0;
+    _filters_version = 0;
     return m;
 }
diff --git a/Python/ceval.c b/Python/ceval.c
index 9741c15b892..436e5cad25f 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -36,8 +36,7 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
 typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
 
 /* Forward declarations */
-Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t,
-                                          PyObject *);
+Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, PyObject *);
 static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);
 
 #ifdef LLTRACE
@@ -53,15 +52,13 @@ static int call_trace_protected(Py_tracefunc, PyObject *,
 static void call_exc_trace(Py_tracefunc, PyObject *,
                            PyThreadState *, PyFrameObject *);
 static int maybe_call_line_trace(Py_tracefunc, PyObject *,
-                                 PyThreadState *, PyFrameObject *,
-                                 int *, int *, int *);
+                                 PyThreadState *, PyFrameObject *, int *, int *, int *);
 static void maybe_dtrace_line(PyFrameObject *, int *, int *, int *);
 static void dtrace_function_entry(PyFrameObject *);
 static void dtrace_function_return(PyFrameObject *);
 
 static PyObject * cmp_outcome(int, PyObject *, PyObject *);
-static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *,
-                              PyObject *);
+static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, PyObject *);
 static PyObject * import_from(PyObject *, PyObject *);
 static int import_all_from(PyObject *, PyObject *);
 static void format_exc_check_arg(PyObject *, const char *, PyObject *);
@@ -91,7 +88,7 @@ static long dxp[256];
 #endif
 
 #ifdef WITH_THREAD
-#define GIL_REQUEST _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)
+#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request)
 #else
 #define GIL_REQUEST 0
 #endif
@@ -101,22 +98,22 @@ static long dxp[256];
    the GIL eventually anyway. */
 #define COMPUTE_EVAL_BREAKER() \
     _Py_atomic_store_relaxed( \
-        &_PyRuntime.ceval.eval_breaker, \
+        &eval_breaker, \
         GIL_REQUEST | \
-        _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \
-        _PyRuntime.ceval.pending.async_exc)
+        _Py_atomic_load_relaxed(&pendingcalls_to_do) | \
+        pending_async_exc)
 
 #ifdef WITH_THREAD
 
 #define SET_GIL_DROP_REQUEST() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&gil_drop_request, 1); \
+        _Py_atomic_store_relaxed(&eval_breaker, 1); \
     } while (0)
 
 #define RESET_GIL_DROP_REQUEST() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \
+        _Py_atomic_store_relaxed(&gil_drop_request, 0); \
         COMPUTE_EVAL_BREAKER(); \
     } while (0)
 
@@ -125,35 +122,47 @@ static long dxp[256];
 /* Pending calls are only modified under pending_lock */
 #define SIGNAL_PENDING_CALLS() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \
+        _Py_atomic_store_relaxed(&eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_PENDING_CALLS() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \
+        _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \
         COMPUTE_EVAL_BREAKER(); \
     } while (0)
 
 #define SIGNAL_ASYNC_EXC() \
     do { \
-        _PyRuntime.ceval.pending.async_exc = 1; \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        pending_async_exc = 1; \
+        _Py_atomic_store_relaxed(&eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_ASYNC_EXC() \
-    do { \
-        _PyRuntime.ceval.pending.async_exc = 0; \
-        COMPUTE_EVAL_BREAKER(); \
-    } while (0)
+    do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
 
 
+/* This single variable consolidates all requests to break out of the fast path
+   in the eval loop. */
+static _Py_atomic_int eval_breaker = {0};
+/* Request for running pending calls. */
+static _Py_atomic_int pendingcalls_to_do = {0};
+/* Request for looking at the `async_exc` field of the current thread state.
+   Guarded by the GIL. */
+static int pending_async_exc = 0;
+
 #ifdef WITH_THREAD
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #include "pythread.h"
+
+static PyThread_type_lock pending_lock = 0; /* for pending calls */
+static unsigned long main_thread = 0;
+/* Request for dropping the GIL */
+static _Py_atomic_int gil_drop_request = {0};
+
 #include "ceval_gil.h"
 
 int
@@ -169,9 +178,9 @@ PyEval_InitThreads(void)
         return;
     create_gil();
     take_gil(PyThreadState_GET());
-    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
-    if (!_PyRuntime.ceval.pending.lock)
-        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+    main_thread = PyThread_get_thread_ident();
+    if (!pending_lock)
+        pending_lock = PyThread_allocate_lock();
 }
 
 void
@@ -239,9 +248,9 @@ PyEval_ReInitThreads(void)
     if (!gil_created())
         return;
     recreate_gil();
-    _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+    pending_lock = PyThread_allocate_lock();
     take_gil(current_tstate);
-    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
+    main_thread = PyThread_get_thread_ident();
 
     /* Destroy all threads except the current one */
     _PyThreadState_DeleteExcept(current_tstate);
@@ -285,7 +294,7 @@ PyEval_RestoreThread(PyThreadState *tstate)
         int err = errno;
         take_gil(tstate);
         /* _Py_Finalizing is protected by the GIL */
-        if (_Py_IS_FINALIZING() && !_Py_CURRENTLY_FINALIZING(tstate)) {
+        if (_Py_Finalizing && tstate != _Py_Finalizing) {
             drop_gil(tstate);
             PyThread_exit_thread();
             assert(0);  /* unreachable */
@@ -337,11 +346,19 @@ _PyEval_SignalReceived(void)
    callback.
  */
 
+#define NPENDINGCALLS 32
+static struct {
+    int (*func)(void *);
+    void *arg;
+} pendingcalls[NPENDINGCALLS];
+static int pendingfirst = 0;
+static int pendinglast = 0;
+
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
     int i, j, result=0;
-    PyThread_type_lock lock = _PyRuntime.ceval.pending.lock;
+    PyThread_type_lock lock = pending_lock;
 
     /* try a few times for the lock.  Since this mechanism is used
      * for signal handling (on the main thread), there is a (slim)
@@ -363,14 +380,14 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
             return -1;
     }
 
-    i = _PyRuntime.ceval.pending.last;
+    i = pendinglast;
     j = (i + 1) % NPENDINGCALLS;
-    if (j == _PyRuntime.ceval.pending.first) {
+    if (j == pendingfirst) {
         result = -1; /* Queue full */
     } else {
-        _PyRuntime.ceval.pending.calls[i].func = func;
-        _PyRuntime.ceval.pending.calls[i].arg = arg;
-        _PyRuntime.ceval.pending.last = j;
+        pendingcalls[i].func = func;
+        pendingcalls[i].arg = arg;
+        pendinglast = j;
     }
     /* signal main loop */
     SIGNAL_PENDING_CALLS();
@@ -388,19 +405,16 @@ Py_MakePendingCalls(void)
 
     assert(PyGILState_Check());
 
-    if (!_PyRuntime.ceval.pending.lock) {
+    if (!pending_lock) {
         /* initial allocation of the lock */
-        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
-        if (_PyRuntime.ceval.pending.lock == NULL)
+        pending_lock = PyThread_allocate_lock();
+        if (pending_lock == NULL)
             return -1;
     }
 
     /* only service pending calls on main thread */
-    if (_PyRuntime.ceval.pending.main_thread &&
-        PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread)
-    {
+    if (main_thread && PyThread_get_thread_ident() != main_thread)
         return 0;
-    }
     /* don't perform recursive pending calls */
     if (busy)
         return 0;
@@ -422,16 +436,16 @@ Py_MakePendingCalls(void)
         void *arg = NULL;
 
         /* pop one item off the queue while holding the lock */
-        PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK);
-        j = _PyRuntime.ceval.pending.first;
-        if (j == _PyRuntime.ceval.pending.last) {
+        PyThread_acquire_lock(pending_lock, WAIT_LOCK);
+        j = pendingfirst;
+        if (j == pendinglast) {
             func = NULL; /* Queue empty */
         } else {
-            func = _PyRuntime.ceval.pending.calls[j].func;
-            arg = _PyRuntime.ceval.pending.calls[j].arg;
-            _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS;
+            func = pendingcalls[j].func;
+            arg = pendingcalls[j].arg;
+            pendingfirst = (j + 1) % NPENDINGCALLS;
         }
-        PyThread_release_lock(_PyRuntime.ceval.pending.lock);
+        PyThread_release_lock(pending_lock);
         /* having released the lock, perform the callback */
         if (func == NULL)
             break;
@@ -475,6 +489,14 @@ Py_MakePendingCalls(void)
    The two threads could theoretically wiggle around the "busy" variable.
 */
 
+#define NPENDINGCALLS 32
+static struct {
+    int (*func)(void *);
+    void *arg;
+} pendingcalls[NPENDINGCALLS];
+static volatile int pendingfirst = 0;
+static volatile int pendinglast = 0;
+
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
@@ -484,15 +506,15 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
     if (busy)
         return -1;
     busy = 1;
-    i = _PyRuntime.ceval.pending.last;
+    i = pendinglast;
     j = (i + 1) % NPENDINGCALLS;
-    if (j == _PyRuntime.ceval.pending.first) {
+    if (j == pendingfirst) {
         busy = 0;
         return -1; /* Queue full */
     }
-    _PyRuntime.ceval.pending.calls[i].func = func;
-    _PyRuntime.ceval.pending.calls[i].arg = arg;
-    _PyRuntime.ceval.pending.last = j;
+    pendingcalls[i].func = func;
+    pendingcalls[i].arg = arg;
+    pendinglast = j;
 
     SIGNAL_PENDING_CALLS();
     busy = 0;
@@ -521,12 +543,12 @@ Py_MakePendingCalls(void)
         int i;
         int (*func)(void *);
         void *arg;
-        i = _PyRuntime.ceval.pending.first;
-        if (i == _PyRuntime.ceval.pending.last)
+        i = pendingfirst;
+        if (i == pendinglast)
             break; /* Queue empty */
-        func = _PyRuntime.ceval.pending.calls[i].func;
-        arg = _PyRuntime.ceval.pending.calls[i].arg;
-        _PyRuntime.ceval.pending.first = (i + 1) % NPENDINGCALLS;
+        func = pendingcalls[i].func;
+        arg = pendingcalls[i].arg;
+        pendingfirst = (i + 1) % NPENDINGCALLS;
         if (func(arg) < 0) {
             goto error;
         }
@@ -548,32 +570,20 @@ Py_MakePendingCalls(void)
 #ifndef Py_DEFAULT_RECURSION_LIMIT
 #define Py_DEFAULT_RECURSION_LIMIT 1000
 #endif
-
-void
-_PyEval_Initialize(struct _ceval_runtime_state *state)
-{
-    state->recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
-    state->check_recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
-    _gil_initialize(&state->gil);
-}
-
-int
-_PyEval_CheckRecursionLimit(void)
-{
-    return _PyRuntime.ceval.check_recursion_limit;
-}
+static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
+int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT;
 
 int
 Py_GetRecursionLimit(void)
 {
-    return _PyRuntime.ceval.recursion_limit;
+    return recursion_limit;
 }
 
 void
 Py_SetRecursionLimit(int new_limit)
 {
-    _PyRuntime.ceval.recursion_limit = new_limit;
-    _PyRuntime.ceval.check_recursion_limit = _PyRuntime.ceval.recursion_limit;
+    recursion_limit = new_limit;
+    _Py_CheckRecursionLimit = recursion_limit;
 }
 
 /* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall()
@@ -585,7 +595,6 @@ int
 _Py_CheckRecursiveCall(const char *where)
 {
     PyThreadState *tstate = PyThreadState_GET();
-    int recursion_limit = _PyRuntime.ceval.recursion_limit;
 
 #ifdef USE_STACKCHECK
     if (PyOS_CheckStack()) {
@@ -594,7 +603,7 @@ _Py_CheckRecursiveCall(const char *where)
         return -1;
     }
 #endif
-    _PyRuntime.ceval.check_recursion_limit = recursion_limit;
+    _Py_CheckRecursionLimit = recursion_limit;
     if (tstate->recursion_critical)
         /* Somebody asked that we don't check for recursion. */
         return 0;
@@ -633,7 +642,13 @@ static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *);
 static int do_raise(PyObject *, PyObject *);
 static int unpack_iterable(PyObject *, int, int, PyObject **);
 
-#define _Py_TracingPossible _PyRuntime.ceval.tracing_possible
+/* Records whether tracing is on for any thread.  Counts the number of
+   threads for which tstate->c_tracefunc is non-NULL, so if the value
+   is 0, we know we don't have to check this thread's c_tracefunc.
+   This speeds up the if statement in PyEval_EvalFrameEx() after
+   fast_next_opcode*/
+static int _Py_TracingPossible = 0;
+
 
 
 PyObject *
@@ -764,7 +779,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
 
 #define DISPATCH() \
     { \
-        if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \
+        if (!_Py_atomic_load_relaxed(&eval_breaker)) {      \
                     FAST_DISPATCH(); \
         } \
         continue; \
@@ -812,8 +827,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
 /* Code access macros */
 
 /* The integer overflow is checked by an assertion below. */
-#define INSTR_OFFSET()  \
-    (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
+#define INSTR_OFFSET()  (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
 #define NEXTOPARG()  do { \
         _Py_CODEUNIT word = *next_instr; \
         opcode = _Py_OPCODE(word); \
@@ -1066,7 +1080,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
            async I/O handler); see Py_AddPendingCall() and
            Py_MakePendingCalls() above. */
 
-        if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) {
+        if (_Py_atomic_load_relaxed(&eval_breaker)) {
             if (_Py_OPCODE(*next_instr) == SETUP_FINALLY ||
                 _Py_OPCODE(*next_instr) == YIELD_FROM) {
                 /* Two cases where we skip running signal handlers and other
@@ -1083,16 +1097,12 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
                 */
                 goto fast_next_opcode;
             }
-            if (_Py_atomic_load_relaxed(
-                        &_PyRuntime.ceval.pending.calls_to_do))
-            {
+            if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) {
                 if (Py_MakePendingCalls() < 0)
                     goto error;
             }
 #ifdef WITH_THREAD
-            if (_Py_atomic_load_relaxed(
-                        &_PyRuntime.ceval.gil_drop_request))
-            {
+            if (_Py_atomic_load_relaxed(&gil_drop_request)) {
                 /* Give another thread a chance */
                 if (PyThreadState_Swap(NULL) != tstate)
                     Py_FatalError("ceval: tstate mix-up");
@@ -1103,9 +1113,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
                 take_gil(tstate);
 
                 /* Check if we should make a quick exit. */
-                if (_Py_IS_FINALIZING() &&
-                    !_Py_CURRENTLY_FINALIZING(tstate))
-                {
+                if (_Py_Finalizing && _Py_Finalizing != tstate) {
                     drop_gil(tstate);
                     PyThread_exit_thread();
                 }
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index ef5189068e0..a3b450bd5c4 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -8,13 +8,20 @@
 
 /* First some general settings */
 
-#define INTERVAL (_PyRuntime.ceval.gil.interval >= 1 ? _PyRuntime.ceval.gil.interval : 1)
+/* microseconds (the Python API uses seconds, though) */
+#define DEFAULT_INTERVAL 5000
+static unsigned long gil_interval = DEFAULT_INTERVAL;
+#define INTERVAL (gil_interval >= 1 ? gil_interval : 1)
+
+/* Enable if you want to force the switching of threads at least every `gil_interval` */
+#undef FORCE_SWITCHING
+#define FORCE_SWITCHING
 
 
 /*
    Notes about the implementation:
 
-   - The GIL is just a boolean variable (locked) whose access is protected
+   - The GIL is just a boolean variable (gil_locked) whose access is protected
      by a mutex (gil_mutex), and whose changes are signalled by a condition
      variable (gil_cond). gil_mutex is taken for short periods of time,
      and therefore mostly uncontended.
@@ -41,7 +48,7 @@
    - When a thread releases the GIL and gil_drop_request is set, that thread
      ensures that another GIL-awaiting thread gets scheduled.
      It does so by waiting on a condition variable (switch_cond) until
-     the value of last_holder is changed to something else than its
+     the value of gil_last_holder is changed to something else than its
      own thread state pointer, indicating that another thread was able to
      take the GIL.
 
@@ -53,7 +60,11 @@
 */
 
 #include "condvar.h"
+#ifndef Py_HAVE_CONDVAR
+#error You need either a POSIX-compatible or a Windows system!
+#endif
 
+#define MUTEX_T PyMUTEX_T
 #define MUTEX_INIT(mut) \
     if (PyMUTEX_INIT(&(mut))) { \
         Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); };
@@ -67,6 +78,7 @@
     if (PyMUTEX_UNLOCK(&(mut))) { \
         Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); };
 
+#define COND_T PyCOND_T
 #define COND_INIT(cond) \
     if (PyCOND_INIT(&(cond))) { \
         Py_FatalError("PyCOND_INIT(" #cond ") failed"); };
@@ -91,36 +103,48 @@
     } \
 
 
-#define DEFAULT_INTERVAL 5000
 
-static void _gil_initialize(struct _gil_runtime_state *state)
-{
-    _Py_atomic_int uninitialized = {-1};
-    state->locked = uninitialized;
-    state->interval = DEFAULT_INTERVAL;
-}
+/* Whether the GIL is already taken (-1 if uninitialized). This is atomic
+   because it can be read without any lock taken in ceval.c. */
+static _Py_atomic_int gil_locked = {-1};
+/* Number of GIL switches since the beginning. */
+static unsigned long gil_switch_number = 0;
+/* Last PyThreadState holding / having held the GIL. This helps us know
+   whether anyone else was scheduled after we dropped the GIL. */
+static _Py_atomic_address gil_last_holder = {0};
+
+/* This condition variable allows one or several threads to wait until
+   the GIL is released. In addition, the mutex also protects the above
+   variables. */
+static COND_T gil_cond;
+static MUTEX_T gil_mutex;
+
+#ifdef FORCE_SWITCHING
+/* This condition variable helps the GIL-releasing thread wait for
+   a GIL-awaiting thread to be scheduled and take the GIL. */
+static COND_T switch_cond;
+static MUTEX_T switch_mutex;
+#endif
+
 
 static int gil_created(void)
 {
-    return (_Py_atomic_load_explicit(&_PyRuntime.ceval.gil.locked,
-                                     _Py_memory_order_acquire)
-            ) >= 0;
+    return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0;
 }
 
 static void create_gil(void)
 {
-    MUTEX_INIT(_PyRuntime.ceval.gil.mutex);
+    MUTEX_INIT(gil_mutex);
 #ifdef FORCE_SWITCHING
-    MUTEX_INIT(_PyRuntime.ceval.gil.switch_mutex);
+    MUTEX_INIT(switch_mutex);
 #endif
-    COND_INIT(_PyRuntime.ceval.gil.cond);
+    COND_INIT(gil_cond);
 #ifdef FORCE_SWITCHING
-    COND_INIT(_PyRuntime.ceval.gil.switch_cond);
+    COND_INIT(switch_cond);
 #endif
-    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, 0);
-    _Py_ANNOTATE_RWLOCK_CREATE(&_PyRuntime.ceval.gil.locked);
-    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, 0,
-                              _Py_memory_order_release);
+    _Py_atomic_store_relaxed(&gil_last_holder, 0);
+    _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked);
+    _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release);
 }
 
 static void destroy_gil(void)
@@ -128,62 +152,54 @@ static void destroy_gil(void)
     /* some pthread-like implementations tie the mutex to the cond
      * and must have the cond destroyed first.
      */
-    COND_FINI(_PyRuntime.ceval.gil.cond);
-    MUTEX_FINI(_PyRuntime.ceval.gil.mutex);
+    COND_FINI(gil_cond);
+    MUTEX_FINI(gil_mutex);
 #ifdef FORCE_SWITCHING
-    COND_FINI(_PyRuntime.ceval.gil.switch_cond);
-    MUTEX_FINI(_PyRuntime.ceval.gil.switch_mutex);
+    COND_FINI(switch_cond);
+    MUTEX_FINI(switch_mutex);
 #endif
-    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, -1,
-                              _Py_memory_order_release);
-    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
+    _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
 }
 
 static void recreate_gil(void)
 {
-    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
     /* XXX should we destroy the old OS resources here? */
     create_gil();
 }
 
 static void drop_gil(PyThreadState *tstate)
 {
-    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
+    if (!_Py_atomic_load_relaxed(&gil_locked))
         Py_FatalError("drop_gil: GIL is not locked");
     /* tstate is allowed to be NULL (early interpreter init) */
     if (tstate != NULL) {
         /* Sub-interpreter support: threads might have been switched
            under our feet using PyThreadState_Swap(). Fix the GIL last
            holder variable so that our heuristics work. */
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
-                                 (uintptr_t)tstate);
+        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
     }
 
-    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
-    _Py_ANNOTATE_RWLOCK_RELEASED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
-    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 0);
-    COND_SIGNAL(_PyRuntime.ceval.gil.cond);
-    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
+    MUTEX_LOCK(gil_mutex);
+    _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&gil_locked, 0);
+    COND_SIGNAL(gil_cond);
+    MUTEX_UNLOCK(gil_mutex);
 
 #ifdef FORCE_SWITCHING
-    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) &&
-        tstate != NULL)
-    {
-        MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
+    if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) {
+        MUTEX_LOCK(switch_mutex);
         /* Not switched yet => wait */
-        if (((PyThreadState*)_Py_atomic_load_relaxed(
-                    &_PyRuntime.ceval.gil.last_holder)
-            ) == tstate)
-        {
+        if ((PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder) == tstate) {
         RESET_GIL_DROP_REQUEST();
             /* NOTE: if COND_WAIT does not atomically start waiting when
                releasing the mutex, another thread can run through, take
                the GIL and drop it again, and reset the condition
                before we even had a chance to wait for it. */
-            COND_WAIT(_PyRuntime.ceval.gil.switch_cond,
-                      _PyRuntime.ceval.gil.switch_mutex);
+            COND_WAIT(switch_cond, switch_mutex);
     }
-        MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
+        MUTEX_UNLOCK(switch_mutex);
     }
 #endif
 }
@@ -195,65 +211,60 @@ static void take_gil(PyThreadState *tstate)
         Py_FatalError("take_gil: NULL tstate");
 
     err = errno;
-    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
+    MUTEX_LOCK(gil_mutex);
 
-    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
+    if (!_Py_atomic_load_relaxed(&gil_locked))
         goto _ready;
 
-    while (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) {
+    while (_Py_atomic_load_relaxed(&gil_locked)) {
         int timed_out = 0;
         unsigned long saved_switchnum;
 
-        saved_switchnum = _PyRuntime.ceval.gil.switch_number;
-        COND_TIMED_WAIT(_PyRuntime.ceval.gil.cond, _PyRuntime.ceval.gil.mutex,
-                        INTERVAL, timed_out);
+        saved_switchnum = gil_switch_number;
+        COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
         /* If we timed out and no switch occurred in the meantime, it is time
            to ask the GIL-holding thread to drop it. */
         if (timed_out &&
-            _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) &&
-            _PyRuntime.ceval.gil.switch_number == saved_switchnum) {
+            _Py_atomic_load_relaxed(&gil_locked) &&
+            gil_switch_number == saved_switchnum) {
             SET_GIL_DROP_REQUEST();
         }
     }
 _ready:
 #ifdef FORCE_SWITCHING
-    /* This mutex must be taken before modifying
-       _PyRuntime.ceval.gil.last_holder (see drop_gil()). */
-    MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
+    /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */
+    MUTEX_LOCK(switch_mutex);
 #endif
     /* We now hold the GIL */
-    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 1);
-    _Py_ANNOTATE_RWLOCK_ACQUIRED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
-
-    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(
-                    &_PyRuntime.ceval.gil.last_holder))
-    {
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
-                                 (uintptr_t)tstate);
-        ++_PyRuntime.ceval.gil.switch_number;
+    _Py_atomic_store_relaxed(&gil_locked, 1);
+    _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1);
+
+    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) {
+        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
+        ++gil_switch_number;
     }
 
 #ifdef FORCE_SWITCHING
-    COND_SIGNAL(_PyRuntime.ceval.gil.switch_cond);
-    MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
+    COND_SIGNAL(switch_cond);
+    MUTEX_UNLOCK(switch_mutex);
 #endif
-    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) {
+    if (_Py_atomic_load_relaxed(&gil_drop_request)) {
         RESET_GIL_DROP_REQUEST();
     }
     if (tstate->async_exc != NULL) {
         _PyEval_SignalAsyncExc();
     }
 
-    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
+    MUTEX_UNLOCK(gil_mutex);
     errno = err;
 }
 
 void _PyEval_SetSwitchInterval(unsigned long microseconds)
 {
-    _PyRuntime.ceval.gil.interval = microseconds;
+    gil_interval = microseconds;
 }
 
 unsigned long _PyEval_GetSwitchInterval()
 {
-    return _PyRuntime.ceval.gil.interval;
+    return gil_interval;
 }
diff --git a/Python/condvar.h b/Python/condvar.h
index aaa8043585f..9a71b17738f 100644
--- a/Python/condvar.h
+++ b/Python/condvar.h
@@ -37,16 +37,27 @@
  *    Condition Variable.
  */
 
-#ifndef _CONDVAR_IMPL_H_
-#define _CONDVAR_IMPL_H_
+#ifndef _CONDVAR_H_
+#define _CONDVAR_H_
 
 #include "Python.h"
-#include "internal/_condvar.h"
+
+#ifndef _POSIX_THREADS
+/* This means pthreads are not implemented in libc headers, hence the macro
+   not present in unistd.h. But they still can be implemented as an external
+   library (e.g. gnu pth in pthread emulation) */
+# ifdef HAVE_PTHREAD_H
+#  include <pthread.h> /* _POSIX_THREADS */
+# endif
+#endif
 
 #ifdef _POSIX_THREADS
 /*
  * POSIX support
  */
+#define Py_HAVE_CONDVAR
+
+#include <pthread.h>
 
 #define PyCOND_ADD_MICROSECONDS(tv, interval) \
 do { /* TODO: add overflow and truncation checks */ \
@@ -63,11 +74,13 @@ do { /* TODO: add overflow and truncation checks */ \
 #endif
 
 /* The following functions return 0 on success, nonzero on error */
+#define PyMUTEX_T pthread_mutex_t
 #define PyMUTEX_INIT(mut)       pthread_mutex_init((mut), NULL)
 #define PyMUTEX_FINI(mut)       pthread_mutex_destroy(mut)
 #define PyMUTEX_LOCK(mut)       pthread_mutex_lock(mut)
 #define PyMUTEX_UNLOCK(mut)     pthread_mutex_unlock(mut)
 
+#define PyCOND_T pthread_cond_t
 #define PyCOND_INIT(cond)       pthread_cond_init((cond), NULL)
 #define PyCOND_FINI(cond)       pthread_cond_destroy(cond)
 #define PyCOND_SIGNAL(cond)     pthread_cond_signal(cond)
@@ -103,11 +116,45 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
  * Emulated condition variables ones that work with XP and later, plus
  * example native support on VISTA and onwards.
  */
+#define Py_HAVE_CONDVAR
+
+
+/* include windows if it hasn't been done before */
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* options */
+/* non-emulated condition variables are provided for those that want
+ * to target Windows Vista.  Modify this macro to enable them.
+ */
+#ifndef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
+#endif
+
+/* fall back to emulation if not targeting Vista */
+#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
+#undef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1
+#endif
+
 
 #if _PY_EMULATED_WIN_CV
 
 /* The mutex is a CriticalSection object and
    The condition variables is emulated with the help of a semaphore.
+   Semaphores are available on Windows XP (2003 server) and later.
+   We use a Semaphore rather than an auto-reset event, because although
+   an auto-resent event might appear to solve the lost-wakeup bug (race
+   condition between releasing the outer lock and waiting) because it
+   maintains state even though a wait hasn't happened, there is still
+   a lost wakeup problem if more than one thread are interrupted in the
+   critical place.  A semaphore solves that, because its state is counted,
+   not Boolean.
+   Because it is ok to signal a condition variable with no one
+   waiting, we need to keep track of the number of
+   waiting threads.  Otherwise, the semaphore's state could rise
+   without bound.  This also helps reduce the number of "spurious wakeups"
+   that would otherwise happen.
 
    This implementation still has the problem that the threads woken
    with a "signal" aren't necessarily those that are already
@@ -121,6 +168,8 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
    http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
 */
 
+typedef CRITICAL_SECTION PyMUTEX_T;
+
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
 {
@@ -149,6 +198,15 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs)
     return 0;
 }
 
+/* The ConditionVariable object.  From XP onwards it is easily emulated with
+ * a Semaphore
+ */
+
+typedef struct _PyCOND_T
+{
+    HANDLE sem;
+    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
+} PyCOND_T;
 
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
@@ -246,7 +304,12 @@ PyCOND_BROADCAST(PyCOND_T *cv)
     return 0;
 }
 
-#else /* !_PY_EMULATED_WIN_CV */
+#else
+
+/* Use native Win7 primitives if build target is Win7 or higher */
+
+/* SRWLOCK is faster and better than CriticalSection */
+typedef SRWLOCK PyMUTEX_T;
 
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
@@ -276,6 +339,8 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs)
 }
 
 
+typedef CONDITION_VARIABLE  PyCOND_T;
+
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
 {
@@ -322,4 +387,4 @@ PyCOND_BROADCAST(PyCOND_T *cv)
 
 #endif /* _POSIX_THREADS, NT_THREADS */
 
-#endif /* _CONDVAR_IMPL_H_ */
+#endif /* _CONDVAR_H_ */
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 3f405b1225a..662405bdeb3 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -77,30 +77,6 @@ extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
 extern void _PyGILState_Fini(void);
 #endif /* WITH_THREAD */
 
-_PyRuntimeState _PyRuntime = {0, 0};
-
-void
-_PyRuntime_Initialize(void)
-{
-    /* XXX We only initialize once in the process, which aligns with
-       the static initialization of the former globals now found in
-       _PyRuntime.  However, _PyRuntime *should* be initialized with
-       every Py_Initialize() call, but doing so breaks the runtime.
-       This is because the runtime state is not properly finalized
-       currently. */
-    static int initialized = 0;
-    if (initialized)
-        return;
-    initialized = 1;
-    _PyRuntimeState_Init(&_PyRuntime);
-}
-
-void
-_PyRuntime_Finalize(void)
-{
-    _PyRuntimeState_Fini(&_PyRuntime);
-}
-
 /* Global configuration variable declarations are in pydebug.h */
 /* XXX (ncoghlan): move those declarations to pylifecycle.h? */
 int Py_DebugFlag; /* Needed by parser.c */
@@ -124,6 +100,8 @@ int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */
 int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */
 #endif
 
+PyThreadState *_Py_Finalizing = NULL;
+
 /* Hack to force loading of object files */
 int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \
     PyOS_mystrnicmp; /* Python/pystrcmp.o */
@@ -141,17 +119,19 @@ PyModule_GetWarningsModule(void)
  *
  * Can be called prior to Py_Initialize.
  */
+int _Py_CoreInitialized = 0;
+int _Py_Initialized = 0;
 
 int
 _Py_IsCoreInitialized(void)
 {
-    return _PyRuntime.core_initialized;
+    return _Py_CoreInitialized;
 }
 
 int
 Py_IsInitialized(void)
 {
-    return _PyRuntime.initialized;
+    return _Py_Initialized;
 }
 
 /* Helper to allow an embedding application to override the normal
@@ -564,16 +544,14 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
     _PyCoreConfig core_config = _PyCoreConfig_INIT;
     _PyMainInterpreterConfig preinit_config = _PyMainInterpreterConfig_INIT;
 
-    _PyRuntime_Initialize();
-
     if (config != NULL) {
         core_config = *config;
     }
 
-    if (_PyRuntime.initialized) {
+    if (_Py_Initialized) {
         Py_FatalError("Py_InitializeCore: main interpreter already initialized");
     }
-    if (_PyRuntime.core_initialized) {
+    if (_Py_CoreInitialized) {
         Py_FatalError("Py_InitializeCore: runtime core already initialized");
     }
 
@@ -586,14 +564,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
      * threads still hanging around from a previous Py_Initialize/Finalize
      * pair :(
      */
-    _PyRuntime.finalizing = NULL;
-
-    if (_PyMem_SetupAllocators(core_config.allocator) < 0) {
-        fprintf(stderr,
-            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n",
-            core_config.allocator);
-        exit(1);
-    }
+    _Py_Finalizing = NULL;
 
 #ifdef __ANDROID__
     /* Passing "" to setlocale() on Android requests the C locale rather
@@ -635,7 +606,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
         Py_HashRandomizationFlag = 1;
     }
 
-    _PyInterpreterState_Enable(&_PyRuntime);
+    _PyInterpreterState_Init();
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_InitializeCore: can't make main interpreter");
@@ -727,7 +698,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
     }
 
     /* Only when we get here is the runtime core fully initialized */
-    _PyRuntime.core_initialized = 1;
+    _Py_CoreInitialized = 1;
 }
 
 /* Read configuration settings from standard locations
@@ -768,10 +739,10 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
     PyInterpreterState *interp;
     PyThreadState *tstate;
 
-    if (!_PyRuntime.core_initialized) {
+    if (!_Py_CoreInitialized) {
         Py_FatalError("Py_InitializeMainInterpreter: runtime core not initialized");
     }
-    if (_PyRuntime.initialized) {
+    if (_Py_Initialized) {
         Py_FatalError("Py_InitializeMainInterpreter: main interpreter already initialized");
     }
 
@@ -792,7 +763,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
          * This means anything which needs support from extension modules
          * or pure Python code in the standard library won't work.
          */
-        _PyRuntime.initialized = 1;
+        _Py_Initialized = 1;
         return 0;
     }
     /* TODO: Report exceptions rather than fatal errors below here */
@@ -837,7 +808,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
         Py_XDECREF(warnings_module);
     }
 
-    _PyRuntime.initialized = 1;
+    _Py_Initialized = 1;
 
     if (!Py_NoSiteFlag)
         initsite(); /* Module site */
@@ -953,7 +924,7 @@ Py_FinalizeEx(void)
     PyThreadState *tstate;
     int status = 0;
 
-    if (!_PyRuntime.initialized)
+    if (!_Py_Initialized)
         return status;
 
     wait_for_thread_shutdown();
@@ -975,9 +946,9 @@ Py_FinalizeEx(void)
 
     /* Remaining threads (e.g. daemon threads) will automatically exit
        after taking the GIL (in PyEval_RestoreThread()). */
-    _PyRuntime.finalizing = tstate;
-    _PyRuntime.initialized = 0;
-    _PyRuntime.core_initialized = 0;
+    _Py_Finalizing = tstate;
+    _Py_Initialized = 0;
+    _Py_CoreInitialized = 0;
 
     /* Flush sys.stdout and sys.stderr */
     if (flush_std_files() < 0) {
@@ -1139,7 +1110,6 @@ Py_FinalizeEx(void)
 #endif
 
     call_ll_exitfuncs();
-    _PyRuntime_Finalize();
     return status;
 }
 
@@ -1169,7 +1139,7 @@ Py_NewInterpreter(void)
     PyThreadState *tstate, *save_tstate;
     PyObject *bimod, *sysmod;
 
-    if (!_PyRuntime.initialized)
+    if (!_Py_Initialized)
         Py_FatalError("Py_NewInterpreter: call Py_Initialize first");
 
 #ifdef WITH_THREAD
@@ -1884,19 +1854,20 @@ Py_FatalError(const char *msg)
 #  include "pythread.h"
 #endif
 
+static void (*pyexitfunc)(void) = NULL;
 /* For the atexit module. */
 void _Py_PyAtExit(void (*func)(void))
 {
-    _PyRuntime.pyexitfunc = func;
+    pyexitfunc = func;
 }
 
 static void
 call_py_exitfuncs(void)
 {
-    if (_PyRuntime.pyexitfunc == NULL)
+    if (pyexitfunc == NULL)
         return;
 
-    (*_PyRuntime.pyexitfunc)();
+    (*pyexitfunc)();
     PyErr_Clear();
 }
 
@@ -1929,19 +1900,22 @@ wait_for_thread_shutdown(void)
 }
 
 #define NEXITFUNCS 32
+static void (*exitfuncs[NEXITFUNCS])(void);
+static int nexitfuncs = 0;
+
 int Py_AtExit(void (*func)(void))
 {
-    if (_PyRuntime.nexitfuncs >= NEXITFUNCS)
+    if (nexitfuncs >= NEXITFUNCS)
         return -1;
-    _PyRuntime.exitfuncs[_PyRuntime.nexitfuncs++] = func;
+    exitfuncs[nexitfuncs++] = func;
     return 0;
 }
 
 static void
 call_ll_exitfuncs(void)
 {
-    while (_PyRuntime.nexitfuncs > 0)
-        (*_PyRuntime.exitfuncs[--_PyRuntime.nexitfuncs])();
+    while (nexitfuncs > 0)
+        (*exitfuncs[--nexitfuncs])();
 
     fflush(stdout);
     fflush(stderr);
diff --git a/Python/pystate.c b/Python/pystate.c
index 2d926372fd6..30a372212ed 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -34,65 +34,55 @@ to avoid the expense of doing their own locking).
 extern "C" {
 #endif
 
-void
-_PyRuntimeState_Init(_PyRuntimeState *runtime)
-{
-    memset(runtime, 0, sizeof(*runtime));
-
-    _PyObject_Initialize(&runtime->obj);
-    _PyMem_Initialize(&runtime->mem);
-    _PyGC_Initialize(&runtime->gc);
-    _PyEval_Initialize(&runtime->ceval);
-
-    runtime->gilstate.check_enabled = 1;
-    runtime->gilstate.autoTLSkey = -1;
+int _PyGILState_check_enabled = 1;
 
 #ifdef WITH_THREAD
-    runtime->interpreters.mutex = PyThread_allocate_lock();
-    if (runtime->interpreters.mutex == NULL)
-        Py_FatalError("Can't initialize threads for interpreter");
-#endif
-    runtime->interpreters.next_id = -1;
-}
-
-void
-_PyRuntimeState_Fini(_PyRuntimeState *runtime)
-{
-#ifdef WITH_THREAD
-    if (runtime->interpreters.mutex != NULL) {
-        PyThread_free_lock(runtime->interpreters.mutex);
-        runtime->interpreters.mutex = NULL;
-    }
-#endif
-}
-
-#ifdef WITH_THREAD
-#define HEAD_LOCK() PyThread_acquire_lock(_PyRuntime.interpreters.mutex, \
-                                          WAIT_LOCK)
-#define HEAD_UNLOCK() PyThread_release_lock(_PyRuntime.interpreters.mutex)
+#include "pythread.h"
+static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */
+#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock()))
+#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK)
+#define HEAD_UNLOCK() PyThread_release_lock(head_mutex)
+
+/* The single PyInterpreterState used by this process'
+   GILState implementation
+*/
+/* TODO: Given interp_main, it may be possible to kill this ref */
+static PyInterpreterState *autoInterpreterState = NULL;
+static int autoTLSkey = -1;
 #else
+#define HEAD_INIT() /* Nothing */
 #define HEAD_LOCK() /* Nothing */
 #define HEAD_UNLOCK() /* Nothing */
 #endif
 
+static PyInterpreterState *interp_head = NULL;
+static PyInterpreterState *interp_main = NULL;
+
+/* Assuming the current thread holds the GIL, this is the
+   PyThreadState for the current thread. */
+_Py_atomic_address _PyThreadState_Current = {0};
+PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
+
 #ifdef WITH_THREAD
 static void _PyGILState_NoteThreadState(PyThreadState* tstate);
 #endif
 
+/* _next_interp_id is an auto-numbered sequence of small integers.
+   It gets initialized in _PyInterpreterState_Init(), which is called
+   in Py_Initialize(), and used in PyInterpreterState_New().  A negative
+   interpreter ID indicates an error occurred.  The main interpreter
+   will always have an ID of 0.  Overflow results in a RuntimeError.
+   If that becomes a problem later then we can adjust, e.g. by using
+   a Python int.
+
+   We initialize this to -1 so that the pre-Py_Initialize() value
+   results in an error. */
+static int64_t _next_interp_id = -1;
+
 void
-_PyInterpreterState_Enable(_PyRuntimeState *runtime)
+_PyInterpreterState_Init(void)
 {
-    runtime->interpreters.next_id = 0;
-#ifdef WITH_THREAD
-    /* Since we only call _PyRuntimeState_Init() once per process
-       (see _PyRuntime_Initialize()), we make sure the mutex is
-       initialized here. */
-    if (runtime->interpreters.mutex == NULL) {
-        runtime->interpreters.mutex = PyThread_allocate_lock();
-        if (runtime->interpreters.mutex == NULL)
-            Py_FatalError("Can't initialize threads for interpreter");
-    }
-#endif
+    _next_interp_id = 0;
 }
 
 PyInterpreterState *
@@ -102,16 +92,16 @@ PyInterpreterState_New(void)
                                  PyMem_RawMalloc(sizeof(PyInterpreterState));
 
     if (interp != NULL) {
+        HEAD_INIT();
+#ifdef WITH_THREAD
+        if (head_mutex == NULL)
+            Py_FatalError("Can't initialize threads for interpreter");
+#endif
         interp->modules_by_index = NULL;
         interp->sysdict = NULL;
         interp->builtins = NULL;
         interp->builtins_copy = NULL;
         interp->tstate_head = NULL;
-        interp->check_interval = 100;
-        interp->warnoptions = NULL;
-        interp->xoptions = NULL;
-        interp->num_threads = 0;
-        interp->pythread_stacksize = 0;
         interp->codec_search_path = NULL;
         interp->codec_search_cache = NULL;
         interp->codec_error_registry = NULL;
@@ -135,19 +125,19 @@ PyInterpreterState_New(void)
 #endif
 
         HEAD_LOCK();
-        interp->next = _PyRuntime.interpreters.head;
-        if (_PyRuntime.interpreters.main == NULL) {
-            _PyRuntime.interpreters.main = interp;
+        interp->next = interp_head;
+        if (interp_main == NULL) {
+            interp_main = interp;
         }
-        _PyRuntime.interpreters.head = interp;
-        if (_PyRuntime.interpreters.next_id < 0) {
+        interp_head = interp;
+        if (_next_interp_id < 0) {
             /* overflow or Py_Initialize() not called! */
             PyErr_SetString(PyExc_RuntimeError,
                             "failed to get an interpreter ID");
             interp = NULL;
         } else {
-            interp->id = _PyRuntime.interpreters.next_id;
-            _PyRuntime.interpreters.next_id += 1;
+            interp->id = _next_interp_id;
+            _next_interp_id += 1;
         }
         HEAD_UNLOCK();
     }
@@ -199,7 +189,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
     PyInterpreterState **p;
     zapthreads(interp);
     HEAD_LOCK();
-    for (p = &_PyRuntime.interpreters.head; ; p = &(*p)->next) {
+    for (p = &interp_head; ; p = &(*p)->next) {
         if (*p == NULL)
             Py_FatalError(
                 "PyInterpreterState_Delete: invalid interp");
@@ -209,13 +199,19 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
     if (interp->tstate_head != NULL)
         Py_FatalError("PyInterpreterState_Delete: remaining threads");
     *p = interp->next;
-    if (_PyRuntime.interpreters.main == interp) {
-        _PyRuntime.interpreters.main = NULL;
-        if (_PyRuntime.interpreters.head != NULL)
+    if (interp_main == interp) {
+        interp_main = NULL;
+        if (interp_head != NULL)
             Py_FatalError("PyInterpreterState_Delete: remaining subinterpreters");
     }
     HEAD_UNLOCK();
     PyMem_RawFree(interp);
+#ifdef WITH_THREAD
+    if (interp_head == NULL && head_mutex != NULL) {
+        PyThread_free_lock(head_mutex);
+        head_mutex = NULL;
+    }
+#endif
 }
 
 
@@ -503,11 +499,8 @@ PyThreadState_Delete(PyThreadState *tstate)
     if (tstate == GET_TSTATE())
         Py_FatalError("PyThreadState_Delete: tstate is still current");
 #ifdef WITH_THREAD
-    if (_PyRuntime.gilstate.autoInterpreterState &&
-        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
-    {
-        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
-    }
+    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
+        PyThread_delete_key_value(autoTLSkey);
 #endif /* WITH_THREAD */
     tstate_delete_common(tstate);
 }
@@ -522,11 +515,8 @@ PyThreadState_DeleteCurrent()
         Py_FatalError(
             "PyThreadState_DeleteCurrent: no current tstate");
     tstate_delete_common(tstate);
-    if (_PyRuntime.gilstate.autoInterpreterState &&
-        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
-    {
-        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
-    }
+    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
+        PyThread_delete_key_value(autoTLSkey);
     SET_TSTATE(NULL);
     PyEval_ReleaseLock();
 }
@@ -686,13 +676,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 PyInterpreterState *
 PyInterpreterState_Head(void)
 {
-    return _PyRuntime.interpreters.head;
+    return interp_head;
 }
 
 PyInterpreterState *
 PyInterpreterState_Main(void)
 {
-    return _PyRuntime.interpreters.main;
+    return interp_main;
 }
 
 PyInterpreterState *
@@ -732,7 +722,7 @@ _PyThread_CurrentFrames(void)
      * need to grab head_mutex for the duration.
      */
     HEAD_LOCK();
-    for (i = _PyRuntime.interpreters.head; i != NULL; i = i->next) {
+    for (i = interp_head; i != NULL; i = i->next) {
         PyThreadState *t;
         for (t = i->tstate_head; t != NULL; t = t->next) {
             PyObject *id;
@@ -784,11 +774,11 @@ void
 _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 {
     assert(i && t); /* must init with valid states */
-    _PyRuntime.gilstate.autoTLSkey = PyThread_create_key();
-    if (_PyRuntime.gilstate.autoTLSkey == -1)
+    autoTLSkey = PyThread_create_key();
+    if (autoTLSkey == -1)
         Py_FatalError("Could not allocate TLS entry");
-    _PyRuntime.gilstate.autoInterpreterState = i;
-    assert(PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL);
+    autoInterpreterState = i;
+    assert(PyThread_get_key_value(autoTLSkey) == NULL);
     assert(t->gilstate_counter == 0);
 
     _PyGILState_NoteThreadState(t);
@@ -797,15 +787,15 @@ _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 PyInterpreterState *
 _PyGILState_GetInterpreterStateUnsafe(void)
 {
-    return _PyRuntime.gilstate.autoInterpreterState;
+    return autoInterpreterState;
 }
 
 void
 _PyGILState_Fini(void)
 {
-    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
-    _PyRuntime.gilstate.autoTLSkey = -1;
-    _PyRuntime.gilstate.autoInterpreterState = NULL;
+    PyThread_delete_key(autoTLSkey);
+    autoTLSkey = -1;
+    autoInterpreterState = NULL;
 }
 
 /* Reset the TLS key - called by PyOS_AfterFork_Child().
@@ -816,19 +806,17 @@ void
 _PyGILState_Reinit(void)
 {
 #ifdef WITH_THREAD
-    _PyRuntime.interpreters.mutex = PyThread_allocate_lock();
-    if (_PyRuntime.interpreters.mutex == NULL)
-        Py_FatalError("Can't initialize threads for interpreter");
+    head_mutex = NULL;
+    HEAD_INIT();
 #endif
     PyThreadState *tstate = PyGILState_GetThisThreadState();
-    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
-    if ((_PyRuntime.gilstate.autoTLSkey = PyThread_create_key()) == -1)
+    PyThread_delete_key(autoTLSkey);
+    if ((autoTLSkey = PyThread_create_key()) == -1)
         Py_FatalError("Could not allocate TLS entry");
 
     /* If the thread had an associated auto thread state, reassociate it with
      * the new key. */
-    if (tstate && PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
-                                         (void *)tstate) < 0)
+    if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
         Py_FatalError("Couldn't create autoTLSkey mapping");
 }
 
@@ -843,7 +831,7 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
     /* If autoTLSkey isn't initialized, this must be the very first
        threadstate created in Py_Initialize().  Don't do anything for now
        (we'll be back here when _PyGILState_Init is called). */
-    if (!_PyRuntime.gilstate.autoInterpreterState)
+    if (!autoInterpreterState)
         return;
 
     /* Stick the thread state for this thread in thread local storage.
@@ -858,13 +846,9 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
        The first thread state created for that given OS level thread will
        "win", which seems reasonable behaviour.
     */
-    if (PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL) {
-        if ((PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
-                                    (void *)tstate)
-             ) < 0)
-        {
+    if (PyThread_get_key_value(autoTLSkey) == NULL) {
+        if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
             Py_FatalError("Couldn't create autoTLSkey mapping");
-        }
     }
 
     /* PyGILState_Release must not try to delete this thread state. */
@@ -875,10 +859,9 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
 PyThreadState *
 PyGILState_GetThisThreadState(void)
 {
-    if (_PyRuntime.gilstate.autoInterpreterState == NULL)
+    if (autoInterpreterState == NULL)
         return NULL;
-    return (PyThreadState *)PyThread_get_key_value(
-                _PyRuntime.gilstate.autoTLSkey);
+    return (PyThreadState *)PyThread_get_key_value(autoTLSkey);
 }
 
 int
@@ -889,7 +872,7 @@ PyGILState_Check(void)
     if (!_PyGILState_check_enabled)
         return 1;
 
-    if (_PyRuntime.gilstate.autoTLSkey == -1)
+    if (autoTLSkey == -1)
         return 1;
 
     tstate = GET_TSTATE();
@@ -909,10 +892,8 @@ PyGILState_Ensure(void)
        spells out other issues.  Embedders are expected to have
        called Py_Initialize() and usually PyEval_InitThreads().
     */
-    /* Py_Initialize() hasn't been called! */
-    assert(_PyRuntime.gilstate.autoInterpreterState);
-    tcur = (PyThreadState *)PyThread_get_key_value(
-                _PyRuntime.gilstate.autoTLSkey);
+    assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */
+    tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey);
     if (tcur == NULL) {
         /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
            called from a new thread for the first time, we need the create the
@@ -920,7 +901,7 @@ PyGILState_Ensure(void)
         PyEval_InitThreads();
 
         /* Create a new thread state for this thread */
-        tcur = PyThreadState_New(_PyRuntime.gilstate.autoInterpreterState);
+        tcur = PyThreadState_New(autoInterpreterState);
         if (tcur == NULL)
             Py_FatalError("Couldn't create thread-state for new thread");
         /* This is our thread state!  We'll need to delete it in the
@@ -945,7 +926,7 @@ void
 PyGILState_Release(PyGILState_STATE oldstate)
 {
     PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value(
-                                _PyRuntime.gilstate.autoTLSkey);
+                                                            autoTLSkey);
     if (tcur == NULL)
         Py_FatalError("auto-releasing thread-state, "
                       "but no thread-state for this thread");
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 080c541c6df..852babbed78 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -519,6 +519,8 @@ Return the profiling function set with sys.setprofile.\n\
 See the profiler chapter in the library manual."
 );
 
+static int _check_interval = 100;
+
 static PyObject *
 sys_setcheckinterval(PyObject *self, PyObject *args)
 {
@@ -527,8 +529,7 @@ sys_setcheckinterval(PyObject *self, PyObject *args)
                      "are deprecated.  Use sys.setswitchinterval() "
                      "instead.", 1) < 0)
         return NULL;
-    PyInterpreterState *interp = PyThreadState_GET()->interp;
-    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &interp->check_interval))
+    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval))
         return NULL;
     Py_RETURN_NONE;
 }
@@ -548,8 +549,7 @@ sys_getcheckinterval(PyObject *self, PyObject *args)
                      "are deprecated.  Use sys.getswitchinterval() "
                      "instead.", 1) < 0)
         return NULL;
-    PyInterpreterState *interp = PyThreadState_GET()->interp;
-    return PyLong_FromLong(interp->check_interval);
+    return PyLong_FromLong(_check_interval);
 }
 
 PyDoc_STRVAR(getcheckinterval_doc,
@@ -1339,7 +1339,7 @@ Clear the internal type lookup cache.");
 static PyObject *
 sys_is_finalizing(PyObject* self, PyObject* args)
 {
-    return PyBool_FromLong(_Py_IS_FINALIZING());
+    return PyBool_FromLong(_Py_Finalizing != NULL);
 }
 
 PyDoc_STRVAR(is_finalizing_doc,
@@ -1479,24 +1479,11 @@ list_builtin_module_names(void)
     return list;
 }
 
-static PyObject *
-get_warnoptions(void)
-{
-    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
-    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
-        Py_XDECREF(warnoptions);
-        warnoptions = PyList_New(0);
-        if (warnoptions == NULL)
-            return NULL;
-        PyThreadState_GET()->interp->warnoptions = warnoptions;
-    }
-    return warnoptions;
-}
+static PyObject *warnoptions = NULL;
 
 void
 PySys_ResetWarnOptions(void)
 {
-    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
     if (warnoptions == NULL || !PyList_Check(warnoptions))
         return;
     PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL);
@@ -1505,9 +1492,12 @@ PySys_ResetWarnOptions(void)
 void
 PySys_AddWarnOptionUnicode(PyObject *unicode)
 {
-    PyObject *warnoptions = get_warnoptions();
-    if (warnoptions == NULL)
-        return;
+    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
+        Py_XDECREF(warnoptions);
+        warnoptions = PyList_New(0);
+        if (warnoptions == NULL)
+            return;
+    }
     PyList_Append(warnoptions, unicode);
 }
 
@@ -1525,20 +1515,17 @@ PySys_AddWarnOption(const wchar_t *s)
 int
 PySys_HasWarnOptions(void)
 {
-    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
     return (warnoptions != NULL && (PyList_Size(warnoptions) > 0)) ? 1 : 0;
 }
 
+static PyObject *xoptions = NULL;
+
 static PyObject *
 get_xoptions(void)
 {
-    PyObject *xoptions = PyThreadState_GET()->interp->xoptions;
     if (xoptions == NULL || !PyDict_Check(xoptions)) {
         Py_XDECREF(xoptions);
         xoptions = PyDict_New();
-        if (xoptions == NULL)
-            return NULL;
-        PyThreadState_GET()->interp->xoptions = xoptions;
     }
     return xoptions;
 }
@@ -2143,15 +2130,17 @@ _PySys_EndInit(PyObject *sysdict)
     SET_SYS_FROM_STRING_INT_RESULT("base_exec_prefix",
                         PyUnicode_FromWideChar(Py_GetExecPrefix(), -1));
 
-    PyObject *warnoptions = get_warnoptions();
-    if (warnoptions == NULL)
-        return -1;
-    SET_SYS_FROM_STRING_BORROW_INT_RESULT("warnoptions", warnoptions);
+    if (warnoptions == NULL) {
+        warnoptions = PyList_New(0);
+        if (warnoptions == NULL)
+            return -1;
+    }
 
-    PyObject *xoptions = get_xoptions();
-    if (xoptions == NULL)
-        return -1;
-    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", xoptions);
+    SET_SYS_FROM_STRING_INT_RESULT("warnoptions",
+                                   PyList_GetSlice(warnoptions,
+                                                   0, Py_SIZE(warnoptions)));
+
+    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", get_xoptions());
 
     if (PyErr_Occurred())
         return -1;
diff --git a/Python/thread.c b/Python/thread.c
index 6fd594fd301..4d2f2c32a19 100644
--- a/Python/thread.c
+++ b/Python/thread.c
@@ -76,6 +76,11 @@ PyThread_init_thread(void)
     PyThread__init_thread();
 }
 
+/* Support for runtime thread stack size tuning.
+   A value of 0 means using the platform's default stack size
+   or the size specified by the THREAD_STACK_SIZE macro. */
+static size_t _pythread_stacksize = 0;
+
 #if defined(_POSIX_THREADS)
 #   define PYTHREAD_NAME "pthread"
 #   include "thread_pthread.h"
@@ -91,7 +96,7 @@ PyThread_init_thread(void)
 size_t
 PyThread_get_stacksize(void)
 {
-    return PyThreadState_GET()->interp->pythread_stacksize;
+    return _pythread_stacksize;
 }
 
 /* Only platforms defining a THREAD_SET_STACKSIZE() macro
diff --git a/Python/thread_nt.h b/Python/thread_nt.h
index 2f3a71b86ad..47eb4b6e94c 100644
--- a/Python/thread_nt.h
+++ b/Python/thread_nt.h
@@ -189,10 +189,9 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
         return PYTHREAD_INVALID_THREAD_ID;
     obj->func = func;
     obj->arg = arg;
-    PyThreadState *tstate = PyThreadState_GET();
-    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
     hThread = (HANDLE)_beginthreadex(0,
-                      Py_SAFE_DOWNCAST(stacksize, Py_ssize_t, unsigned int),
+                      Py_SAFE_DOWNCAST(_pythread_stacksize,
+                                       Py_ssize_t, unsigned int),
                       bootstrap, obj,
                       0, &threadID);
     if (hThread == 0) {
@@ -333,13 +332,13 @@ _pythread_nt_set_stacksize(size_t size)
 {
     /* set to default */
     if (size == 0) {
-        PyThreadState_GET()->interp->pythread_stacksize = 0;
+        _pythread_stacksize = 0;
         return 0;
     }
 
     /* valid range? */
     if (size >= THREAD_MIN_STACKSIZE && size < THREAD_MAX_STACKSIZE) {
-        PyThreadState_GET()->interp->pythread_stacksize = size;
+        _pythread_stacksize = size;
         return 0;
     }
 
diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h
index ea05b6fbcfe..268dec41168 100644
--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@@ -205,9 +205,8 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
         return PYTHREAD_INVALID_THREAD_ID;
 #endif
 #if defined(THREAD_STACK_SIZE)
-    PyThreadState *tstate = PyThreadState_GET();
-    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
-    tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE;
+    tss = (_pythread_stacksize != 0) ? _pythread_stacksize
+                                     : THREAD_STACK_SIZE;
     if (tss != 0) {
         if (pthread_attr_setstacksize(&attrs, tss) != 0) {
             pthread_attr_destroy(&attrs);
@@ -579,7 +578,7 @@ _pythread_pthread_set_stacksize(size_t size)
 
     /* set to default */
     if (size == 0) {
-        PyThreadState_GET()->interp->pythread_stacksize = 0;
+        _pythread_stacksize = 0;
         return 0;
     }
 
@@ -596,7 +595,7 @@ _pythread_pthread_set_stacksize(size_t size)
             rc = pthread_attr_setstacksize(&attrs, size);
             pthread_attr_destroy(&attrs);
             if (rc == 0) {
-                PyThreadState_GET()->interp->pythread_stacksize = size;
+                _pythread_stacksize = size;
                 return 0;
             }
         }
diff --git a/Tools/c-globals/README b/Tools/c-globals/README
deleted file mode 100644
index d0e6e8eba06..00000000000
--- a/Tools/c-globals/README
+++ /dev/null
@@ -1,41 +0,0 @@
-#######################################
-# C Globals and CPython Runtime State.
-
-CPython's C code makes extensive use of global variables.  Each global
-falls into one of several categories:
-
-* (effectively) constants (incl. static types)
-* globals used exclusively in main or in the REPL
-* freelists, caches, and counters
-* process-global state
-* module state
-* Python runtime state
-
-The ignored-globals.txt file is organized similarly.  Of the different
-categories, the last two are problematic and generally should not exist
-in the codebase.
-
-Globals that hold module state (i.e. in Modules/*.c) cause problems
-when multiple interpreters are in use.  For more info, see PEP 3121,
-which addresses the situation for extension modules in general.
-
-Globals in the last category should be avoided as well.  The problem
-isn't with the Python runtime having state.  Rather, the problem is with
-that state being spread thoughout the codebase in dozens of individual
-globals.  Unlike the other globals, the runtime state represents a set
-of values that are constantly shifting in a complex way.  When they are
-spread out it's harder to get a clear picture of what the runtime
-involves.  Furthermore, when they are spread out it complicates efforts
-that change the runtime.
-
-Consequently, the globals for Python's runtime state have been
-consolidated under a single top-level _PyRuntime global. No new globals
-should be added for runtime state.  Instead, they should be added to
-_PyRuntimeState or one of its sub-structs.  The check-c-globals script
-should be run to ensure that no new globals have been added:
-
-  python3 Tools/c-globals/check-c-globals.py
-
-If it reports any globals then they should be resolved.  If the globals
-are runtime state then they should be folded into _PyRuntimeState.
-Otherwise they should be added to ignored-globals.txt.
diff --git a/Tools/c-globals/check-c-globals.py b/Tools/c-globals/check-c-globals.py
deleted file mode 100644
index 1de69a8751c..00000000000
--- a/Tools/c-globals/check-c-globals.py
+++ /dev/null
@@ -1,446 +0,0 @@
-
-from collections import namedtuple
-import glob
-import os.path
-import re
-import shutil
-import sys
-import subprocess
-
-
-VERBOSITY = 2
-
-C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
-TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
-ROOT_DIR = os.path.dirname(TOOLS_DIR)
-GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
-
-SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
-
-CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
-
-
-IGNORED_VARS = {
-        '_DYNAMIC',
-        '_GLOBAL_OFFSET_TABLE_',
-        '__JCR_LIST__',
-        '__JCR_END__',
-        '__TMC_END__',
-        '__bss_start',
-        '__data_start',
-        '__dso_handle',
-        '_edata',
-        '_end',
-        }
-
-
-def find_capi_vars(root):
-    capi_vars = {}
-    for dirname in SOURCE_DIRS:
-        for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
-                                  recursive=True):
-            with open(filename) as file:
-                for name in _find_capi_vars(file):
-                    if name in capi_vars:
-                        assert not filename.endswith('.c')
-                        assert capi_vars[name].endswith('.c')
-                    capi_vars[name] = filename
-    return capi_vars
-
-
-def _find_capi_vars(lines):
-    for line in lines:
-        if not line.startswith('PyAPI_DATA'):
-            continue
-        assert '{' not in line
-        match = CAPI_REGEX.match(line)
-        assert match
-        names, = match.groups()
-        for name in names.split(', '):
-            yield name
-
-
-def _read_global_names(filename):
-    # These variables are shared between all interpreters in the process.
-    with open(filename) as file:
-        return {line.partition('#')[0].strip()
-                for line in file
-                if line.strip() and not line.startswith('#')}
-
-
-def _is_global_var(name, globalnames):
-    if _is_autogen_var(name):
-        return True
-    if _is_type_var(name):
-        return True
-    if _is_module(name):
-        return True
-    if _is_exception(name):
-        return True
-    if _is_compiler(name):
-        return True
-    return name in globalnames
-
-
-def _is_autogen_var(name):
-    return (
-        name.startswith('PyId_') or
-        '.' in name or
-        # Objects/typeobject.c
-        name.startswith('op_id.') or
-        name.startswith('rop_id.') or
-        # Python/graminit.c
-        name.startswith('arcs_') or
-        name.startswith('states_')
-        )
-
-
-def _is_type_var(name):
-    if name.endswith(('Type', '_Type', '_type')):  # XXX Always a static type?
-        return True
-    if name.endswith('_desc'):  # for structseq types
-        return True
-    return (
-        name.startswith('doc_') or
-        name.endswith(('_doc', '__doc__', '_docstring')) or
-        name.endswith('_methods') or
-        name.endswith('_fields') or
-        name.endswith(('_memberlist', '_members')) or
-        name.endswith('_slots') or
-        name.endswith(('_getset', '_getsets', '_getsetlist')) or
-        name.endswith('_as_mapping') or
-        name.endswith('_as_number') or
-        name.endswith('_as_sequence') or
-        name.endswith('_as_buffer') or
-        name.endswith('_as_async')
-        )
-
-
-def _is_module(name):
-    if name.endswith(('_functions', 'Methods', '_Methods')):
-        return True
-    if name == 'module_def':
-        return True
-    if name == 'initialized':
-        return True
-    return name.endswith(('module', '_Module'))
-
-
-def _is_exception(name):
-    # Other vars are enumerated in globals-core.txt.
-    if not name.startswith(('PyExc_', '_PyExc_')):
-        return False
-    return name.endswith(('Error', 'Warning'))
-
-
-def _is_compiler(name):
-    return (
-        # Python/Pythyon-ast.c
-        name.endswith('_type') or
-        name.endswith('_singleton') or
-        name.endswith('_attributes')
-        )
-
-
-class Var(namedtuple('Var', 'name kind scope capi filename')):
-
-    @classmethod
-    def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
-        _, _, line = line.partition(' ')  # strip off the address
-        line = line.strip()
-        kind, _, line = line.partition(' ')
-        if kind in ignored or ():
-            return None
-        elif kind not in expected or ():
-            raise RuntimeError('unsupported NM type {!r}'.format(kind))
-
-        name, _, filename = line.partition('\t')
-        name = name.strip()
-        if _is_autogen_var(name):
-            return None
-        if _is_global_var(name, globalnames):
-            scope = 'global'
-        else:
-            scope = None
-        capi = (name in capi_vars or ())
-        if filename:
-            filename = os.path.relpath(filename.partition(':')[0])
-        return cls(name, kind, scope, capi, filename or '~???~')
-
-    @property
-    def external(self):
-        return self.kind.isupper()
-
-
-def find_vars(root, globals_filename=GLOBALS_FILE):
-    python = os.path.join(root, 'python')
-    if not os.path.exists(python):
-        raise RuntimeError('python binary missing (need to build it first?)')
-    capi_vars = find_capi_vars(root)
-    globalnames = _read_global_names(globals_filename)
-
-    nm = shutil.which('nm')
-    if nm is None:
-        # XXX Use dumpbin.exe /SYMBOLS on Windows.
-        raise NotImplementedError
-    else:
-        yield from (var
-                    for var in _find_var_symbols(python, nm, capi_vars,
-                                                 globalnames)
-                    if var.name not in IGNORED_VARS)
-
-
-NM_FUNCS = set('Tt')
-NM_PUBLIC_VARS = set('BD')
-NM_PRIVATE_VARS = set('bd')
-NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
-NM_DATA = set('Rr')
-NM_OTHER = set('ACGgiINpSsuUVvWw-?')
-NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
-
-
-def _find_var_symbols(python, nm, capi_vars, globalnames):
-    args = [nm,
-            '--line-numbers',
-            python]
-    out = subprocess.check_output(args)
-    for line in out.decode('utf-8').splitlines():
-        var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
-        if var is None:
-            continue
-        yield var
-
-
-#######################################
-
-class Filter(namedtuple('Filter', 'name op value action')):
-
-    @classmethod
-    def parse(cls, raw):
-        action = '+'
-        if raw.startswith(('+', '-')):
-            action = raw[0]
-            raw = raw[1:]
-        # XXX Support < and >?
-        name, op, value = raw.partition('=')
-        return cls(name, op, value, action)
-
-    def check(self, var):
-        value = getattr(var, self.name, None)
-        if not self.op:
-            matched = bool(value)
-        elif self.op == '=':
-            matched = (value == self.value)
-        else:
-            raise NotImplementedError
-
-        if self.action == '+':
-            return matched
-        elif self.action == '-':
-            return not matched
-        else:
-            raise NotImplementedError
-
-
-def filter_var(var, filters):
-    for filter in filters:
-        if not filter.check(var):
-            return False
-    return True
-
-
-def make_sort_key(spec):
-    columns = [(col.strip('_'), '_' if col.startswith('_') else '')
-               for col in spec]
-    def sort_key(var):
-        return tuple(getattr(var, col).lstrip(prefix)
-                     for col, prefix in columns)
-    return sort_key
-
-
-def make_groups(allvars, spec):
-    group = spec
-    groups = {}
-    for var in allvars:
-        value = getattr(var, group)
-        key = '{}: {}'.format(group, value)
-        try:
-            groupvars = groups[key]
-        except KeyError:
-            groupvars = groups[key] = []
-        groupvars.append(var)
-    return groups
-
-
-def format_groups(groups, columns, fmts, widths):
-    for group in sorted(groups):
-        groupvars = groups[group]
-        yield '', 0
-        yield '  # {}'.format(group), 0
-        yield from format_vars(groupvars, columns, fmts, widths)
-
-
-def format_vars(allvars, columns, fmts, widths):
-    fmt = ' '.join(fmts[col] for col in columns)
-    fmt = ' ' + fmt.replace(' ', '   ') + ' '  # for div margin
-    header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
-    yield header, 0
-    div = ' '.join('-'*(widths[col]+2) for col in columns)
-    yield div, 0
-    for var in allvars:
-        values = (getattr(var, col) for col in columns)
-        row = fmt.format(*('X' if val is True else val or ''
-                           for val in values))
-        yield row, 1
-    yield div, 0
-
-
-#######################################
-
-COLUMNS = 'name,external,capi,scope,filename'
-COLUMN_NAMES = COLUMNS.split(',')
-
-COLUMN_WIDTHS = {col: len(col)
-                 for col in COLUMN_NAMES}
-COLUMN_WIDTHS.update({
-        'name': 50,
-        'scope': 7,
-        'filename': 40,
-        })
-COLUMN_FORMATS = {col: '{:%s}' % width
-                  for col, width in COLUMN_WIDTHS.items()}
-for col in COLUMN_FORMATS:
-    if COLUMN_WIDTHS[col] == len(col):
-        COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
-
-
-def _parse_filters_arg(raw, error):
-    filters = []
-    for value in raw.split(','):
-        value=value.strip()
-        if not value:
-            continue
-        try:
-            filter = Filter.parse(value)
-            if filter.name not in COLUMN_NAMES:
-                raise Exception('unsupported column {!r}'.format(filter.name))
-        except Exception as e:
-            error('bad filter {!r}: {}'.format(raw, e))
-        filters.append(filter)
-    return filters
-
-
-def _parse_columns_arg(raw, error):
-    columns = raw.split(',')
-    for column in columns:
-        if column not in COLUMN_NAMES:
-            error('unsupported column {!r}'.format(column))
-    return columns
-
-
-def _parse_sort_arg(raw, error):
-    sort = raw.split(',')
-    for column in sort:
-        if column.lstrip('_') not in COLUMN_NAMES:
-            error('unsupported column {!r}'.format(column))
-    return sort
-
-
-def _parse_group_arg(raw, error):
-    if not raw:
-        return raw
-    group = raw
-    if group not in COLUMN_NAMES:
-        error('unsupported column {!r}'.format(group))
-    if group != 'filename':
-        error('unsupported group {!r}'.format(group))
-    return group
-
-
-def parse_args(argv=None):
-    if argv is None:
-        argv = sys.argv[1:]
-
-    import argparse
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('-v', '--verbose', action='count', default=0)
-    parser.add_argument('-q', '--quiet', action='count', default=0)
-
-    parser.add_argument('--filters', default='-scope',
-                        help='[[-]<COLUMN>[=<GLOB>]] ...')
-
-    parser.add_argument('--columns', default=COLUMNS,
-                        help='a comma-separated list of columns to show')
-    parser.add_argument('--sort', default='filename,_name',
-                        help='a comma-separated list of columns to sort')
-    parser.add_argument('--group',
-                        help='group by the given column name (- to not group)')
-
-    parser.add_argument('--rc-on-match', dest='rc', type=int)
-
-    parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
-
-    args = parser.parse_args(argv)
-
-    verbose = vars(args).pop('verbose', 0)
-    quiet = vars(args).pop('quiet', 0)
-    args.verbosity = max(0, VERBOSITY + verbose - quiet)
-
-    if args.sort.startswith('filename') and not args.group:
-        args.group = 'filename'
-
-    if args.rc is None:
-        if '-scope=core' in args.filters or 'core' not in args.filters:
-            args.rc = 0
-        else:
-            args.rc = 1
-
-    args.filters = _parse_filters_arg(args.filters, parser.error)
-    args.columns = _parse_columns_arg(args.columns, parser.error)
-    args.sort = _parse_sort_arg(args.sort, parser.error)
-    args.group = _parse_group_arg(args.group, parser.error)
-
-    return args
-
-
-def main(root=ROOT_DIR, filename=GLOBALS_FILE,
-         filters=None, columns=COLUMN_NAMES, sort=None, group=None,
-         verbosity=VERBOSITY, rc=1):
-
-    log = lambda msg: ...
-    if verbosity >= 2:
-        log = lambda msg: print(msg)
-
-    allvars = (var
-               for var in find_vars(root, filename)
-               if filter_var(var, filters))
-    if sort:
-        allvars = sorted(allvars, key=make_sort_key(sort))
-
-    if group:
-        try:
-            columns.remove(group)
-        except ValueError:
-            pass
-        grouped = make_groups(allvars, group)
-        lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
-    else:
-        lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
-
-    total = 0
-    for line, count in lines:
-        total += count
-        log(line)
-    log('\ntotal: {}'.format(total))
-
-    if total and rc:
-        print('ERROR: found unsafe globals', file=sys.stderr)
-        return rc
-    return 0
-
-
-if __name__ == '__main__':
-    args = parse_args()
-    sys.exit(
-            main(**vars(args)))
diff --git a/Tools/c-globals/ignored-globals.txt b/Tools/c-globals/ignored-globals.txt
deleted file mode 100644
index 4fafba6eefa..00000000000
--- a/Tools/c-globals/ignored-globals.txt
+++ /dev/null
@@ -1,494 +0,0 @@
-# All variables declared here are shared between all interpreters
-# in a single process.  That means that they must not be changed
-# unless that change should apply to all interpreters.
-#
-# See check-c-globals.py.
-#
-# Many generic names are handled via the script:
-#
-# * most exceptions and all warnings handled via _is_exception()
-# * for builtin modules, generic names are handled via _is_module()
-# * generic names for static types handled via _is_type_var()
-# * AST vars handled via _is_compiler()
-
-
-#######################################
-# main
-
-# Modules/getpath.c
-exec_prefix
-module_search_path
-prefix
-progpath
-
-# Modules/main.c
-orig_argc
-orig_argv
-
-# Python/getopt.c
-opt_ptr
-_PyOS_optarg
-_PyOS_opterr
-_PyOS_optind
-
-
-#######################################
-# REPL
-
-# Parser/myreadline.c
-PyOS_InputHook
-PyOS_ReadlineFunctionPointer
-_PyOS_ReadlineLock
-_PyOS_ReadlineTState
-
-
-#######################################
-# state
-
-# Python/dtoa.c
-p5s
-pmem_next  # very slight race
-private_mem  # very slight race
-
-# Python/import.c
-# For the moment the import lock stays global.  Ultimately there should
-# be a global lock for extension modules and a per-interpreter lock.
-import_lock
-import_lock_level
-import_lock_thread
-
-# Python/pylifecycle.c
-_PyRuntime
-
-
-#---------------------------------
-# module globals (PyObject)
-
-# Modules/_functoolsmodule.c
-kwd_mark
-
-# Modules/_localemodule.c
-Error
-
-# Modules/_threadmodule.c
-ThreadError
-
-# Modules/_tracemalloc.c
-unknown_filename
-
-# Modules/gcmodule.c
-gc_str
-
-# Modules/posixmodule.c
-billion
-posix_putenv_garbage
-
-# Modules/signalmodule.c
-DefaultHandler
-IgnoreHandler
-IntHandler
-ItimerError
-
-# Modules/zipimport.c
-ZipImportError
-zip_directory_cache
-
-
-#---------------------------------
-# module globals (other)
-
-# Modules/_tracemalloc.c
-allocators
-tables_lock
-tracemalloc_config
-tracemalloc_empty_traceback
-tracemalloc_filenames
-tracemalloc_peak_traced_memory
-tracemalloc_reentrant_key
-tracemalloc_traceback
-tracemalloc_tracebacks
-tracemalloc_traced_memory
-tracemalloc_traces
-
-# Modules/faulthandler.c
-fatal_error
-faulthandler_handlers
-old_stack
-stack
-thread
-user_signals
-
-# Modules/posixmodule.c
-posix_constants_confstr
-posix_constants_pathconf
-posix_constants_sysconf
-_stat_float_times  # deprecated, __main__-only
-structseq_new
-ticks_per_second
-
-# Modules/signalmodule.c
-Handlers  # main thread only
-is_tripped  # main thread only
-main_pid
-main_thread
-old_siginthandler
-wakeup_fd  # main thread only
-
-# Modules/zipimport.c
-zip_searchorder
-
-# Python/bltinmodule.c
-Py_FileSystemDefaultEncodeErrors
-Py_FileSystemDefaultEncoding
-Py_HasFileSystemDefaultEncoding
-
-# Python/sysmodule.c
-_PySys_ImplCacheTag
-_PySys_ImplName
-
-
-#---------------------------------
-# freelists
-
-# Modules/_collectionsmodule.c
-freeblocks
-numfreeblocks
-
-# Objects/classobject.c
-free_list
-numfree
-
-# Objects/dictobject.c
-free_list
-keys_free_list
-numfree
-numfreekeys
-
-# Objects/exceptions.c
-memerrors_freelist
-memerrors_numfree
-
-# Objects/floatobject.c
-free_list
-numfree
-
-# Objects/frameobject.c
-free_list
-numfree
-
-# Objects/genobject.c
-ag_asend_freelist
-ag_asend_freelist_free
-ag_value_freelist
-ag_value_freelist_free
-
-# Objects/listobject.c
-free_list
-numfree
-
-# Objects/methodobject.c
-free_list
-numfree
-
-# Objects/sliceobject.c
-slice_cache  # slight race
-
-# Objects/tupleobject.c
-free_list
-numfree
-
-# Python/dtoa.c
-freelist  # very slight race
-
-
-#---------------------------------
-# caches (PyObject)
-
-# Objects/typeobject.c
-method_cache  # only for static types
-next_version_tag  # only for static types
-
-# Python/dynload_shlib.c
-handles  # slight race during import
-nhandles  # slight race during import
-
-# Python/import.c
-extensions  # slight race on init during import
-
-
-#---------------------------------
-# caches (other)
-
-# Python/bootstrap_hash.c
-urandom_cache
-
-# Python/modsupport.c
-_Py_PackageContext  # Slight race during import!  Move to PyThreadState?
-
-
-#---------------------------------
-# counters
-
-# Objects/bytesobject.c
-null_strings
-one_strings
-
-# Objects/dictobject.c
-pydict_global_version
-
-# Objects/moduleobject.c
-max_module_number  # slight race during import
-
-
-#######################################
-# constants
-
-#---------------------------------
-# singletons
-
-# Objects/boolobject.c
-_Py_FalseStruct
-_Py_TrueStruct
-
-# Objects/object.c
-_Py_NoneStruct
-_Py_NotImplementedStruct
-
-# Objects/sliceobject.c
-_Py_EllipsisObject
-
-
-#---------------------------------
-# constants (other)
-
-# Modules/config.c
-_PyImport_Inittab
-
-# Objects/bytearrayobject.c
-_PyByteArray_empty_string
-
-# Objects/dictobject.c
-empty_keys_struct
-empty_values
-
-# Objects/floatobject.c
-detected_double_format
-detected_float_format
-double_format
-float_format
-
-# Objects/longobject.c
-_PyLong_DigitValue
-
-# Objects/object.c
-_Py_SwappedOp
-
-# Objects/obmalloc.c
-_PyMem_Debug
-
-# Objects/setobject.c
-_dummy_struct
-
-# Objects/structseq.c
-PyStructSequence_UnnamedField
-
-# Objects/typeobject.c
-name_op
-slotdefs  # almost
-slotdefs_initialized  # almost
-subtype_getsets_dict_only
-subtype_getsets_full
-subtype_getsets_weakref_only
-tp_new_methoddef
-
-# Objects/unicodeobject.c
-bloom_linebreak
-static_strings  # slight race
-
-# Parser/tokenizer.c
-_PyParser_TokenNames
-
-# Python/Python-ast.c
-alias_fields
-
-# Python/codecs.c
-Py_hexdigits
-ucnhash_CAPI  # slight performance-only race
-
-# Python/dynload_shlib.c
-_PyImport_DynLoadFiletab
-
-# Python/fileutils.c
-_Py_open_cloexec_works
-force_ascii
-
-# Python/frozen.c
-M___hello__
-PyImport_FrozenModules
-
-# Python/graminit.c
-_PyParser_Grammar
-dfas
-labels
-
-# Python/import.c
-PyImport_Inittab
-
-# Python/pylifecycle.c
-_TARGET_LOCALES
-
-
-#---------------------------------
-# initialized (PyObject)
-
-# Objects/bytesobject.c
-characters
-nullstring
-
-# Objects/exceptions.c
-PyExc_RecursionErrorInst
-errnomap
-
-# Objects/longobject.c
-_PyLong_One
-_PyLong_Zero
-small_ints
-
-# Objects/setobject.c
-emptyfrozenset
-
-# Objects/unicodeobject.c
-interned  # slight race on init in PyUnicode_InternInPlace()
-unicode_empty
-unicode_latin1
-
-
-#---------------------------------
-# initialized (other)
-
-# Python/getargs.c
-static_arg_parsers
-
-# Python/pyhash.c
-PyHash_Func
-_Py_HashSecret
-_Py_HashSecret_Initialized
-
-# Python/pylifecycle.c
-_Py_StandardStreamEncoding
-_Py_StandardStreamErrors
-default_home
-env_home
-progname
-Py_BytesWarningFlag
-Py_DebugFlag
-Py_DontWriteBytecodeFlag
-Py_FrozenFlag
-Py_HashRandomizationFlag
-Py_IgnoreEnvironmentFlag
-Py_InspectFlag
-Py_InteractiveFlag
-Py_IsolatedFlag
-Py_NoSiteFlag
-Py_NoUserSiteDirectory
-Py_OptimizeFlag
-Py_QuietFlag
-Py_UnbufferedStdioFlag
-Py_UseClassExceptionsFlag
-Py_VerboseFlag
-
-
-#---------------------------------
-# types
-
-# Modules/_threadmodule.c
-Locktype
-RLocktype
-localdummytype
-localtype
-
-# Objects/exceptions.c
-PyExc_BaseException
-PyExc_Exception
-PyExc_GeneratorExit
-PyExc_KeyboardInterrupt
-PyExc_StopAsyncIteration
-PyExc_StopIteration
-PyExc_SystemExit
-_PyExc_BaseException
-_PyExc_Exception
-_PyExc_GeneratorExit
-_PyExc_KeyboardInterrupt
-_PyExc_StopAsyncIteration
-_PyExc_StopIteration
-_PyExc_SystemExit
-
-# Objects/structseq.c
-_struct_sequence_template
-
-
-#---------------------------------
-# interned strings/bytes
-
-# Modules/_io/_iomodule.c
-_PyIO_empty_bytes
-_PyIO_empty_str
-_PyIO_str_close
-_PyIO_str_closed
-_PyIO_str_decode
-_PyIO_str_encode
-_PyIO_str_fileno
-_PyIO_str_flush
-_PyIO_str_getstate
-_PyIO_str_isatty
-_PyIO_str_newlines
-_PyIO_str_nl
-_PyIO_str_read
-_PyIO_str_read1
-_PyIO_str_readable
-_PyIO_str_readall
-_PyIO_str_readinto
-_PyIO_str_readline
-_PyIO_str_reset
-_PyIO_str_seek
-_PyIO_str_seekable
-_PyIO_str_setstate
-_PyIO_str_tell
-_PyIO_str_truncate
-_PyIO_str_writable
-_PyIO_str_write
-
-# Modules/_threadmodule.c
-str_dict
-
-# Objects/boolobject.c
-false_str
-true_str
-
-# Objects/listobject.c
-indexerr
-
-# Python/symtable.c
-__class__
-dictcomp
-genexpr
-lambda
-listcomp
-setcomp
-top
-
-# Python/sysmodule.c
-whatstrings
-
-
-#######################################
-# hacks
-
-# Objects/object.c
-_Py_abstract_hack
-
-# Objects/setobject.c
-_PySet_Dummy
-
-# Python/pylifecycle.c
-_PyOS_mystrnicmp_hack