[pypy-commit] pypy remove-getfield-pure: Merge with default

sbauman pypy.commits at gmail.com
Fri Jan 8 11:25:01 EST 2016


Author: Spenser Andrew Bauman <sabauma at gmail.com>
Branch: remove-getfield-pure
Changeset: r81626:a699f3807ffd
Date: 2016-01-05 13:35 -0500
http://bitbucket.org/pypy/pypy/changeset/a699f3807ffd/

Log:	Merge with default

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -28,7 +28,7 @@
     DEALINGS IN THE SOFTWARE.
 
 
-PyPy Copyright holders 2003-2015
+PyPy Copyright holders 2003-2016
 ----------------------------------- 
 
 Except when otherwise stated (look for LICENSE files or information at
diff --git a/lib-python/2.7/pickle.py b/lib-python/2.7/pickle.py
--- a/lib-python/2.7/pickle.py
+++ b/lib-python/2.7/pickle.py
@@ -1376,6 +1376,7 @@
 
 def decode_long(data):
     r"""Decode a long from a two's complement little-endian binary string.
+    This is overriden on PyPy by a RPython version that has linear complexity.
 
     >>> decode_long('')
     0L
@@ -1402,6 +1403,11 @@
         n -= 1L << (nbytes * 8)
     return n
 
+try:
+    from __pypy__ import decode_long
+except ImportError:
+    pass
+
 # Shorthands
 
 try:
diff --git a/lib-python/2.7/sysconfig.py b/lib-python/2.7/sysconfig.py
--- a/lib-python/2.7/sysconfig.py
+++ b/lib-python/2.7/sysconfig.py
@@ -524,6 +524,13 @@
             import _osx_support
             _osx_support.customize_config_vars(_CONFIG_VARS)
 
+        # PyPy:
+        import imp
+        for suffix, mode, type_ in imp.get_suffixes():
+            if type_ == imp.C_EXTENSION:
+                _CONFIG_VARS['SOABI'] = suffix.split('.')[1]
+                break
+
     if args:
         vals = []
         for name in args:
diff --git a/lib_pypy/cPickle.py b/lib_pypy/cPickle.py
--- a/lib_pypy/cPickle.py
+++ b/lib_pypy/cPickle.py
@@ -559,6 +559,7 @@
 
 def decode_long(data):
     r"""Decode a long from a two's complement little-endian binary string.
+    This is overriden on PyPy by a RPython version that has linear complexity.
 
     >>> decode_long('')
     0L
@@ -592,6 +593,11 @@
         n -= 1L << (nbytes << 3)
     return n
 
+try:
+    from __pypy__ import decode_long
+except ImportError:
+    pass
+
 def load(f):
     return Unpickler(f).load()
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,6 +5,8 @@
 .. this is a revision shortly after release-4.0.1
 .. startrev: 4b5c840d0da2
 
+Fixed ``_PyLong_FromByteArray()``, which was buggy.
+
 .. branch: numpy-1.10
 
 Fix tests to run cleanly with -A and start to fix micronumpy for upstream numpy
@@ -101,3 +103,10 @@
 
 Fix the cryptic exception message when attempting to use extended slicing
 in rpython. Was issue #2211.
+
+.. branch: ec-keepalive
+
+Optimize the case where, in a new C-created thread, we keep invoking
+short-running Python callbacks.  (CFFI on CPython has a hack to achieve
+the same result.)  This can also be seen as a bug fix: previously,
+thread-local objects would be reset between two such calls.
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -89,6 +89,7 @@
         'set_code_callback'         : 'interp_magic.set_code_callback',
         'save_module_content_for_future_reload':
                           'interp_magic.save_module_content_for_future_reload',
+        'decode_long'               : 'interp_magic.decode_long',
     }
     if sys.platform == 'win32':
         interpleveldefs['get_console_cp'] = 'interp_magic.get_console_cp'
diff --git a/pypy/module/__pypy__/interp_magic.py b/pypy/module/__pypy__/interp_magic.py
--- a/pypy/module/__pypy__/interp_magic.py
+++ b/pypy/module/__pypy__/interp_magic.py
@@ -1,4 +1,4 @@
-from pypy.interpreter.error import OperationError, wrap_oserror
+from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter.pycode import CodeHookCache
 from pypy.interpreter.pyframe import PyFrame
@@ -158,4 +158,13 @@
     if space.is_none(w_callable):
         cache._code_hook = None
     else:
-        cache._code_hook = w_callable
\ No newline at end of file
+        cache._code_hook = w_callable
+
+ at unwrap_spec(string=str, byteorder=str, signed=int)
+def decode_long(space, string, byteorder='little', signed=1):
+    from rpython.rlib.rbigint import rbigint, InvalidEndiannessError
+    try:
+        result = rbigint.frombytes(string, byteorder, bool(signed))
+    except InvalidEndiannessError:
+        raise oefmt(space.w_ValueError, "invalid byteorder argument")
+    return space.newlong_from_rbigint(result)
diff --git a/pypy/module/__pypy__/test/test_magic.py b/pypy/module/__pypy__/test/test_magic.py
--- a/pypy/module/__pypy__/test/test_magic.py
+++ b/pypy/module/__pypy__/test/test_magic.py
@@ -30,4 +30,20 @@
 """ in d
         finally:
             __pypy__.set_code_callback(None)
-        assert d['f'].__code__ in l
\ No newline at end of file
+        assert d['f'].__code__ in l
+
+    def test_decode_long(self):
+        from __pypy__ import decode_long
+        assert decode_long('') == 0
+        assert decode_long('\xff\x00') == 255
+        assert decode_long('\xff\x7f') == 32767
+        assert decode_long('\x00\xff') == -256
+        assert decode_long('\x00\x80') == -32768
+        assert decode_long('\x80') == -128
+        assert decode_long('\x7f') == 127
+        assert decode_long('\x55' * 97) == (1 << (97 * 8)) // 3
+        assert decode_long('\x00\x80', 'big') == 128
+        assert decode_long('\xff\x7f', 'little', False) == 32767
+        assert decode_long('\x00\x80', 'little', False) == 32768
+        assert decode_long('\x00\x80', 'little', True) == -32768
+        raises(ValueError, decode_long, '', 'foo')
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -228,26 +228,11 @@
 def _PyLong_FromByteArray(space, bytes, n, little_endian, signed):
     little_endian = rffi.cast(lltype.Signed, little_endian)
     signed = rffi.cast(lltype.Signed, signed)
-
-    result = rbigint()
-    negative = False
-
-    for i in range(0, n):
-        if little_endian:
-            c = intmask(bytes[i])
-        else:
-            c = intmask(bytes[n - i - 1])
-        if i == 0 and signed and c & 0x80:
-            negative = True
-        if negative:
-            c = c ^ 0xFF
-        digit = rbigint.fromint(c)
-
-        result = result.lshift(8)
-        result = result.add(digit)
-
-    if negative:
-        result = result.neg()
-
+    s = rffi.charpsize2str(rffi.cast(rffi.CCHARP, bytes),
+                           rffi.cast(lltype.Signed, n))
+    if little_endian:
+        byteorder = 'little'
+    else:
+        byteorder = 'big'
+    result = rbigint.frombytes(s, byteorder, signed != 0)
     return space.newlong_from_rbigint(result)
-
diff --git a/pypy/module/cpyext/test/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py
--- a/pypy/module/cpyext/test/test_longobject.py
+++ b/pypy/module/cpyext/test/test_longobject.py
@@ -175,10 +175,26 @@
                                               little_endian, is_signed);
              """),
             ])
-        assert module.from_bytearray(True, False) == 0x9ABC
-        assert module.from_bytearray(True, True) == -0x6543
-        assert module.from_bytearray(False, False) == 0xBC9A
-        assert module.from_bytearray(False, True) == -0x4365
+        assert module.from_bytearray(True, False) == 0xBC9A
+        assert module.from_bytearray(True, True) == -0x4366
+        assert module.from_bytearray(False, False) == 0x9ABC
+        assert module.from_bytearray(False, True) == -0x6544
+
+    def test_frombytearray_2(self):
+        module = self.import_extension('foo', [
+            ("from_bytearray", "METH_VARARGS",
+             """
+                 int little_endian, is_signed;
+                 if (!PyArg_ParseTuple(args, "ii", &little_endian, &is_signed))
+                     return NULL;
+                 return _PyLong_FromByteArray("\x9A\xBC\x41", 3,
+                                              little_endian, is_signed);
+             """),
+            ])
+        assert module.from_bytearray(True, False) == 0x41BC9A
+        assert module.from_bytearray(True, True) == 0x41BC9A
+        assert module.from_bytearray(False, False) == 0x9ABC41
+        assert module.from_bytearray(False, True) == -0x6543BF
 
     def test_fromunicode(self):
         module = self.import_extension('foo', [
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -299,7 +299,7 @@
         return build_stat_result(space, st)
 
 def lstat(space, w_path):
-    "Like stat(path), but do no follow symbolic links."
+    "Like stat(path), but do not follow symbolic links."
     try:
         st = dispatch_filename(rposix_stat.lstat)(space, w_path)
     except OSError, e:
diff --git a/pypy/module/thread/__init__.py b/pypy/module/thread/__init__.py
--- a/pypy/module/thread/__init__.py
+++ b/pypy/module/thread/__init__.py
@@ -27,7 +27,7 @@
         from pypy.module.thread import gil
         MixedModule.__init__(self, space, *args)
         prev_ec = space.threadlocals.get_ec()
-        space.threadlocals = gil.GILThreadLocals()
+        space.threadlocals = gil.GILThreadLocals(space)
         space.threadlocals.initialize(space)
         if prev_ec is not None:
             space.threadlocals._set_ec(prev_ec)
diff --git a/pypy/module/thread/test/test_gil.py b/pypy/module/thread/test/test_gil.py
--- a/pypy/module/thread/test/test_gil.py
+++ b/pypy/module/thread/test/test_gil.py
@@ -65,7 +65,7 @@
             except Exception, e:
                 assert 0
             thread.gc_thread_die()
-        my_gil_threadlocals = gil.GILThreadLocals()
+        my_gil_threadlocals = gil.GILThreadLocals(space)
         def f():
             state.data = []
             state.datalen1 = 0
diff --git a/pypy/module/thread/threadlocals.py b/pypy/module/thread/threadlocals.py
--- a/pypy/module/thread/threadlocals.py
+++ b/pypy/module/thread/threadlocals.py
@@ -1,5 +1,7 @@
-from rpython.rlib import rthread
+import weakref
+from rpython.rlib import rthread, rshrinklist
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.rarithmetic import r_ulonglong
 from pypy.module.thread.error import wrap_thread_error
 from pypy.interpreter.executioncontext import ExecutionContext
 
@@ -13,15 +15,51 @@
     a thread finishes.  This works as long as the thread was started by
     os_thread.bootstrap()."""
 
-    def __init__(self):
+    def __init__(self, space):
         "NOT_RPYTHON"
-        self._valuedict = {}   # {thread_ident: ExecutionContext()}
+        #
+        # This object tracks code that enters and leaves threads.
+        # There are two APIs.  For Python-level threads, we know when
+        # the thread starts and ends, and we call enter_thread() and
+        # leave_thread().  In a few other cases, like callbacks, we
+        # might be running in some never-seen-before thread: in this
+        # case, the callback logic needs to call try_enter_thread() at
+        # the start, and if this returns True it needs to call
+        # leave_thread() at the end.
+        #
+        # We implement an optimization for the second case (which only
+        # works if we translate with a framework GC and with
+        # rweakref).  If try_enter_thread() is called in a
+        # never-seen-before thread, it still returns False and
+        # remembers the ExecutionContext with 'self._weaklist'.  The
+        # next time we call try_enter_thread() again in the same
+        # thread, the ExecutionContext is reused.  The optimization is
+        # not completely invisible to the user: 'thread._local()'
+        # values will remain.  We can argue that it is the correct
+        # behavior to do that, and the behavior we get if the
+        # optimization is disabled is buggy (but hard to do better
+        # then).
+        #
+        # 'self._valuedict' is a dict mapping the thread idents to
+        # ExecutionContexts; it does not list the ExecutionContexts
+        # which are in 'self._weaklist'.  (The latter is more precisely
+        # a list of AutoFreeECWrapper objects, defined below, which
+        # each references the ExecutionContext.)
+        #
+        self.space = space
+        self._valuedict = {}
         self._cleanup_()
         self.raw_thread_local = rthread.ThreadLocalReference(ExecutionContext,
                                                             loop_invariant=True)
 
+    def can_optimize_with_weaklist(self):
+        config = self.space.config
+        return (config.translation.rweakref and
+                rthread.ThreadLocalReference.automatic_keepalive(config))
+
     def _cleanup_(self):
         self._valuedict.clear()
+        self._weaklist = None
         self._mainthreadident = 0
 
     def enter_thread(self, space):
@@ -29,19 +67,35 @@
         self._set_ec(space.createexecutioncontext())
 
     def try_enter_thread(self, space):
-        if rthread.get_ident() in self._valuedict:
+        # common case: the thread-local has already got a value
+        if self.raw_thread_local.get() is not None:
             return False
-        self.enter_thread(space)
-        return True
 
-    def _set_ec(self, ec):
+        # Else, make and attach a new ExecutionContext
+        ec = space.createexecutioncontext()
+        if not self.can_optimize_with_weaklist():
+            self._set_ec(ec)
+            return True
+
+        # If can_optimize_with_weaklist(), then 'rthread' keeps the
+        # thread-local values alive until the end of the thread.  Use
+        # AutoFreeECWrapper as an object with a __del__; when this
+        # __del__ is called, it means the thread was really finished.
+        # In this case we don't want leave_thread() to be called
+        # explicitly, so we return False.
+        if self._weaklist is None:
+            self._weaklist = ListECWrappers()
+        self._weaklist.append(weakref.ref(AutoFreeECWrapper(ec)))
+        self._set_ec(ec, register_in_valuedict=False)
+        return False
+
+    def _set_ec(self, ec, register_in_valuedict=True):
         ident = rthread.get_ident()
         if self._mainthreadident == 0 or self._mainthreadident == ident:
             ec._signals_enabled = 1    # the main thread is enabled
             self._mainthreadident = ident
-        self._valuedict[ident] = ec
-        # This logic relies on hacks and _make_sure_does_not_move().
-        # It only works because we keep the 'ec' alive in '_valuedict' too.
+        if register_in_valuedict:
+            self._valuedict[ident] = ec
         self.raw_thread_local.set(ec)
 
     def leave_thread(self, space):
@@ -84,7 +138,23 @@
         ec._signals_enabled = new
 
     def getallvalues(self):
-        return self._valuedict
+        if self._weaklist is None:
+            return self._valuedict
+        # This logic walks the 'self._weaklist' list and adds the
+        # ExecutionContexts to 'result'.  We are careful in case there
+        # are two AutoFreeECWrappers in the list which have the same
+        # 'ident'; in this case we must keep the most recent one (the
+        # older one should be deleted soon).  Moreover, entries in
+        # self._valuedict have priority because they are never
+        # outdated.
+        result = {}
+        for h in self._weaklist.items():
+            wrapper = h()
+            if wrapper is not None and not wrapper.deleted:
+                result[wrapper.ident] = wrapper.ec
+                # ^^ this possibly overwrites an older ec
+        result.update(self._valuedict)
+        return result
 
     def reinit_threads(self, space):
         "Called in the child process after a fork()"
@@ -94,7 +164,31 @@
         old_sig = ec._signals_enabled
         if ident != self._mainthreadident:
             old_sig += 1
-        self._cleanup_()
+        self._cleanup_()      # clears self._valuedict
         self._mainthreadident = ident
         self._set_ec(ec)
         ec._signals_enabled = old_sig
+
+
+class AutoFreeECWrapper(object):
+    deleted = False
+
+    def __init__(self, ec):
+        # this makes a loop between 'self' and 'ec'.  It should not prevent
+        # the __del__ method here from being called.
+        self.ec = ec
+        ec._threadlocals_auto_free = self
+        self.ident = rthread.get_ident()
+
+    def __del__(self):
+        from pypy.module.thread.os_local import thread_is_stopping
+        # this is always called in another thread: the thread
+        # referenced by 'self.ec' has finished at that point, and
+        # we're just after the GC which finds no more references to
+        # 'ec' (and thus to 'self').
+        self.deleted = True
+        thread_is_stopping(self.ec)
+
+class ListECWrappers(rshrinklist.AbstractShrinkList):
+    def must_keep(self, wref):
+        return wref() is not None
diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -358,3 +358,10 @@
         assert 3L.__coerce__(4L) == (3L, 4L)
         assert 3L.__coerce__(4) == (3, 4)
         assert 3L.__coerce__(object()) == NotImplemented
+
+    def test_linear_long_base_16(self):
+        # never finishes if long(_, 16) is not linear-time
+        size = 100000
+        n = "a" * size
+        expected = (2 << (size * 4)) // 3
+        assert long(n, 16) == expected
diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py
--- a/rpython/rlib/buffer.py
+++ b/rpython/rlib/buffer.py
@@ -97,6 +97,18 @@
 
     def __init__(self, buffer, offset, size):
         self.readonly = buffer.readonly
+        if isinstance(buffer, SubBuffer):     # don't nest them
+            # we want a view (offset, size) over a view
+            # (buffer.offset, buffer.size) over buffer.buffer.
+            # Note that either '.size' can be -1 to mean 'up to the end'.
+            at_most = buffer.getlength() - offset
+            if size > at_most or size < 0:
+                if at_most < 0:
+                    at_most = 0
+                size = at_most
+            offset += buffer.offset
+            buffer = buffer.buffer
+        #
         self.buffer = buffer
         self.offset = offset
         self.size = size
diff --git a/rpython/rlib/entrypoint.py b/rpython/rlib/entrypoint.py
--- a/rpython/rlib/entrypoint.py
+++ b/rpython/rlib/entrypoint.py
@@ -1,4 +1,4 @@
-secondary_entrypoints = {}
+secondary_entrypoints = {"main": []}
 
 import py
 from rpython.rtyper.lltypesystem import lltype, rffi
@@ -109,20 +109,3 @@
                     "you.  Another difference is that entrypoint_highlevel() "
                     "returns the normal Python function, which can be safely "
                     "called from more Python code.")
-
-
-# the point of dance below is so the call to rpython_startup_code actually
-# does call asm_stack_bottom. It's here because there is no other good place.
-# This thing is imported by any target which has any API, so it'll get
-# registered
-
-RPython_StartupCode = rffi.llexternal('RPython_StartupCode', [], lltype.Void,
-                                      _nowrapper=True,
-                                      random_effects_on_gcobjs=True)
-
- at entrypoint_lowlevel('main', [], c_name='rpython_startup_code')
-def rpython_startup_code():
-    rffi.stackcounter.stacks_counter += 1
-    llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
-    RPython_StartupCode()
-    rffi.stackcounter.stacks_counter -= 1
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -335,6 +335,25 @@
     # XXX this can be made more efficient in the future
     return bytearray(str(i))
 
+def fetch_translated_config():
+    """Returns the config that is current when translating.
+    Returns None if not translated.
+    """
+    return None
+
+class Entry(ExtRegistryEntry):
+    _about_ = fetch_translated_config
+
+    def compute_result_annotation(self):
+        config = self.bookkeeper.annotator.translator.config
+        return self.bookkeeper.immutablevalue(config)
+
+    def specialize_call(self, hop):
+        from rpython.rtyper.lltypesystem import lltype
+        translator = hop.rtyper.annotator.translator
+        hop.exception_cannot_occur()
+        return hop.inputconst(lltype.Void, translator.config)
+
 # ____________________________________________________________
 
 class FREED_OBJECT(object):
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2794,8 +2794,10 @@
 
 def parse_digit_string(parser):
     # helper for fromstr
+    base = parser.base
+    if (base & (base - 1)) == 0:
+        return parse_string_from_binary_base(parser)
     a = rbigint()
-    base = parser.base
     digitmax = BASE_MAX[base]
     tens, dig = 1, 0
     while True:
@@ -2811,3 +2813,52 @@
             tens *= base
     a.sign *= parser.sign
     return a
+
+def parse_string_from_binary_base(parser):
+    # The point to this routine is that it takes time linear in the number of
+    # string characters.
+    from rpython.rlib.rstring import ParseStringError
+
+    base = parser.base
+    if   base ==  2: bits_per_char = 1
+    elif base ==  4: bits_per_char = 2
+    elif base ==  8: bits_per_char = 3
+    elif base == 16: bits_per_char = 4
+    elif base == 32: bits_per_char = 5
+    else:
+        raise AssertionError
+
+    # n <- total number of bits needed, while moving 'parser' to the end
+    n = 0
+    while parser.next_digit() >= 0:
+        n += 1
+
+    # b <- number of Python digits needed, = ceiling(n/SHIFT). */
+    try:
+        b = ovfcheck(n * bits_per_char)
+        b = ovfcheck(b + (SHIFT - 1))
+    except OverflowError:
+        raise ParseStringError("long string too large to convert")
+    b = (b // SHIFT) or 1
+    z = rbigint([NULLDIGIT] * b, sign=parser.sign)
+
+    # Read string from right, and fill in long from left; i.e.,
+    # from least to most significant in both.
+    accum = _widen_digit(0)
+    bits_in_accum = 0
+    pdigit = 0
+    for _ in range(n):
+        k = parser.prev_digit()
+        accum |= _widen_digit(k) << bits_in_accum
+        bits_in_accum += bits_per_char
+        if bits_in_accum >= SHIFT:
+            z.setdigit(pdigit, accum)
+            pdigit += 1
+            assert pdigit <= b
+            accum >>= SHIFT
+            bits_in_accum -= SHIFT
+
+    if bits_in_accum:
+        z.setdigit(pdigit, accum)
+    z._normalize()
+    return z
diff --git a/rpython/rlib/rshrinklist.py b/rpython/rlib/rshrinklist.py
--- a/rpython/rlib/rshrinklist.py
+++ b/rpython/rlib/rshrinklist.py
@@ -6,6 +6,8 @@
     The twist is that occasionally append() will throw away the
     items for which must_keep() returns False.  (It does so without
     changing the order.)
+
+    See also rpython.rlib.rweaklist.
     """
     _mixin_ = True
 
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -485,6 +485,24 @@
         else:
             return -1
 
+    def prev_digit(self):
+        # After exhausting all n digits in next_digit(), you can walk them
+        # again in reverse order by calling prev_digit() exactly n times
+        i = self.i - 1
+        assert i >= 0
+        self.i = i
+        c = self.s[i]
+        digit = ord(c)
+        if '0' <= c <= '9':
+            digit -= ord('0')
+        elif 'A' <= c <= 'Z':
+            digit = (digit - ord('A')) + 10
+        elif 'a' <= c <= 'z':
+            digit = (digit - ord('a')) + 10
+        else:
+            raise AssertionError
+        return digit
+
 # -------------- public API ---------------------------------
 
 INIT_SIZE = 100 # XXX tweak
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -291,8 +291,6 @@
 # ____________________________________________________________
 #
 # Thread-locals.
-# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR!
-# We use _make_sure_does_not_move() to make sure the pointer will not move.
 
 
 class ThreadLocalField(object):
@@ -351,6 +349,11 @@
 
 
 class ThreadLocalReference(ThreadLocalField):
+    # A thread-local that points to an object.  The object stored in such
+    # a thread-local is kept alive as long as the thread is not finished
+    # (but only with our own GCs!  it seems not to work with Boehm...)
+    # (also, on Windows, if you're not making a DLL but an EXE, it will
+    # leak the objects when a thread finishes; see threadlocal.c.)
     _COUNT = 1
 
     def __init__(self, Cls, loop_invariant=False):
@@ -378,20 +381,41 @@
             assert isinstance(value, Cls) or value is None
             if we_are_translated():
                 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-                from rpython.rlib.rgc import _make_sure_does_not_move
-                from rpython.rlib.objectmodel import running_on_llinterp
                 gcref = cast_instance_to_gcref(value)
-                if not running_on_llinterp:
-                    if gcref:
-                        _make_sure_does_not_move(gcref)
                 value = lltype.cast_ptr_to_int(gcref)
                 setraw(value)
+                rgc.register_custom_trace_hook(TRACETLREF, _lambda_trace_tlref)
+                rgc.ll_writebarrier(_tracetlref_obj)
             else:
                 self.local.value = value
 
         self.get = get
         self.set = set
 
+        def _trace_tlref(gc, obj, callback, arg):
+            p = llmemory.NULL
+            llop.threadlocalref_acquire(lltype.Void)
+            while True:
+                p = llop.threadlocalref_enum(llmemory.Address, p)
+                if not p:
+                    break
+                gc._trace_callback(callback, arg, p + offset)
+            llop.threadlocalref_release(lltype.Void)
+        _lambda_trace_tlref = lambda: _trace_tlref
+        TRACETLREF = lltype.GcStruct('TRACETLREF')
+        _tracetlref_obj = lltype.malloc(TRACETLREF, immortal=True)
+
+    @staticmethod
+    def automatic_keepalive(config):
+        """Returns True if translated with a GC that keeps alive
+        the set() value until the end of the thread.  Returns False
+        if you need to keep it alive yourself (but in that case, you
+        should also reset it to None before the thread finishes).
+        """
+        return (config.translation.gctransformer == "framework" and
+                # see translator/c/src/threadlocal.c for the following line
+                (not _win32 or config.translation.shared))
+
 
 tlfield_thread_ident = ThreadLocalField(lltype.Signed, "thread_ident",
                                         loop_invariant=True)
@@ -399,7 +423,8 @@
                                    loop_invariant=True)
 tlfield_rpy_errno = ThreadLocalField(rffi.INT, "rpy_errno")
 tlfield_alt_errno = ThreadLocalField(rffi.INT, "alt_errno")
-if sys.platform == "win32":
+_win32 = (sys.platform == "win32")
+if _win32:
     from rpython.rlib import rwin32
     tlfield_rpy_lasterror = ThreadLocalField(rwin32.DWORD, "rpy_lasterror")
     tlfield_alt_lasterror = ThreadLocalField(rwin32.DWORD, "alt_lasterror")
diff --git a/rpython/rlib/rweaklist.py b/rpython/rlib/rweaklist.py
--- a/rpython/rlib/rweaklist.py
+++ b/rpython/rlib/rweaklist.py
@@ -5,6 +5,13 @@
 
 
 class RWeakListMixin(object):
+    """A mixin base class.  A collection that weakly maps indexes to objects.
+    After an object goes away, its index is marked free and will be reused
+    by some following add_handle() call.  So add_handle() might not append
+    the object at the end of the list, but can put it anywhere.
+
+    See also rpython.rlib.rshrinklist.
+    """
     _mixin_ = True
 
     def initialize(self):
diff --git a/rpython/rlib/test/test_buffer.py b/rpython/rlib/test/test_buffer.py
--- a/rpython/rlib/test/test_buffer.py
+++ b/rpython/rlib/test/test_buffer.py
@@ -45,3 +45,22 @@
     ssbuf = SubBuffer(sbuf, 3, 2)
     assert ssbuf.getslice(0, 2, 1, 2) == 'ld'
     assert ssbuf.as_str_and_offset_maybe() == ('hello world', 9)
+    #
+    ss2buf = SubBuffer(sbuf, 1, -1)
+    assert ss2buf.as_str() == 'orld'
+    assert ss2buf.getlength() == 4
+    ss3buf = SubBuffer(ss2buf, 1, -1)
+    assert ss3buf.as_str() == 'rld'
+    assert ss3buf.getlength() == 3
+    #
+    ss4buf = SubBuffer(buf, 3, 4)
+    assert ss4buf.as_str() == 'lo w'
+    ss5buf = SubBuffer(ss4buf, 1, -1)
+    assert ss5buf.as_str() == 'o w'
+    assert ss5buf.getlength() == 3
+
+def test_repeated_subbuffer():
+    buf = StringBuffer('x' * 10000)
+    for i in range(9999, 9, -1):
+        buf = SubBuffer(buf, 1, i)
+    assert buf.getlength() == 10
diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -6,7 +6,8 @@
     prepare_dict_update, reversed_dict, specialize, enforceargs, newlist_hint,
     resizelist_hint, is_annotation_constant, always_inline, NOT_CONSTANT,
     iterkeys_with_hash, iteritems_with_hash, contains_with_hash,
-    setitem_with_hash, getitem_with_hash, delitem_with_hash, import_from_mixin)
+    setitem_with_hash, getitem_with_hash, delitem_with_hash, import_from_mixin,
+    fetch_translated_config)
 from rpython.translator.translator import TranslationContext, graphof
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rtyper.test.test_llinterp import interpret
@@ -439,6 +440,13 @@
         res = self.interpret(f, [42])
         assert res == 84
 
+    def test_fetch_translated_config(self):
+        assert fetch_translated_config() is None
+        def f():
+            return fetch_translated_config().translation.continuation
+        res = self.interpret(f, [])
+        assert res is False
+
 
 def test_specialize_decorator():
     def f():
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -825,7 +825,19 @@
             def __init__(self, base, sign, digits):
                 self.base = base
                 self.sign = sign
-                self.next_digit = iter(digits + [-1]).next
+                self.i = 0
+                self._digits = digits
+            def next_digit(self):
+                i = self.i
+                if i == len(self._digits):
+                    return -1
+                self.i = i + 1
+                return self._digits[i]
+            def prev_digit(self):
+                i = self.i - 1
+                assert i >= 0
+                self.i = i
+                return self._digits[i]
         x = parse_digit_string(Parser(10, 1, [6]))
         assert x.eq(rbigint.fromint(6))
         x = parse_digit_string(Parser(10, 1, [6, 2, 3]))
@@ -847,6 +859,16 @@
         x = parse_digit_string(Parser(7, -1, [0, 0, 0]))
         assert x.tobool() is False
 
+        for base in [2, 4, 8, 16, 32]:
+            for inp in [[0], [1], [1, 0], [0, 1], [1, 0, 1], [1, 0, 0, 1],
+                        [1, 0, 0, base-1, 0, 1], [base-1, 1, 0, 0, 0, 1, 0],
+                        [base-1]]:
+                inp = inp * 97
+                x = parse_digit_string(Parser(base, -1, inp))
+                num = sum(inp[i] * (base ** (len(inp)-1-i))
+                          for i in range(len(inp)))
+                assert x.eq(rbigint.fromlong(-num))
+
 
 BASE = 2 ** SHIFT
 
diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py
--- a/rpython/rlib/test/test_rthread.py
+++ b/rpython/rlib/test/test_rthread.py
@@ -1,6 +1,7 @@
 import gc, time
 from rpython.rlib.rthread import *
 from rpython.rlib.rarithmetic import r_longlong
+from rpython.rlib import objectmodel
 from rpython.translator.c.test.test_boehm import AbstractGCTestClass
 from rpython.rtyper.lltypesystem import lltype, rffi
 import py
@@ -240,3 +241,60 @@
 
 class TestUsingFramework(AbstractThreadTests):
     gcpolicy = 'minimark'
+
+    def test_tlref_keepalive(self, no__thread=True):
+        import weakref
+        from rpython.config.translationoption import SUPPORT__THREAD
+
+        if not (SUPPORT__THREAD or no__thread):
+            py.test.skip("no __thread support here")
+
+        class FooBar(object):
+            pass
+        t = ThreadLocalReference(FooBar)
+
+        def tset():
+            x1 = FooBar()
+            t.set(x1)
+            return weakref.ref(x1)
+        tset._dont_inline_ = True
+
+        class WrFromThread:
+            pass
+        wr_from_thread = WrFromThread()
+
+        def f():
+            config = objectmodel.fetch_translated_config()
+            assert t.automatic_keepalive(config) is True
+            wr = tset()
+            import gc; gc.collect()   # 'x1' should not be collected
+            x2 = t.get()
+            assert x2 is not None
+            assert wr() is not None
+            assert wr() is x2
+            return wr
+
+        def thread_entry_point():
+            wr = f()
+            wr_from_thread.wr = wr
+            wr_from_thread.seen = True
+
+        def main():
+            wr_from_thread.seen = False
+            start_new_thread(thread_entry_point, ())
+            wr1 = f()
+            time.sleep(0.5)
+            assert wr_from_thread.seen is True
+            wr2 = wr_from_thread.wr
+            import gc; gc.collect()      # wr2() should be collected here
+            assert wr1() is not None     # this thread, still running
+            assert wr2() is None         # other thread, not running any more
+            return 42
+
+        extra_options = {'no__thread': no__thread, 'shared': True}
+        fn = self.getcompiled(main, [], extra_options=extra_options)
+        res = fn()
+        assert res == 42
+
+    def test_tlref_keepalive__thread(self):
+        self.test_tlref_keepalive(no__thread=False)
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -950,6 +950,13 @@
         return self.op_raw_load(RESTYPE, _address_of_thread_local(), offset)
     op_threadlocalref_get.need_result_type = True
 
+    def op_threadlocalref_acquire(self, prev):
+        raise NotImplementedError
+    def op_threadlocalref_release(self, prev):
+        raise NotImplementedError
+    def op_threadlocalref_enum(self, prev):
+        raise NotImplementedError
+
     # __________________________________________________________
     # operations on addresses
 
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -545,8 +545,11 @@
     'getslice':             LLOp(canraise=(Exception,)),
     'check_and_clear_exc':  LLOp(),
 
-    'threadlocalref_addr':  LLOp(sideeffects=False),  # get (or make) addr of tl
+    'threadlocalref_addr':  LLOp(),                   # get (or make) addr of tl
     'threadlocalref_get':   LLOp(sideeffects=False),  # read field (no check)
+    'threadlocalref_acquire':  LLOp(),                # lock for enum
+    'threadlocalref_release':  LLOp(),                # lock for enum
+    'threadlocalref_enum':  LLOp(sideeffects=False),  # enum all threadlocalrefs
 
     # __________ debugging __________
     'debug_view':           LLOp(),
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -733,6 +733,9 @@
     print >> f, 'struct pypy_threadlocal_s {'
     print >> f, '\tint ready;'
     print >> f, '\tchar *stack_end;'
+    print >> f, '\tstruct pypy_threadlocal_s *prev, *next;'
+    # note: if the four fixed fields above are changed, you need
+    # to adapt threadlocal.c's linkedlist_head declaration too
     for field in fields:
         typename = database.gettype(field.FIELDTYPE)
         print >> f, '\t%s;' % cdecl(typename, field.fieldname)
diff --git a/rpython/translator/c/src/entrypoint.c b/rpython/translator/c/src/entrypoint.c
--- a/rpython/translator/c/src/entrypoint.c
+++ b/rpython/translator/c/src/entrypoint.c
@@ -37,6 +37,24 @@
 # include <src/thread.h>
 #endif
 
+void rpython_startup_code(void)
+{
+#ifdef RPY_WITH_GIL
+    RPyGilAcquire();
+#endif
+#ifdef PYPY_USE_ASMGCC
+    pypy_g_rpython_rtyper_lltypesystem_rffi_StackCounter.sc_inst_stacks_counter++;
+#endif
+    pypy_asm_stack_bottom();
+    RPython_StartupCode();
+#ifdef PYPY_USE_ASMGCC
+    pypy_g_rpython_rtyper_lltypesystem_rffi_StackCounter.sc_inst_stacks_counter--;
+#endif
+#ifdef RPY_WITH_GIL
+    RPyGilRelease();
+#endif
+}
+
 
 RPY_EXTERN
 int pypy_main_function(int argc, char *argv[])
diff --git a/rpython/translator/c/src/thread.h b/rpython/translator/c/src/thread.h
--- a/rpython/translator/c/src/thread.h
+++ b/rpython/translator/c/src/thread.h
@@ -48,7 +48,7 @@
 }
 static inline void _RPyGilRelease(void) {
     assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
-    rpy_fastgil = 0;
+    lock_release(&rpy_fastgil);
 }
 static inline long *_RPyFetchFastGil(void) {
     return &rpy_fastgil;
diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c
--- a/rpython/translator/c/src/thread_nt.c
+++ b/rpython/translator/c/src/thread_nt.c
@@ -231,10 +231,19 @@
     return (result != WAIT_TIMEOUT);
 }
 
-#define mutex1_t      mutex2_t
-#define mutex1_init   mutex2_init
-#define mutex1_lock   mutex2_lock
-#define mutex1_unlock mutex2_unlock
+typedef CRITICAL_SECTION mutex1_t;
+
+static inline void mutex1_init(mutex1_t *mutex) {
+    InitializeCriticalSection(mutex);
+}
+
+static inline void mutex1_lock(mutex1_t *mutex) {
+    EnterCriticalSection(mutex);
+}
+
+static inline void mutex1_unlock(mutex1_t *mutex) {
+    LeaveCriticalSection(mutex);
+}
 
 //#define lock_test_and_set(ptr, value)  see thread_nt.h
 #define atomic_increment(ptr)          InterlockedIncrement(ptr)
diff --git a/rpython/translator/c/src/thread_nt.h b/rpython/translator/c/src/thread_nt.h
--- a/rpython/translator/c/src/thread_nt.h
+++ b/rpython/translator/c/src/thread_nt.h
@@ -38,3 +38,4 @@
 #else
 #define lock_test_and_set(ptr, value)  InterlockedExchange(ptr, value)
 #endif
+#define lock_release(ptr)              (*((volatile long *)ptr) = 0)
diff --git a/rpython/translator/c/src/thread_pthread.h b/rpython/translator/c/src/thread_pthread.h
--- a/rpython/translator/c/src/thread_pthread.h
+++ b/rpython/translator/c/src/thread_pthread.h
@@ -81,3 +81,4 @@
 
 
 #define lock_test_and_set(ptr, value)  __sync_lock_test_and_set(ptr, value)
+#define lock_release(ptr)              __sync_lock_release(ptr)
diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c
--- a/rpython/translator/c/src/threadlocal.c
+++ b/rpython/translator/c/src/threadlocal.c
@@ -3,20 +3,99 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#ifndef _WIN32
-# include <pthread.h>
+#include <assert.h>
+#include "src/threadlocal.h"
+#include "src/thread.h"
+
+
+/* this is a spin-lock that must be acquired around each doubly-linked-list
+   manipulation (because such manipulations can occur without the GIL) */
+static long pypy_threadlocal_lock = 0;
+
+static int check_valid(void);
+
+void _RPython_ThreadLocals_Acquire(void) {
+    while (!lock_test_and_set(&pypy_threadlocal_lock, 1)) {
+        /* busy loop */
+    }
+    assert(check_valid());
+}
+void _RPython_ThreadLocals_Release(void) {
+    assert(check_valid());
+    lock_release(&pypy_threadlocal_lock);
+}
+
+
+pthread_key_t pypy_threadlocal_key
+#ifdef _WIN32
+= TLS_OUT_OF_INDEXES
 #endif
-#include "src/threadlocal.h"
+;
 
+static struct pypy_threadlocal_s linkedlist_head = {
+    -1,                     /* ready     */
+    NULL,                   /* stack_end */
+    &linkedlist_head,       /* prev      */
+    &linkedlist_head };     /* next      */
+
+static int check_valid(void)
+{
+    struct pypy_threadlocal_s *prev, *cur;
+    prev = &linkedlist_head;
+    while (1) {
+        cur = prev->next;
+        assert(cur->prev == prev);
+        if (cur == &linkedlist_head)
+            break;
+        assert(cur->ready == 42);
+        assert(cur->next != cur);
+        prev = cur;
+    }
+    assert(cur->ready == -1);
+    return 1;
+}
+
+static void cleanup_after_fork(void)
+{
+    /* assume that at most one pypy_threadlocal_s survived, the current one */
+    struct pypy_threadlocal_s *cur;
+#ifdef USE___THREAD
+    cur = &pypy_threadlocal;
+#else
+    cur = (struct pypy_threadlocal_s *)_RPy_ThreadLocals_Get();
+#endif
+    if (cur && cur->ready == 42) {
+        cur->next = cur->prev = &linkedlist_head;
+        linkedlist_head.next = linkedlist_head.prev = cur;
+    }
+    else {
+        linkedlist_head.next = linkedlist_head.prev = &linkedlist_head;
+    }
+    _RPython_ThreadLocals_Release();
+}
+
+
+struct pypy_threadlocal_s *
+_RPython_ThreadLocals_Enum(struct pypy_threadlocal_s *prev)
+{
+    if (prev == NULL)
+        prev = &linkedlist_head;
+    if (prev->next == &linkedlist_head)
+        return NULL;
+    return prev->next;
+}
 
 static void _RPy_ThreadLocals_Init(void *p)
 {
+    struct pypy_threadlocal_s *tls = (struct pypy_threadlocal_s *)p;
+    struct pypy_threadlocal_s *oldnext;
     memset(p, 0, sizeof(struct pypy_threadlocal_s));
+
 #ifdef RPY_TLOFS_p_errno
-    ((struct pypy_threadlocal_s *)p)->p_errno = &errno;
+    tls->p_errno = &errno;
 #endif
 #ifdef RPY_TLOFS_thread_ident
-    ((struct pypy_threadlocal_s *)p)->thread_ident =
+    tls->thread_ident =
 #    ifdef _WIN32
         GetCurrentThreadId();
 #    else
@@ -26,58 +105,80 @@
                   where it is not the case are rather old nowadays. */
 #    endif
 #endif
-    ((struct pypy_threadlocal_s *)p)->ready = 42;
+    _RPython_ThreadLocals_Acquire();
+    oldnext = linkedlist_head.next;
+    tls->prev = &linkedlist_head;
+    tls->next = oldnext;
+    linkedlist_head.next = tls;
+    oldnext->prev = tls;
+    tls->ready = 42;
+    _RPython_ThreadLocals_Release();
 }
 
+static void threadloc_unlink(void *p)
+{
+    /* warning: this can be called at completely random times without
+       the GIL. */
+    struct pypy_threadlocal_s *tls = (struct pypy_threadlocal_s *)p;
+    _RPython_ThreadLocals_Acquire();
+    if (tls->ready == 42) {
+        tls->next->prev = tls->prev;
+        tls->prev->next = tls->next;
+        memset(tls, 0xDD, sizeof(struct pypy_threadlocal_s));  /* debug */
+        tls->ready = 0;
+    }
+    _RPython_ThreadLocals_Release();
+#ifndef USE___THREAD
+    free(p);
+#endif
+}
 
-/* ------------------------------------------------------------ */
-#ifdef USE___THREAD
-/* ------------------------------------------------------------ */
+#ifdef _WIN32
+/* xxx Defines a DllMain() function.  It's horrible imho: it only
+   works if we happen to compile a DLL (not a EXE); and of course you
+   get link-time errors if two files in the same DLL do the same.
+   There are some alternatives known, but they are horrible in other
+   ways (e.g. using undocumented behavior).  This seems to be the
+   simplest, but feel free to fix if you need that.
 
-
-/* in this situation, we always have one full 'struct pypy_threadlocal_s'
-   available, managed by gcc. */
-__thread struct pypy_threadlocal_s pypy_threadlocal;
+   For this reason we have the line 'not _win32 or config.translation.shared'
+   in rpython.rlib.rthread.
+*/
+BOOL WINAPI DllMain(HINSTANCE hinstDLL,
+                    DWORD     reason_for_call,
+                    LPVOID    reserved)
+{
+    LPVOID p;
+    switch (reason_for_call) {
+    case DLL_THREAD_DETACH:
+        if (pypy_threadlocal_key != TLS_OUT_OF_INDEXES) {
+            p = TlsGetValue(pypy_threadlocal_key);
+            if (p != NULL) {
+                TlsSetValue(pypy_threadlocal_key, NULL);
+                threadloc_unlink(p);
+            }
+        }
+        break;
+    default:
+        break;
+    }
+    return TRUE;
+}
+#endif
 
 void RPython_ThreadLocals_ProgramInit(void)
 {
-    _RPy_ThreadLocals_Init(&pypy_threadlocal);
-}
-
-char *_RPython_ThreadLocals_Build(void)
-{
-    RPyAssert(pypy_threadlocal.ready == 0, "corrupted thread-local");
-    _RPy_ThreadLocals_Init(&pypy_threadlocal);
-    return (char *)&pypy_threadlocal;
-}
-
-void RPython_ThreadLocals_ThreadDie(void)
-{
-    memset(&pypy_threadlocal, 0xDD,
-           sizeof(struct pypy_threadlocal_s));  /* debug */
-    pypy_threadlocal.ready = 0;
-}
-
-
-/* ------------------------------------------------------------ */
-#else
-/* ------------------------------------------------------------ */
-
-
-/* this is the case where the 'struct pypy_threadlocal_s' is allocated
-   explicitly, with malloc()/free(), and attached to (a single) thread-
-   local key using the API of Windows or pthread. */
-
-pthread_key_t pypy_threadlocal_key;
-
-
-void RPython_ThreadLocals_ProgramInit(void)
-{
+    /* Initialize the pypy_threadlocal_key, together with a destructor
+       that will be called every time a thread shuts down (if there is
+       a non-null thread-local value).  This is needed even in the
+       case where we use '__thread' below, for the destructor.
+    */
+    assert(pypy_threadlocal_lock == 0);
 #ifdef _WIN32
     pypy_threadlocal_key = TlsAlloc();
     if (pypy_threadlocal_key == TLS_OUT_OF_INDEXES)
 #else
-    if (pthread_key_create(&pypy_threadlocal_key, NULL) != 0)
+    if (pthread_key_create(&pypy_threadlocal_key, threadloc_unlink) != 0)
 #endif
     {
         fprintf(stderr, "Internal RPython error: "
@@ -85,8 +186,53 @@
         abort();
     }
     _RPython_ThreadLocals_Build();
+
+#ifndef _WIN32
+    pthread_atfork(_RPython_ThreadLocals_Acquire,
+                   _RPython_ThreadLocals_Release,
+                   cleanup_after_fork);
+#endif
 }
 
+
+/* ------------------------------------------------------------ */
+#ifdef USE___THREAD
+/* ------------------------------------------------------------ */
+
+
+/* in this situation, we always have one full 'struct pypy_threadlocal_s'
+   available, managed by gcc. */
+__thread struct pypy_threadlocal_s pypy_threadlocal;
+
+char *_RPython_ThreadLocals_Build(void)
+{
+    RPyAssert(pypy_threadlocal.ready == 0, "unclean thread-local");
+    _RPy_ThreadLocals_Init(&pypy_threadlocal);
+
+    /* we also set up &pypy_threadlocal as a POSIX thread-local variable,
+       because we need the destructor behavior. */
+    pthread_setspecific(pypy_threadlocal_key, (void *)&pypy_threadlocal);
+
+    return (char *)&pypy_threadlocal;
+}
+
+void RPython_ThreadLocals_ThreadDie(void)
+{
+    pthread_setspecific(pypy_threadlocal_key, NULL);
+    threadloc_unlink(&pypy_threadlocal);
+}
+
+
+/* ------------------------------------------------------------ */
+#else
+/* ------------------------------------------------------------ */
+
+
+/* this is the case where the 'struct pypy_threadlocal_s' is allocated
+   explicitly, with malloc()/free(), and attached to (a single) thread-
+   local key using the API of Windows or pthread. */
+
+
 char *_RPython_ThreadLocals_Build(void)
 {
     void *p = malloc(sizeof(struct pypy_threadlocal_s));
@@ -105,8 +251,7 @@
     void *p = _RPy_ThreadLocals_Get();
     if (p != NULL) {
         _RPy_ThreadLocals_Set(NULL);
-        memset(p, 0xDD, sizeof(struct pypy_threadlocal_s));  /* debug */
-        free(p);
+        threadloc_unlink(p);   /* includes free(p) */
     }
 }
 
diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h
--- a/rpython/translator/c/src/threadlocal.h
+++ b/rpython/translator/c/src/threadlocal.h
@@ -13,14 +13,24 @@
    to die. */
 RPY_EXTERN void RPython_ThreadLocals_ThreadDie(void);
 
-/* There are two llops: 'threadlocalref_addr' and 'threadlocalref_make'.
-   They both return the address of the thread-local structure (of the
-   C type 'struct pypy_threadlocal_s').  The difference is that
-   OP_THREADLOCALREF_MAKE() checks if we have initialized this thread-
-   local structure in the current thread, and if not, calls the following
-   helper. */
+/* 'threadlocalref_addr' returns the address of the thread-local
+   structure (of the C type 'struct pypy_threadlocal_s').  It first
+   checks if we have initialized this thread-local structure in the
+   current thread, and if not, calls the following helper. */
 RPY_EXTERN char *_RPython_ThreadLocals_Build(void);
 
+RPY_EXTERN void _RPython_ThreadLocals_Acquire(void);
+RPY_EXTERN void _RPython_ThreadLocals_Release(void);
+
+/* Must acquire/release the thread-local lock around a series of calls
+   to the following function */
+RPY_EXTERN struct pypy_threadlocal_s *
+_RPython_ThreadLocals_Enum(struct pypy_threadlocal_s *prev);
+
+#define OP_THREADLOCALREF_ACQUIRE(r)   _RPython_ThreadLocals_Acquire()
+#define OP_THREADLOCALREF_RELEASE(r)   _RPython_ThreadLocals_Release()
+#define OP_THREADLOCALREF_ENUM(p, r)   r = _RPython_ThreadLocals_Enum(p)
+
 
 /* ------------------------------------------------------------ */
 #ifdef USE___THREAD
@@ -29,6 +39,8 @@
 
 /* Use the '__thread' specifier, so far only on Linux */
 
+#include <pthread.h>
+
 RPY_EXTERN __thread struct pypy_threadlocal_s pypy_threadlocal;
 
 #define OP_THREADLOCALREF_ADDR(r)               \
@@ -64,8 +76,6 @@
 #  define _RPy_ThreadLocals_Set(x)  pthread_setspecific(pypy_threadlocal_key, x)
 #endif
 
-RPY_EXTERN pthread_key_t pypy_threadlocal_key;
-
 
 #define OP_THREADLOCALREF_ADDR(r)               \
     do {                                        \
@@ -87,6 +97,9 @@
 /* ------------------------------------------------------------ */
 
 
+RPY_EXTERN pthread_key_t pypy_threadlocal_key;
+
+
 /* only for the fall-back path in the JIT */
 #define OP_THREADLOCALREF_GET_NONCONST(RESTYPE, offset, r)      \
     do {                                                        \
diff --git a/rpython/translator/c/test/test_boehm.py b/rpython/translator/c/test/test_boehm.py
--- a/rpython/translator/c/test/test_boehm.py
+++ b/rpython/translator/c/test/test_boehm.py
@@ -23,6 +23,7 @@
 class AbstractGCTestClass(object):
     gcpolicy = "boehm"
     use_threads = False
+    extra_options = {}
 
     # deal with cleanups
     def setup_method(self, meth):
@@ -33,8 +34,10 @@
             #print "CLEANUP"
             self._cleanups.pop()()
 
-    def getcompiled(self, func, argstypelist=[], annotatorpolicy=None):
-        return compile(func, argstypelist, gcpolicy=self.gcpolicy, thread=self.use_threads)
+    def getcompiled(self, func, argstypelist=[], annotatorpolicy=None,
+                    extra_options={}):
+        return compile(func, argstypelist, gcpolicy=self.gcpolicy,
+                       thread=self.use_threads, **extra_options)
 
 
 class TestUsingBoehm(AbstractGCTestClass):
diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -96,6 +96,8 @@
                     continue
                 if name == 'pypy_debug_file':     # ok to export this one
                     continue
+                if name == 'rpython_startup_code':  # ok for this one too
+                    continue
                 if 'pypy' in name.lower() or 'rpy' in name.lower():
                     raise Exception("Unexpected exported name %r.  "
                         "What is likely missing is RPY_EXTERN before the "
diff --git a/rpython/translator/driver.py b/rpython/translator/driver.py
--- a/rpython/translator/driver.py
+++ b/rpython/translator/driver.py
@@ -203,9 +203,8 @@
                 try:
                     points = secondary_entrypoints[key]
                 except KeyError:
-                    raise KeyError(
-                        "Entrypoints not found. I only know the keys %r." %
-                        (", ".join(secondary_entrypoints.keys()), ))
+                    raise KeyError("Entrypoint %r not found (not in %r)" %
+                                   (key, secondary_entrypoints.keys()))
                 self.secondary_entrypoints.extend(points)
 
         self.translator.driver_instrument_result = self.instrument_result


More information about the pypy-commit mailing list