[pypy-commit] pypy py3k: merge default

Sun Nov 30 23:12:47 CET 2014

Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k
Changeset: r74769:f682ccbce64f
Date: 2014-11-30 14:12 -0800
http://bitbucket.org/pypy/pypy/changeset/f682ccbce64f/

Log:	merge default

diff too long, truncating to 2000 out of 3147 lines

diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -158,21 +158,14 @@
 
 
 class W_CTypePrimitiveSigned(W_CTypePrimitive):
-    _attrs_            = ['value_fits_long', 'vmin', 'vrangemax']
-    _immutable_fields_ = ['value_fits_long', 'vmin', 'vrangemax']
+    _attrs_            = ['value_fits_long', 'value_smaller_than_long']
+    _immutable_fields_ = ['value_fits_long', 'value_smaller_than_long']
     is_primitive_integer = True
 
     def __init__(self, *args):
         W_CTypePrimitive.__init__(self, *args)
         self.value_fits_long = self.size <= rffi.sizeof(lltype.Signed)
-        if self.size < rffi.sizeof(lltype.Signed):
-            assert self.value_fits_long
-            sh = self.size * 8
-            self.vmin = r_uint(-1) << (sh - 1)
-            self.vrangemax = (r_uint(1) << sh) - 1
-        else:
-            self.vmin = r_uint(0)
-            self.vrangemax = r_uint(-1)
+        self.value_smaller_than_long = self.size < rffi.sizeof(lltype.Signed)
 
     def cast_to_int(self, cdata):
         return self.convert_to_object(cdata)
@@ -192,8 +185,17 @@
     def convert_from_object(self, cdata, w_ob):
         if self.value_fits_long:
             value = misc.as_long(self.space, w_ob)
-            if self.size < rffi.sizeof(lltype.Signed):
-                if r_uint(value) - self.vmin > self.vrangemax:
+            if self.value_smaller_than_long:
+                size = self.size
+                if size == 1:
+                    signextended = misc.signext(value, 1)
+                elif size == 2:
+                    signextended = misc.signext(value, 2)
+                elif size == 4:
+                    signextended = misc.signext(value, 4)
+                else:
+                    raise AssertionError("unsupported size")
+                if value != signextended:
                     self._overflow(w_ob)
             misc.write_raw_signed_data(cdata, value, self.size)
         else:
@@ -221,7 +223,7 @@
             length = w_cdata.get_array_length()
             populate_list_from_raw_array(res, buf, length)
             return res
-        elif self.value_fits_long:
+        elif self.value_smaller_than_long:
             res = [0] * w_cdata.get_array_length()
             misc.unpack_list_from_raw_array(res, w_cdata._cdata, self.size)
             return res
@@ -235,8 +237,8 @@
                 cdata = rffi.cast(rffi.LONGP, cdata)
                 copy_list_to_raw_array(int_list, cdata)
             else:
-                overflowed = misc.pack_list_to_raw_array_bounds(
-                    int_list, cdata, self.size, self.vmin, self.vrangemax)
+                overflowed = misc.pack_list_to_raw_array_bounds_signed(
+                    int_list, cdata, self.size)
                 if overflowed != 0:
                     self._overflow(self.space.wrap(overflowed))
             return True
@@ -314,8 +316,8 @@
     def pack_list_of_items(self, cdata, w_ob):
         int_list = self.space.listview_int(w_ob)
         if int_list is not None:
-            overflowed = misc.pack_list_to_raw_array_bounds(
-                int_list, cdata, self.size, r_uint(0), self.vrangemax)
+            overflowed = misc.pack_list_to_raw_array_bounds_unsigned(
+                int_list, cdata, self.size, self.vrangemax)
             if overflowed != 0:
                 self._overflow(self.space.wrap(overflowed))
             return True
diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py
--- a/pypy/module/_cffi_backend/misc.py
+++ b/pypy/module/_cffi_backend/misc.py
@@ -222,6 +222,19 @@
 neg_msg = "can't convert negative number to unsigned"
 ovf_msg = "long too big to convert"
 
+ at specialize.arg(1)
+def signext(value, size):
+    # 'value' is sign-extended from 'size' bytes to a full integer.
+    # 'size' should be a constant smaller than a full integer size.
+    if size == rffi.sizeof(rffi.SIGNEDCHAR):
+        return rffi.cast(lltype.Signed, rffi.cast(rffi.SIGNEDCHAR, value))
+    elif size == rffi.sizeof(rffi.SHORT):
+        return rffi.cast(lltype.Signed, rffi.cast(rffi.SHORT, value))
+    elif size == rffi.sizeof(rffi.INT):
+        return rffi.cast(lltype.Signed, rffi.cast(rffi.INT, value))
+    else:
+        raise AssertionError("unsupported size")
+
 # ____________________________________________________________
 
 class _NotStandardObject(Exception):
@@ -339,13 +352,26 @@
 
 # ____________________________________________________________
 
-def pack_list_to_raw_array_bounds(int_list, target, size, vmin, vrangemax):
+def pack_list_to_raw_array_bounds_signed(int_list, target, size):
     for TP, TPP in _prim_signed_types:
         if size == rffi.sizeof(TP):
             ptr = rffi.cast(TPP, target)
             for i in range(len(int_list)):
                 x = int_list[i]
-                if r_uint(x) - vmin > vrangemax:
+                y = rffi.cast(TP, x)
+                if x != rffi.cast(lltype.Signed, y):
+                    return x      # overflow
+                ptr[i] = y
+            return 0
+    raise NotImplementedError("bad integer size")
+
+def pack_list_to_raw_array_bounds_unsigned(int_list, target, size, vrangemax):
+    for TP, TPP in _prim_signed_types:
+        if size == rffi.sizeof(TP):
+            ptr = rffi.cast(TPP, target)
+            for i in range(len(int_list)):
+                x = int_list[i]
+                if r_uint(x) > vrangemax:
                     return x      # overflow
                 ptr[i] = rffi.cast(TP, x)
             return 0
diff --git a/pypy/module/_ssl/thread_lock.py b/pypy/module/_ssl/thread_lock.py
--- a/pypy/module/_ssl/thread_lock.py
+++ b/pypy/module/_ssl/thread_lock.py
@@ -24,12 +24,19 @@
 
 separate_module_source = """
 #include <openssl/crypto.h>
+#ifndef _WIN32
+# include <pthread.h>
+#endif
 
 static unsigned int _ssl_locks_count = 0;
 static struct RPyOpaque_ThreadLock *_ssl_locks;
 
 static unsigned long _ssl_thread_id_function(void) {
-    return RPyThreadGetIdent();
+#ifdef _WIN32
+    return (unsigned long)GetCurrentThreadId();
+#else
+    return (unsigned long)pthread_self();
+#endif
 }
 
 static void _ssl_thread_locking_function(int mode, int n, const char *file,
diff --git a/pypy/module/cpyext/src/pythread.c b/pypy/module/cpyext/src/pythread.c
--- a/pypy/module/cpyext/src/pythread.c
+++ b/pypy/module/cpyext/src/pythread.c
@@ -1,11 +1,18 @@
 #include <Python.h>
+#ifndef _WIN32
+# include <pthread.h>
+#endif
 #include "pythread.h"
 #include "src/thread.h"
 
 long
 PyThread_get_thread_ident(void)
 {
-    return RPyThreadGetIdent();
+#ifdef _WIN32
+    return (long)GetCurrentThreadId();
+#else
+    return (long)pthread_self();
+#endif
 }
 
 PyThread_type_lock
diff --git a/pypy/module/micronumpy/concrete.py b/pypy/module/micronumpy/concrete.py
--- a/pypy/module/micronumpy/concrete.py
+++ b/pypy/module/micronumpy/concrete.py
@@ -47,6 +47,7 @@
     def setitem(self, index, value):
         self.dtype.itemtype.store(self, index, 0, value)
 
+    @jit.unroll_safe
     def setslice(self, space, arr):
         if len(arr.get_shape()) > 0 and len(self.get_shape()) == 0:
             raise oefmt(space.w_ValueError,
diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py
--- a/pypy/module/micronumpy/iterators.py
+++ b/pypy/module/micronumpy/iterators.py
@@ -154,7 +154,7 @@
         index = state.index
         if self.track_index:
             index += 1
-        indices = state.indices
+        indices = state.indices[:]
         offset = state.offset
         if self.contiguous:
             offset += self.array.dtype.elsize
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -88,6 +88,21 @@
                                 reds = 'auto')
 
 def setslice(space, shape, target, source):
+    if not shape:
+        # XXX - simplify
+        target_iter, target_state = target.create_iter(shape)
+        source_iter, source_state = source.create_iter(shape)
+        dtype = target.dtype
+        val = source_iter.getitem(source_state)
+        if dtype.is_str_or_unicode():
+            val = dtype.coerce(space, val)
+        else:
+            val = val.convert_to(space, dtype)
+        target_iter.setitem(target_state, val)
+        return target        
+    return _setslice(space, shape, target, source)
+
+def _setslice(space, shape, target, source):
     # note that unlike everything else, target and source here are
     # array implementations, not arrays
     target_iter, target_state = target.create_iter(shape)
diff --git a/pypy/module/micronumpy/nditer.py b/pypy/module/micronumpy/nditer.py
--- a/pypy/module/micronumpy/nditer.py
+++ b/pypy/module/micronumpy/nditer.py
@@ -1,3 +1,4 @@
+from rpython.rlib import jit
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -264,8 +265,8 @@
         self.index = [0] * len(shape)
         self.backward = backward
 
+    @jit.unroll_safe
     def next(self):
-        # TODO It's probably possible to refactor all the "next" method from each iterator
         for i in range(len(self.shape) - 1, -1, -1):
             if self.index[i] < self.shape[i] - 1:
                 self.index[i] += 1
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -127,12 +127,13 @@
         assert result == 3 ** 2
         self.check_trace_count(1)
         self.check_simple_loop({
-            'call': 3,
+            'call': 1,
             'float_add': 1,
             'float_eq': 3,
             'float_mul': 2,
             'float_ne': 1,
             'getarrayitem_gc': 1,
+            'getarrayitem_raw': 1,     # read the errno
             'guard_false': 4,
             'guard_not_invalidated': 1,
             'guard_true': 3,
@@ -144,6 +145,7 @@
             'raw_load': 2,
             'raw_store': 1,
             'setarrayitem_gc': 1,
+            'setarrayitem_raw': 1,     # write the errno
         })
 
     def define_pow_int():
diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py
--- a/pypy/module/pypyjit/test_pypy_c/model.py
+++ b/pypy/module/pypyjit/test_pypy_c/model.py
@@ -184,10 +184,10 @@
         matcher = OpMatcher(ops)
         return matcher.match(expected_src, **kwds)
 
-    def match_by_id(self, id, expected_src, **kwds):
+    def match_by_id(self, id, expected_src, ignore_ops=[], **kwds):
         ops = list(self.ops_by_id(id, **kwds))
         matcher = OpMatcher(ops, id)
-        return matcher.match(expected_src)
+        return matcher.match(expected_src, ignore_ops=ignore_ops)
 
 class PartialTraceWithIds(TraceWithIds):
     def __init__(self, trace, is_entry_bridge=False):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -82,7 +82,7 @@
         assert log.opnames(ops) == []
         #
         assert entry_bridge.match_by_id('call', """
-            p38 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=<Callr . EF=1 OS=5>)
+            p38 = call(ConstClass(_ll_1_threadlocalref_get__Ptr_GcStruct_objectLlT_Signed), #, descr=<Callr . i EF=1 OS=5>)
             p39 = getfield_gc(p38, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
             i40 = force_token()
             p41 = getfield_gc_pure(p38, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
@@ -444,7 +444,7 @@
             p26 = getfield_gc(p7, descr=<FieldP pypy.objspace.std.dictmultiobject.W_DictMultiObject.inst_strategy .*>)
             guard_value(p26, ConstPtr(ptr27), descr=...)
             guard_not_invalidated(descr=...)
-            p29 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=<Callr . EF=1 OS=5>)
+            p29 = call(ConstClass(_ll_1_threadlocalref_get__Ptr_GcStruct_objectLlT_Signed), #, descr=<Callr . i EF=1 OS=5>)
             p30 = getfield_gc(p29, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
             p31 = force_token()
             p32 = getfield_gc_pure(p29, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py
--- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py
@@ -199,21 +199,16 @@
         ldexp_addr, res = log.result
         assert res == 8.0 * 300
         loop, = log.loops_by_filename(self.filepath)
-        if 'ConstClass(ldexp)' in repr(loop):   # e.g. OS/X
-            ldexp_addr = 'ConstClass(ldexp)'
         assert loop.match_by_id('cfficall', """
-            ...
-            f1 = call_release_gil(..., descr=<Callf 8 fi EF=6 OS=62>)
-            ...
-        """)
-        ops = loop.ops_by_id('cfficall')
-        for name in ['raw_malloc', 'raw_free']:
-            assert name not in str(ops)
-        for name in ['raw_load', 'raw_store', 'getarrayitem_raw', 'setarrayitem_raw']:
-            assert name not in log.opnames(ops)
-        # so far just check that call_release_gil() is produced.
-        # later, also check that the arguments to call_release_gil()
-        # are constants
+            setarrayitem_raw(i69, 0, i95, descr=<ArrayS 4>)    # write 'errno'
+            p96 = force_token()
+            setfield_gc(p0, p96, descr=<FieldP pypy.interpreter.pyframe.PyFrame.vable_token .>)
+            f97 = call_release_gil(i59, 1.0, 3, descr=<Callf 8 fi EF=6 OS=62>)
+            guard_not_forced(descr=...)
+            guard_no_exception(descr=...)
+            i98 = getarrayitem_raw(i69, 0, descr=<ArrayS 4>)   # read 'errno'
+            setfield_gc(p65, i98, descr=<FieldS pypy.interpreter.executioncontext.ExecutionContext.inst__cffi_saved_errno .>)
+        """, ignore_ops=['guard_not_invalidated'])
 
     def test_cffi_call_guard_not_forced_fails(self):
         # this is the test_pypy_c equivalent of
@@ -340,18 +335,16 @@
         guard_value(p166, ConstPtr(ptr72), descr=...)
         p167 = call(ConstClass(_ll_0_alloc_with_del___), descr=<Callr . EF=4>)
         guard_no_exception(descr=...)
-        i112 = int_sub(i160, -32768)
+        i112 = int_signext(i160, 2)
         setfield_gc(p167, ConstPtr(null), descr=<FieldP pypy.module._cffi_backend.cdataobj.W_CData.inst__lifeline_ .+>)
         setfield_gc(p167, ConstPtr(ptr85), descr=<FieldP pypy.module._cffi_backend.cdataobj.W_CData.inst_ctype .+>)
-        i114 = uint_gt(i112, 65535)
+        i114 = int_ne(i160, i112)
         guard_false(i114, descr=...)
-        i115 = int_and(i112, 65535)
-        i116 = int_add(i115, -32768)
         --TICK--
         i119 = call(ConstClass(_ll_1_raw_malloc_varsize__Signed), 6, descr=<Calli . i EF=4 OS=110>)
-        raw_store(i119, 0, i116, descr=<ArrayS 2>)
-        raw_store(i119, 2, i116, descr=<ArrayS 2>)
-        raw_store(i119, 4, i116, descr=<ArrayS 2>)
+        raw_store(i119, 0, i112, descr=<ArrayS 2>)
+        raw_store(i119, 2, i112, descr=<ArrayS 2>)
+        raw_store(i119, 4, i112, descr=<ArrayS 2>)
         setfield_gc(p167, i119, descr=<FieldU pypy.module._cffi_backend.cdataobj.W_CData.inst__cdata .+>)
         i123 = arraylen_gc(p67, descr=<ArrayP .>)
         jump(..., descr=...)
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -65,6 +65,7 @@
         self.external_class_cache = {}      # cache of ExternalType classes
 
         self.needs_generic_instantiate = {}
+        self.thread_local_fields = set()
 
         delayed_imports()
 
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -497,9 +497,11 @@
         if self.cpu.supports_floats:
             mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
                                                                     cond=cond)
-        # pop all callee saved registers and IP to keep the alignment
+        # pop all callee saved registers.  This pops 'pc' last.
+        # It also pops the threadlocal_addr back into 'r1', but it
+        # is not needed any more and will be discarded.
         mc.POP([reg.value for reg in r.callee_restored_registers] +
-                                                       [r.ip.value], cond=cond)
+                                                       [r.r1.value], cond=cond)
         mc.BKPT()
 
     def gen_func_prolog(self):
@@ -508,11 +510,16 @@
         if self.cpu.supports_floats:
             stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD
 
-        # push all callee saved registers and IP to keep the alignment
+        # push all callee saved registers including lr; and push r1 as
+        # well, which contains the threadlocal_addr argument.  Note that
+        # we're pushing a total of 10 words, which keeps the stack aligned.
         self.mc.PUSH([reg.value for reg in r.callee_saved_registers] +
-                                                        [r.ip.value])
+                                                        [r.r1.value])
+        self.saved_threadlocal_addr = 0   # at offset 0 from location 'sp'
         if self.cpu.supports_floats:
             self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
+            self.saved_threadlocal_addr += (
+                len(r.callee_saved_vfp_registers) * 2 * WORD)
         assert stack_size % 8 == 0 # ensure we keep alignment
 
         # set fp to point to the JITFRAME
@@ -952,16 +959,11 @@
             regalloc._check_invariants()
         self.mc.mark_op(None)  # end of the loop
 
-    def regalloc_emit_llong(self, op, arglocs, fcond, regalloc):
+    def regalloc_emit_extra(self, op, arglocs, fcond, regalloc):
+        # for calls to a function with a specifically-supported OS_xxx
         effectinfo = op.getdescr().get_extra_info()
         oopspecindex = effectinfo.oopspecindex
-        asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
-        return fcond
-
-    def regalloc_emit_math(self, op, arglocs, fcond, regalloc):
-        effectinfo = op.getdescr().get_extra_info()
-        oopspecindex = effectinfo.oopspecindex
-        asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+        asm_extra_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
         return fcond
 
     def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
@@ -1150,6 +1152,14 @@
         else:
             assert 0, 'unsupported case'
 
+    def _mov_raw_sp_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_core_reg():
+            # load a value from 'SP + n'
+            assert prev_loc.value <= 0xFFF     # not too far
+            self.load_reg(self.mc, loc, r.sp, prev_loc.value, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
     def regalloc_mov(self, prev_loc, loc, cond=c.AL):
         """Moves a value from a previous location to some other location"""
         if prev_loc.is_imm():
@@ -1163,7 +1173,7 @@
         elif prev_loc.is_vfp_reg():
             self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
         elif prev_loc.is_raw_sp():
-            assert 0, 'raw sp locs are not supported as source loc'
+            self._mov_raw_sp_to_loc(prev_loc, loc, cond)
         else:
             assert 0, 'unsupported case'
     mov_loc_loc = regalloc_mov
@@ -1509,22 +1519,17 @@
 
 asm_operations = [notimplemented_op] * (rop._LAST + 1)
 asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
-asm_llong_operations = {}
-asm_math_operations = {}
+asm_extra_operations = {}
 
 for name, value in ResOpAssembler.__dict__.iteritems():
     if name.startswith('emit_guard_'):
         opname = name[len('emit_guard_'):]
         num = getattr(rop, opname.upper())
         asm_operations_with_guard[num] = value
-    elif name.startswith('emit_op_llong_'):
-        opname = name[len('emit_op_llong_'):]
-        num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
-        asm_llong_operations[num] = value
-    elif name.startswith('emit_op_math_'):
-        opname = name[len('emit_op_math_'):]
-        num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
-        asm_math_operations[num] = value
+    elif name.startswith('emit_opx_'):
+        opname = name[len('emit_opx_'):]
+        num = getattr(EffectInfo, 'OS_' + opname.upper())
+        asm_extra_operations[num] = value
     elif name.startswith('emit_op_'):
         opname = name[len('emit_op_'):]
         num = getattr(rop, opname.upper())
diff --git a/rpython/jit/backend/arm/codebuilder.py b/rpython/jit/backend/arm/codebuilder.py
--- a/rpython/jit/backend/arm/codebuilder.py
+++ b/rpython/jit/backend/arm/codebuilder.py
@@ -318,6 +318,18 @@
                     | (rd & 0xF) << 12
                     | imm16 & 0xFFF)
 
+    def SXTB_rr(self, rd, rm, c=cond.AL):
+        self.write32(c << 28
+                    | 0x06AF0070
+                    | (rd & 0xF) << 12
+                    | (rm & 0xF))
+
+    def SXTH_rr(self, rd, rm, c=cond.AL):
+        self.write32(c << 28
+                    | 0x06BF0070
+                    | (rd & 0xF) << 12
+                    | (rm & 0xF))
+
     def LDREX(self, rt, rn, c=cond.AL):
         self.write32(c << 28
                     | 0x01900f9f
diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -46,7 +46,7 @@
     def is_core_reg(self):
         return True
 
-    def as_key(self):
+    def as_key(self):       # 0 <= as_key <= 15
         return self.value
 
 
@@ -64,7 +64,7 @@
     def is_vfp_reg(self):
         return True
 
-    def as_key(self):
+    def as_key(self):            # 20 <= as_key <= 35
         return self.value + 20
 
     def is_float(self):
@@ -115,8 +115,8 @@
     def is_imm_float(self):
         return True
 
-    def as_key(self):
-        return self.value
+    def as_key(self):          # a real address + 1
+        return self.value | 1
 
     def is_float(self):
         return True
@@ -148,7 +148,7 @@
     def is_stack(self):
         return True
 
-    def as_key(self):
+    def as_key(self):                # an aligned word + 10000
         return self.position + 10000
 
     def is_float(self):
@@ -174,6 +174,9 @@
     def is_float(self):
         return self.type == FLOAT
 
+    def as_key(self):            # a word >= 1000, and < 1000 + size of SP frame
+        return self.value + 1000
+
 
 def imm(i):
     return ImmLocation(i)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -19,7 +19,7 @@
 from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder
 from rpython.jit.backend.arm.jump import remap_frame_layout
 from rpython.jit.backend.arm.regalloc import TempBox
-from rpython.jit.backend.arm.locations import imm
+from rpython.jit.backend.arm.locations import imm, RawSPStackLocation
 from rpython.jit.backend.llsupport import symbolic
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.backend.llsupport.descr import InteriorFieldDescr
@@ -102,6 +102,17 @@
         self.mc.MOV_rr(res.value, arg.value, cond=c.GE)
         return fcond
 
+    def emit_op_int_signext(self, op, arglocs, regalloc, fcond):
+        arg, numbytes, res = arglocs
+        assert numbytes.is_imm()
+        if numbytes.value == 1:
+            self.mc.SXTB_rr(res.value, arg.value)
+        elif numbytes.value == 2:
+            self.mc.SXTH_rr(res.value, arg.value)
+        else:
+            raise AssertionError("bad number of bytes")
+        return fcond
+
     #ref: http://blogs.arm.com/software-enablement/detecting-overflow-from-mul/
     def emit_guard_int_mul_ovf(self, op, guard, arglocs, regalloc, fcond):
         reg1 = arglocs[0]
@@ -971,7 +982,9 @@
         return fcond
 
     def _call_assembler_emit_call(self, addr, argloc, resloc):
-        self.simple_call(addr, [argloc], result_loc=resloc)
+        ofs = self.saved_threadlocal_addr
+        threadlocal_loc = RawSPStackLocation(ofs, INT)
+        self.simple_call(addr, [argloc, threadlocal_loc], result_loc=resloc)
 
     def _call_assembler_emit_helper_call(self, addr, arglocs, resloc):
         self.simple_call(addr, arglocs, result_loc=resloc)
@@ -1097,7 +1110,7 @@
 
     emit_op_float_neg = gen_emit_unary_float_op('float_neg', 'VNEG')
     emit_op_float_abs = gen_emit_unary_float_op('float_abs', 'VABS')
-    emit_op_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT')
+    emit_opx_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT')
 
     emit_op_float_lt = gen_emit_float_cmp_op('float_lt', c.VFP_LT)
     emit_op_float_le = gen_emit_float_cmp_op('float_le', c.VFP_LE)
@@ -1131,13 +1144,13 @@
 
     # the following five instructions are only ARMv7;
     # regalloc.py won't call them at all on ARMv6
-    emit_op_llong_add = gen_emit_float_op('llong_add', 'VADD_i64')
-    emit_op_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64')
-    emit_op_llong_and = gen_emit_float_op('llong_and', 'VAND_i64')
-    emit_op_llong_or = gen_emit_float_op('llong_or', 'VORR_i64')
-    emit_op_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64')
+    emit_opx_llong_add = gen_emit_float_op('llong_add', 'VADD_i64')
+    emit_opx_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64')
+    emit_opx_llong_and = gen_emit_float_op('llong_and', 'VAND_i64')
+    emit_opx_llong_or = gen_emit_float_op('llong_or', 'VORR_i64')
+    emit_opx_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64')
 
-    def emit_op_llong_to_int(self, op, arglocs, regalloc, fcond):
+    def emit_opx_llong_to_int(self, op, arglocs, regalloc, fcond):
         loc = arglocs[0]
         res = arglocs[1]
         assert loc.is_vfp_reg()
@@ -1271,3 +1284,11 @@
             regalloc.rm.possibly_free_var(length_box)
         regalloc.rm.possibly_free_var(dstaddr_box)
         return fcond
+
+    def emit_opx_threadlocalref_get(self, op, arglocs, regalloc, fcond):
+        ofs0, res = arglocs
+        assert ofs0.is_imm()
+        ofs = self.saved_threadlocal_addr
+        self.load_reg(self.mc, res, r.sp, ofs)
+        self.load_reg(self.mc, res, res, ofs0.value)
+        return fcond
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -373,11 +373,8 @@
         return gcmap
 
     # ------------------------------------------------------------
-    def perform_llong(self, op, args, fcond):
-        return self.assembler.regalloc_emit_llong(op, args, fcond, self)
-
-    def perform_math(self, op, args, fcond):
-        return self.assembler.regalloc_emit_math(op, args, self, fcond)
+    def perform_extra(self, op, args, fcond):
+        return self.assembler.regalloc_emit_extra(op, args, fcond, self)
 
     def force_spill_var(self, var):
         if var.type == FLOAT:
@@ -458,6 +455,12 @@
         resloc = self.force_allocate_reg(op.result, [op.getarg(0)])
         return [argloc, resloc]
 
+    def prepare_op_int_signext(self, op, fcond):
+        argloc = self.make_sure_var_in_reg(op.getarg(0))
+        numbytes = op.getarg(1).getint()
+        resloc = self.force_allocate_reg(op.result)
+        return [argloc, imm(numbytes), resloc]
+
     def prepare_guard_int_mul_ovf(self, op, guard, fcond):
         boxes = op.getarglist()
         reg1 = self.make_sure_var_in_reg(boxes[0], forbidden_vars=boxes)
@@ -552,15 +555,19 @@
                             EffectInfo.OS_LLONG_XOR):
                 if self.cpu.cpuinfo.arch_version >= 7:
                     args = self._prepare_llong_binop_xx(op, fcond)
-                    self.perform_llong(op, args, fcond)
+                    self.perform_extra(op, args, fcond)
                     return
             elif oopspecindex == EffectInfo.OS_LLONG_TO_INT:
                 args = self._prepare_llong_to_int(op, fcond)
-                self.perform_llong(op, args, fcond)
+                self.perform_extra(op, args, fcond)
                 return
             elif oopspecindex == EffectInfo.OS_MATH_SQRT:
-                args = self.prepare_op_math_sqrt(op, fcond)
-                self.perform_math(op, args, fcond)
+                args = self._prepare_op_math_sqrt(op, fcond)
+                self.perform_extra(op, args, fcond)
+                return
+            elif oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
+                args = self._prepare_threadlocalref_get(op, fcond)
+                self.perform_extra(op, args, fcond)
                 return
             #elif oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP:
             #    ...
@@ -618,6 +625,11 @@
         res = self.force_allocate_reg(op.result)
         return [loc0, res]
 
+    def _prepare_threadlocalref_get(self, op, fcond):
+        ofs0 = imm(op.getarg(1).getint())
+        res = self.force_allocate_reg(op.result)
+        return [ofs0, res]
+
     def _prepare_guard(self, op, args=None):
         if args is None:
             args = []
@@ -1278,7 +1290,7 @@
     prepare_guard_float_ge = prepare_float_op(guard=True,
                             float_result=False, name='prepare_guard_float_ge')
 
-    def prepare_op_math_sqrt(self, op, fcond):
+    def _prepare_op_math_sqrt(self, op, fcond):
         loc = self.make_sure_var_in_reg(op.getarg(1))
         self.possibly_free_vars_for_op(op)
         self.free_temp_vars()
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -217,7 +217,13 @@
         return lltype.cast_opaque_ptr(llmemory.GCREF, frame)
 
     def make_execute_token(self, *ARGS):
-        FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF],
+        # The JIT backend must generate functions with the following
+        # signature: it takes the jitframe and the threadlocal_addr
+        # as arguments, and it returns the (possibly reallocated) jitframe.
+        # The backend can optimize OS_THREADLOCALREF_GET calls to return a
+        # field of this threadlocal_addr, but only if 'translate_support_code':
+        # in untranslated tests, threadlocal_addr is a dummy NULL.
+        FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF, llmemory.Address],
                                              llmemory.GCREF))
 
         lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)]
@@ -249,8 +255,13 @@
                     else:
                         assert kind == history.REF
                         self.set_ref_value(ll_frame, num, arg)
+                if self.translate_support_code:
+                    ll_threadlocal_addr = llop.threadlocalref_addr(
+                        llmemory.Address)
+                else:
+                    ll_threadlocal_addr = llmemory.NULL
                 llop.gc_writebarrier(lltype.Void, ll_frame)
-                ll_frame = func(ll_frame)
+                ll_frame = func(ll_frame, ll_threadlocal_addr)
             finally:
                 if not self.translate_support_code:
                     LLInterpreter.current_interpreter = prev_interpreter
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -26,8 +26,6 @@
         # - profiler
         # - full optimizer
         # - floats neg and abs
-        # - threadlocalref_get
-        # - get_errno, set_errno
         # - llexternal with macro=True
 
         class Frame(object):
@@ -36,10 +34,6 @@
             def __init__(self, i):
                 self.i = i
 
-        class Foo(object):
-            pass
-        t = ThreadLocalReference(Foo)
-
         eci = ExternalCompilationInfo(post_include_bits=['''
 #define pypy_my_fabs(x)  fabs(x)
 '''])
@@ -74,9 +68,6 @@
                 k = myabs1(myabs2(j))
                 if k - abs(j):  raise ValueError
                 if k - abs(-j): raise ValueError
-                if t.get().nine != 9: raise ValueError
-                rposix.set_errno(total)
-                if rposix.get_errno() != total: raise ValueError
             return chr(total % 253)
         #
         class Virt2(object):
@@ -104,12 +95,8 @@
             return res
         #
         def main(i, j):
-            foo = Foo()
-            foo.nine = -(i + j)
-            t.set(foo)
             a_char = f(i, j)
             a_float = libffi_stuff(i, j)
-            keepalive_until_here(foo)
             return ord(a_char) * 10 + int(a_float)
         expected = main(40, -49)
         res = self.meta_interp(main, [40, -49])
@@ -121,6 +108,7 @@
 
     def test_direct_assembler_call_translates(self):
         """Test CALL_ASSEMBLER and the recursion limit"""
+        # - also tests threadlocalref_get
         from rpython.rlib.rstackovf import StackOverflow
 
         class Thing(object):
@@ -138,6 +126,10 @@
 
         somewhere_else = SomewhereElse()
 
+        class Foo(object):
+            pass
+        t = ThreadLocalReference(Foo)
+
         def change(newthing):
             somewhere_else.frame.thing = newthing
 
@@ -163,6 +155,7 @@
                     nextval = 13
                 frame.thing = Thing(nextval + 1)
                 i += 1
+                if t.get().nine != 9: raise ValueError
             return frame.thing.val
 
         driver2 = JitDriver(greens = [], reds = ['n'])
@@ -184,13 +177,24 @@
                 n = portal2(n)
         assert portal2(10) == -9
 
+        def setup(value):
+            foo = Foo()
+            foo.nine = value
+            t.set(foo)
+            return foo
+
         def mainall(codeno, bound):
-            return main(codeno) + main2(bound)
+            foo = setup(bound + 8)
+            result = main(codeno) + main2(bound)
+            keepalive_until_here(foo)
+            return result
 
+        tmp_obj = setup(9)
+        expected_1 = main(0)
         res = self.meta_interp(mainall, [0, 1], inline=True,
                                policy=StopAtXPolicy(change))
         print hex(res)
-        assert res & 255 == main(0)
+        assert res & 255 == expected_1
         bound = res & ~255
         assert 1024 <= bound <= 131072
         assert bound & (bound-1) == 0       # a power of two
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -3890,6 +3890,26 @@
             deadframe = self.cpu.execute_token(looptoken, inp)
             assert outp == self.cpu.get_int_value(deadframe, 0)
 
+    def test_int_signext(self):
+        numbytes_cases = [1, 2] if IS_32_BIT else [1, 2, 4]
+        for numbytes in numbytes_cases:
+            ops = """
+            [i0]
+            i1 = int_signext(i0, %d)
+            finish(i1, descr=descr)
+            """ % numbytes
+            descr = BasicFinalDescr()
+            loop = parse(ops, self.cpu, namespace=locals())
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+            test_cases = [random.randrange(-sys.maxint-1, sys.maxint+1)
+                          for _ in range(100)]
+            for test_case in test_cases:
+                deadframe = self.cpu.execute_token(looptoken, test_case)
+                got = self.cpu.get_int_value(deadframe, 0)
+                expected = heaptracker.int_signext(test_case, numbytes)
+                assert got == expected
+
     def test_compile_asmlen(self):
         from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -34,10 +34,16 @@
     FRAME_FIXED_SIZE = 19
     PASS_ON_MY_FRAME = 15
     JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float
+    # 'threadlocal_addr' is passed as 2nd argument on the stack,
+    # and it can be left here for when it is needed
+    THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD
 else:
-    # rbp + rbx + r12 + r13 + r14 + r15 + 13 extra words = 19
+    # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19
     FRAME_FIXED_SIZE = 19
-    PASS_ON_MY_FRAME = 13
+    PASS_ON_MY_FRAME = 12
     JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
+    # 'threadlocal_addr' is passed as 2nd argument in %esi,
+    # and is moved into this frame location
+    THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD
 
 assert PASS_ON_MY_FRAME >= 12       # asmgcc needs at least JIT_USE_WORDS + 3
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -18,7 +18,7 @@
 from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size)
 from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, WORD, IS_X86_64,
                                        JITFRAME_FIXED_SIZE, IS_X86_32,
-                                       PASS_ON_MY_FRAME)
+                                       PASS_ON_MY_FRAME, THREADLOCAL_OFS)
 from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
     xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
     r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
@@ -730,6 +730,7 @@
         self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD)
         self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value)
         if IS_X86_64:
+            self.mc.MOV_sr(THREADLOCAL_OFS, esi.value)
             self.mc.MOV_rr(ebp.value, edi.value)
         else:
             self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD)
@@ -1143,6 +1144,18 @@
     def genop_math_sqrt(self, op, arglocs, resloc):
         self.mc.SQRTSD(arglocs[0], resloc)
 
+    def genop_int_signext(self, op, arglocs, resloc):
+        argloc, numbytesloc = arglocs
+        assert isinstance(numbytesloc, ImmedLoc)
+        if numbytesloc.value == 1:
+            self.mc.MOVSX8(resloc, argloc)
+        elif numbytesloc.value == 2:
+            self.mc.MOVSX16(resloc, argloc)
+        elif IS_X86_64 and numbytesloc.value == 4:
+            self.mc.MOVSX32(resloc, argloc)
+        else:
+            raise AssertionError("bad number of bytes")
+
     def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
         guard_opnum = guard_op.getopnum()
         if isinstance(arglocs[0], RegLoc):
@@ -1957,7 +1970,8 @@
         self._emit_guard_not_forced(guard_token)
 
     def _call_assembler_emit_call(self, addr, argloc, _):
-        self.simple_call(addr, [argloc])
+        threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT)
+        self.simple_call(addr, [argloc, threadlocal_loc])
 
     def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
         self.simple_call(addr, arglocs, result_loc)
@@ -2322,48 +2336,16 @@
         assert isinstance(reg, RegLoc)
         self.mc.MOV_rr(reg.value, ebp.value)
 
-    def threadlocalref_get(self, op, resloc):
-        # this function is only called on Linux
-        from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr
-        from rpython.jit.backend.x86 import stmtlocal
+    def threadlocalref_get(self, offset, resloc):
+        # This loads the stack location THREADLOCAL_OFS into a
+        # register, and then read the word at the given offset.
+        # It is only supported if 'translate_support_code' is
+        # true; otherwise, the original call to the piece of assembler
+        # was done with a dummy NULL value.
+        assert self.cpu.translate_support_code
         assert isinstance(resloc, RegLoc)
-        effectinfo = op.getdescr().get_extra_info()
-        assert effectinfo.extradescrs is not None
-        ed = effectinfo.extradescrs[0]
-        assert isinstance(ed, ThreadLocalRefDescr)
-        addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr())
-        # 'addr1' is the address is the current thread, but we assume that
-        # it is a thread-local at a constant offset from %fs/%gs.
-        addr0 = stmtlocal.threadlocal_base()
-        addr = addr1 - addr0
-        assert rx86.fits_in_32bits(addr)
-        mc = self.mc
-        mc.writechar(stmtlocal.SEGMENT_TL)     # prefix: %fs or %gs
-        mc.MOV_rj(resloc.value, addr)          # memory read
-
-    def get_set_errno(self, op, loc, issue_a_write):
-        # this function is only called on Linux
-        from rpython.jit.backend.x86 import stmtlocal
-        addr = stmtlocal.get_errno_tl()
-        assert rx86.fits_in_32bits(addr)
-        mc = self.mc
-        mc.writechar(stmtlocal.SEGMENT_TL)     # prefix: %fs or %gs
-        # !!important: the *next* instruction must be the one using 'addr'!!
-        if issue_a_write:
-            if isinstance(loc, RegLoc):
-                mc.MOV32_jr(addr, loc.value)       # memory write from reg
-            else:
-                assert isinstance(loc, ImmedLoc)
-                newvalue = loc.value
-                newvalue = rffi.cast(rffi.INT, newvalue)
-                newvalue = rffi.cast(lltype.Signed, newvalue)
-                mc.MOV32_ji(addr, newvalue)        # memory write immediate
-        else:
-            assert isinstance(loc, RegLoc)
-            if IS_X86_32:
-                mc.MOV_rj(loc.value, addr)         # memory read
-            elif IS_X86_64:
-                mc.MOVSX32_rj(loc.value, addr)     # memory read, sign-extend
+        self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
+        self.mc.MOV_rm(resloc.value, (resloc.value, offset))
 
     def genop_discard_zero_array(self, op, arglocs):
         (base_loc, startindex_loc, bytes_loc,
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -474,6 +474,12 @@
 
     consider_int_invert = consider_int_neg
 
+    def consider_int_signext(self, op):
+        argloc = self.loc(op.getarg(0))
+        numbytesloc = self.loc(op.getarg(1))
+        resloc = self.force_allocate_reg(op.result)
+        self.perform(op, [argloc, numbytesloc], resloc)
+
     def consider_int_lshift(self, op):
         if isinstance(op.getarg(1), Const):
             loc2 = self.rm.convert_to_imm(op.getarg(1))
@@ -693,29 +699,11 @@
         loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1))
         self.perform_math(op, [loc0], loc0)
 
-    TLREF_SUPPORT = sys.platform.startswith('linux')
-    ERRNO_SUPPORT = sys.platform.startswith('linux')
-
     def _consider_threadlocalref_get(self, op):
-        if self.TLREF_SUPPORT:
+        if self.translate_support_code:
+            offset = op.getarg(1).getint()   # getarg(0) == 'threadlocalref_get'
             resloc = self.force_allocate_reg(op.result)
-            self.assembler.threadlocalref_get(op, resloc)
-        else:
-            self._consider_call(op)
-
-    def _consider_get_errno(self, op):
-        if self.ERRNO_SUPPORT:
-            resloc = self.force_allocate_reg(op.result)
-            self.assembler.get_set_errno(op, resloc, issue_a_write=False)
-        else:
-            self._consider_call(op)
-
-    def _consider_set_errno(self, op):
-        if self.ERRNO_SUPPORT:
-            # op.getarg(0) is the function set_errno; op.getarg(1) is
-            # the new errno value
-            loc0 = self.rm.make_sure_var_in_reg(op.getarg(1))
-            self.assembler.get_set_errno(op, loc0, issue_a_write=True)
+            self.assembler.threadlocalref_get(offset, resloc)
         else:
             self._consider_call(op)
 
@@ -798,10 +786,6 @@
                 return self._consider_math_sqrt(op)
             if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
                 return self._consider_threadlocalref_get(op)
-            if oopspecindex == EffectInfo.OS_GET_ERRNO:
-                return self._consider_get_errno(op)
-            if oopspecindex == EffectInfo.OS_SET_ERRNO:
-                return self._consider_set_errno(op)
             if oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP:
                 return self._consider_math_read_timestamp(op)
         self._consider_call(op)
diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py
deleted file mode 100644
--- a/rpython/jit/backend/x86/stmtlocal.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.jit.backend.x86.arch import WORD
-
-SEGMENT_FS = '\x64'
-SEGMENT_GS = '\x65'
-
-if WORD == 4:
-    SEGMENT_TL = SEGMENT_GS
-    _instruction = "movl %%gs:0, %0"
-else:
-    SEGMENT_TL = SEGMENT_FS
-    _instruction = "movq %%fs:0, %0"
-
-eci = ExternalCompilationInfo(post_include_bits=['''
-#define RPY_STM_JIT  1
-static long pypy__threadlocal_base(void)
-{
-    /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */
-    long result;
-    asm("%s" : "=r"(result));
-    return result;
-}
-static long pypy__get_errno_tl(void)
-{
-    return ((long)&errno) - pypy__threadlocal_base();
-}
-''' % _instruction])
-
-
-threadlocal_base = rffi.llexternal(
-    'pypy__threadlocal_base',
-    [], lltype.Signed,
-    compilation_info=eci,
-    _nowrapper=True,
-    ) #transactionsafe=True)
-
-get_errno_tl = rffi.llexternal(
-    'pypy__get_errno_tl',
-    [], lltype.Signed,
-    compilation_info=eci,
-    _nowrapper=True,
-    ) #transactionsafe=True)
diff --git a/rpython/jit/codewriter/assembler.py b/rpython/jit/codewriter/assembler.py
--- a/rpython/jit/codewriter/assembler.py
+++ b/rpython/jit/codewriter/assembler.py
@@ -216,10 +216,11 @@
             self.code[pos  ] = chr(target & 0xFF)
             self.code[pos+1] = chr(target >> 8)
         for descr in self.switchdictdescrs:
-            descr.dict = {}
+            as_dict = {}
             for key, switchlabel in descr._labels:
                 target = self.label_positions[switchlabel.name]
-                descr.dict[key] = target
+                as_dict[key] = target
+            descr.attach(as_dict)
 
     def check_result(self):
         # Limitation of the number of registers, from the single-byte encoding
diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py
--- a/rpython/jit/codewriter/effectinfo.py
+++ b/rpython/jit/codewriter/effectinfo.py
@@ -23,8 +23,6 @@
     OS_SHRINK_ARRAY             = 3    # rgc.ll_shrink_array
     OS_DICT_LOOKUP              = 4    # ll_dict_lookup
     OS_THREADLOCALREF_GET       = 5    # llop.threadlocalref_get
-    OS_GET_ERRNO                = 6    # rposix.get_errno
-    OS_SET_ERRNO                = 7    # rposix.set_errno
     OS_NOT_IN_TRACE             = 8    # for calls not recorded in the jit trace
     #
     OS_STR_CONCAT               = 22   # "stroruni.concat"
diff --git a/rpython/jit/codewriter/flatten.py b/rpython/jit/codewriter/flatten.py
--- a/rpython/jit/codewriter/flatten.py
+++ b/rpython/jit/codewriter/flatten.py
@@ -243,55 +243,39 @@
         else:
             # A switch.
             #
-            def emitdefaultpath():
-                if block.exits[-1].exitcase == 'default':
-                    self.make_link(block.exits[-1])
-                else:
-                    self.emitline("unreachable")
-                    self.emitline("---")
-            #
-            self.emitline('-live-')
             switches = [link for link in block.exits
                         if link.exitcase != 'default']
             switches.sort(key=lambda link: link.llexitcase)
             kind = getkind(block.exitswitch.concretetype)
-            if len(switches) >= 5 and kind == 'int':
-                # A large switch on an integer, implementable efficiently
-                # with the help of a SwitchDictDescr
-                from rpython.jit.codewriter.jitcode import SwitchDictDescr
-                switchdict = SwitchDictDescr()
-                switchdict._labels = []
-                self.emitline('switch', self.getcolor(block.exitswitch),
-                                        switchdict)
-                emitdefaultpath()
-                #
-                for switch in switches:
-                    key = lltype.cast_primitive(lltype.Signed,
-                                                switch.llexitcase)
-                    switchdict._labels.append((key, TLabel(switch)))
-                    # emit code for that path
-                    self.emitline(Label(switch))
-                    self.make_link(switch)
+            assert kind == 'int'    # XXX
             #
+            # A switch on an integer, implementable efficiently with the
+            # help of a SwitchDictDescr.  We use this even if there are
+            # very few cases: in pyjitpl.py, opimpl_switch() will promote
+            # the int only if it matches one of the cases.
+            from rpython.jit.codewriter.jitcode import SwitchDictDescr
+            switchdict = SwitchDictDescr()
+            switchdict._labels = []
+            self.emitline('-live-')    # for 'guard_value'
+            self.emitline('switch', self.getcolor(block.exitswitch),
+                                    switchdict)
+            # emit the default path
+            if block.exits[-1].exitcase == 'default':
+                self.make_link(block.exits[-1])
             else:
-                # A switch with several possible answers, though not too
-                # many of them -- a chain of int_eq comparisons is fine
-                assert kind == 'int'    # XXX
-                color = self.getcolor(block.exitswitch)
-                self.emitline('int_guard_value', color)
-                for switch in switches:
-                    # make the case described by 'switch'
-                    self.emitline('goto_if_not_int_eq',
-                                  color,
-                                  Constant(switch.llexitcase,
-                                           block.exitswitch.concretetype),
-                                  TLabel(switch))
-                    # emit code for the "taken" path
-                    self.make_link(switch)
-                    # finally, emit the label for the "non-taken" path
-                    self.emitline(Label(switch))
-                #
-                emitdefaultpath()
+                self.emitline("unreachable")
+                self.emitline("---")
+            #
+            for switch in switches:
+                key = lltype.cast_primitive(lltype.Signed,
+                                            switch.llexitcase)
+                switchdict._labels.append((key, TLabel(switch)))
+                # emit code for that path
+                # note: we need a -live- for all the 'guard_false' we produce
+                # if the switched value doesn't match any case.
+                self.emitline(Label(switch))
+                self.emitline('-live-')
+                self.make_link(switch)
 
     def insert_renamings(self, link):
         renamings = {}
diff --git a/rpython/jit/codewriter/heaptracker.py b/rpython/jit/codewriter/heaptracker.py
--- a/rpython/jit/codewriter/heaptracker.py
+++ b/rpython/jit/codewriter/heaptracker.py
@@ -1,6 +1,7 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory
 from rpython.rtyper import rclass
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.rarithmetic import r_uint, intmask
 
 
 def adr2int(addr):
@@ -11,6 +12,14 @@
 def int2adr(int):
     return llmemory.cast_int_to_adr(int)
 
+def int_signext(value, numbytes):
+    b8 = numbytes * 8
+    a = r_uint(value)
+    a += r_uint(1 << (b8 - 1))     # a += 128
+    a &= r_uint((1 << b8) - 1)     # a &= 255
+    a -= r_uint(1 << (b8 - 1))     # a -= 128
+    return intmask(a)
+
 def count_fields_if_immutable(STRUCT):
     assert isinstance(STRUCT, lltype.GcStruct)
     if STRUCT._hints.get('immutable', False):
diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py
--- a/rpython/jit/codewriter/jitcode.py
+++ b/rpython/jit/codewriter/jitcode.py
@@ -1,4 +1,4 @@
-from rpython.jit.metainterp.history import AbstractDescr
+from rpython.jit.metainterp.history import AbstractDescr, ConstInt
 from rpython.jit.codewriter import heaptracker
 from rpython.rlib.objectmodel import we_are_translated
 
@@ -109,6 +109,10 @@
 class SwitchDictDescr(AbstractDescr):
     "Get a 'dict' attribute mapping integer values to bytecode positions."
 
+    def attach(self, as_dict):
+        self.dict = as_dict
+        self.const_keys_in_order = map(ConstInt, sorted(as_dict.keys()))
+
     def __repr__(self):
         dict = getattr(self, 'dict', '?')
         return '<SwitchDictDescr %s>' % (dict,)
@@ -117,26 +121,6 @@
         raise NotImplementedError
 
 
-class ThreadLocalRefDescr(AbstractDescr):
-    # A special descr used as the extradescr in a call to a
-    # threadlocalref_get function.  If the backend supports it,
-    # it can use this 'get_tlref_addr()' to get the address *in the
-    # current thread* of the thread-local variable.  If, on the current
-    # platform, the "__thread" variables are implemented as an offset
-    # from some base register (e.g. %fs on x86-64), then the backend will
-    # immediately substract the current value of the base register.
-    # This gives an offset from the base register, and this can be
-    # written down in an assembler instruction to load the "__thread"
-    # variable from anywhere.
-
-    def __init__(self, opaque_id):
-        from rpython.rtyper.lltypesystem.lloperation import llop
-        from rpython.rtyper.lltypesystem import llmemory
-        def get_tlref_addr():
-            return llop.threadlocalref_getaddr(llmemory.Address, opaque_id)
-        self.get_tlref_addr = get_tlref_addr
-
-
 class LiveVarsInfo(object):
     def __init__(self, live_i, live_r, live_f):
         self.live_i = live_i
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -439,8 +439,6 @@
         elif oopspec_name.endswith('dict.lookup'):
             # also ordereddict.lookup
             prepare = self._handle_dict_lookup_call
-        elif oopspec_name.startswith('rposix.'):
-            prepare = self._handle_rposix_call
         else:
             prepare = self.prepare_builtin_call
         try:
@@ -1267,19 +1265,12 @@
 
         result = []
         if min2:
-            c_min2 = Constant(min2, lltype.Signed)
-            v2 = varoftype(lltype.Signed)
-            result.append(SpaceOperation('int_sub', [v_arg, c_min2], v2))
+            c_bytes = Constant(size2, lltype.Signed)
+            result.append(SpaceOperation('int_signext', [v_arg, c_bytes],
+                                         v_result))
         else:
-            v2 = v_arg
-        c_mask = Constant(int((1 << (8 * size2)) - 1), lltype.Signed)
-        if min2:
-            v3 = varoftype(lltype.Signed)
-        else:
-            v3 = v_result
-        result.append(SpaceOperation('int_and', [v2, c_mask], v3))
-        if min2:
-            result.append(SpaceOperation('int_add', [v3, c_min2], v_result))
+            c_mask = Constant(int((1 << (8 * size2)) - 1), lltype.Signed)
+            result.append(SpaceOperation('int_and', [v_arg, c_mask], v_result))
         return result
 
     def _float_to_float_cast(self, v_arg, v_result):
@@ -1986,16 +1977,6 @@
         else:
             raise NotImplementedError(oopspec_name)
 
-    def _handle_rposix_call(self, op, oopspec_name, args):
-        if oopspec_name == 'rposix.get_errno':
-            return self._handle_oopspec_call(op, args, EffectInfo.OS_GET_ERRNO,
-                                             EffectInfo.EF_CANNOT_RAISE)
-        elif oopspec_name == 'rposix.set_errno':
-            return self._handle_oopspec_call(op, args, EffectInfo.OS_SET_ERRNO,
-                                             EffectInfo.EF_CANNOT_RAISE)
-        else:
-            raise NotImplementedError(oopspec_name)
-
     def rewrite_op_ll_read_timestamp(self, op):
         op1 = self.prepare_builtin_call(op, "ll_read_timestamp", [])
         return self.handle_residual_call(op1,
@@ -2012,16 +1993,15 @@
         return [op0, op1]
 
     def rewrite_op_threadlocalref_get(self, op):
-        from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr
-        opaqueid = op.args[0].value
-        op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [],
-                                        extra=(opaqueid,),
-                                        extrakey=opaqueid._obj)
-        extradescr = ThreadLocalRefDescr(opaqueid)
+        # only supports RESTYPE being exactly one word.
+        RESTYPE = op.result.concretetype
+        assert (RESTYPE in (lltype.Signed, lltype.Unsigned, llmemory.Address)
+                or isinstance(RESTYPE, lltype.Ptr))
+        c_offset, = op.args
+        op1 = self.prepare_builtin_call(op, 'threadlocalref_get', [c_offset])
         return self.handle_residual_call(op1,
             oopspecindex=EffectInfo.OS_THREADLOCALREF_GET,
-            extraeffect=EffectInfo.EF_LOOPINVARIANT,
-            extradescr=[extradescr])
+            extraeffect=EffectInfo.EF_LOOPINVARIANT)
 
 # ____________________________________________________________
 
diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py
--- a/rpython/jit/codewriter/support.py
+++ b/rpython/jit/codewriter/support.py
@@ -702,10 +702,9 @@
     build_ll_1_raw_free_no_track_allocation = (
         build_raw_free_builder(track_allocation=False))
 
-    def build_ll_0_threadlocalref_getter(opaqueid):
-        def _ll_0_threadlocalref_getter():
-            return llop.threadlocalref_get(rclass.OBJECTPTR, opaqueid)
-        return _ll_0_threadlocalref_getter
+    def _ll_1_threadlocalref_get(TP, offset):
+        return llop.threadlocalref_get(TP, offset)
+    _ll_1_threadlocalref_get.need_result_type = 'exact'   # don't deref
 
     def _ll_1_weakref_create(obj):
         return llop.weakref_create(llmemory.WeakRefPtr, obj)
@@ -818,8 +817,18 @@
     s_result = lltype_to_annotation(ll_res)
     impl = setup_extra_builtin(rtyper, oopspec_name, len(args_s), extra)
     if getattr(impl, 'need_result_type', False):
-        bk = rtyper.annotator.bookkeeper
-        args_s.insert(0, annmodel.SomePBC([bk.getdesc(deref(ll_res))]))
+        if hasattr(rtyper, 'annotator'):
+            bk = rtyper.annotator.bookkeeper
+            ll_restype = ll_res
+            if impl.need_result_type != 'exact':
+                ll_restype = deref(ll_restype)
+            desc = bk.getdesc(ll_restype)
+        else:
+            class TestingDesc(object):
+                knowntype = int
+                pyobj = None
+            desc = TestingDesc()
+        args_s.insert(0, annmodel.SomePBC([desc]))
     #
     if hasattr(rtyper, 'annotator'):  # regular case
         mixlevelann = MixLevelHelperAnnotator(rtyper)
diff --git a/rpython/jit/codewriter/test/test_flatten.py b/rpython/jit/codewriter/test/test_flatten.py
--- a/rpython/jit/codewriter/test/test_flatten.py
+++ b/rpython/jit/codewriter/test/test_flatten.py
@@ -1,5 +1,6 @@
 import py, sys
 from rpython.jit.codewriter import support
+from rpython.jit.codewriter.heaptracker import int_signext
 from rpython.jit.codewriter.flatten import flatten_graph, reorder_renaming_list
 from rpython.jit.codewriter.flatten import GraphFlattener, ListOfKind, Register
 from rpython.jit.codewriter.format import assert_format
@@ -281,30 +282,6 @@
             foobar hi_there!
         """)
 
-    def test_switch(self):
-        def f(n):
-            if n == -5:  return 12
-            elif n == 2: return 51
-            elif n == 7: return 1212
-            else:        return 42
-        self.encoding_test(f, [65], """
-            -live-
-            int_guard_value %i0
-            goto_if_not_int_eq %i0, $-5, L1
-            int_return $12
-            ---
-            L1:
-            goto_if_not_int_eq %i0, $2, L2
-            int_return $51
-            ---
-            L2:
-            goto_if_not_int_eq %i0, $7, L3
-            int_return $1212
-            ---
-            L3:
-            int_return $42
-        """)
-
     def test_switch_dict(self):
         def f(x):
             if   x == 1: return 61
@@ -320,21 +297,27 @@
             int_return $-1
             ---
             L1:
+            -live-
             int_return $61
             ---
             L2:
+            -live-
             int_return $511
             ---
             L3:
+            -live-
             int_return $-22
             ---
             L4:
+            -live-
             int_return $81
             ---
             L5:
+            -live-
             int_return $17
             ---
             L6:
+            -live-
             int_return $54
         """)
 
@@ -780,53 +763,37 @@
             (rffi.SIGNEDCHAR, rffi.LONG, ""),
             (rffi.SIGNEDCHAR, rffi.ULONG, ""),
 
-            (rffi.UCHAR, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1
-                                             int_and %i1, $255 -> %i2
-                                             int_add %i2, $-128 -> %i3"""),
+            (rffi.UCHAR, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"),
             (rffi.UCHAR, rffi.UCHAR, ""),
             (rffi.UCHAR, rffi.SHORT, ""),
             (rffi.UCHAR, rffi.USHORT, ""),
             (rffi.UCHAR, rffi.LONG, ""),
             (rffi.UCHAR, rffi.ULONG, ""),
 
-            (rffi.SHORT, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1
-                                             int_and %i1, $255 -> %i2
-                                             int_add %i2, $-128 -> %i3"""),
+            (rffi.SHORT, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"),
             (rffi.SHORT, rffi.UCHAR, "int_and %i0, $255 -> %i1"),
             (rffi.SHORT, rffi.SHORT, ""),
             (rffi.SHORT, rffi.USHORT, "int_and %i0, $65535 -> %i1"),
             (rffi.SHORT, rffi.LONG, ""),
             (rffi.SHORT, rffi.ULONG, ""),
 
-            (rffi.USHORT, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1
-                                              int_and %i1, $255 -> %i2
-                                              int_add %i2, $-128 -> %i3"""),
+            (rffi.USHORT, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"),
             (rffi.USHORT, rffi.UCHAR, "int_and %i0, $255 -> %i1"),
-            (rffi.USHORT, rffi.SHORT, """int_sub %i0, $-32768 -> %i1
-                                         int_and %i1, $65535 -> %i2
-                                         int_add %i2, $-32768 -> %i3"""),
+            (rffi.USHORT, rffi.SHORT, "int_signext %i0, $2 -> %i1"),
             (rffi.USHORT, rffi.USHORT, ""),
             (rffi.USHORT, rffi.LONG, ""),
             (rffi.USHORT, rffi.ULONG, ""),
 
-            (rffi.LONG, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1
-                                            int_and %i1, $255 -> %i2
-                                            int_add %i2, $-128 -> %i3"""),
+            (rffi.LONG, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"),
             (rffi.LONG, rffi.UCHAR, "int_and %i0, $255 -> %i1"),
-            (rffi.LONG, rffi.SHORT, """int_sub %i0, $-32768 -> %i1
-                                       int_and %i1, $65535 -> %i2
-                                       int_add %i2, $-32768 -> %i3"""),
+            (rffi.LONG, rffi.SHORT, "int_signext %i0, $2 -> %i1"),
             (rffi.LONG, rffi.USHORT, "int_and %i0, $65535 -> %i1"),
             (rffi.LONG, rffi.LONG, ""),
             (rffi.LONG, rffi.ULONG, ""),
 
-            (rffi.ULONG, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1
-                                             int_and %i1, $255 -> %i2
-                                             int_add %i2, $-128 -> %i3"""),
+            (rffi.ULONG, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"),
             (rffi.ULONG, rffi.UCHAR, "int_and %i0, $255 -> %i1"),
-            (rffi.ULONG, rffi.SHORT, """int_sub %i0, $-32768 -> %i1
-                                        int_and %i1, $65535 -> %i2
-                                        int_add %i2, $-32768 -> %i3"""),
+            (rffi.ULONG, rffi.SHORT, "int_signext %i0, $2 -> %i1"),
             (rffi.ULONG, rffi.USHORT, "int_and %i0, $65535 -> %i1"),
             (rffi.ULONG, rffi.LONG, ""),
             (rffi.ULONG, rffi.ULONG, ""),
@@ -910,18 +877,14 @@
             return rffi.cast(rffi.SIGNEDCHAR, n)
         self.encoding_test(f, [12.456], """
             cast_float_to_int %f0 -> %i0
-            int_sub %i0, $-128 -> %i1
-            int_and %i1, $255 -> %i2
-            int_add %i2, $-128 -> %i3
-            int_return %i3
+            int_signext %i0, $1 -> %i1
+            int_return %i1
         """, transform=True)
         self.encoding_test(f, [rffi.cast(lltype.SingleFloat, 12.456)], """
             cast_singlefloat_to_float %i0 -> %f0
             cast_float_to_int %f0 -> %i1
-            int_sub %i1, $-128 -> %i2
-            int_and %i2, $255 -> %i3
-            int_add %i3, $-128 -> %i4
-            int_return %i4
+            int_signext %i1, $1 -> %i2
+            int_return %i2
         """, transform=True)
 
         def f(dbl):
@@ -1068,9 +1031,12 @@
         match = r.match(op)
         assert match, "line %r does not match regexp" % (op,)
         opname = match.group(1)
-        if   opname == 'int_add': value += int(match.group(2))
-        elif opname == 'int_sub': value -= int(match.group(2))
-        elif opname == 'int_and': value &= int(match.group(2))
-        else: assert 0, opname
+        if opname == 'int_and':
+            value &= int(match.group(2))
+        elif opname == 'int_signext':
+            numbytes = int(match.group(2))
+            value = int_signext(value, numbytes)
+        else:
+            assert 0, opname
     #
     assert rffi.cast(lltype.Signed, value) == expected_value
diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py
--- a/rpython/jit/codewriter/test/test_jtransform.py
+++ b/rpython/jit/codewriter/test/test_jtransform.py
@@ -148,9 +148,7 @@
              EI.OS_UNIEQ_LENGTHOK:       ([PUNICODE, PUNICODE], INT),
              EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR),
              EI.OS_RAW_FREE:             ([ARRAYPTR], lltype.Void),
-             EI.OS_THREADLOCALREF_GET:   ([], rclass.OBJECTPTR),
-             EI.OS_GET_ERRNO:            ([], INT),
-             EI.OS_SET_ERRNO:            ([INT], lltype.Void),
+             EI.OS_THREADLOCALREF_GET:   ([INT], INT),   # for example
             }
             argtypes = argtypes[oopspecindex]
             assert argtypes[0] == [v.concretetype for v in op.args[1:]]
@@ -159,9 +157,7 @@
                 assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE
             elif oopspecindex == EI.OS_RAW_MALLOC_VARSIZE_CHAR:
                 assert extraeffect == EI.EF_CAN_RAISE
-            elif oopspecindex in (EI.OS_RAW_FREE,
-                                  EI.OS_GET_ERRNO,
-                                  EI.OS_SET_ERRNO):
+            elif oopspecindex == EI.OS_RAW_FREE:
                 assert extraeffect == EI.EF_CANNOT_RAISE
             elif oopspecindex == EI.OS_THREADLOCALREF_GET:
                 assert extraeffect == EI.EF_LOOPINVARIANT
@@ -1347,53 +1343,20 @@
     assert op2 is None
 
 def test_threadlocalref_get():
-    from rpython.rtyper import rclass
-    from rpython.rlib.rthread import ThreadLocalReference
+    from rpython.rlib.rthread import ThreadLocalField
+    tlfield = ThreadLocalField(lltype.Signed, 'foobar_test_')
     OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET
-    class Foo: pass
-    t = ThreadLocalReference(Foo)
-    v2 = varoftype(rclass.OBJECTPTR)
-    c_opaqueid = const(t.opaque_id)
-    op = SpaceOperation('threadlocalref_get', [c_opaqueid], v2)
+    c = const(tlfield.offset)
+    v = varoftype(lltype.Signed)
+    op = SpaceOperation('threadlocalref_get', [c], v)
     tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
     op0 = tr.rewrite_operation(op)
-    assert op0.opname == 'residual_call_r_r'
-    assert op0.args[0].value == 'threadlocalref_getter' # pseudo-function as str
-    assert op0.args[1] == ListOfKind("ref", [])
-    assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET
-    assert op0.result == v2
-
-def test_get_errno():
-    # test that the oopspec is present and correctly transformed
-    from rpython.rlib import rposix
-    FUNC = lltype.FuncType([], lltype.Signed)
-    func = lltype.functionptr(FUNC, 'get_errno', _callable=rposix.get_errno)
-    v3 = varoftype(lltype.Signed)
-    op = SpaceOperation('direct_call', [const(func)], v3)
-    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
-    op1 = tr.rewrite_operation(op)
-    assert op1.opname == 'residual_call_r_i'
-    assert op1.args[0].value == func
-    assert op1.args[1] == ListOfKind('ref', [])
-    assert op1.args[2] == 'calldescr-%d' % effectinfo.EffectInfo.OS_GET_ERRNO
-    assert op1.result == v3
-
-def test_set_errno():
-    # test that the oopspec is present and correctly transformed
-    from rpython.rlib import rposix
-    FUNC = lltype.FuncType([lltype.Signed], lltype.Void)
-    func = lltype.functionptr(FUNC, 'set_errno', _callable=rposix.set_errno)
-    v1 = varoftype(lltype.Signed)
-    v3 = varoftype(lltype.Void)
-    op = SpaceOperation('direct_call', [const(func), v1], v3)
-    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
-    op1 = tr.rewrite_operation(op)
-    assert op1.opname == 'residual_call_ir_v'
-    assert op1.args[0].value == func
-    assert op1.args[1] == ListOfKind('int', [v1])
-    assert op1.args[2] == ListOfKind('ref', [])
-    assert op1.args[3] == 'calldescr-%d' % effectinfo.EffectInfo.OS_SET_ERRNO
-    assert op1.result == v3
+    assert op0.opname == 'residual_call_ir_i'
+    assert op0.args[0].value == 'threadlocalref_get' # pseudo-function as str
+    assert op0.args[1] == ListOfKind("int", [c])
+    assert op0.args[2] == ListOfKind("ref", [])
+    assert op0.args[3] == 'calldescr-%d' % OS_THREADLOCALREF_GET
+    assert op0.result == v
 
 def test_unknown_operation():
     op = SpaceOperation('foobar', [], varoftype(lltype.Void))
diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -489,6 +489,9 @@
         if i < 0:
             return 0
         return i
+    @arguments("i", "i", returns="i")
+    def bhimpl_int_signext(a, b):
+        return heaptracker.int_signext(a, b)
 
     @arguments("i", "i", returns="i")
     def bhimpl_uint_lt(a, b):
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -342,6 +342,19 @@
         else:
             self.emit_operation(op)
 
+    def optimize_INT_SIGNEXT(self, op):
+        value = self.getvalue(op.getarg(0))
+        numbits = op.getarg(1).getint() * 8
+        start = -(1 << (numbits - 1))
+        stop = 1 << (numbits - 1)
+        bounds = IntBound(start, stop - 1)
+        if bounds.contains_bound(value.intbound):
+            self.make_equal_to(op.result, value)
+        else:
+            self.emit_operation(op)
+            vres = self.getvalue(op.result)
+            vres.intbound.intersect(bounds)
+
     def optimize_ARRAYLEN_GC(self, op):
         self.emit_operation(op)
         array = self.getvalue(op.getarg(0))
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -5494,6 +5494,41 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_int_signext_already_in_bounds(self):
+        ops = """
+        [i0]
+        i1 = int_signext(i0, 1)
+        i2 = int_signext(i1, 2)
+        jump(i2)
+        """
+        expected = """
+        [i0]
+        i1 = int_signext(i0, 1)
+        jump(i1)
+        """
+        self.optimize_loop(ops, expected)
+        #
+        ops = """
+        [i0]
+        i1 = int_signext(i0, 1)
+        i2 = int_signext(i1, 1)
+        jump(i2)
+        """
+        expected = """
+        [i0]
+        i1 = int_signext(i0, 1)
+        jump(i1)
+        """
+        self.optimize_loop(ops, expected)
+        #
+        ops = """
+        [i0]
+        i1 = int_signext(i0, 2)
+        i2 = int_signext(i1, 1)
+        jump(i2)
+        """
+        self.optimize_loop(ops, ops)
+
 
 class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -1,4 +1,4 @@
-import py
+import py, sys
 from rpython.rlib.objectmodel import instantiate
 from rpython.jit.metainterp import compile, resume
 from rpython.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt, TreeLoop
@@ -190,6 +190,11 @@
             args = []
             for _ in range(oparity[opnum]):
                 args.append(random.randrange(1, 20))
+            if opnum == rop.INT_SIGNEXT:
+                # 2nd arg is number of bytes to extend from ---
+                # must not be too random
+                args[-1] = random.choice([1, 2] if sys.maxint < 2**32 else
+                                         [1, 2, 4])
             ops = """
             []
             i1 = %s(%s)
@@ -5607,6 +5612,44 @@
         """
         self.optimize_loop(ops, ops, ops)
 
+    def test_bound_backpropagate_int_signext(self):
+        ops = """
+        []
+        i0 = escape()
+        i1 = int_signext(i0, 1)
+        i2 = int_eq(i0, i1)
+        guard_true(i2) []
+        i3 = int_le(i0, 127)    # implied by equality with int_signext
+        guard_true(i3) []
+        i5 = int_gt(i0, -129)   # implied by equality with int_signext
+        guard_true(i5) []
+        jump()
+        """
+        expected = """
+        []
+        i0 = escape()
+        i1 = int_signext(i0, 1)
+        i2 = int_eq(i0, i1)
+        guard_true(i2) []
+        jump()
+        """
+        self.optimize_loop(ops, expected)
+
+    def test_bound_backpropagate_int_signext_2(self):
+        ops = """
+        []
+        i0 = escape()
+        i1 = int_signext(i0, 1)
+        i2 = int_eq(i0, i1)
+        guard_true(i2) []
+        i3 = int_le(i0, 126)    # false for i1 == 127
+        guard_true(i3) []
+        i5 = int_gt(i0, -128)   # false for i1 == -128
+        guard_true(i5) []
+        jump()
+        """
+        self.optimize_loop(ops, ops)
+
     def test_mul_ovf(self):
         ops = """
         [i0, i1]
diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -197,7 +197,7 @@
     # ------------------------------
 
     for _opimpl in ['int_add', 'int_sub', 'int_mul', 'int_floordiv', 'int_mod',
-                    'int_and', 'int_or', 'int_xor',
+                    'int_and', 'int_or', 'int_xor', 'int_signext',
                     'int_rshift', 'int_lshift', 'uint_rshift',
                     'uint_lt', 'uint_le', 'uint_gt', 'uint_ge',
                     'uint_floordiv',
@@ -402,13 +402,26 @@
 
     @arguments("box", "descr", "orgpc")
     def opimpl_switch(self, valuebox, switchdict, orgpc):
-        box = self.implement_guard_value(valuebox, orgpc)
-        search_value = box.getint()
+        search_value = valuebox.getint()
         assert isinstance(switchdict, SwitchDictDescr)
         try:
-            self.pc = switchdict.dict[search_value]
+            target = switchdict.dict[search_value]
         except KeyError:
-            pass
+            # None of the cases match.  Fall back to generating a chain
+            # of 'int_eq'.
+            # xxx as a minor optimization, if that's a bridge, then we would
+            # not need the cases that we already tested (and failed) with
+            # 'guard_value'.  How to do it is not very clear though.
+            for const1 in switchdict.const_keys_in_order:
+                box = self.execute(rop.INT_EQ, valuebox, const1)
+                assert box.getint() == 0
+                target = switchdict.dict[const1.getint()]
+                self.metainterp.generate_guard(rop.GUARD_FALSE, box,
+                                               resumepc=target)
+        else:
+            # found one of the cases
+            self.implement_guard_value(valuebox, orgpc)
+            self.pc = target
 
     @arguments()
     def opimpl_unreachable(self):
@@ -2270,8 +2283,8 @@
         if opnum == rop.GUARD_TRUE:     # a goto_if_not that jumps only now
             if not dont_change_position:
                 frame.pc = frame.jitcode.follow_jump(frame.pc)
-        elif opnum == rop.GUARD_FALSE:     # a goto_if_not that stops jumping
-            pass
+        elif opnum == rop.GUARD_FALSE:     # a goto_if_not that stops jumping;
+            pass                  # or a switch that was in its "default" case
         elif opnum == rop.GUARD_VALUE or opnum == rop.GUARD_CLASS:
             pass        # the pc is already set to the *start* of the opcode
         elif (opnum == rop.GUARD_NONNULL or
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -419,6 +419,7 @@
     'INT_RSHIFT/2',
     'INT_LSHIFT/2',
     'UINT_RSHIFT/2',
+    'INT_SIGNEXT/2',
     'FLOAT_ADD/2',
     'FLOAT_SUB/2',
     'FLOAT_MUL/2',
diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -698,6 +698,40 @@
         res = self.interp_operations(f, [12311])
         assert res == 42
 
+    def test_switch_bridges(self):
+        from rpython.rlib.rarithmetic import intmask
+        myjitdriver = JitDriver(greens = [], reds = 'auto')
+        lsts = [[-5, 2, 20] * 6,
+                [7, 123, 2] * 6,
+                [12311, -5, 7] * 6,
+                [7, 123, 2] * 4 + [-5, -5, -5] * 2,
+                [7, 123, 2] * 4 + [-5, -5, -5] * 2 + [12311, 12311, 12311],
+                ]
+        def f(case):
+            x = 0
+            i = 0
+            lst = lsts[case]
+            while i < len(lst):
+                myjitdriver.jit_merge_point()
+                n = lst[i]
+                if n == -5:  a = 5
+                elif n == 2: a = 4
+                elif n == 7: a = 3
+                else:        a = 2
+                x = intmask(x * 10 + a)
+                i += 1
+            return x
+        res = self.meta_interp(f, [0], backendopt=True)
+        assert res == intmask(542 * 1001001001001001)
+        res = self.meta_interp(f, [1], backendopt=True)
+        assert res == intmask(324 * 1001001001001001)
+        res = self.meta_interp(f, [2], backendopt=True)
+        assert res == intmask(253 * 1001001001001001)
+        res = self.meta_interp(f, [3], backendopt=True)
+        assert res == intmask(324324324324555555)
+        res = self.meta_interp(f, [4], backendopt=True)
+        assert res == intmask(324324324324555555222)
+
     def test_r_uint(self):
         from rpython.rlib.rarithmetic import r_uint
         myjitdriver = JitDriver(greens = [], reds = ['y'])
@@ -833,23 +867,6 @@
         assert type(res) == bool
         assert not res
 
-    def test_switch_dict(self):
-        def f(x):
-            if   x == 1: return 61
-            elif x == 2: return 511
-            elif x == 3: return -22
-            elif x == 4: return 81
-            elif x == 5: return 17
-            elif x == 6: return 54
-            elif x == 7: return 987
-            elif x == 8: return -12
-            elif x == 9: return 321
-            return -1
-        res = self.interp_operations(f, [5])
-        assert res == 17
-        res = self.interp_operations(f, [15])
-        assert res == -1
-
     def test_int_add_ovf(self):
         def f(x, y):
             try:
@@ -3048,6 +3065,16 @@
         res = self.meta_interp(f, [32])
         assert res == f(32)
 
+    def test_int_signext(self):
+        def f(n):
+            return rffi.cast(rffi.SIGNEDCHAR, n)
+        res = self.interp_operations(f, [128])
+        assert res == -128
+        res = self.interp_operations(f, [-35 + 256 * 29])
+        assert res == -35
+        res = self.interp_operations(f, [127 - 256 * 29])
+        assert res == 127
+
 class BaseLLtypeTests(BasicTests):
 
     def test_identityhash(self):
diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py
--- a/rpython/jit/metainterp/test/test_threadlocal.py
+++ b/rpython/jit/metainterp/test/test_threadlocal.py
@@ -1,29 +1,21 @@
 import py
+from rpython.rlib import rthread
 from rpython.jit.metainterp.test.support import LLJitMixin
-from rpython.rlib.rthread import ThreadLocalReference
-from rpython.rlib.jit import dont_look_inside
+from rpython.rtyper.lltypesystem import lltype
+from rpython.rtyper.lltypesystem.lloperation import llop
 
 
 class ThreadLocalTest(object):
 
     def test_threadlocalref_get(self):
-        class Foo:
-            pass
-        t = ThreadLocalReference(Foo)
-        x = Foo()
-
-        @dont_look_inside
-        def setup():
-            t.set(x)