[pypy-commit] pypy default: hg merge portable-threadlocal
arigo
noreply at buildbot.pypy.org
Thu Nov 27 10:56:16 CET 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r74747:adc6ab4ae74d
Date: 2014-11-27 10:56 +0100
http://bitbucket.org/pypy/pypy/changeset/adc6ab4ae74d/
Log: hg merge portable-threadlocal
Change the way thread-locals are read from the JIT: found a way to
do it portably, by passing around the pointer to a thread-local
structure from outside the JIT and all the way inside.
diff too long, truncating to 2000 out of 2070 lines
diff --git a/pypy/module/_ssl/thread_lock.py b/pypy/module/_ssl/thread_lock.py
--- a/pypy/module/_ssl/thread_lock.py
+++ b/pypy/module/_ssl/thread_lock.py
@@ -24,12 +24,19 @@
separate_module_source = """
#include <openssl/crypto.h>
+#ifndef _WIN32
+# include <pthread.h>
+#endif
static unsigned int _ssl_locks_count = 0;
static struct RPyOpaque_ThreadLock *_ssl_locks;
static unsigned long _ssl_thread_id_function(void) {
- return RPyThreadGetIdent();
+#ifdef _WIN32
+ return (unsigned long)GetCurrentThreadId();
+#else
+ return (unsigned long)pthread_self();
+#endif
}
static void _ssl_thread_locking_function(int mode, int n, const char *file,
diff --git a/pypy/module/cpyext/src/pythread.c b/pypy/module/cpyext/src/pythread.c
--- a/pypy/module/cpyext/src/pythread.c
+++ b/pypy/module/cpyext/src/pythread.c
@@ -1,11 +1,18 @@
#include <Python.h>
+#ifndef _WIN32
+# include <pthread.h>
+#endif
#include "pythread.h"
#include "src/thread.h"
long
PyThread_get_thread_ident(void)
{
- return RPyThreadGetIdent();
+#ifdef _WIN32
+ return (long)GetCurrentThreadId();
+#else
+ return (long)pthread_self();
+#endif
}
PyThread_type_lock
diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py
--- a/pypy/module/pypyjit/test_pypy_c/model.py
+++ b/pypy/module/pypyjit/test_pypy_c/model.py
@@ -184,10 +184,10 @@
matcher = OpMatcher(ops)
return matcher.match(expected_src, **kwds)
- def match_by_id(self, id, expected_src, **kwds):
+ def match_by_id(self, id, expected_src, ignore_ops=[], **kwds):
ops = list(self.ops_by_id(id, **kwds))
matcher = OpMatcher(ops, id)
- return matcher.match(expected_src)
+ return matcher.match(expected_src, ignore_ops=ignore_ops)
class PartialTraceWithIds(TraceWithIds):
def __init__(self, trace, is_entry_bridge=False):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py
--- a/pypy/module/pypyjit/test_pypy_c/test_call.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_call.py
@@ -82,7 +82,7 @@
assert log.opnames(ops) == []
#
assert entry_bridge.match_by_id('call', """
- p38 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=<Callr . EF=1 OS=5>)
+ p38 = call(ConstClass(_ll_1_threadlocalref_get__Ptr_GcStruct_objectLlT_Signed), #, descr=<Callr . i EF=1 OS=5>)
p39 = getfield_gc(p38, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
i40 = force_token()
p41 = getfield_gc_pure(p38, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
@@ -444,7 +444,7 @@
p26 = getfield_gc(p7, descr=<FieldP pypy.objspace.std.dictmultiobject.W_DictMultiObject.inst_strategy .*>)
guard_value(p26, ConstPtr(ptr27), descr=...)
guard_not_invalidated(descr=...)
- p29 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=<Callr . EF=1 OS=5>)
+ p29 = call(ConstClass(_ll_1_threadlocalref_get__Ptr_GcStruct_objectLlT_Signed), #, descr=<Callr . i EF=1 OS=5>)
p30 = getfield_gc(p29, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>)
p31 = force_token()
p32 = getfield_gc_pure(p29, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py
--- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py
@@ -199,21 +199,16 @@
ldexp_addr, res = log.result
assert res == 8.0 * 300
loop, = log.loops_by_filename(self.filepath)
- if 'ConstClass(ldexp)' in repr(loop): # e.g. OS/X
- ldexp_addr = 'ConstClass(ldexp)'
assert loop.match_by_id('cfficall', """
- ...
- f1 = call_release_gil(..., descr=<Callf 8 fi EF=6 OS=62>)
- ...
- """)
- ops = loop.ops_by_id('cfficall')
- for name in ['raw_malloc', 'raw_free']:
- assert name not in str(ops)
- for name in ['raw_load', 'raw_store', 'getarrayitem_raw', 'setarrayitem_raw']:
- assert name not in log.opnames(ops)
- # so far just check that call_release_gil() is produced.
- # later, also check that the arguments to call_release_gil()
- # are constants
+ setarrayitem_raw(i69, 0, i95, descr=<ArrayS 4>) # write 'errno'
+ p96 = force_token()
+ setfield_gc(p0, p96, descr=<FieldP pypy.interpreter.pyframe.PyFrame.vable_token .>)
+ f97 = call_release_gil(i59, 1.0, 3, descr=<Callf 8 fi EF=6 OS=62>)
+ guard_not_forced(descr=...)
+ guard_no_exception(descr=...)
+ i98 = getarrayitem_raw(i69, 0, descr=<ArrayS 4>) # read 'errno'
+ setfield_gc(p65, i98, descr=<FieldS pypy.interpreter.executioncontext.ExecutionContext.inst__cffi_saved_errno .>)
+ """, ignore_ops=['guard_not_invalidated'])
def test_cffi_call_guard_not_forced_fails(self):
# this is the test_pypy_c equivalent of
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -65,6 +65,7 @@
self.external_class_cache = {} # cache of ExternalType classes
self.needs_generic_instantiate = {}
+ self.thread_local_fields = set()
delayed_imports()
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -497,9 +497,11 @@
if self.cpu.supports_floats:
mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
cond=cond)
- # pop all callee saved registers and IP to keep the alignment
+ # pop all callee saved registers. This pops 'pc' last.
+ # It also pops the threadlocal_addr back into 'r1', but it
+ # is not needed any more and will be discarded.
mc.POP([reg.value for reg in r.callee_restored_registers] +
- [r.ip.value], cond=cond)
+ [r.r1.value], cond=cond)
mc.BKPT()
def gen_func_prolog(self):
@@ -508,11 +510,16 @@
if self.cpu.supports_floats:
stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD
- # push all callee saved registers and IP to keep the alignment
+ # push all callee saved registers including lr; and push r1 as
+ # well, which contains the threadlocal_addr argument. Note that
+ # we're pushing a total of 10 words, which keeps the stack aligned.
self.mc.PUSH([reg.value for reg in r.callee_saved_registers] +
- [r.ip.value])
+ [r.r1.value])
+ self.saved_threadlocal_addr = 0 # at offset 0 from location 'sp'
if self.cpu.supports_floats:
self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
+ self.saved_threadlocal_addr += (
+ len(r.callee_saved_vfp_registers) * 2 * WORD)
assert stack_size % 8 == 0 # ensure we keep alignment
# set fp to point to the JITFRAME
@@ -952,16 +959,11 @@
regalloc._check_invariants()
self.mc.mark_op(None) # end of the loop
- def regalloc_emit_llong(self, op, arglocs, fcond, regalloc):
+ def regalloc_emit_extra(self, op, arglocs, fcond, regalloc):
+ # for calls to a function with a specifically-supported OS_xxx
effectinfo = op.getdescr().get_extra_info()
oopspecindex = effectinfo.oopspecindex
- asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
- return fcond
-
- def regalloc_emit_math(self, op, arglocs, fcond, regalloc):
- effectinfo = op.getdescr().get_extra_info()
- oopspecindex = effectinfo.oopspecindex
- asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+ asm_extra_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
return fcond
def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
@@ -1150,6 +1152,14 @@
else:
assert 0, 'unsupported case'
+ def _mov_raw_sp_to_loc(self, prev_loc, loc, cond=c.AL):
+ if loc.is_core_reg():
+ # load a value from 'SP + n'
+ assert prev_loc.value <= 0xFFF # not too far
+ self.load_reg(self.mc, loc, r.sp, prev_loc.value, cond=cond)
+ else:
+ assert 0, 'unsupported case'
+
def regalloc_mov(self, prev_loc, loc, cond=c.AL):
"""Moves a value from a previous location to some other location"""
if prev_loc.is_imm():
@@ -1163,7 +1173,7 @@
elif prev_loc.is_vfp_reg():
self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
elif prev_loc.is_raw_sp():
- assert 0, 'raw sp locs are not supported as source loc'
+ self._mov_raw_sp_to_loc(prev_loc, loc, cond)
else:
assert 0, 'unsupported case'
mov_loc_loc = regalloc_mov
@@ -1509,22 +1519,17 @@
asm_operations = [notimplemented_op] * (rop._LAST + 1)
asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
-asm_llong_operations = {}
-asm_math_operations = {}
+asm_extra_operations = {}
for name, value in ResOpAssembler.__dict__.iteritems():
if name.startswith('emit_guard_'):
opname = name[len('emit_guard_'):]
num = getattr(rop, opname.upper())
asm_operations_with_guard[num] = value
- elif name.startswith('emit_op_llong_'):
- opname = name[len('emit_op_llong_'):]
- num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
- asm_llong_operations[num] = value
- elif name.startswith('emit_op_math_'):
- opname = name[len('emit_op_math_'):]
- num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
- asm_math_operations[num] = value
+ elif name.startswith('emit_opx_'):
+ opname = name[len('emit_opx_'):]
+ num = getattr(EffectInfo, 'OS_' + opname.upper())
+ asm_extra_operations[num] = value
elif name.startswith('emit_op_'):
opname = name[len('emit_op_'):]
num = getattr(rop, opname.upper())
diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -46,7 +46,7 @@
def is_core_reg(self):
return True
- def as_key(self):
+ def as_key(self): # 0 <= as_key <= 15
return self.value
@@ -64,7 +64,7 @@
def is_vfp_reg(self):
return True
- def as_key(self):
+ def as_key(self): # 20 <= as_key <= 35
return self.value + 20
def is_float(self):
@@ -115,8 +115,8 @@
def is_imm_float(self):
return True
- def as_key(self):
- return self.value
+ def as_key(self): # a real address + 1
+ return self.value | 1
def is_float(self):
return True
@@ -148,7 +148,7 @@
def is_stack(self):
return True
- def as_key(self):
+ def as_key(self): # an aligned word + 10000
return self.position + 10000
def is_float(self):
@@ -174,6 +174,9 @@
def is_float(self):
return self.type == FLOAT
+ def as_key(self): # a word >= 1000, and < 1000 + size of SP frame
+ return self.value + 1000
+
def imm(i):
return ImmLocation(i)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -19,7 +19,7 @@
from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder
from rpython.jit.backend.arm.jump import remap_frame_layout
from rpython.jit.backend.arm.regalloc import TempBox
-from rpython.jit.backend.arm.locations import imm
+from rpython.jit.backend.arm.locations import imm, RawSPStackLocation
from rpython.jit.backend.llsupport import symbolic
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.backend.llsupport.descr import InteriorFieldDescr
@@ -982,7 +982,9 @@
return fcond
def _call_assembler_emit_call(self, addr, argloc, resloc):
- self.simple_call(addr, [argloc], result_loc=resloc)
+ ofs = self.saved_threadlocal_addr
+ threadlocal_loc = RawSPStackLocation(ofs, INT)
+ self.simple_call(addr, [argloc, threadlocal_loc], result_loc=resloc)
def _call_assembler_emit_helper_call(self, addr, arglocs, resloc):
self.simple_call(addr, arglocs, result_loc=resloc)
@@ -1108,7 +1110,7 @@
emit_op_float_neg = gen_emit_unary_float_op('float_neg', 'VNEG')
emit_op_float_abs = gen_emit_unary_float_op('float_abs', 'VABS')
- emit_op_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT')
+ emit_opx_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT')
emit_op_float_lt = gen_emit_float_cmp_op('float_lt', c.VFP_LT)
emit_op_float_le = gen_emit_float_cmp_op('float_le', c.VFP_LE)
@@ -1142,13 +1144,13 @@
# the following five instructions are only ARMv7;
# regalloc.py won't call them at all on ARMv6
- emit_op_llong_add = gen_emit_float_op('llong_add', 'VADD_i64')
- emit_op_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64')
- emit_op_llong_and = gen_emit_float_op('llong_and', 'VAND_i64')
- emit_op_llong_or = gen_emit_float_op('llong_or', 'VORR_i64')
- emit_op_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64')
+ emit_opx_llong_add = gen_emit_float_op('llong_add', 'VADD_i64')
+ emit_opx_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64')
+ emit_opx_llong_and = gen_emit_float_op('llong_and', 'VAND_i64')
+ emit_opx_llong_or = gen_emit_float_op('llong_or', 'VORR_i64')
+ emit_opx_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64')
- def emit_op_llong_to_int(self, op, arglocs, regalloc, fcond):
+ def emit_opx_llong_to_int(self, op, arglocs, regalloc, fcond):
loc = arglocs[0]
res = arglocs[1]
assert loc.is_vfp_reg()
@@ -1282,3 +1284,11 @@
regalloc.rm.possibly_free_var(length_box)
regalloc.rm.possibly_free_var(dstaddr_box)
return fcond
+
+ def emit_opx_threadlocalref_get(self, op, arglocs, regalloc, fcond):
+ ofs0, res = arglocs
+ assert ofs0.is_imm()
+ ofs = self.saved_threadlocal_addr
+ self.load_reg(self.mc, res, r.sp, ofs)
+ self.load_reg(self.mc, res, res, ofs0.value)
+ return fcond
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -373,11 +373,8 @@
return gcmap
# ------------------------------------------------------------
- def perform_llong(self, op, args, fcond):
- return self.assembler.regalloc_emit_llong(op, args, fcond, self)
-
- def perform_math(self, op, args, fcond):
- return self.assembler.regalloc_emit_math(op, args, self, fcond)
+ def perform_extra(self, op, args, fcond):
+ return self.assembler.regalloc_emit_extra(op, args, fcond, self)
def force_spill_var(self, var):
if var.type == FLOAT:
@@ -558,15 +555,19 @@
EffectInfo.OS_LLONG_XOR):
if self.cpu.cpuinfo.arch_version >= 7:
args = self._prepare_llong_binop_xx(op, fcond)
- self.perform_llong(op, args, fcond)
+ self.perform_extra(op, args, fcond)
return
elif oopspecindex == EffectInfo.OS_LLONG_TO_INT:
args = self._prepare_llong_to_int(op, fcond)
- self.perform_llong(op, args, fcond)
+ self.perform_extra(op, args, fcond)
return
elif oopspecindex == EffectInfo.OS_MATH_SQRT:
- args = self.prepare_op_math_sqrt(op, fcond)
- self.perform_math(op, args, fcond)
+ args = self._prepare_op_math_sqrt(op, fcond)
+ self.perform_extra(op, args, fcond)
+ return
+ elif oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
+ args = self._prepare_threadlocalref_get(op, fcond)
+ self.perform_extra(op, args, fcond)
return
#elif oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP:
# ...
@@ -624,6 +625,11 @@
res = self.force_allocate_reg(op.result)
return [loc0, res]
+ def _prepare_threadlocalref_get(self, op, fcond):
+ ofs0 = imm(op.getarg(1).getint())
+ res = self.force_allocate_reg(op.result)
+ return [ofs0, res]
+
def _prepare_guard(self, op, args=None):
if args is None:
args = []
@@ -1284,7 +1290,7 @@
prepare_guard_float_ge = prepare_float_op(guard=True,
float_result=False, name='prepare_guard_float_ge')
- def prepare_op_math_sqrt(self, op, fcond):
+ def _prepare_op_math_sqrt(self, op, fcond):
loc = self.make_sure_var_in_reg(op.getarg(1))
self.possibly_free_vars_for_op(op)
self.free_temp_vars()
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -217,7 +217,13 @@
return lltype.cast_opaque_ptr(llmemory.GCREF, frame)
def make_execute_token(self, *ARGS):
- FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF],
+ # The JIT backend must generate functions with the following
+ # signature: it takes the jitframe and the threadlocal_addr
+ # as arguments, and it returns the (possibly reallocated) jitframe.
+ # The backend can optimize OS_THREADLOCALREF_GET calls to return a
+ # field of this threadlocal_addr, but only if 'translate_support_code':
+ # in untranslated tests, threadlocal_addr is a dummy NULL.
+ FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF, llmemory.Address],
llmemory.GCREF))
lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)]
@@ -249,8 +255,13 @@
else:
assert kind == history.REF
self.set_ref_value(ll_frame, num, arg)
+ if self.translate_support_code:
+ ll_threadlocal_addr = llop.threadlocalref_addr(
+ llmemory.Address)
+ else:
+ ll_threadlocal_addr = llmemory.NULL
llop.gc_writebarrier(lltype.Void, ll_frame)
- ll_frame = func(ll_frame)
+ ll_frame = func(ll_frame, ll_threadlocal_addr)
finally:
if not self.translate_support_code:
LLInterpreter.current_interpreter = prev_interpreter
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -26,8 +26,6 @@
# - profiler
# - full optimizer
# - floats neg and abs
- # - threadlocalref_get
- # - get_errno, set_errno
# - llexternal with macro=True
class Frame(object):
@@ -36,10 +34,6 @@
def __init__(self, i):
self.i = i
- class Foo(object):
- pass
- t = ThreadLocalReference(Foo)
-
eci = ExternalCompilationInfo(post_include_bits=['''
#define pypy_my_fabs(x) fabs(x)
'''])
@@ -74,9 +68,6 @@
k = myabs1(myabs2(j))
if k - abs(j): raise ValueError
if k - abs(-j): raise ValueError
- if t.get().nine != 9: raise ValueError
- rposix.set_errno(total)
- if rposix.get_errno() != total: raise ValueError
return chr(total % 253)
#
class Virt2(object):
@@ -104,12 +95,8 @@
return res
#
def main(i, j):
- foo = Foo()
- foo.nine = -(i + j)
- t.set(foo)
a_char = f(i, j)
a_float = libffi_stuff(i, j)
- keepalive_until_here(foo)
return ord(a_char) * 10 + int(a_float)
expected = main(40, -49)
res = self.meta_interp(main, [40, -49])
@@ -121,6 +108,7 @@
def test_direct_assembler_call_translates(self):
"""Test CALL_ASSEMBLER and the recursion limit"""
+ # - also tests threadlocalref_get
from rpython.rlib.rstackovf import StackOverflow
class Thing(object):
@@ -138,6 +126,10 @@
somewhere_else = SomewhereElse()
+ class Foo(object):
+ pass
+ t = ThreadLocalReference(Foo)
+
def change(newthing):
somewhere_else.frame.thing = newthing
@@ -163,6 +155,7 @@
nextval = 13
frame.thing = Thing(nextval + 1)
i += 1
+ if t.get().nine != 9: raise ValueError
return frame.thing.val
driver2 = JitDriver(greens = [], reds = ['n'])
@@ -184,13 +177,24 @@
n = portal2(n)
assert portal2(10) == -9
+ def setup(value):
+ foo = Foo()
+ foo.nine = value
+ t.set(foo)
+ return foo
+
def mainall(codeno, bound):
- return main(codeno) + main2(bound)
+ foo = setup(bound + 8)
+ result = main(codeno) + main2(bound)
+ keepalive_until_here(foo)
+ return result
+ tmp_obj = setup(9)
+ expected_1 = main(0)
res = self.meta_interp(mainall, [0, 1], inline=True,
policy=StopAtXPolicy(change))
print hex(res)
- assert res & 255 == main(0)
+ assert res & 255 == expected_1
bound = res & ~255
assert 1024 <= bound <= 131072
assert bound & (bound-1) == 0 # a power of two
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -34,10 +34,16 @@
FRAME_FIXED_SIZE = 19
PASS_ON_MY_FRAME = 15
JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float
+ # 'threadlocal_addr' is passed as 2nd argument on the stack,
+ # and it can be left here for when it is needed
+ THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD
else:
- # rbp + rbx + r12 + r13 + r14 + r15 + 13 extra words = 19
+ # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19
FRAME_FIXED_SIZE = 19
- PASS_ON_MY_FRAME = 13
+ PASS_ON_MY_FRAME = 12
JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
+ # 'threadlocal_addr' is passed as 2nd argument in %esi,
+ # and is moved into this frame location
+ THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD
assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -18,7 +18,7 @@
from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size)
from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, WORD, IS_X86_64,
JITFRAME_FIXED_SIZE, IS_X86_32,
- PASS_ON_MY_FRAME)
+ PASS_ON_MY_FRAME, THREADLOCAL_OFS)
from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
@@ -730,6 +730,7 @@
self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD)
self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value)
if IS_X86_64:
+ self.mc.MOV_sr(THREADLOCAL_OFS, esi.value)
self.mc.MOV_rr(ebp.value, edi.value)
else:
self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD)
@@ -1969,7 +1970,8 @@
self._emit_guard_not_forced(guard_token)
def _call_assembler_emit_call(self, addr, argloc, _):
- self.simple_call(addr, [argloc])
+ threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT)
+ self.simple_call(addr, [argloc, threadlocal_loc])
def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
self.simple_call(addr, arglocs, result_loc)
@@ -2334,48 +2336,16 @@
assert isinstance(reg, RegLoc)
self.mc.MOV_rr(reg.value, ebp.value)
- def threadlocalref_get(self, op, resloc):
- # this function is only called on Linux
- from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr
- from rpython.jit.backend.x86 import stmtlocal
+ def threadlocalref_get(self, offset, resloc):
+ # This loads the stack location THREADLOCAL_OFS into a
+ # register, and then read the word at the given offset.
+ # It is only supported if 'translate_support_code' is
+ # true; otherwise, the original call to the piece of assembler
+ # was done with a dummy NULL value.
+ assert self.cpu.translate_support_code
assert isinstance(resloc, RegLoc)
- effectinfo = op.getdescr().get_extra_info()
- assert effectinfo.extradescrs is not None
- ed = effectinfo.extradescrs[0]
- assert isinstance(ed, ThreadLocalRefDescr)
- addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr())
- # 'addr1' is the address is the current thread, but we assume that
- # it is a thread-local at a constant offset from %fs/%gs.
- addr0 = stmtlocal.threadlocal_base()
- addr = addr1 - addr0
- assert rx86.fits_in_32bits(addr)
- mc = self.mc
- mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs
- mc.MOV_rj(resloc.value, addr) # memory read
-
- def get_set_errno(self, op, loc, issue_a_write):
- # this function is only called on Linux
- from rpython.jit.backend.x86 import stmtlocal
- addr = stmtlocal.get_errno_tl()
- assert rx86.fits_in_32bits(addr)
- mc = self.mc
- mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs
- # !!important: the *next* instruction must be the one using 'addr'!!
- if issue_a_write:
- if isinstance(loc, RegLoc):
- mc.MOV32_jr(addr, loc.value) # memory write from reg
- else:
- assert isinstance(loc, ImmedLoc)
- newvalue = loc.value
- newvalue = rffi.cast(rffi.INT, newvalue)
- newvalue = rffi.cast(lltype.Signed, newvalue)
- mc.MOV32_ji(addr, newvalue) # memory write immediate
- else:
- assert isinstance(loc, RegLoc)
- if IS_X86_32:
- mc.MOV_rj(loc.value, addr) # memory read
- elif IS_X86_64:
- mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend
+ self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
+ self.mc.MOV_rm(resloc.value, (resloc.value, offset))
def genop_discard_zero_array(self, op, arglocs):
(base_loc, startindex_loc, bytes_loc,
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -699,29 +699,11 @@
loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1))
self.perform_math(op, [loc0], loc0)
- TLREF_SUPPORT = sys.platform.startswith('linux')
- ERRNO_SUPPORT = sys.platform.startswith('linux')
-
def _consider_threadlocalref_get(self, op):
- if self.TLREF_SUPPORT:
+ if self.translate_support_code:
+ offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get'
resloc = self.force_allocate_reg(op.result)
- self.assembler.threadlocalref_get(op, resloc)
- else:
- self._consider_call(op)
-
- def _consider_get_errno(self, op):
- if self.ERRNO_SUPPORT:
- resloc = self.force_allocate_reg(op.result)
- self.assembler.get_set_errno(op, resloc, issue_a_write=False)
- else:
- self._consider_call(op)
-
- def _consider_set_errno(self, op):
- if self.ERRNO_SUPPORT:
- # op.getarg(0) is the function set_errno; op.getarg(1) is
- # the new errno value
- loc0 = self.rm.make_sure_var_in_reg(op.getarg(1))
- self.assembler.get_set_errno(op, loc0, issue_a_write=True)
+ self.assembler.threadlocalref_get(offset, resloc)
else:
self._consider_call(op)
@@ -804,10 +786,6 @@
return self._consider_math_sqrt(op)
if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
return self._consider_threadlocalref_get(op)
- if oopspecindex == EffectInfo.OS_GET_ERRNO:
- return self._consider_get_errno(op)
- if oopspecindex == EffectInfo.OS_SET_ERRNO:
- return self._consider_set_errno(op)
if oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP:
return self._consider_math_read_timestamp(op)
self._consider_call(op)
diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py
deleted file mode 100644
--- a/rpython/jit/backend/x86/stmtlocal.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
-from rpython.jit.backend.x86.arch import WORD
-
-SEGMENT_FS = '\x64'
-SEGMENT_GS = '\x65'
-
-if WORD == 4:
- SEGMENT_TL = SEGMENT_GS
- _instruction = "movl %%gs:0, %0"
-else:
- SEGMENT_TL = SEGMENT_FS
- _instruction = "movq %%fs:0, %0"
-
-eci = ExternalCompilationInfo(post_include_bits=['''
-#define RPY_STM_JIT 1
-static long pypy__threadlocal_base(void)
-{
- /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */
- long result;
- asm("%s" : "=r"(result));
- return result;
-}
-static long pypy__get_errno_tl(void)
-{
- return ((long)&errno) - pypy__threadlocal_base();
-}
-''' % _instruction])
-
-
-threadlocal_base = rffi.llexternal(
- 'pypy__threadlocal_base',
- [], lltype.Signed,
- compilation_info=eci,
- _nowrapper=True,
- ) #transactionsafe=True)
-
-get_errno_tl = rffi.llexternal(
- 'pypy__get_errno_tl',
- [], lltype.Signed,
- compilation_info=eci,
- _nowrapper=True,
- ) #transactionsafe=True)
diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py
--- a/rpython/jit/codewriter/effectinfo.py
+++ b/rpython/jit/codewriter/effectinfo.py
@@ -23,8 +23,6 @@
OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array
OS_DICT_LOOKUP = 4 # ll_dict_lookup
OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get
- OS_GET_ERRNO = 6 # rposix.get_errno
- OS_SET_ERRNO = 7 # rposix.set_errno
OS_NOT_IN_TRACE = 8 # for calls not recorded in the jit trace
#
OS_STR_CONCAT = 22 # "stroruni.concat"
diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py
--- a/rpython/jit/codewriter/jitcode.py
+++ b/rpython/jit/codewriter/jitcode.py
@@ -117,26 +117,6 @@
raise NotImplementedError
-class ThreadLocalRefDescr(AbstractDescr):
- # A special descr used as the extradescr in a call to a
- # threadlocalref_get function. If the backend supports it,
- # it can use this 'get_tlref_addr()' to get the address *in the
- # current thread* of the thread-local variable. If, on the current
- # platform, the "__thread" variables are implemented as an offset
- # from some base register (e.g. %fs on x86-64), then the backend will
- # immediately substract the current value of the base register.
- # This gives an offset from the base register, and this can be
- # written down in an assembler instruction to load the "__thread"
- # variable from anywhere.
-
- def __init__(self, opaque_id):
- from rpython.rtyper.lltypesystem.lloperation import llop
- from rpython.rtyper.lltypesystem import llmemory
- def get_tlref_addr():
- return llop.threadlocalref_getaddr(llmemory.Address, opaque_id)
- self.get_tlref_addr = get_tlref_addr
-
-
class LiveVarsInfo(object):
def __init__(self, live_i, live_r, live_f):
self.live_i = live_i
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -439,8 +439,6 @@
elif oopspec_name.endswith('dict.lookup'):
# also ordereddict.lookup
prepare = self._handle_dict_lookup_call
- elif oopspec_name.startswith('rposix.'):
- prepare = self._handle_rposix_call
else:
prepare = self.prepare_builtin_call
try:
@@ -1979,16 +1977,6 @@
else:
raise NotImplementedError(oopspec_name)
- def _handle_rposix_call(self, op, oopspec_name, args):
- if oopspec_name == 'rposix.get_errno':
- return self._handle_oopspec_call(op, args, EffectInfo.OS_GET_ERRNO,
- EffectInfo.EF_CANNOT_RAISE)
- elif oopspec_name == 'rposix.set_errno':
- return self._handle_oopspec_call(op, args, EffectInfo.OS_SET_ERRNO,
- EffectInfo.EF_CANNOT_RAISE)
- else:
- raise NotImplementedError(oopspec_name)
-
def rewrite_op_ll_read_timestamp(self, op):
op1 = self.prepare_builtin_call(op, "ll_read_timestamp", [])
return self.handle_residual_call(op1,
@@ -2005,16 +1993,15 @@
return [op0, op1]
def rewrite_op_threadlocalref_get(self, op):
- from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr
- opaqueid = op.args[0].value
- op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [],
- extra=(opaqueid,),
- extrakey=opaqueid._obj)
- extradescr = ThreadLocalRefDescr(opaqueid)
+ # only supports RESTYPE being exactly one word.
+ RESTYPE = op.result.concretetype
+ assert (RESTYPE in (lltype.Signed, lltype.Unsigned, llmemory.Address)
+ or isinstance(RESTYPE, lltype.Ptr))
+ c_offset, = op.args
+ op1 = self.prepare_builtin_call(op, 'threadlocalref_get', [c_offset])
return self.handle_residual_call(op1,
oopspecindex=EffectInfo.OS_THREADLOCALREF_GET,
- extraeffect=EffectInfo.EF_LOOPINVARIANT,
- extradescr=[extradescr])
+ extraeffect=EffectInfo.EF_LOOPINVARIANT)
# ____________________________________________________________
diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py
--- a/rpython/jit/codewriter/support.py
+++ b/rpython/jit/codewriter/support.py
@@ -702,10 +702,9 @@
build_ll_1_raw_free_no_track_allocation = (
build_raw_free_builder(track_allocation=False))
- def build_ll_0_threadlocalref_getter(opaqueid):
- def _ll_0_threadlocalref_getter():
- return llop.threadlocalref_get(rclass.OBJECTPTR, opaqueid)
- return _ll_0_threadlocalref_getter
+ def _ll_1_threadlocalref_get(TP, offset):
+ return llop.threadlocalref_get(TP, offset)
+ _ll_1_threadlocalref_get.need_result_type = 'exact' # don't deref
def _ll_1_weakref_create(obj):
return llop.weakref_create(llmemory.WeakRefPtr, obj)
@@ -818,8 +817,18 @@
s_result = lltype_to_annotation(ll_res)
impl = setup_extra_builtin(rtyper, oopspec_name, len(args_s), extra)
if getattr(impl, 'need_result_type', False):
- bk = rtyper.annotator.bookkeeper
- args_s.insert(0, annmodel.SomePBC([bk.getdesc(deref(ll_res))]))
+ if hasattr(rtyper, 'annotator'):
+ bk = rtyper.annotator.bookkeeper
+ ll_restype = ll_res
+ if impl.need_result_type != 'exact':
+ ll_restype = deref(ll_restype)
+ desc = bk.getdesc(ll_restype)
+ else:
+ class TestingDesc(object):
+ knowntype = int
+ pyobj = None
+ desc = TestingDesc()
+ args_s.insert(0, annmodel.SomePBC([desc]))
#
if hasattr(rtyper, 'annotator'): # regular case
mixlevelann = MixLevelHelperAnnotator(rtyper)
diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py
--- a/rpython/jit/codewriter/test/test_jtransform.py
+++ b/rpython/jit/codewriter/test/test_jtransform.py
@@ -148,9 +148,7 @@
EI.OS_UNIEQ_LENGTHOK: ([PUNICODE, PUNICODE], INT),
EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR),
EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void),
- EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR),
- EI.OS_GET_ERRNO: ([], INT),
- EI.OS_SET_ERRNO: ([INT], lltype.Void),
+ EI.OS_THREADLOCALREF_GET: ([INT], INT), # for example
}
argtypes = argtypes[oopspecindex]
assert argtypes[0] == [v.concretetype for v in op.args[1:]]
@@ -159,9 +157,7 @@
assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE
elif oopspecindex == EI.OS_RAW_MALLOC_VARSIZE_CHAR:
assert extraeffect == EI.EF_CAN_RAISE
- elif oopspecindex in (EI.OS_RAW_FREE,
- EI.OS_GET_ERRNO,
- EI.OS_SET_ERRNO):
+ elif oopspecindex == EI.OS_RAW_FREE:
assert extraeffect == EI.EF_CANNOT_RAISE
elif oopspecindex == EI.OS_THREADLOCALREF_GET:
assert extraeffect == EI.EF_LOOPINVARIANT
@@ -1347,53 +1343,20 @@
assert op2 is None
def test_threadlocalref_get():
- from rpython.rtyper import rclass
- from rpython.rlib.rthread import ThreadLocalReference
+ from rpython.rlib.rthread import ThreadLocalField
+ tlfield = ThreadLocalField(lltype.Signed, 'foobar_test_')
OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET
- class Foo: pass
- t = ThreadLocalReference(Foo)
- v2 = varoftype(rclass.OBJECTPTR)
- c_opaqueid = const(t.opaque_id)
- op = SpaceOperation('threadlocalref_get', [c_opaqueid], v2)
+ c = const(tlfield.offset)
+ v = varoftype(lltype.Signed)
+ op = SpaceOperation('threadlocalref_get', [c], v)
tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
op0 = tr.rewrite_operation(op)
- assert op0.opname == 'residual_call_r_r'
- assert op0.args[0].value == 'threadlocalref_getter' # pseudo-function as str
- assert op0.args[1] == ListOfKind("ref", [])
- assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET
- assert op0.result == v2
-
-def test_get_errno():
- # test that the oopspec is present and correctly transformed
- from rpython.rlib import rposix
- FUNC = lltype.FuncType([], lltype.Signed)
- func = lltype.functionptr(FUNC, 'get_errno', _callable=rposix.get_errno)
- v3 = varoftype(lltype.Signed)
- op = SpaceOperation('direct_call', [const(func)], v3)
- tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
- op1 = tr.rewrite_operation(op)
- assert op1.opname == 'residual_call_r_i'
- assert op1.args[0].value == func
- assert op1.args[1] == ListOfKind('ref', [])
- assert op1.args[2] == 'calldescr-%d' % effectinfo.EffectInfo.OS_GET_ERRNO
- assert op1.result == v3
-
-def test_set_errno():
- # test that the oopspec is present and correctly transformed
- from rpython.rlib import rposix
- FUNC = lltype.FuncType([lltype.Signed], lltype.Void)
- func = lltype.functionptr(FUNC, 'set_errno', _callable=rposix.set_errno)
- v1 = varoftype(lltype.Signed)
- v3 = varoftype(lltype.Void)
- op = SpaceOperation('direct_call', [const(func), v1], v3)
- tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
- op1 = tr.rewrite_operation(op)
- assert op1.opname == 'residual_call_ir_v'
- assert op1.args[0].value == func
- assert op1.args[1] == ListOfKind('int', [v1])
- assert op1.args[2] == ListOfKind('ref', [])
- assert op1.args[3] == 'calldescr-%d' % effectinfo.EffectInfo.OS_SET_ERRNO
- assert op1.result == v3
+ assert op0.opname == 'residual_call_ir_i'
+ assert op0.args[0].value == 'threadlocalref_get' # pseudo-function as str
+ assert op0.args[1] == ListOfKind("int", [c])
+ assert op0.args[2] == ListOfKind("ref", [])
+ assert op0.args[3] == 'calldescr-%d' % OS_THREADLOCALREF_GET
+ assert op0.result == v
def test_unknown_operation():
op = SpaceOperation('foobar', [], varoftype(lltype.Void))
diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py
--- a/rpython/jit/metainterp/test/test_threadlocal.py
+++ b/rpython/jit/metainterp/test/test_threadlocal.py
@@ -1,29 +1,20 @@
import py
+from rpython.rlib import rthread
from rpython.jit.metainterp.test.support import LLJitMixin
-from rpython.rlib.rthread import ThreadLocalReference
-from rpython.rlib.jit import dont_look_inside
+from rpython.rtyper.lltypesystem import lltype
+from rpython.rtyper.lltypesystem.lloperation import llop
class ThreadLocalTest(object):
def test_threadlocalref_get(self):
- class Foo:
- pass
- t = ThreadLocalReference(Foo)
- x = Foo()
-
- @dont_look_inside
- def setup():
- t.set(x)
+ tlfield = rthread.ThreadLocalField(lltype.Signed, 'foobar_test_')
def f():
- setup()
- if t.get() is x:
- return 42
- return -666
+ return tlfield.getraw()
res = self.interp_operations(f, [])
- assert res == 42
+ assert res == 0x544c # magic value returned by llinterp
class TestLLtype(ThreadLocalTest, LLJitMixin):
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -1080,6 +1080,9 @@
assert not livevars, "live GC var around %s!" % (hop.spaceop,)
hop.genop("direct_call", [self.root_walker.thread_run_ptr])
self.pop_roots(hop, livevars)
+ else:
+ hop.rename("gc_thread_run") # keep it around for c/gc.py,
+ # unless handled specially above
def gct_gc_thread_start(self, hop):
assert self.translator.config.translation.thread
@@ -1095,6 +1098,7 @@
assert not livevars, "live GC var around %s!" % (hop.spaceop,)
hop.genop("direct_call", [self.root_walker.thread_die_ptr])
self.pop_roots(hop, livevars)
+ hop.rename("gc_thread_die") # keep it around for c/gc.py
def gct_gc_thread_before_fork(self, hop):
if (self.translator.config.translation.thread
diff --git a/rpython/memory/gctransform/shadowstack.py b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -132,8 +132,12 @@
gcdata.root_stack_top/root_stack_base is the one corresponding
to the current thread.
No GC operation here, e.g. no mallocs or storing in a dict!
+
+ Note that here specifically we don't call rthread.get_ident(),
+ but rthread.get_or_make_ident(). We are possibly in a fresh
+ new thread, so we need to be careful.
"""
- tid = get_tid()
+ tid = rthread.get_or_make_ident()
if gcdata.active_tid != tid:
switch_shadow_stacks(tid)
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -95,12 +95,19 @@
# the default wrapper for set_errno is not suitable for use in critical places
# like around GIL handling logic, so we provide our own wrappers.
- at jit.oopspec("rposix.get_errno()")
def get_errno():
+ if jit.we_are_jitted():
+ from rpython.rlib import rthread
+ perrno = rthread.tlfield_p_errno.getraw()
+ return intmask(perrno[0])
return intmask(_get_errno())
- at jit.oopspec("rposix.set_errno(errno)")
def set_errno(errno):
+ if jit.we_are_jitted():
+ from rpython.rlib import rthread
+ perrno = rthread.tlfield_p_errno.getraw()
+ perrno[0] = rffi.cast(INT, errno)
+ return
_set_errno(rffi.cast(INT, errno))
if os.name == 'nt':
diff --git a/rpython/rlib/rstack.py b/rpython/rlib/rstack.py
--- a/rpython/rlib/rstack.py
+++ b/rpython/rlib/rstack.py
@@ -1,6 +1,6 @@
"""
This file defines utilities for manipulating the stack in an
-RPython-compliant way, intended mostly for use by the Stackless PyPy.
+RPython-compliant way. It is mainly about the stack_check() function.
"""
import py
@@ -10,18 +10,11 @@
from rpython.rlib import rgc
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.translator import cdir
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
# ____________________________________________________________
-srcdir = py.path.local(cdir) / 'src'
-compilation_info = ExternalCompilationInfo(
- includes=['src/stack.h'],
- separate_module_files=[srcdir / 'stack.c', srcdir / 'threadlocal.c'])
-
def llexternal(name, args, res, _callable=None):
- return rffi.llexternal(name, args, res, compilation_info=compilation_info,
+ return rffi.llexternal(name, args, res,
sandboxsafe=True, _nowrapper=True,
_callable=_callable)
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -5,8 +5,10 @@
from rpython.rlib import jit, rgc
from rpython.rlib.debug import ll_assert
from rpython.rlib.objectmodel import we_are_translated, specialize
+from rpython.rlib.objectmodel import CDefinedIntSymbolic
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rtyper.tool import rffi_platform
+from rpython.rtyper.extregistry import ExtRegistryEntry
class RThreadError(Exception):
pass
@@ -40,8 +42,6 @@
releasegil=True) # release the GIL, but most
# importantly, reacquire it
# around the callback
-c_thread_get_ident = llexternal('RPyThreadGetIdent', [], rffi.LONG,
- _nowrapper=True) # always call directly
TLOCKP = rffi.COpaquePtr('struct RPyOpaque_ThreadLock',
compilation_info=eci)
@@ -83,9 +83,16 @@
# wrappers...
- at jit.loop_invariant
def get_ident():
- return rffi.cast(lltype.Signed, c_thread_get_ident())
+ if we_are_translated():
+ return tlfield_thread_ident.getraw()
+ else:
+ import thread
+ return thread.get_ident()
+
+def get_or_make_ident():
+ assert we_are_translated()
+ return tlfield_thread_ident.get_or_make_raw()
@specialize.arg(0)
def start_new_thread(x, y):
@@ -265,17 +272,40 @@
# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR!
# We use _make_sure_does_not_move() to make sure the pointer will not move.
-ecitl = ExternalCompilationInfo(
- includes = ['src/threadlocal.h'],
- separate_module_files = [translator_c_dir / 'src' / 'threadlocal.c'])
-ensure_threadlocal = rffi.llexternal_use_eci(ecitl)
-class ThreadLocalReference(object):
+class ThreadLocalField(object):
+ def __init__(self, FIELDTYPE, fieldname):
+ "NOT_RPYTHON: must be prebuilt"
+ self.FIELDTYPE = FIELDTYPE
+ self.fieldname = fieldname
+ offset = CDefinedIntSymbolic('RPY_TLOFS_%s' % self.fieldname,
+ default='?')
+ self.offset = offset
+
+ def getraw():
+ _threadlocalref_seeme(self)
+ return llop.threadlocalref_get(FIELDTYPE, offset)
+
+ def get_or_make_raw():
+ _threadlocalref_seeme(self)
+ addr = llop.threadlocalref_addr(llmemory.Address)
+ return llop.raw_load(FIELDTYPE, addr, offset)
+
+ def setraw(value):
+ _threadlocalref_seeme(self)
+ addr = llop.threadlocalref_addr(llmemory.Address)
+ llop.raw_store(lltype.Void, addr, offset, value)
+
+ self.getraw = getraw
+ self.get_or_make_raw = get_or_make_raw
+ self.setraw = setraw
+
+ def _freeze_(self):
+ return True
+
+
+class ThreadLocalReference(ThreadLocalField):
_COUNT = 1
- OPAQUEID = lltype.OpaqueType("ThreadLocalRef",
- hints={"threadlocalref": True,
- "external": "C",
- "c_name": "RPyThreadStaticTLS"})
def __init__(self, Cls):
"NOT_RPYTHON: must be prebuilt"
@@ -284,15 +314,16 @@
self.local = thread._local() # <- NOT_RPYTHON
unique_id = ThreadLocalReference._COUNT
ThreadLocalReference._COUNT += 1
- opaque_id = lltype.opaqueptr(ThreadLocalReference.OPAQUEID,
- 'tlref%d' % unique_id)
- self.opaque_id = opaque_id
+ ThreadLocalField.__init__(self, lltype.Signed, 'tlref%d' % unique_id)
+ setraw = self.setraw
+ offset = self.offset
def get():
if we_are_translated():
from rpython.rtyper import rclass
from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance
- ptr = llop.threadlocalref_get(rclass.OBJECTPTR, opaque_id)
+ _threadlocalref_seeme(self)
+ ptr = llop.threadlocalref_get(rclass.OBJECTPTR, offset)
return cast_base_ptr_to_instance(Cls, ptr)
else:
return getattr(self.local, 'value', None)
@@ -301,21 +332,34 @@
def set(value):
assert isinstance(value, Cls) or value is None
if we_are_translated():
- from rpython.rtyper.annlowlevel import cast_instance_to_base_ptr
+ from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.rlib.rgc import _make_sure_does_not_move
from rpython.rlib.objectmodel import running_on_llinterp
- ptr = cast_instance_to_base_ptr(value)
+ gcref = cast_instance_to_gcref(value)
if not running_on_llinterp:
- gcref = lltype.cast_opaque_ptr(llmemory.GCREF, ptr)
if gcref:
_make_sure_does_not_move(gcref)
- llop.threadlocalref_set(lltype.Void, opaque_id, ptr)
- ensure_threadlocal()
+ value = lltype.cast_ptr_to_int(gcref)
+ setraw(value)
else:
self.local.value = value
self.get = get
self.set = set
- def _freeze_(self):
- return True
+
+tlfield_thread_ident = ThreadLocalField(lltype.Signed, "thread_ident")
+tlfield_p_errno = ThreadLocalField(rffi.CArrayPtr(rffi.INT), "p_errno")
+
+def _threadlocalref_seeme(field):
+ "NOT_RPYTHON"
+
+class _Entry(ExtRegistryEntry):
+ _about_ = _threadlocalref_seeme
+
+ def compute_result_annotation(self, s_field):
+ field = s_field.const
+ self.bookkeeper.thread_local_fields.add(field)
+
+ def specialize_call(self, hop):
+ hop.exception_cannot_occur()
diff --git a/rpython/rlib/test/test_rthread.py b/rpython/rlib/test/test_rthread.py
--- a/rpython/rlib/test/test_rthread.py
+++ b/rpython/rlib/test/test_rthread.py
@@ -47,6 +47,10 @@
time.sleep(0.5)
assert results == [True] * 15
+def test_get_ident():
+ import thread
+ assert get_ident() == thread.get_ident()
+
class AbstractThreadTests(AbstractGCTestClass):
use_threads = True
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -919,19 +919,14 @@
def op_stack_current(self):
return 0
- def op_threadlocalref_set(self, key, value):
- try:
- d = self.llinterpreter.tlrefsdict
- except AttributeError:
- d = self.llinterpreter.tlrefsdict = {}
- d[key._obj] = value
+ def op_threadlocalref_addr(self):
+ raise NotImplementedError("threadlocalref_addr")
- def op_threadlocalref_get(self, key):
- d = self.llinterpreter.tlrefsdict
- return d[key._obj]
-
- def op_threadlocalref_getaddr(self, key):
- raise NotImplementedError("threadlocalref_getaddr")
+ def op_threadlocalref_get(self, offset):
+ if (type(offset) is CDefinedIntSymbolic and
+ offset.expr == 'RPY_TLOFS_foobar_test_'): # used in tests
+ return 0x544c
+ raise NotImplementedError("threadlocalref_get")
# __________________________________________________________
# operations on addresses
@@ -978,6 +973,9 @@
ll_p = rffi.cast(rffi.CArrayPtr(RESTYPE),
rffi.ptradd(ll_p, offset))
value = ll_p[0]
+ ## elif getattr(addr, 'is_fake_thread_local_addr', False):
+ ## assert type(offset) is CDefinedIntSymbolic
+ ## value = self.llinterpreter.tlobj[offset.expr]
else:
assert offset.TYPE == RESTYPE
value = getattr(addr, str(RESTYPE).lower())[offset.repeat]
@@ -998,6 +996,9 @@
ll_p = rffi.cast(rffi.CArrayPtr(ARGTYPE),
rffi.ptradd(ll_p, offset))
ll_p[0] = value
+ ## elif getattr(addr, 'is_fake_thread_local_addr', False):
+ ## assert type(offset) is CDefinedIntSymbolic
+ ## self.llinterpreter.tlobj[offset.expr] = value
else:
assert offset.TYPE == ARGTYPE
getattr(addr, str(ARGTYPE).lower())[offset.repeat] = value
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -546,9 +546,8 @@
'getslice': LLOp(canraise=(Exception,)),
'check_and_clear_exc': LLOp(),
- 'threadlocalref_get': LLOp(sideeffects=False),
- 'threadlocalref_getaddr': LLOp(sideeffects=False),
- 'threadlocalref_set': LLOp(),
+ 'threadlocalref_addr': LLOp(sideeffects=False), # get (or make) addr of tl
+ 'threadlocalref_get': LLOp(sideeffects=False), # read field (no check)
# __________ debugging __________
'debug_view': LLOp(),
diff --git a/rpython/rtyper/lltypesystem/test/test_llmemory.py b/rpython/rtyper/lltypesystem/test/test_llmemory.py
--- a/rpython/rtyper/lltypesystem/test/test_llmemory.py
+++ b/rpython/rtyper/lltypesystem/test/test_llmemory.py
@@ -649,3 +649,13 @@
#assert cast_int_to_adr(i) == adr -- depends on ll2ctypes details
i = cast_adr_to_int(NULL, mode="forced")
assert is_valid_int(i) and i == 0
+
+def test_cast_gcref_to_int():
+ A = lltype.GcArray(Address)
+ def f():
+ ptr = lltype.malloc(A, 10)
+ gcref = lltype.cast_opaque_ptr(GCREF, ptr)
+ adr = lltype.cast_ptr_to_int(gcref)
+ assert adr == lltype.cast_ptr_to_int(ptr)
+ f()
+ interpret(f, [])
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -13,6 +13,7 @@
from rpython.translator.backendopt.ssa import SSI_to_SSA
from rpython.translator.backendopt.innerloop import find_inner_loops
from rpython.tool.identity_dict import identity_dict
+from rpython.rlib.objectmodel import CDefinedIntSymbolic
LOCALVAR = 'l_%s'
@@ -900,4 +901,21 @@
else:
return None # use the default
+ def OP_THREADLOCALREF_GET(self, op):
+ typename = self.db.gettype(op.result.concretetype)
+ if isinstance(op.args[0], Constant):
+ assert isinstance(op.args[0].value, CDefinedIntSymbolic)
+ fieldname = op.args[0].value.expr
+ assert fieldname.startswith('RPY_TLOFS_')
+ fieldname = fieldname[10:]
+ return '%s = (%s)RPY_THREADLOCALREF_GET(%s);' % (
+ self.expr(op.result),
+ cdecl(typename, ''),
+ fieldname)
+ else:
+ return 'OP_THREADLOCALREF_GET_NONCONST(%s, %s, %s);' % (
+ cdecl(typename, ''),
+ self.expr(op.args[0]),
+ self.expr(op.result))
+
assert not USESLOTS or '__dict__' not in dir(FunctionCodeGenerator)
diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py
--- a/rpython/translator/c/gc.py
+++ b/rpython/translator/c/gc.py
@@ -71,13 +71,20 @@
return ''
def OP_GC_THREAD_RUN(self, funcgen, op):
- return ''
+ # The gc transformer leaves this operation in the graphs
+ # in all cases except with framework+shadowstack. In that
+ # case the operation is removed because redundant with
+ # rthread.get_or_make_ident().
+ return 'RPY_THREADLOCALREF_ENSURE();'
def OP_GC_THREAD_START(self, funcgen, op):
return ''
def OP_GC_THREAD_DIE(self, funcgen, op):
- return ''
+ # The gc transformer leaves this operation in the graphs
+ # (but may insert a call to a gcrootfinder-specific
+ # function just before).
+ return 'RPython_ThreadLocals_ThreadDie();'
def OP_GC_THREAD_BEFORE_FORK(self, funcgen, op):
return '%s = NULL;' % funcgen.expr(op.result)
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -703,8 +703,27 @@
for node in structdeflist:
for line in node.definition():
print >> f, line
+ gen_threadlocal_structdef(f, database)
print >> f, "#endif"
+def gen_threadlocal_structdef(f, database):
+ from rpython.translator.c.support import cdecl
+ print >> f
+ bk = database.translator.annotator.bookkeeper
+ fields = list(bk.thread_local_fields)
+ fields.sort(key=lambda field: field.fieldname)
+ for field in fields:
+ print >> f, ('#define RPY_TLOFS_%s offsetof(' % field.fieldname +
+ 'struct pypy_threadlocal_s, %s)' % field.fieldname)
+ print >> f, 'struct pypy_threadlocal_s {'
+ print >> f, '\tint ready;'
+ print >> f, '\tchar *stack_end;'
+ for field in fields:
+ typename = database.gettype(field.FIELDTYPE)
+ print >> f, '\t%s;' % cdecl(typename, field.fieldname)
+ print >> f, '};'
+ print >> f
+
def gen_forwarddecl(f, database):
print >> f, '/***********************************************************/'
print >> f, '/*** Forward declarations ***/'
@@ -730,6 +749,11 @@
# generate the start-up code and put it into a function
print >> f, 'char *RPython_StartupCode(void) {'
print >> f, '\tchar *error = NULL;'
+
+ bk = database.translator.annotator.bookkeeper
+ if bk.thread_local_fields:
+ print >> f, '\tRPython_ThreadLocals_ProgramInit();'
+
for line in database.gcpolicy.gc_startup_code():
print >> f,"\t" + line
@@ -748,6 +772,7 @@
print >> f, '\tif (error) return error;'
for line in lines:
print >> f, '\t'+line
+
print >> f, '\treturn error;'
print >> f, '}'
@@ -770,6 +795,8 @@
srcdir / 'asm.c',
srcdir / 'instrument.c',
srcdir / 'int.c',
+ srcdir / 'stack.c',
+ srcdir / 'threadlocal.c',
]
if _CYGWIN:
files.append(srcdir / 'cygwin_wait.c')
diff --git a/rpython/translator/c/node.py b/rpython/translator/c/node.py
--- a/rpython/translator/c/node.py
+++ b/rpython/translator/c/node.py
@@ -966,30 +966,12 @@
args.append('0')
yield 'RPyOpaque_SETUP_%s(%s);' % (T.tag, ', '.join(args))
-class ThreadLocalRefOpaqueNode(ContainerNode):
- nodekind = 'tlrefopaque'
-
- def basename(self):
- return self.obj._name
-
- def enum_dependencies(self):
- return []
-
- def initializationexpr(self, decoration=''):
- return ['0']
-
- def startupcode(self):
- p = self.getptrname()
- yield 'RPyThreadStaticTLS_Create(%s);' % (p,)
-
def opaquenode_factory(db, T, obj):
if T == RuntimeTypeInfo:
return db.gcpolicy.rtti_node_factory()(db, T, obj)
if T.hints.get("render_structure", False):
return ExtType_OpaqueNode(db, T, obj)
- if T.hints.get("threadlocalref", False):
- return ThreadLocalRefOpaqueNode(db, T, obj)
raise Exception("don't know about %r" % (T,))
diff --git a/rpython/translator/c/src/g_include.h b/rpython/translator/c/src/g_include.h
--- a/rpython/translator/c/src/g_include.h
+++ b/rpython/translator/c/src/g_include.h
@@ -19,6 +19,8 @@
#include "src/address.h"
#include "src/unichar.h"
#include "src/llgroup.h"
+#include "src/stack.h"
+#include "src/threadlocal.h"
#include "src/instrument.h"
#include "src/asm.h"
diff --git a/rpython/translator/c/src/g_prerequisite.h b/rpython/translator/c/src/g_prerequisite.h
--- a/rpython/translator/c/src/g_prerequisite.h
+++ b/rpython/translator/c/src/g_prerequisite.h
@@ -23,6 +23,3 @@
# define RPY_LENGTH0 1 /* array decl [0] are bad */
# define RPY_DUMMY_VARLENGTH /* nothing */
#endif
-
-
-#include "src/threadlocal.h"
diff --git a/rpython/translator/c/src/stack.c b/rpython/translator/c/src/stack.c
--- a/rpython/translator/c/src/stack.c
+++ b/rpython/translator/c/src/stack.c
@@ -1,6 +1,8 @@
/* Stack operation */
+#include "common_header.h"
+#include "structdef.h" /* for struct pypy_threadlocal_s */
#include <src/stack.h>
-#include <src/thread.h>
+#include <src/threadlocal.h>
#include <stdio.h>
@@ -9,7 +11,6 @@
char *_LLstacktoobig_stack_end = NULL;
long _LLstacktoobig_stack_length = MAX_STACK_SIZE;
char _LLstacktoobig_report_error = 1;
-static RPyThreadStaticTLS end_tls_key;
void LL_stack_set_length_fraction(double fraction)
{
@@ -20,6 +21,8 @@
{
long diff, max_stack_size;
char *baseptr, *curptr = (char*)current;
+ char *tl;
+ struct pypy_threadlocal_s *tl1;
/* The stack_end variable is updated to match the current value
if it is still 0 or if we later find a 'curptr' position
@@ -27,15 +30,9 @@
thread-local storage, but we try to minimize its overhead by
keeping a local copy in _LLstacktoobig_stack_end. */
- if (_LLstacktoobig_stack_end == NULL) {
- /* not initialized */
- /* XXX We assume that initialization is performed early,
- when there is still only one thread running. This
- allows us to ignore race conditions here */
- RPyThreadStaticTLS_Create(&end_tls_key);
- }
-
- baseptr = (char *) RPyThreadStaticTLS_Get(end_tls_key);
+ OP_THREADLOCALREF_ADDR(tl);
+ tl1 = (struct pypy_threadlocal_s *)tl;
+ baseptr = tl1->stack_end;
max_stack_size = _LLstacktoobig_stack_length;
if (baseptr == NULL) {
/* first time we see this thread */
@@ -58,7 +55,7 @@
/* update the stack base pointer to the current value */
baseptr = curptr;
- RPyThreadStaticTLS_Set(end_tls_key, baseptr);
+ tl1->stack_end = baseptr;
_LLstacktoobig_stack_end = baseptr;
return 0;
}
diff --git a/rpython/translator/c/src/stack.h b/rpython/translator/c/src/stack.h
--- a/rpython/translator/c/src/stack.h
+++ b/rpython/translator/c/src/stack.h
@@ -2,14 +2,13 @@
/************************************************************/
/*** C header subsection: stack operations ***/
+#include <src/precommondefs.h>
+
+
#ifndef MAX_STACK_SIZE
# define MAX_STACK_SIZE (3 << 18) /* 768 kb */
#endif
-/* This include must be done in any case to initialise
- * the header dependencies early (winsock2, before windows.h).
- * It is needed to have RPyThreadStaticTLS, too. */
-#include "threadlocal.h"
RPY_EXTERN char *_LLstacktoobig_stack_end;
RPY_EXTERN long _LLstacktoobig_stack_length;
diff --git a/rpython/translator/c/src/support.h b/rpython/translator/c/src/support.h
--- a/rpython/translator/c/src/support.h
+++ b/rpython/translator/c/src/support.h
@@ -2,6 +2,9 @@
/************************************************************/
/*** C header subsection: support functions ***/
+#ifndef _SRC_SUPPORT_H
+#define _SRC_SUPPORT_H
+
#define RUNNING_ON_LLINTERP 0
#define OP_JIT_RECORD_KNOWN_CLASS(i, c, r) /* nothing */
@@ -65,3 +68,5 @@
# define RPyNLenItem(array, index) ((array)->items[index])
# define RPyBareItem(array, index) ((array)[index])
#endif
+
+#endif /* _SRC_SUPPORT_H */
diff --git a/rpython/translator/c/src/thread_nt.c b/rpython/translator/c/src/thread_nt.c
--- a/rpython/translator/c/src/thread_nt.c
+++ b/rpython/translator/c/src/thread_nt.c
@@ -26,15 +26,6 @@
static long _pypythread_stacksize = 0;
-/*
- * Return the thread Id instead of an handle. The Id is said to uniquely
- identify the thread in the system
- */
-long RPyThreadGetIdent()
-{
- return GetCurrentThreadId();
-}
-
static void
bootstrap(void *call)
{
@@ -42,7 +33,7 @@
/* copy callobj since other thread might free it before we're done */
void (*func)(void) = obj->func;
- obj->id = RPyThreadGetIdent();
+ obj->id = GetCurrentThreadId();
ReleaseSemaphore(obj->done, 1, NULL);
func();
}
diff --git a/rpython/translator/c/src/thread_nt.h b/rpython/translator/c/src/thread_nt.h
--- a/rpython/translator/c/src/thread_nt.h
+++ b/rpython/translator/c/src/thread_nt.h
@@ -13,8 +13,6 @@
/* prototypes */
RPY_EXTERN
-long RPyThreadGetIdent(void);
-RPY_EXTERN
long RPyThreadStart(void (*func)(void));
RPY_EXTERN
int RPyThreadLockInit(struct RPyOpaque_ThreadLock *lock);
diff --git a/rpython/translator/c/src/thread_pthread.c b/rpython/translator/c/src/thread_pthread.c
--- a/rpython/translator/c/src/thread_pthread.c
+++ b/rpython/translator/c/src/thread_pthread.c
@@ -56,30 +56,6 @@
# endif
#endif
-/* XXX This implementation is considered (to quote Tim Peters) "inherently
- hosed" because:
- - It does not guarantee the promise that a non-zero integer is returned.
- - The cast to long is inherently unsafe.
- - It is not clear that the 'volatile' (for AIX?) and ugly casting in the
- latter return statement (for Alpha OSF/1) are any longer necessary.
-*/
-long RPyThreadGetIdent(void)
-{
- volatile pthread_t threadid;
- /* Jump through some hoops for Alpha OSF/1 */
- threadid = pthread_self();
-
-#ifdef __CYGWIN__
- /* typedef __uint32_t pthread_t; */
- return (long) threadid;
-#else
- if (sizeof(pthread_t) <= sizeof(long))
- return (long) threadid;
- else
- return (long) *(long *) &threadid;
-#endif
-}
-
static long _pypythread_stacksize = 0;
static void *bootstrap_pthread(void *func)
diff --git a/rpython/translator/c/src/thread_pthread.h b/rpython/translator/c/src/thread_pthread.h
--- a/rpython/translator/c/src/thread_pthread.h
+++ b/rpython/translator/c/src/thread_pthread.h
@@ -60,8 +60,6 @@
/* prototypes */
RPY_EXTERN
-long RPyThreadGetIdent(void);
-RPY_EXTERN
long RPyThreadStart(void (*func)(void));
RPY_EXTERN
int RPyThreadLockInit(struct RPyOpaque_ThreadLock *lock);
diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c
--- a/rpython/translator/c/src/threadlocal.c
+++ b/rpython/translator/c/src/threadlocal.c
@@ -1,28 +1,117 @@
+#include "common_header.h"
+#include "structdef.h" /* for struct pypy_threadlocal_s */
#include <stdio.h>
#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#ifndef _WIN32
+# include <pthread.h>
+#endif
#include "src/threadlocal.h"
+
+static void _RPy_ThreadLocals_Init(void *p)
+{
+ memset(p, 0, sizeof(struct pypy_threadlocal_s));
+#ifdef RPY_TLOFS_p_errno
+ ((struct pypy_threadlocal_s *)p)->p_errno = &errno;
+#endif
+#ifdef RPY_TLOFS_thread_ident
+ ((struct pypy_threadlocal_s *)p)->thread_ident =
+# ifdef _WIN32
+ GetCurrentThreadId();
+# else
+ (long)pthread_self(); /* xxx This abuses pthread_self() by
+ assuming it just returns a integer. According to
+ comments in CPython's source code, the platforms
+ where it is not the case are rather old nowadays. */
+# endif
+#endif
+ ((struct pypy_threadlocal_s *)p)->ready = 42;
+}
+
+
+/* ------------------------------------------------------------ */
+#ifdef USE___THREAD
+/* ------------------------------------------------------------ */
+
+
+/* in this situation, we always have one full 'struct pypy_threadlocal_s'
+ available, managed by gcc. */
+__thread struct pypy_threadlocal_s pypy_threadlocal;
+
+void RPython_ThreadLocals_ProgramInit(void)
+{
+ _RPy_ThreadLocals_Init(&pypy_threadlocal);
+}
+
+char *_RPython_ThreadLocals_Build(void)
+{
+ RPyAssert(pypy_threadlocal.ready == 0, "corrupted thread-local");
+ _RPy_ThreadLocals_Init(&pypy_threadlocal);
+ return (char *)&pypy_threadlocal;
+}
+
+void RPython_ThreadLocals_ThreadDie(void)
+{
+ memset(&pypy_threadlocal, 0xDD,
+ sizeof(struct pypy_threadlocal_s)); /* debug */
+ pypy_threadlocal.ready = 0;
+}
+
+
+/* ------------------------------------------------------------ */
+#else
+/* ------------------------------------------------------------ */
+
+
+/* this is the case where the 'struct pypy_threadlocal_s' is allocated
+ explicitly, with malloc()/free(), and attached to (a single) thread-
+ local key using the API of Windows or pthread. */
+
+pthread_key_t pypy_threadlocal_key;
+
+
+void RPython_ThreadLocals_ProgramInit(void)
+{
#ifdef _WIN32
-
-void RPyThreadTLS_Create(RPyThreadTLS *result)
-{
- *result = TlsAlloc();
- if (*result == TLS_OUT_OF_INDEXES) {
+ pypy_threadlocal_key = TlsAlloc();
+ if (pypy_threadlocal_key == TLS_OUT_OF_INDEXES)
+#else
+ if (pthread_key_create(&pypy_threadlocal_key, NULL) != 0)
+#endif
+ {
fprintf(stderr, "Internal RPython error: "
"out of thread-local storage indexes");
abort();
}
+ _RPython_ThreadLocals_Build();
}
-#else
+char *_RPython_ThreadLocals_Build(void)
+{
+ void *p = malloc(sizeof(struct pypy_threadlocal_s));
+ if (!p) {
+ fprintf(stderr, "Internal RPython error: "
+ "out of memory for the thread-local storage");
+ abort();
+ }
+ _RPy_ThreadLocals_Init(p);
+ _RPy_ThreadLocals_Set(p);
+ return (char *)p;
+}
-void RPyThreadTLS_Create(RPyThreadTLS *result)
+void RPython_ThreadLocals_ThreadDie(void)
{
- if (pthread_key_create(result, NULL) != 0) {
- fprintf(stderr, "Internal RPython error: "
- "out of thread-local storage keys");
- abort();
+ void *p = _RPy_ThreadLocals_Get();
+ if (p != NULL) {
+ _RPy_ThreadLocals_Set(NULL);
+ memset(p, 0xDD, sizeof(struct pypy_threadlocal_s)); /* debug */
+ free(p);
}
}
+
+/* ------------------------------------------------------------ */
#endif
+/* ------------------------------------------------------------ */
diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h
--- a/rpython/translator/c/src/threadlocal.h
+++ b/rpython/translator/c/src/threadlocal.h
@@ -2,51 +2,98 @@
#ifndef _SRC_THREADLOCAL_H
#define _SRC_THREADLOCAL_H
-#include <src/precommondefs.h>
+#include "src/precommondefs.h"
+#include "src/support.h"
+/* RPython_ThreadLocals_ProgramInit() is called once at program start-up. */
+RPY_EXTERN void RPython_ThreadLocals_ProgramInit(void);
+
+/* RPython_ThreadLocals_ThreadDie() is called in a thread that is about
+ to die. */
+RPY_EXTERN void RPython_ThreadLocals_ThreadDie(void);
+
+/* There are two llops: 'threadlocalref_addr' and 'threadlocalref_make'.
+ They both return the address of the thread-local structure (of the
+ C type 'struct pypy_threadlocal_s'). The difference is that
+ OP_THREADLOCALREF_MAKE() checks if we have initialized this thread-
+ local structure in the current thread, and if not, calls the following
+ helper. */
+RPY_EXTERN char *_RPython_ThreadLocals_Build(void);
+
+
+/* ------------------------------------------------------------ */
+#ifdef USE___THREAD
+/* ------------------------------------------------------------ */
+
+
+/* Use the '__thread' specifier, so far only on Linux */
+
+RPY_EXTERN __thread struct pypy_threadlocal_s pypy_threadlocal;
+
+#define OP_THREADLOCALREF_ADDR(r) \
+ do { \
+ r = (char *)&pypy_threadlocal; \
+ if (pypy_threadlocal.ready != 42) \
+ r = _RPython_ThreadLocals_Build(); \
+ } while (0)
+
+#define RPY_THREADLOCALREF_ENSURE() \
+ if (pypy_threadlocal.ready != 42) \
+ (void)_RPython_ThreadLocals_Build();
+
+#define RPY_THREADLOCALREF_GET(FIELD) pypy_threadlocal.FIELD
+
+
+/* ------------------------------------------------------------ */
+#else
+/* ------------------------------------------------------------ */
+
+
+/* Don't use '__thread'. */
+
#ifdef _WIN32
-
-#include <WinSock2.h>
-#include <windows.h>
-#define __thread __declspec(thread)
-typedef DWORD RPyThreadTLS;
-#define RPyThreadTLS_Get(key) TlsGetValue(key)
-#define RPyThreadTLS_Set(key, value) TlsSetValue(key, value)
-
+# include <WinSock2.h>
+# include <windows.h>
+# define _RPy_ThreadLocals_Get() TlsGetValue(pypy_threadlocal_key)
+# define _RPy_ThreadLocals_Set(x) TlsSetValue(pypy_threadlocal_key, x)
+typedef DWORD pthread_key_t;
#else
-
-#include <pthread.h>
-typedef pthread_key_t RPyThreadTLS;
-#define RPyThreadTLS_Get(key) pthread_getspecific(key)
-#define RPyThreadTLS_Set(key, value) pthread_setspecific(key, value)
-
+# include <pthread.h>
+# define _RPy_ThreadLocals_Get() pthread_getspecific(pypy_threadlocal_key)
+# define _RPy_ThreadLocals_Set(x) pthread_setspecific(pypy_threadlocal_key, x)
#endif
+RPY_EXTERN pthread_key_t pypy_threadlocal_key;
-#ifdef USE___THREAD
-#define RPyThreadStaticTLS __thread void *
-#define RPyThreadStaticTLS_Create(tls) (void)0
-#define RPyThreadStaticTLS_Get(tls) tls
-#define RPyThreadStaticTLS_Set(tls, value) tls = value
-#define OP_THREADLOCALREF_GETADDR(tlref, ptr) ptr = tlref
+#define OP_THREADLOCALREF_ADDR(r) \
+ do { \
+ r = (char *)_RPy_ThreadLocals_Get(); \
+ if (!r) \
+ r = _RPython_ThreadLocals_Build(); \
+ } while (0)
+#define RPY_THREADLOCALREF_ENSURE() \
+ if (!_RPy_ThreadLocals_Get()) \
+ (void)_RPython_ThreadLocals_Build();
+
+#define RPY_THREADLOCALREF_GET(FIELD) \
+ ((struct pypy_threadlocal_s *)_RPy_ThreadLocals_Get())->FIELD
+
+
+/* ------------------------------------------------------------ */
#endif
+/* ------------------------------------------------------------ */
-#ifndef RPyThreadStaticTLS
-#define RPyThreadStaticTLS RPyThreadTLS
-#define RPyThreadStaticTLS_Create(key) RPyThreadTLS_Create(key)
-#define RPyThreadStaticTLS_Get(key) RPyThreadTLS_Get(key)
-#define RPyThreadStaticTLS_Set(key, value) RPyThreadTLS_Set(key, value)
-RPY_EXTERN void RPyThreadTLS_Create(RPyThreadTLS *result);
-
-#endif
-
-
-#define OP_THREADLOCALREF_SET(tlref, ptr, _) RPyThreadStaticTLS_Set(*tlref, ptr)
-#define OP_THREADLOCALREF_GET(tlref, ptr) ptr = RPyThreadStaticTLS_Get(*tlref)
+/* only for the fall-back path in the JIT */
+#define OP_THREADLOCALREF_GET_NONCONST(RESTYPE, offset, r) \
+ do { \
+ char *a; \
+ OP_THREADLOCALREF_ADDR(a); \
+ r = *(RESTYPE *)(a + offset); \
+ } while (0)
#endif /* _SRC_THREADLOCAL_H */
diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -2,6 +2,7 @@
import sys, os, re
from rpython.config.translationoption import get_combined_translation_config
+from rpython.config.translationoption import SUPPORT__THREAD
from rpython.rlib.objectmodel import keepalive_until_here
from rpython.rlib.rarithmetic import r_longlong
from rpython.rlib.debug import ll_assert, have_debug_prints, debug_flush
@@ -1026,11 +1027,12 @@
gcrootfinder = 'shadowstack'
config = None
More information about the pypy-commit
mailing list