[pypy-commit] pypy portable-threadlocal: Implement OS_THREADLOCALREF_ADDR in the x86 backend.

arigo noreply at buildbot.pypy.org
Sun Nov 23 15:40:02 CET 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: portable-threadlocal
Changeset: r74652:c187d3b63e14
Date: 2014-11-23 15:39 +0100
http://bitbucket.org/pypy/pypy/changeset/c187d3b63e14/

Log:	Implement OS_THREADLOCALREF_ADDR in the x86 backend.

diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -217,7 +217,13 @@
         return lltype.cast_opaque_ptr(llmemory.GCREF, frame)
 
     def make_execute_token(self, *ARGS):
-        FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF],
+        # The JIT backend must generate functions with the following
+        # signature: it takes the jitframe and the threadlocal_addr
+        # as arguments, and it returns the (possibly reallocated) jitframe.
+        # The backend can optimize OS_THREADLOCALREF_ADDR calls to return
+        # this threadlocal_addr, but only if 'translate_support_code':
+        # in untranslated tests, threadlocal_addr is a dummy NULL.
+        FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF, llmemory.Address],
                                              llmemory.GCREF))
 
         lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)]
@@ -249,8 +255,13 @@
                     else:
                         assert kind == history.REF
                         self.set_ref_value(ll_frame, num, arg)
+                if self.translate_support_code:
+                    ll_threadlocal_addr = llop.threadlocalref_addr(
+                        llmemory.Address)
+                else:
+                    ll_threadlocal_addr = llmemory.NULL
                 llop.gc_writebarrier(lltype.Void, ll_frame)
-                ll_frame = func(ll_frame)
+                ll_frame = func(ll_frame, ll_threadlocal_addr)
             finally:
                 if not self.translate_support_code:
                     LLInterpreter.current_interpreter = prev_interpreter
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -26,8 +26,6 @@
         # - profiler
         # - full optimizer
         # - floats neg and abs
-        # - threadlocalref_get
-        # - get_errno, set_errno
         # - llexternal with macro=True
 
         class Frame(object):
@@ -36,10 +34,6 @@
             def __init__(self, i):
                 self.i = i
 
-        class Foo(object):
-            pass
-        t = ThreadLocalReference(Foo)
-
         eci = ExternalCompilationInfo(post_include_bits=['''
 #define pypy_my_fabs(x)  fabs(x)
 '''])
@@ -74,9 +68,6 @@
                 k = myabs1(myabs2(j))
                 if k - abs(j):  raise ValueError
                 if k - abs(-j): raise ValueError
-                if t.get().nine != 9: raise ValueError
-                rposix.set_errno(total)
-                if rposix.get_errno() != total: raise ValueError
             return chr(total % 253)
         #
         class Virt2(object):
@@ -104,12 +95,8 @@
             return res
         #
         def main(i, j):
-            foo = Foo()
-            foo.nine = -(i + j)
-            t.set(foo)
             a_char = f(i, j)
             a_float = libffi_stuff(i, j)
-            keepalive_until_here(foo)
             return ord(a_char) * 10 + int(a_float)
         expected = main(40, -49)
         res = self.meta_interp(main, [40, -49])
@@ -121,6 +108,7 @@
 
     def test_direct_assembler_call_translates(self):
         """Test CALL_ASSEMBLER and the recursion limit"""
+        # - also tests threadlocalref_get
         from rpython.rlib.rstackovf import StackOverflow
 
         class Thing(object):
@@ -138,6 +126,10 @@
 
         somewhere_else = SomewhereElse()
 
+        class Foo(object):
+            pass
+        t = ThreadLocalReference(Foo)
+
         def change(newthing):
             somewhere_else.frame.thing = newthing
 
@@ -163,6 +155,7 @@
                     nextval = 13
                 frame.thing = Thing(nextval + 1)
                 i += 1
+                if t.get().nine != 9: raise ValueError
             return frame.thing.val
 
         driver2 = JitDriver(greens = [], reds = ['n'])
@@ -184,13 +177,24 @@
                 n = portal2(n)
         assert portal2(10) == -9
 
+        def setup(value):
+            foo = Foo()
+            foo.nine = value
+            t.set(foo)
+            return foo
+
         def mainall(codeno, bound):
-            return main(codeno) + main2(bound)
+            foo = setup(bound + 8)
+            result = main(codeno) + main2(bound)
+            keepalive_until_here(foo)
+            return result
 
+        tmp_obj = setup(9)
+        expected_1 = main(0)
         res = self.meta_interp(mainall, [0, 1], inline=True,
                                policy=StopAtXPolicy(change))
         print hex(res)
-        assert res & 255 == main(0)
+        assert res & 255 == expected_1
         bound = res & ~255
         assert 1024 <= bound <= 131072
         assert bound & (bound-1) == 0       # a power of two
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -34,10 +34,16 @@
     FRAME_FIXED_SIZE = 19
     PASS_ON_MY_FRAME = 15
     JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float
+    # 'threadlocal_addr' is passed as 2nd argument on the stack,
+    # and it can be left here for when it is needed
+    THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD
 else:
-    # rbp + rbx + r12 + r13 + r14 + r15 + 13 extra words = 19
+    # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19
     FRAME_FIXED_SIZE = 19
-    PASS_ON_MY_FRAME = 13
+    PASS_ON_MY_FRAME = 12
     JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
+    # 'threadlocal_addr' is passed as 2nd argument in %esi,
+    # and is moved into this frame location
+    THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD
 
 assert PASS_ON_MY_FRAME >= 12       # asmgcc needs at least JIT_USE_WORDS + 3
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -18,7 +18,7 @@
 from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size)
 from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, WORD, IS_X86_64,
                                        JITFRAME_FIXED_SIZE, IS_X86_32,
-                                       PASS_ON_MY_FRAME)
+                                       PASS_ON_MY_FRAME, THREADLOCAL_OFS)
 from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
     xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
     r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
@@ -730,6 +730,7 @@
         self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD)
         self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value)
         if IS_X86_64:
+            self.mc.MOV_sr(THREADLOCAL_OFS, esi.value)
             self.mc.MOV_rr(ebp.value, edi.value)
         else:
             self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD)
@@ -1955,7 +1956,8 @@
         self._emit_guard_not_forced(guard_token)
 
     def _call_assembler_emit_call(self, addr, argloc, _):
-        self.simple_call(addr, [argloc])
+        threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT)
+        self.simple_call(addr, [argloc, threadlocal_loc])
 
     def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
         self.simple_call(addr, arglocs, result_loc)
@@ -2321,8 +2323,13 @@
         self.mc.MOV_rr(reg.value, ebp.value)
 
     def threadlocalref_addr(self, resloc):
+        # This simply loads the stack location THREADLOCAL_OFS into a
+        # register.  It is only supported if 'translate_support_code' is
+        # true; otherwise, the original call to the piece of assembler
+        # was done with a dummy NULL value.
+        assert self.cpu.translate_support_code
         assert isinstance(resloc, RegLoc)
-        XXX
+        self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
 
     def get_set_errno(self, op, loc, issue_a_write):
         # this function is only called on Linux
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -694,8 +694,11 @@
         self.perform_math(op, [loc0], loc0)
 
     def _consider_threadlocalref_addr(self, op):
-        resloc = self.force_allocate_reg(op.result)
-        self.assembler.threadlocalref_addr(resloc)
+        if self.translate_support_code:
+            resloc = self.force_allocate_reg(op.result)
+            self.assembler.threadlocalref_addr(resloc)
+        else:
+            self._consider_call(op)
 
     def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
         # we need to save registers on the stack:
@@ -774,8 +777,8 @@
                         return
             if oopspecindex == EffectInfo.OS_MATH_SQRT:
                 return self._consider_math_sqrt(op)
-            #if oopspecindex == EffectInfo.OS_THREADLOCALREF_ADDR:
-            #    return self._consider_threadlocalref_addr(op)
+            if oopspecindex == EffectInfo.OS_THREADLOCALREF_ADDR:
+                return self._consider_threadlocalref_addr(op)
             if oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP:
                 return self._consider_math_read_timestamp(op)
         self._consider_call(op)


More information about the pypy-commit mailing list