[pypy-commit] pypy default: Enable windows support for vmprof

fijal pypy.commits at gmail.com
Sat Feb 13 02:48:20 EST 2016


Author: fijal
Branch: 
Changeset: r82204:9dbad5cf71a6
Date: 2016-02-13 08:47 +0100
http://bitbucket.org/pypy/pypy/changeset/9dbad5cf71a6/

Log:	Enable windows support for vmprof

diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -36,13 +36,13 @@
     "cStringIO", "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
     "binascii", "_multiprocessing", '_warnings', "_collections",
     "_multibytecodec", "micronumpy", "_continuation", "_cffi_backend",
-    "_csv", "cppyy", "_pypyjson"
+    "_csv", "cppyy", "_pypyjson", "_vmprof",
 ])
 
-if ((sys.platform.startswith('linux') or sys.platform == 'darwin')
-    and os.uname()[4] == 'x86_64' and sys.maxint > 2**32):
+#if ((sys.platform.startswith('linux') or sys.platform == 'darwin')
+#    and os.uname()[4] == 'x86_64' and sys.maxint > 2**32):
     # it's not enough that we get x86_64
-    working_modules.add('_vmprof')
+#    working_modules.add('_vmprof')
 
 translation_modules = default_modules.copy()
 translation_modules.update([
diff --git a/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
--- a/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
+++ b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
@@ -3,16 +3,13 @@
 from rpython.jit.backend.test.support import CCompiledMixin
 from rpython.rlib.jit import JitDriver
 from rpython.tool.udir import udir
+from rpython.rlib import rthread
 from rpython.translator.translator import TranslationContext
 from rpython.jit.backend.detect_cpu import getcpuclass
 
 class CompiledVmprofTest(CCompiledMixin):
     CPUClass = getcpuclass()
 
-    def setup(self):
-        if self.CPUClass.backend_name != 'x86_64':
-            py.test.skip("vmprof only supports x86-64 CPUs at the moment")
-
     def _get_TranslationContext(self):
         t = TranslationContext()
         t.config.translation.gc = 'incminimark'
@@ -62,6 +59,7 @@
         tmpfilename = str(udir.join('test_rvmprof'))
 
         def f(num):
+            rthread.get_ident() # register TLOFS_thread_ident
             code = MyCode("py:x:foo:3")
             rvmprof.register_code(code, get_name)
             fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -30,11 +30,11 @@
 
 
 def setup():
+    compile_extra = ['-DRPYTHON_LL2CTYPES']
     platform.verify_eci(ExternalCompilationInfo(
-        compile_extra=['-DRPYTHON_LL2CTYPES'],
+        compile_extra=compile_extra,
         **eci_kwds))
 
-
     eci = global_eci
     vmprof_init = rffi.llexternal("vmprof_init",
                                   [rffi.INT, rffi.DOUBLE, rffi.CCHARP],
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -1,23 +1,21 @@
 #define _GNU_SOURCE 1
 
-
 #ifdef RPYTHON_LL2CTYPES
    /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
-#  ifndef RPY_EXTERN
-#    define RPY_EXTERN RPY_EXPORTED
-#  endif
-#  define RPY_EXPORTED  extern __attribute__((visibility("default")))
-#  define VMPROF_ADDR_OF_TRAMPOLINE(addr)  0
+#ifndef RPY_EXTERN
+#define RPY_EXTERN RPY_EXPORTED
+#endif
+#ifdef _WIN32
+#define RPY_EXPORTED __declspec(dllexport)
+#else
+#define RPY_EXPORTED  extern __attribute__((visibility("default")))
+#endif
 
 #else
-
 #  include "common_header.h"
 #  include "structdef.h"
 #  include "src/threadlocal.h"
 #  include "rvmprof.h"
-/*#  ifndef VMPROF_ADDR_OF_TRAMPOLINE
-#   error "RPython program using rvmprof, but not calling vmprof_execute_code()"
-#  endif*/
 
 #endif
 
diff --git a/rpython/rlib/rvmprof/src/vmprof_common.h b/rpython/rlib/rvmprof/src/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/vmprof_common.h
@@ -7,9 +7,6 @@
 static long profile_interval_usec = 0;
 static int opened_profile(char *interp_name);
 
-#define MAX_STACK_DEPTH   \
-    ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *))
-
 #define MARKER_STACKTRACE '\x01'
 #define MARKER_VIRTUAL_IP '\x02'
 #define MARKER_TRAILER '\x03'
@@ -20,6 +17,9 @@
 #define VERSION_THREAD_ID '\x01'
 #define VERSION_TAG '\x02'
 
+#define MAX_STACK_DEPTH   \
+    ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *))
+
 typedef struct prof_stacktrace_s {
     char padding[sizeof(long) - 1];
     char marker;
@@ -71,6 +71,43 @@
     return _write_all((char*)&header, 5 * sizeof(long) + 4 + namelen);
 }
 
+/* *************************************************************
+ * functions to dump the stack trace
+ * *************************************************************
+ */
+
+
+static int get_stack_trace(vmprof_stack_t* stack, intptr_t *result, int max_depth, intptr_t pc)
+{
+    int n = 0;
+    intptr_t addr = 0;
+    int bottom_jitted = 0;
+    // check if the pc is in JIT
+#ifdef PYPY_JIT_CODEMAP
+    if (pypy_find_codemap_at_addr((intptr_t)pc, &addr)) {
+        // the bottom part is jitted, means we can fill up the first part
+        // from the JIT
+        n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
+        stack = stack->next; // skip the first item as it contains garbage
+    }
+#endif
+    while (n < max_depth - 1 && stack) {
+        if (stack->kind == VMPROF_CODE_TAG) {
+            result[n] = stack->kind;
+            result[n + 1] = stack->value;
+            n += 2;
+        }
+#ifdef PYPY_JIT_CODEMAP
+        else if (stack->kind == VMPROF_JITTED_TAG) {
+            pc = ((intptr_t*)(stack->value - sizeof(intptr_t)))[0];
+            n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
+        }
+#endif
+        stack = stack->next;
+    }
+    return n;
+}
+
 #ifndef RPYTHON_LL2CTYPES
 static vmprof_stack_t *get_vmprof_stack(void)
 {
diff --git a/rpython/rlib/rvmprof/src/vmprof_main.h b/rpython/rlib/rvmprof/src/vmprof_main.h
--- a/rpython/rlib/rvmprof/src/vmprof_main.h
+++ b/rpython/rlib/rvmprof/src/vmprof_main.h
@@ -35,6 +35,7 @@
 #include "vmprof_stack.h"
 #include "vmprof_getpc.h"
 #include "vmprof_mt.h"
+#include "vmprof_get_custom_offset.h"
 #include "vmprof_common.h"
 
 /************************************************************/
@@ -78,46 +79,6 @@
 static char atfork_hook_installed = 0;
 
 
-#include "vmprof_get_custom_offset.h"
-
-/* *************************************************************
- * functions to dump the stack trace
- * *************************************************************
- */
-
-
-static int get_stack_trace(intptr_t *result, int max_depth, intptr_t pc, ucontext_t *ucontext)
-{
-    vmprof_stack_t* stack = get_vmprof_stack();
-    int n = 0;
-    intptr_t addr = 0;
-    int bottom_jitted = 0;
-    // check if the pc is in JIT
-#ifdef PYPY_JIT_CODEMAP
-    if (pypy_find_codemap_at_addr((intptr_t)pc, &addr)) {
-        // the bottom part is jitted, means we can fill up the first part
-        // from the JIT
-        n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
-        stack = stack->next; // skip the first item as it contains garbage
-    }
-#endif
-    while (n < max_depth - 1 && stack) {
-        if (stack->kind == VMPROF_CODE_TAG) {
-            result[n] = stack->kind;
-            result[n + 1] = stack->value;
-            n += 2;
-        }
-#ifdef PYPY_JIT_CODEMAP
-        else if (stack->kind == VMPROF_JITTED_TAG) {
-            pc = ((intptr_t*)(stack->value - sizeof(intptr_t)))[0];
-            n = vmprof_write_header_for_jit_addr(result, n, pc, max_depth);
-        }
-#endif
-        stack = stack->next;
-    }
-    return n;
-}
-
 static intptr_t get_current_thread_id(void)
 {
     /* xxx This function is a hack on two fronts:
@@ -194,8 +155,8 @@
             struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
             st->marker = MARKER_STACKTRACE;
             st->count = 1;
-            depth = get_stack_trace(st->stack,
-                MAX_STACK_DEPTH-2, GetPC((ucontext_t*)ucontext), ucontext);
+            depth = get_stack_trace(get_vmprof_stack(), st->stack,
+                MAX_STACK_DEPTH-2, GetPC((ucontext_t*)ucontext));
             st->depth = depth;
             st->stack[depth++] = get_current_thread_id();
             p->data_offset = offsetof(struct prof_stacktrace_s, marker);
diff --git a/rpython/rlib/rvmprof/src/vmprof_main_win32.h b/rpython/rlib/rvmprof/src/vmprof_main_win32.h
--- a/rpython/rlib/rvmprof/src/vmprof_main_win32.h
+++ b/rpython/rlib/rvmprof/src/vmprof_main_win32.h
@@ -10,13 +10,30 @@
     return 0;
 }
 
+#if defined(_MSC_VER)
+#include <BaseTsd.h>
+typedef SSIZE_T ssize_t;
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include "vmprof_stack.h"
+#include "vmprof_get_custom_offset.h"
 #include "vmprof_common.h"
 #include <tlhelp32.h>
 
 // This file has been inspired (but not copied from since the LICENSE
 // would not allow it) from verysleepy profiler
 
+#define SINGLE_BUF_SIZE 8192
+
 volatile int thread_started = 0;
 volatile int enabled = 0;
 
@@ -55,52 +72,75 @@
     return 0;
 }
 
-int vmprof_snapshot_thread(DWORD thread_id, PyThreadState *tstate, prof_stacktrace_s *stack)
+int vmprof_snapshot_thread(struct pypy_threadlocal_s *p, prof_stacktrace_s *stack)
 {
-    HRESULT result;
-    HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
-    int depth;
+    void *addr;
+    vmprof_stack_t *cur;
+    long tid;
+    HANDLE hThread;
+    long depth;
+    DWORD result;
+    CONTEXT ctx;
+
+#ifdef RPYTHON_LL2CTYPES
+    return 0; // not much we can do
+#else
+#ifndef RPY_TLOFS_thread_ident
+    return 0; // we can't freeze threads, unsafe
+#else
+    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, p->thread_ident);
     if (!hThread) {
         return -1;
     }
     result = SuspendThread(hThread);
     if(result == 0xffffffff)
         return -1; // possible, e.g. attached debugger or thread alread suspended
-    // find the correct thread
-    depth = read_trace_from_cpy_frame(tstate->frame, stack->stack,
-        MAX_STACK_DEPTH);
+    ctx.ContextFlags = CONTEXT_FULL;
+    if (!GetThreadContext(hThread, &ctx))
+        return -1;
+    depth = get_stack_trace(p->vmprof_tl_stack,
+                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
     stack->depth = depth;
-    stack->stack[depth++] = (void*)thread_id;
+    stack->stack[depth++] = (void*)p->thread_ident;
     stack->count = 1;
     stack->marker = MARKER_STACKTRACE;
     ResumeThread(hThread);
     return depth;
+#endif
+#endif
 }
 
 long __stdcall vmprof_mainloop(void *arg)
 {   
+#ifndef RPYTHON_LL2CTYPES
+    struct pypy_threadlocal_s *p;
     prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
-    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
     int depth;
-    PyThreadState *tstate;
 
     while (1) {
-        Sleep(profile_interval_usec * 1000);
+        //Sleep(profile_interval_usec * 1000);
+        Sleep(10);
         if (!enabled) {
             continue;
         }
-        tstate = PyInterpreterState_Head()->tstate_head;
-        while (tstate) {
-            depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
-            if (depth > 0) {
-                _write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                    depth * sizeof(void *) +
-                        sizeof(struct prof_stacktrace_s) -
-                        offsetof(struct prof_stacktrace_s, marker));
+        _RPython_ThreadLocals_Acquire();
+        p = _RPython_ThreadLocals_Head(); // the first one is one behind head
+        p = _RPython_ThreadLocals_Enum(p);
+        while (p) {
+            if (p->ready == 42) {
+                depth = vmprof_snapshot_thread(p, stack);
+                if (depth > 0) {
+                    _write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                         depth * sizeof(void *) +
+                         sizeof(struct prof_stacktrace_s) -
+                         offsetof(struct prof_stacktrace_s, marker));
+                }
             }
-            tstate = tstate->next;
+            p = _RPython_ThreadLocals_Enum(p);
         }
+        _RPython_ThreadLocals_Release();
     }
+#endif
 }
 
 RPY_EXTERN
diff --git a/rpython/rlib/rvmprof/src/vmprof_stack.h b/rpython/rlib/rvmprof/src/vmprof_stack.h
--- a/rpython/rlib/rvmprof/src/vmprof_stack.h
+++ b/rpython/rlib/rvmprof/src/vmprof_stack.h
@@ -1,7 +1,11 @@
 #ifndef _VMPROF_STACK_H_
 #define _VMPROF_STACK_H_
 
+#ifdef _WIN32
+#define intptr_t long // XXX windows VC++ 2008 lacks stdint.h
+#else
 #include <unistd.h>
+#endif
 
 #define VMPROF_CODE_TAG 1        /* <- also in cintf.py */
 #define VMPROF_BLACKHOLE_TAG 2
diff --git a/rpython/rlib/rvmprof/test/test_ztranslation.py b/rpython/rlib/rvmprof/test/test_ztranslation.py
--- a/rpython/rlib/rvmprof/test/test_ztranslation.py
+++ b/rpython/rlib/rvmprof/test/test_ztranslation.py
@@ -3,11 +3,10 @@
     sys.path += ['../../../..']    # for subprocess in test_interpreted
 import py
 from rpython.tool.udir import udir
-from rpython.rlib import rvmprof
+from rpython.rlib import rvmprof, rthread
 from rpython.translator.c.test.test_genc import compile
 from rpython.rlib.nonconst import NonConstant
 
-
 class MyCode:
     def __init__(self, count):
         self.count = count
@@ -39,6 +38,7 @@
 PROF_FILE = str(udir.join('test_ztranslation.prof'))
 
 def main(argv=[]):
+    rthread.get_ident() # force TLOFS_thread_ident
     if NonConstant(False):
         # Hack to give os.open() the correct annotation
         os.open('foo', 1, 1)
diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c
--- a/rpython/translator/c/src/threadlocal.c
+++ b/rpython/translator/c/src/threadlocal.c
@@ -85,6 +85,11 @@
     return prev->next;
 }
 
+struct pypy_threadlocal_s *_RPython_ThreadLocals_Head(void)
+{
+    return &linkedlist_head;
+}
+
 static void _RPy_ThreadLocals_Init(void *p)
 {
     struct pypy_threadlocal_s *tls = (struct pypy_threadlocal_s *)p;
diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h
--- a/rpython/translator/c/src/threadlocal.h
+++ b/rpython/translator/c/src/threadlocal.h
@@ -27,6 +27,9 @@
 RPY_EXTERN struct pypy_threadlocal_s *
 _RPython_ThreadLocals_Enum(struct pypy_threadlocal_s *prev);
 
+/* will return the head of the list */
+RPY_EXTERN struct pypy_threadlocal_s *_RPython_ThreadLocals_Head();
+
 #define OP_THREADLOCALREF_ACQUIRE(r)   _RPython_ThreadLocals_Acquire()
 #define OP_THREADLOCALREF_RELEASE(r)   _RPython_ThreadLocals_Release()
 #define OP_THREADLOCALREF_ENUM(p, r)   r = _RPython_ThreadLocals_Enum(p)


More information about the pypy-commit mailing list