[pypy-commit] pypy vmprof-native: copy over changes made to vmprof-python
plan_rich
pypy.commits at gmail.com
Wed Mar 15 08:08:42 EDT 2017
Author: Richard Plangger <planrichi at gmail.com>
Branch: vmprof-native
Changeset: r90697:ac30c079910e
Date: 2017-03-14 15:00 +0100
http://bitbucket.org/pypy/pypy/changeset/ac30c079910e/
Log: copy over changes made to vmprof-python
diff --git a/rpython/rlib/rvmprof/src/vmprof_main.h b/rpython/rlib/rvmprof/src/vmprof_main.h
--- a/rpython/rlib/rvmprof/src/vmprof_main.h
+++ b/rpython/rlib/rvmprof/src/vmprof_main.h
@@ -1,3 +1,5 @@
+#pragma once
+
/* VMPROF
*
* statistical sampling profiler specifically designed to profile programs
@@ -10,45 +12,49 @@
*
* Tested only on gcc, linux, x86_64.
*
- * Copyright (C) 2014-2015
+ * Copyright (C) 2014-2017
* Antonio Cuni - anto.cuni at gmail.com
* Maciej Fijalkowski - fijall at gmail.com
* Armin Rigo - arigo at tunes.org
+ * Richard Plangger - planrichi at gmail.com
*
*/
#define _GNU_SOURCE 1
#include <dlfcn.h>
+#include <pthread.h>
+#include <unistd.h>
#include <assert.h>
-#include <pthread.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/time.h>
-#include <errno.h>
-#include <unistd.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <signal.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include "vmprof_stack.h"
+
+#include "vmprof.h"
+
+#include "vmp_stack.h"
#include "vmprof_getpc.h"
#include "vmprof_mt.h"
-#include "vmprof_get_custom_offset.h"
#include "vmprof_common.h"
+#include "compat.h"
+
+#if defined(__unix__)
+#include "rss_unix.h"
+#elif defined(__APPLE__)
+#include "rss_darwin.h"
+#endif
+
/************************************************************/
-static long prepare_interval_usec;
-static long saved_profile_file;
-static struct profbuf_s *volatile current_codes;
static void *(*mainloop_get_virtual_ip)(char *) = 0;
-
-static int opened_profile(char *interp_name);
+static int opened_profile(const char *interp_name, int memory, int proflines, int native);
static void flush_codes(void);
-
/************************************************************/
/* value: last bit is 1 if signals must be ignored; all other bits
@@ -79,24 +85,26 @@
static char atfork_hook_installed = 0;
-static intptr_t get_current_thread_id(void)
+/* *************************************************************
+ * functions to dump the stack trace
+ * *************************************************************
+ */
+
+int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc)
{
- /* xxx This function is a hack on two fronts:
-
- - It assumes that pthread_self() is async-signal-safe. This
- should be true on Linux. I hope it is also true elsewhere.
-
- - It abuses pthread_self() by assuming it just returns an
- integer. According to comments in CPython's source code, the
- platforms where it is not the case are rare nowadays.
-
- An alternative would be to try to look if the information is
- available in the ucontext_t in the caller.
- */
- return (intptr_t)pthread_self();
+ PY_STACK_FRAME_T * frame;
+#ifdef RPYTHON_VMPROF
+ // do nothing here,
+ frame = (PY_STACK_FRAME_T*)current;
+#else
+ if (!current) {
+ return 0;
+ }
+ frame = current->frame;
+#endif
+ return vmp_walk_and_record_stack(frame, result, max_depth, 1, pc);
}
-
/* *************************************************************
* the signal handler
* *************************************************************
@@ -112,9 +120,67 @@
longjmp(restore_point, SIGSEGV);
}
+int _vmprof_sample_stack(struct profbuf_s *p, PY_THREAD_STATE_T * tstate, ucontext_t * uc)
+{
+ int depth;
+ struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
+ st->marker = MARKER_STACKTRACE;
+ st->count = 1;
+#ifdef RPYTHON_VMPROF
+ depth = get_stack_trace(get_vmprof_stack(), st->stack, MAX_STACK_DEPTH-1, (intptr_t)GetPC(uc));
+#else
+ depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, (intptr_t)NULL);
+#endif
+ if (depth == 0) {
+ return 0;
+ }
+ st->depth = depth;
+ st->stack[depth++] = tstate;
+ long rss = get_current_proc_rss();
+ if (rss >= 0)
+ st->stack[depth++] = (void*)rss;
+ p->data_offset = offsetof(struct prof_stacktrace_s, marker);
+ p->data_size = (depth * sizeof(void *) +
+ sizeof(struct prof_stacktrace_s) -
+ offsetof(struct prof_stacktrace_s, marker));
+ return 1;
+}
+
+#ifndef RPYTHON_VMPROF
+static PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) {
+ // see issue 116 on github.com/vmprof/vmprof-python.
+ // PyGILState_GetThisThreadState(); can hang forever
+ //
+ PyInterpreterState * istate;
+ PyThreadState * state;
+ long mythread_id;
+
+ istate = PyInterpreterState_Head();
+ if (istate == NULL) {
+ return NULL;
+ }
+ mythread_id = PyThread_get_thread_ident();
+ // fish fish fish, it will NOT lock the keymutex in pythread
+ do {
+ state = PyInterpreterState_ThreadHead(istate);
+ do {
+ if (state->thread_id == mythread_id) {
+ return state;
+ }
+ } while ((state = PyThreadState_Next(state)) != NULL);
+ } while ((istate = PyInterpreterState_Next(istate)) != NULL);
+
+ // uh? not found?
+ return NULL;
+}
+#endif
+
static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
{
-#ifdef __APPLE__
+ int commit;
+ PY_THREAD_STATE_T * tstate = NULL;
+ void (*prevhandler)(int);
+#ifndef RPYTHON_VMPROF
// TERRIBLE HACK AHEAD
// on OS X, the thread local storage is sometimes uninitialized
// when the signal handler runs - it means it's impossible to read errno
@@ -122,48 +188,46 @@
// it seems impossible to read the register gs.
// here we register segfault handler (all guarded by a spinlock) and call
// longjmp in case segfault happens while reading a thread local
+ //
+ // We do the same error detection for linux to ensure that
+ // get_current_thread_state returns a sane result
while (__sync_lock_test_and_set(&spinlock, 1)) {
}
- signal(SIGSEGV, &segfault_handler);
+ prevhandler = signal(SIGSEGV, &segfault_handler);
int fault_code = setjmp(restore_point);
if (fault_code == 0) {
pthread_self();
- get_current_thread_id();
+ tstate = _get_pystate_for_this_thread();
} else {
- signal(SIGSEGV, SIG_DFL);
- __sync_synchronize();
- spinlock = 0;
- return;
+ signal(SIGSEGV, prevhandler);
+ __sync_lock_release(&spinlock);
+ return;
}
- signal(SIGSEGV, SIG_DFL);
- __sync_synchronize();
- spinlock = 0;
+ signal(SIGSEGV, prevhandler);
+ __sync_lock_release(&spinlock);
#endif
+
long val = __sync_fetch_and_add(&signal_handler_value, 2L);
if ((val & 1) == 0) {
int saved_errno = errno;
- int fd = profile_file;
+ int fd = vmp_profile_fileno();
assert(fd >= 0);
struct profbuf_s *p = reserve_buffer(fd);
if (p == NULL) {
/* ignore this signal: there are no free buffers right now */
- }
- else {
- int depth;
- struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
- st->marker = MARKER_STACKTRACE;
- st->count = 1;
- depth = get_stack_trace(get_vmprof_stack(), st->stack,
- MAX_STACK_DEPTH-2, GetPC((ucontext_t*)ucontext));
- st->depth = depth;
- st->stack[depth++] = get_current_thread_id();
- p->data_offset = offsetof(struct prof_stacktrace_s, marker);
- p->data_size = (depth * sizeof(void *) +
- sizeof(struct prof_stacktrace_s) -
- offsetof(struct prof_stacktrace_s, marker));
- commit_buffer(fd, p);
+ } else {
+#ifdef RPYTHON_VMPORF
+ commit = _vmprof_sample_stack(p, NULL, (ucontext_t*)ucontext);
+#else
+ commit = _vmprof_sample_stack(p, tstate, (ucontext_t*)ucontext);
+#endif
+ if (commit) {
+ commit_buffer(fd, p);
+ } else {
+ cancel_buffer(p);
+ }
}
errno = saved_errno;
@@ -173,6 +237,7 @@
}
+
/* *************************************************************
* the setup and teardown functions
* *************************************************************
@@ -197,58 +262,53 @@
return 0;
}
-static int itimer_which = ITIMER_PROF;
-
static int install_sigprof_timer(void)
{
- struct itimerval timer;
+ static struct itimerval timer;
timer.it_interval.tv_sec = 0;
timer.it_interval.tv_usec = profile_interval_usec;
timer.it_value = timer.it_interval;
- if (setitimer(itimer_which, &timer, NULL) == 0)
- return 0; /* normal path */
-
- if (errno == EINVAL) {
- /* on WSL, only ITIMER_REAL is supported */
- if (setitimer(ITIMER_REAL, &timer, NULL) == 0) {
- fprintf(stderr, "warning: setitimer(): ITIMER_PROF not "
- "available, using ITIMER_REAL instead. "
- "Multithreaded programs and programs "
- "doing a lot of I/O won't give correct "
- "results.\n");
- itimer_which = ITIMER_REAL;
- return 0;
- }
- }
- return -1;
+ if (setitimer(ITIMER_PROF, &timer, NULL) != 0)
+ return -1;
+ return 0;
}
static int remove_sigprof_timer(void) {
- struct itimerval timer;
+ static struct itimerval timer;
timer.it_interval.tv_sec = 0;
timer.it_interval.tv_usec = 0;
timer.it_value.tv_sec = 0;
timer.it_value.tv_usec = 0;
- if (setitimer(itimer_which, &timer, NULL) != 0)
+ if (setitimer(ITIMER_PROF, &timer, NULL) != 0)
return -1;
return 0;
}
static void atfork_disable_timer(void) {
if (profile_interval_usec > 0) {
- saved_profile_file = profile_file;
- profile_file = -1;
remove_sigprof_timer();
+#ifndef RPYTHON_VMPROF
+ is_enabled = 0;
+#endif
}
}
static void atfork_enable_timer(void) {
if (profile_interval_usec > 0) {
- profile_file = saved_profile_file;
install_sigprof_timer();
+#ifndef RPYTHON_VMPROF
+ is_enabled = 1;
+#endif
}
}
+static void atfork_close_profile_file(void) {
+ int fd = vmp_profile_fileno();
+ if (fd != -1)
+ close(fd);
+ vmp_set_profile_fileno(-1);
+}
+
static int install_pthread_atfork_hooks(void) {
/* this is needed to prevent the problems described there:
- http://code.google.com/p/gperftools/issues/detail?id=278
@@ -262,20 +322,69 @@
*/
if (atfork_hook_installed)
return 0;
- int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL);
+ int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, atfork_close_profile_file);
if (ret != 0)
return -1;
atfork_hook_installed = 1;
return 0;
}
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+void init_cpyprof(int native)
+{
+ // skip this if native should not be enabled
+ if (!native) {
+ vmp_native_disable();
+ return;
+ }
+#if CPYTHON_HAS_FRAME_EVALUATION
+ PyThreadState *tstate = PyThreadState_GET();
+ tstate->interp->eval_frame = vmprof_eval;
+ _default_eval_loop = _PyEval_EvalFrameDefault;
+#elif defined(RPYTHON_VMPROF)
+ // do nothing here, the stack is maintained by rpython
+ // no need for a trampoline
+#else
+ if (vmp_patch_callee_trampoline(PyEval_EvalFrameEx,
+ vmprof_eval, (void*)&_default_eval_loop) == 0) {
+ } else {
+ fprintf(stderr, "FATAL: could not insert trampline, try with --no-native\n");
+ // TODO dump the first few bytes and tell them to create an issue!
+ exit(-1);
+ }
+#endif
+ vmp_native_enable();
+}
+
+static void disable_cpyprof(void)
+{
+ vmp_native_disable();
+#if CPYTHON_HAS_FRAME_EVALUATION
+ PyThreadState *tstate = PyThreadState_GET();
+ tstate->interp->eval_frame = _PyEval_EvalFrameDefault;
+#elif defined(RPYTHON_VMPROF)
+ // TODO nothing?
+#else
+ if (vmp_unpatch_callee_trampoline(PyEval_EvalFrameEx) > 0) {
+ fprintf(stderr, "FATAL: could not remove trampoline\n");
+ exit(-1);
+ }
+#endif
+ dump_native_symbols(vmp_profile_fileno());
+}
+#endif
+
RPY_EXTERN
-int vmprof_enable(void)
+int vmprof_enable(int memory, int native)
{
- assert(profile_file >= 0);
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ init_cpyprof(native);
+#endif
+ assert(vmp_profile_fileno() >= 0);
assert(prepare_interval_usec > 0);
profile_interval_usec = prepare_interval_usec;
-
+ if (memory && setup_rss() == -1)
+ goto error;
if (install_pthread_atfork_hooks() == -1)
goto error;
if (install_sigprof_handler() == -1)
@@ -286,32 +395,19 @@
return 0;
error:
- profile_file = -1;
+ vmp_set_profile_fileno(-1);
profile_interval_usec = 0;
return -1;
}
-static int _write_all(const char *buf, size_t bufsize)
+
+int close_profile(void)
{
- while (bufsize > 0) {
- ssize_t count = write(profile_file, buf, bufsize);
- if (count <= 0)
- return -1; /* failed */
- buf += count;
- bufsize -= count;
- }
- return 0;
-}
+ (void)vmp_write_time_now(MARKER_TRAILER);
-static int close_profile(void)
-{
- char marker = MARKER_TRAILER;
-
- if (_write_all(&marker, 1) < 0)
- return -1;
-
+ teardown_rss();
/* don't close() the file descriptor from here */
- profile_file = -1;
+ vmp_set_profile_fileno(-1);
return 0;
}
@@ -320,29 +416,29 @@
{
vmprof_ignore_signals(1);
profile_interval_usec = 0;
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ disable_cpyprof();
+#endif
if (remove_sigprof_timer() == -1)
return -1;
if (remove_sigprof_handler() == -1)
return -1;
flush_codes();
- if (shutdown_concurrent_bufs(profile_file) < 0)
+ if (shutdown_concurrent_bufs(vmp_profile_fileno()) < 0)
return -1;
return close_profile();
}
RPY_EXTERN
-int vmprof_register_virtual_function(char *code_name, long code_uid,
+int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
int auto_retry)
{
long namelen = strnlen(code_name, 1023);
- long blocklen = 1 + 2 * sizeof(long) + namelen;
+ long blocklen = 1 + sizeof(intptr_t) + sizeof(long) + namelen;
struct profbuf_s *p;
char *t;
- if (profile_file == -1)
- return 0; // silently don't write it
-
retry:
p = current_codes;
if (p != NULL) {
@@ -352,7 +448,7 @@
size_t freesize = SINGLE_BUF_SIZE - p->data_size;
if (freesize < (size_t)blocklen) {
/* full: flush it */
- commit_buffer(profile_file, p);
+ commit_buffer(vmp_profile_fileno(), p);
p = NULL;
}
}
@@ -363,7 +459,7 @@
}
if (p == NULL) {
- p = reserve_buffer(profile_file);
+ p = reserve_buffer(vmp_profile_fileno());
if (p == NULL) {
/* can't get a free block; should almost never be the
case. Spin loop if allowed, or return a failure code
@@ -381,14 +477,14 @@
p->data_size += blocklen;
assert(p->data_size <= SINGLE_BUF_SIZE);
*t++ = MARKER_VIRTUAL_IP;
- memcpy(t, &code_uid, sizeof(long)); t += sizeof(long);
+ memcpy(t, &code_uid, sizeof(intptr_t)); t += sizeof(intptr_t);
memcpy(t, &namelen, sizeof(long)); t += sizeof(long);
memcpy(t, code_name, namelen);
/* try to reattach 'p' to 'current_codes' */
if (!__sync_bool_compare_and_swap(¤t_codes, NULL, p)) {
/* failed, flush it */
- commit_buffer(profile_file, p);
+ commit_buffer(vmp_profile_fileno(), p);
}
return 0;
}
@@ -398,6 +494,6 @@
struct profbuf_s *p = current_codes;
if (p != NULL) {
current_codes = NULL;
- commit_buffer(profile_file, p);
+ commit_buffer(vmp_profile_fileno(), p);
}
}
More information about the pypy-commit
mailing list