[pypy-commit] pypy vmprof: import vmprof from bb:antocuni/vmprof, rev f841327f5621
antocuni
noreply at buildbot.pypy.org
Fri Oct 24 18:15:13 CEST 2014
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof
Changeset: r74182:5b2d6fde1ecd
Date: 2014-10-24 17:07 +0100
http://bitbucket.org/pypy/pypy/changeset/5b2d6fde1ecd/
Log: import vmprof from bb:antocuni/vmprof, rev f841327f5621
diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -407,3 +407,10 @@
the terms of the GPL license version 2 or any later version. Thus the
gdbm module, provided in the file lib_pypy/gdbm.py, is redistributed
under the terms of the GPL license as well.
+
+License for 'pypy/module/_vmprof/src'
+--------------------------------------
+
+The code is based on gperftools. You may see a copy of the License for it at
+
+ https://code.google.com/p/gperftools/source/browse/COPYING
diff --git a/pypy/module/_vmprof/src/config.h b/pypy/module/_vmprof/src/config.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/config.h
@@ -0,0 +1,2 @@
+#define HAVE_SYS_UCONTEXT_H
+#define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_RIP]
diff --git a/pypy/module/_vmprof/src/getpc.h b/pypy/module/_vmprof/src/getpc.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/getpc.h
@@ -0,0 +1,187 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Craig Silverstein
+//
+// This is an internal header file used by profiler.cc. It defines
+// the single (inline) function GetPC. GetPC is used in a signal
+// handler to figure out the instruction that was being executed when
+// the signal-handler was triggered.
+//
+// To get this, we use the ucontext_t argument to the signal-handler
+// callback, which holds the full context of what was going on when
+// the signal triggered. How to get from a ucontext_t to a Program
+// Counter is OS-dependent.
+
+#ifndef BASE_GETPC_H_
+#define BASE_GETPC_H_
+
+#include "config.h"
+
+// On many linux systems, we may need _GNU_SOURCE to get access to
+// the defined constants that define the register we want to see (eg
+// REG_EIP). Note this #define must come first!
+#define _GNU_SOURCE 1
+// If #define _GNU_SOURCE causes problems, this might work instead.
+// It will cause problems for FreeBSD though!, because it turns off
+// the needed __BSD_VISIBLE.
+//#define _XOPEN_SOURCE 500
+
+#include <string.h> // for memcmp
+#if defined(HAVE_SYS_UCONTEXT_H)
+#include <sys/ucontext.h>
+#elif defined(HAVE_UCONTEXT_H)
+#include <ucontext.h> // for ucontext_t (and also mcontext_t)
+#elif defined(HAVE_CYGWIN_SIGNAL_H)
+#include <cygwin/signal.h>
+typedef ucontext ucontext_t;
+#endif
+
+
+// Take the example where function Foo() calls function Bar(). For
+// many architectures, Bar() is responsible for setting up and tearing
+// down its own stack frame. In that case, it's possible for the
+// interrupt to happen when execution is in Bar(), but the stack frame
+// is not properly set up (either before it's done being set up, or
+// after it's been torn down but before Bar() returns). In those
+// cases, the stack trace cannot see the caller function anymore.
+//
+// GetPC can try to identify this situation, on architectures where it
+// might occur, and unwind the current function call in that case to
+// avoid false edges in the profile graph (that is, edges that appear
+// to show a call skipping over a function). To do this, we hard-code
+// in the asm instructions we might see when setting up or tearing
+// down a stack frame.
+//
+// This is difficult to get right: the instructions depend on the
+// processor, the compiler ABI, and even the optimization level. This
+// is a best effort patch -- if we fail to detect such a situation, or
+// mess up the PC, nothing happens; the returned PC is not used for
+// any further processing.
+struct CallUnrollInfo {
+ // Offset from (e)ip register where this instruction sequence
+ // should be matched. Interpreted as bytes. Offset 0 is the next
+ // instruction to execute. Be extra careful with negative offsets in
+ // architectures of variable instruction length (like x86) - it is
+ // not that easy as taking an offset to step one instruction back!
+ int pc_offset;
+ // The actual instruction bytes. Feel free to make it larger if you
+ // need a longer sequence.
+ unsigned char ins[16];
+ // How many bytes to match from ins array?
+ int ins_size;
+ // The offset from the stack pointer (e)sp where to look for the
+ // call return address. Interpreted as bytes.
+ int return_sp_offset;
+};
+
+
+// The dereferences needed to get the PC from a struct ucontext were
+// determined at configure time, and stored in the macro
+// PC_FROM_UCONTEXT in config.h. The only thing we need to do here,
+// then, is to do the magic call-unrolling for systems that support it.
+
+// -- Special case 1: linux x86, for which we have CallUnrollInfo
+#if defined(__linux) && defined(__i386) && defined(__GNUC__)
+static const CallUnrollInfo callunrollinfo[] = {
+ // Entry to a function: push %ebp; mov %esp,%ebp
+ // Top-of-stack contains the caller IP.
+ { 0,
+ {0x55, 0x89, 0xe5}, 3,
+ 0
+ },
+ // Entry to a function, second instruction: push %ebp; mov %esp,%ebp
+ // Top-of-stack contains the old frame, caller IP is +4.
+ { -1,
+ {0x55, 0x89, 0xe5}, 3,
+ 4
+ },
+ // Return from a function: RET.
+ // Top-of-stack contains the caller IP.
+ { 0,
+ {0xc3}, 1,
+ 0
+ }
+};
+
+inline void* GetPC(ucontext_t *signal_ucontext) {
+ // See comment above struct CallUnrollInfo. Only try instruction
+ // flow matching if both eip and esp looks reasonable.
+ const int eip = signal_ucontext->uc_mcontext.gregs[REG_EIP];
+ const int esp = signal_ucontext->uc_mcontext.gregs[REG_ESP];
+ if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 &&
+ (esp & 0xffff0000) != 0) {
+ char* eip_char = reinterpret_cast<char*>(eip);
+ for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) {
+ if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
+ callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
+ // We have a match.
+ void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
+ return *retaddr;
+ }
+ }
+ }
+ return (void*)eip;
+}
+
+// Special case #2: Windows, which has to do something totally different.
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
+// If this is ever implemented, probably the way to do it is to have
+// profiler.cc use a high-precision timer via timeSetEvent:
+// http://msdn2.microsoft.com/en-us/library/ms712713.aspx
+// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC.
+// The callback function would be something like prof_handler, but
+// alas the arguments are different: no ucontext_t! I don't know
+// how we'd get the PC (using StackWalk64?)
+// http://msdn2.microsoft.com/en-us/library/ms680650.aspx
+
+#include "base/logging.h" // for RAW_LOG
+#ifndef HAVE_CYGWIN_SIGNAL_H
+typedef int ucontext_t;
+#endif
+
+inline void* GetPC(ucontext_t *signal_ucontext) {
+ RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
+ return NULL;
+}
+
+// Normal cases. If this doesn't compile, it's probably because
+// PC_FROM_UCONTEXT is the empty string. You need to figure out
+// the right value for your system, and add it to the list in
+// configure.ac (or set it manually in your config.h).
+#else
+inline void* GetPC(ucontext_t *signal_ucontext) {
+ return (void*)signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h
+}
+
+#endif
+
+#endif // BASE_GETPC_H_
diff --git a/pypy/module/_vmprof/src/vmprof.c b/pypy/module/_vmprof/src/vmprof.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/vmprof.c
@@ -0,0 +1,249 @@
+/* VMPROF
+ *
+ * statistical sampling profiler specifically designed to profile programs
+ * which run on a Virtual Machine and/or bytecode interpreter, such as Python,
+ * etc.
+ *
+ * The logic to dump the C stack traces is partly stolen from the code in gperftools.
+ * The file "getpc.h" has been entirely copied from gperftools.
+ *
+ * Tested only on gcc, linux, x86_64.
+ *
+ * Copyright (C) 2014 Antonio Cuni - anto.cuni at gmail.com
+ *
+ */
+
+
+#include "getpc.h" // should be first to get the _GNU_SOURCE dfn
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+
+#include "vmprof.h"
+
+#define _unused(x) ((void)x)
+
+#define MAX_FUNC_NAME 128
+#define MAX_STACK_DEPTH 64
+
+static FILE* profile_file;
+static FILE* symbol_file;
+void* vmprof_mainloop_func;
+static ptrdiff_t mainloop_sp_offset;
+static vmprof_get_virtual_ip_t mainloop_get_virtual_ip;
+
+
+/* *************************************************************
+ * functions to write a profile file compatible with gperftools
+ * *************************************************************
+ */
+static void prof_word(FILE* f, long x) {
+ fwrite(&x, sizeof(x), 1, f);
+}
+
+static void prof_header(FILE* f, long period_usec) {
+ prof_word(f, 0);
+ prof_word(f, 3);
+ prof_word(f, 0);
+ prof_word(f, period_usec);
+ prof_word(f, 0);
+}
+
+static void prof_write_stacktrace(FILE* f, void** stack, int depth, int count) {
+ int i;
+ prof_word(f, count);
+ prof_word(f, depth);
+ for(i=0; i<depth; i++)
+ prof_word(f, (long)stack[i]);
+}
+
+static void prof_binary_trailer(FILE* f) {
+ prof_word(f, 0);
+ prof_word(f, 1);
+ prof_word(f, 0);
+}
+
+
+/* *************************************************************
+ * functions to dump the stack trace
+ * *************************************************************
+ */
+
+// stolen from pprof:
+// Sometimes, we can try to get a stack trace from within a stack
+// trace, because libunwind can call mmap (maybe indirectly via an
+// internal mmap based memory allocator), and that mmap gets trapped
+// and causes a stack-trace request. If were to try to honor that
+// recursive request, we'd end up with infinite recursion or deadlock.
+// Luckily, it's safe to ignore those subsequent traces. In such
+// cases, we return 0 to indicate the situation.
+static __thread int recursive;
+
+int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext) {
+ void *ip;
+ int n = 0;
+ unw_cursor_t cursor;
+ unw_context_t uc = *ucontext;
+ if (recursive) {
+ return 0;
+ }
+ ++recursive;
+
+ int ret = unw_init_local(&cursor, &uc);
+ assert(ret >= 0);
+ _unused(ret);
+
+ while (n < max_depth) {
+ if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
+ break;
+ }
+
+ unw_proc_info_t pip;
+ unw_get_proc_info(&cursor, &pip);
+
+ /* char funcname[4096]; */
+ /* unw_word_t offset; */
+ /* unw_get_proc_name(&cursor, funcname, 4096, &offset); */
+ /* printf("%s+%#lx <%p>\n", funcname, offset, ip); */
+
+ /* if n==0, it means that the signal handler interrupted us while we
+ were in the trampoline, so we are not executing (yet) the real main
+ loop function; just skip it */
+ if (vmprof_mainloop_func &&
+ (void*)pip.start_ip == (void*)vmprof_mainloop_func &&
+ n > 0) {
+ // found main loop stack frame
+ void* sp;
+ unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp);
+ void *arg_addr = (char*)sp + mainloop_sp_offset;
+ void **arg_ptr = (void**)arg_addr;
+ // fprintf(stderr, "stacktrace mainloop: rsp %p &f2 %p offset %ld\n",
+ // sp, arg_addr, mainloop_sp_offset);
+ ip = mainloop_get_virtual_ip(*arg_ptr);
+ }
+
+ result[n++] = ip;
+ if (unw_step(&cursor) <= 0) {
+ break;
+ }
+ }
+ --recursive;
+ return n;
+}
+
+
+static int __attribute__((noinline)) frame_forcer(int rv) {
+ return rv;
+}
+
+static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) {
+ void* stack[MAX_STACK_DEPTH];
+ stack[0] = GetPC((ucontext_t*)ucontext);
+ int depth = frame_forcer(get_stack_trace(stack+1, MAX_STACK_DEPTH-1, ucontext));
+ depth++; // To account for pc value in stack[0];
+ prof_write_stacktrace(profile_file, stack, depth, 1);
+}
+
+/* *************************************************************
+ * functions to enable/disable the profiler
+ * *************************************************************
+ */
+
+static void open_profile(const char* filename, long period_usec) {
+ char buf[4096];
+ profile_file = fopen(filename, "wb");
+ prof_header(profile_file, period_usec);
+ assert(strlen(filename) < 4096);
+ sprintf(buf, "%s.sym", filename);
+ symbol_file = fopen(buf, "w");
+}
+
+static void close_profile(void) {
+ FILE* src;
+ char buf[BUFSIZ];
+ size_t size;
+ prof_binary_trailer(profile_file);
+
+ // copy /proc/PID/maps to the end of the profile file
+ sprintf(buf, "/proc/%d/maps", getpid());
+ src = fopen(buf, "r");
+ while ((size = fread(buf, 1, BUFSIZ, src))) {
+ fwrite(buf, 1, size, profile_file);
+ }
+ fclose(src);
+ fclose(profile_file);
+ fclose(symbol_file);
+}
+
+
+static void install_sigprof_handler(void) {
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigprof_handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGPROF, &sa, NULL);
+}
+
+static void remove_sigprof_handler(void) {
+ signal(SIGPROF, SIG_DFL);
+};
+
+static void install_sigprof_timer(long period_usec) {
+ static struct itimerval timer;
+ timer.it_interval.tv_sec = 0;
+ timer.it_interval.tv_usec = period_usec;
+ timer.it_value = timer.it_interval;
+ if (setitimer(ITIMER_PROF, &timer, NULL) != 0) {
+ printf("Timer could not be initialized \n");
+ }
+}
+
+static void remove_sigprof_timer(void) {
+ static struct itimerval timer;
+ timer.it_interval.tv_sec = 0;
+ timer.it_interval.tv_usec = 0;
+ timer.it_value = timer.it_interval;
+ if (setitimer(ITIMER_PROF, &timer, NULL) != 0) {
+ printf("Timer could not be deleted \n");
+ }
+}
+
+/* *************************************************************
+ * public API
+ * *************************************************************
+ */
+
+void vmprof_set_mainloop(void* func, ptrdiff_t sp_offset,
+ vmprof_get_virtual_ip_t get_virtual_ip) {
+ vmprof_mainloop_func = func;
+ mainloop_sp_offset = sp_offset;
+ mainloop_get_virtual_ip = get_virtual_ip;
+}
+
+void vmprof_enable(const char* filename, long period_usec) {
+ if (period_usec == -1)
+ period_usec = 1000000 / 100; /* 100hz */
+ open_profile(filename, period_usec);
+ install_sigprof_handler();
+ install_sigprof_timer(period_usec);
+}
+
+void vmprof_disable(void) {
+ remove_sigprof_timer();
+ remove_sigprof_handler();
+ close_profile();
+}
+
+void vmprof_register_virtual_function(const char* name, void* start, void* end) {
+ // for now *end is simply ignored
+ fprintf(symbol_file, "%p: %s\n", start, name);
+}
diff --git a/pypy/module/_vmprof/src/vmprof.h b/pypy/module/_vmprof/src/vmprof.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/vmprof.h
@@ -0,0 +1,18 @@
+#ifndef VMPROF_VMPROF_H_
+#define VMPROF_VMPROF_H_
+
+#include <stddef.h>
+
+typedef void* (*vmprof_get_virtual_ip_t)(void*);
+
+extern void* vmprof_mainloop_func;
+void vmprof_set_mainloop(void* func, ptrdiff_t sp_offset,
+ vmprof_get_virtual_ip_t get_virtual_ip);
+
+void vmprof_register_virtual_function(const char* name, void* start, void* end);
+
+
+void vmprof_enable(const char* filename, long period_usec);
+void vmprof_disable(void);
+
+#endif
More information about the pypy-commit
mailing list