[pypy-commit] pypy vmprof: import vmprof from bb:antocuni/vmprof, rev f841327f5621

antocuni noreply at buildbot.pypy.org
Fri Oct 24 18:15:13 CEST 2014


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof
Changeset: r74182:5b2d6fde1ecd
Date: 2014-10-24 17:07 +0100
http://bitbucket.org/pypy/pypy/changeset/5b2d6fde1ecd/

Log:	import vmprof from bb:antocuni/vmprof, rev f841327f5621

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -407,3 +407,10 @@
 the terms of the GPL license version 2 or any later version.  Thus the
 gdbm module, provided in the file lib_pypy/gdbm.py, is redistributed
 under the terms of the GPL license as well.
+
+License for 'pypy/module/_vmprof/src'
+--------------------------------------
+
+The code is based on gperftools. You may see a copy of the License for it at
+
+    https://code.google.com/p/gperftools/source/browse/COPYING
diff --git a/pypy/module/_vmprof/src/config.h b/pypy/module/_vmprof/src/config.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/config.h
@@ -0,0 +1,2 @@
+#define HAVE_SYS_UCONTEXT_H
+#define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_RIP]
diff --git a/pypy/module/_vmprof/src/getpc.h b/pypy/module/_vmprof/src/getpc.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/getpc.h
@@ -0,0 +1,187 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Craig Silverstein
+//
+// This is an internal header file used by profiler.cc.  It defines
+// the single (inline) function GetPC.  GetPC is used in a signal
+// handler to figure out the instruction that was being executed when
+// the signal-handler was triggered.
+//
+// To get this, we use the ucontext_t argument to the signal-handler
+// callback, which holds the full context of what was going on when
+// the signal triggered.  How to get from a ucontext_t to a Program
+// Counter is OS-dependent.
+
+#ifndef BASE_GETPC_H_
+#define BASE_GETPC_H_
+
+#include "config.h"
+
+// On many linux systems, we may need _GNU_SOURCE to get access to
+// the defined constants that define the register we want to see (eg
+// REG_EIP).  Note this #define must come first!
+#define _GNU_SOURCE 1
+// If #define _GNU_SOURCE causes problems, this might work instead.
+// It will cause problems for FreeBSD though!, because it turns off
+// the needed __BSD_VISIBLE.
+//#define _XOPEN_SOURCE 500
+
+#include <string.h>         // for memcmp
+#if defined(HAVE_SYS_UCONTEXT_H)
+#include <sys/ucontext.h>
+#elif defined(HAVE_UCONTEXT_H)
+#include <ucontext.h>       // for ucontext_t (and also mcontext_t)
+#elif defined(HAVE_CYGWIN_SIGNAL_H)
+#include <cygwin/signal.h>
+typedef ucontext ucontext_t;
+#endif
+
+
+// Take the example where function Foo() calls function Bar().  For
+// many architectures, Bar() is responsible for setting up and tearing
+// down its own stack frame.  In that case, it's possible for the
+// interrupt to happen when execution is in Bar(), but the stack frame
+// is not properly set up (either before it's done being set up, or
+// after it's been torn down but before Bar() returns).  In those
+// cases, the stack trace cannot see the caller function anymore.
+//
+// GetPC can try to identify this situation, on architectures where it
+// might occur, and unwind the current function call in that case to
+// avoid false edges in the profile graph (that is, edges that appear
+// to show a call skipping over a function).  To do this, we hard-code
+// in the asm instructions we might see when setting up or tearing
+// down a stack frame.
+//
+// This is difficult to get right: the instructions depend on the
+// processor, the compiler ABI, and even the optimization level.  This
+// is a best effort patch -- if we fail to detect such a situation, or
+// mess up the PC, nothing happens; the returned PC is not used for
+// any further processing.
+struct CallUnrollInfo {
+  // Offset from (e)ip register where this instruction sequence
+  // should be matched. Interpreted as bytes. Offset 0 is the next
+  // instruction to execute. Be extra careful with negative offsets in
+  // architectures of variable instruction length (like x86) - it is
+  // not that easy as taking an offset to step one instruction back!
+  int pc_offset;
+  // The actual instruction bytes. Feel free to make it larger if you
+  // need a longer sequence.
+  unsigned char ins[16];
+  // How many bytes to match from ins array?
+  int ins_size;
+  // The offset from the stack pointer (e)sp where to look for the
+  // call return address. Interpreted as bytes.
+  int return_sp_offset;
+};
+
+
+// The dereferences needed to get the PC from a struct ucontext were
+// determined at configure time, and stored in the macro
+// PC_FROM_UCONTEXT in config.h.  The only thing we need to do here,
+// then, is to do the magic call-unrolling for systems that support it.
+
+// -- Special case 1: linux x86, for which we have CallUnrollInfo
+#if defined(__linux) && defined(__i386) && defined(__GNUC__)
+static const CallUnrollInfo callunrollinfo[] = {
+  // Entry to a function:  push %ebp;  mov  %esp,%ebp
+  // Top-of-stack contains the caller IP.
+  { 0,
+    {0x55, 0x89, 0xe5}, 3,
+    0
+  },
+  // Entry to a function, second instruction:  push %ebp;  mov  %esp,%ebp
+  // Top-of-stack contains the old frame, caller IP is +4.
+  { -1,
+    {0x55, 0x89, 0xe5}, 3,
+    4
+  },
+  // Return from a function: RET.
+  // Top-of-stack contains the caller IP.
+  { 0,
+    {0xc3}, 1,
+    0
+  }
+};
+
+inline void* GetPC(ucontext_t *signal_ucontext) {
+  // See comment above struct CallUnrollInfo.  Only try instruction
+  // flow matching if both eip and esp looks reasonable.
+  const int eip = signal_ucontext->uc_mcontext.gregs[REG_EIP];
+  const int esp = signal_ucontext->uc_mcontext.gregs[REG_ESP];
+  if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 &&
+      (esp & 0xffff0000) != 0) {
+    char* eip_char = reinterpret_cast<char*>(eip);
+    for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) {
+      if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
+                  callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
+        // We have a match.
+        void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
+        return *retaddr;
+      }
+    }
+  }
+  return (void*)eip;
+}
+
+// Special case #2: Windows, which has to do something totally different.
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
+// If this is ever implemented, probably the way to do it is to have
+// profiler.cc use a high-precision timer via timeSetEvent:
+//    http://msdn2.microsoft.com/en-us/library/ms712713.aspx
+// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC.
+// The callback function would be something like prof_handler, but
+// alas the arguments are different: no ucontext_t!  I don't know
+// how we'd get the PC (using StackWalk64?)
+//    http://msdn2.microsoft.com/en-us/library/ms680650.aspx
+
+#include "base/logging.h"   // for RAW_LOG
+#ifndef HAVE_CYGWIN_SIGNAL_H
+typedef int ucontext_t;
+#endif
+
+inline void* GetPC(ucontext_t *signal_ucontext) {
+  RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
+  return NULL;
+}
+
+// Normal cases.  If this doesn't compile, it's probably because
+// PC_FROM_UCONTEXT is the empty string.  You need to figure out
+// the right value for your system, and add it to the list in
+// configure.ac (or set it manually in your config.h).
+#else
+inline void* GetPC(ucontext_t *signal_ucontext) {
+  return (void*)signal_ucontext->PC_FROM_UCONTEXT;   // defined in config.h
+}
+
+#endif
+
+#endif  // BASE_GETPC_H_
diff --git a/pypy/module/_vmprof/src/vmprof.c b/pypy/module/_vmprof/src/vmprof.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/vmprof.c
@@ -0,0 +1,249 @@
+/* VMPROF
+ *
+ * statistical sampling profiler specifically designed to profile programs
+ * which run on a Virtual Machine and/or bytecode interpreter, such as Python,
+ * etc.
+ *
+ * The logic to dump the C stack traces is partly stolen from the code in gperftools.
+ * The file "getpc.h" has been entirely copied from gperftools.
+ *
+ * Tested only on gcc, linux, x86_64.
+ *
+ * Copyright (C) 2014 Antonio Cuni - anto.cuni at gmail.com
+ *
+ */
+
+
+#include "getpc.h"      // should be first to get the _GNU_SOURCE dfn
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+
+#include "vmprof.h"
+
+#define _unused(x) ((void)x)
+
+#define MAX_FUNC_NAME 128
+#define MAX_STACK_DEPTH 64
+
+static FILE* profile_file;
+static FILE* symbol_file;
+void* vmprof_mainloop_func;
+static ptrdiff_t mainloop_sp_offset;
+static vmprof_get_virtual_ip_t mainloop_get_virtual_ip;
+
+
+/* *************************************************************
+ * functions to write a profile file compatible with gperftools
+ * *************************************************************
+ */
+static void prof_word(FILE* f, long x) {
+    fwrite(&x, sizeof(x), 1, f);
+}
+
+static void prof_header(FILE* f, long period_usec) {
+    prof_word(f, 0);
+    prof_word(f, 3);
+    prof_word(f, 0);
+    prof_word(f, period_usec);
+    prof_word(f, 0);
+}
+
+static void prof_write_stacktrace(FILE* f, void** stack, int depth, int count) {
+    int i;
+    prof_word(f, count);
+    prof_word(f, depth);
+    for(i=0; i<depth; i++)
+        prof_word(f, (long)stack[i]);
+}
+
+static void prof_binary_trailer(FILE* f) {
+    prof_word(f, 0);
+    prof_word(f, 1);
+    prof_word(f, 0);
+}
+
+
+/* *************************************************************
+ * functions to dump the stack trace
+ * *************************************************************
+ */
+
+// stolen from pprof:
+// Sometimes, we can try to get a stack trace from within a stack
+// trace, because libunwind can call mmap (maybe indirectly via an
+// internal mmap based memory allocator), and that mmap gets trapped
+// and causes a stack-trace request.  If were to try to honor that
+// recursive request, we'd end up with infinite recursion or deadlock.
+// Luckily, it's safe to ignore those subsequent traces.  In such
+// cases, we return 0 to indicate the situation.
+static __thread int recursive;
+
+int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext) {
+    void *ip;
+    int n = 0;
+    unw_cursor_t cursor;
+    unw_context_t uc = *ucontext;
+    if (recursive) {
+        return 0;
+    }
+    ++recursive;
+
+    int ret = unw_init_local(&cursor, &uc);
+    assert(ret >= 0);
+    _unused(ret);
+
+    while (n < max_depth) {
+        if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
+            break;
+        }
+
+        unw_proc_info_t pip;
+        unw_get_proc_info(&cursor, &pip);
+
+        /* char funcname[4096]; */
+        /* unw_word_t offset; */
+        /* unw_get_proc_name(&cursor, funcname, 4096, &offset); */
+        /* printf("%s+%#lx <%p>\n", funcname, offset, ip); */
+
+        /* if n==0, it means that the signal handler interrupted us while we
+           were in the trampoline, so we are not executing (yet) the real main
+           loop function; just skip it */
+        if (vmprof_mainloop_func && 
+            (void*)pip.start_ip == (void*)vmprof_mainloop_func &&
+            n > 0) {
+          // found main loop stack frame
+          void* sp;
+          unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp);
+          void *arg_addr = (char*)sp + mainloop_sp_offset;
+          void **arg_ptr = (void**)arg_addr;
+          // fprintf(stderr, "stacktrace mainloop: rsp %p   &f2 %p   offset %ld\n", 
+          //         sp, arg_addr, mainloop_sp_offset);
+          ip = mainloop_get_virtual_ip(*arg_ptr);
+        }
+
+        result[n++] = ip;
+        if (unw_step(&cursor) <= 0) {
+            break;
+        }
+    }
+    --recursive;
+    return n;
+}
+
+
+static int __attribute__((noinline)) frame_forcer(int rv) {
+    return rv;
+}
+
+static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) {
+    void* stack[MAX_STACK_DEPTH];
+    stack[0] = GetPC((ucontext_t*)ucontext);
+    int depth = frame_forcer(get_stack_trace(stack+1, MAX_STACK_DEPTH-1, ucontext));
+    depth++;  // To account for pc value in stack[0];
+    prof_write_stacktrace(profile_file, stack, depth, 1);
+}
+
+/* *************************************************************
+ * functions to enable/disable the profiler
+ * *************************************************************
+ */
+
+static void open_profile(const char* filename, long period_usec) {
+    char buf[4096];
+    profile_file = fopen(filename, "wb");
+    prof_header(profile_file, period_usec);
+    assert(strlen(filename) < 4096);
+    sprintf(buf, "%s.sym", filename);
+    symbol_file = fopen(buf, "w");
+}
+
+static void close_profile(void) {
+    FILE* src;
+    char buf[BUFSIZ];
+    size_t size;
+    prof_binary_trailer(profile_file);
+
+    // copy /proc/PID/maps to the end of the profile file
+    sprintf(buf, "/proc/%d/maps", getpid());
+    src = fopen(buf, "r");    
+    while ((size = fread(buf, 1, BUFSIZ, src))) {
+        fwrite(buf, 1, size, profile_file);
+    }
+    fclose(src);
+    fclose(profile_file);
+    fclose(symbol_file);
+}
+
+
+static void install_sigprof_handler(void) {
+    struct sigaction sa;
+    memset(&sa, 0, sizeof(sa));
+    sa.sa_sigaction = sigprof_handler;
+    sa.sa_flags = SA_RESTART | SA_SIGINFO;
+    sigemptyset(&sa.sa_mask);
+    sigaction(SIGPROF, &sa, NULL);
+}
+
+static void remove_sigprof_handler(void) {
+    signal(SIGPROF, SIG_DFL);
+};
+
+static void install_sigprof_timer(long period_usec) {
+    static struct itimerval timer;
+    timer.it_interval.tv_sec = 0;
+    timer.it_interval.tv_usec = period_usec;
+    timer.it_value = timer.it_interval;
+    if (setitimer(ITIMER_PROF, &timer, NULL) != 0) {
+        printf("Timer could not be initialized \n");
+    }
+}
+
+static void remove_sigprof_timer(void) {
+    static struct itimerval timer;
+    timer.it_interval.tv_sec = 0;
+    timer.it_interval.tv_usec = 0;
+    timer.it_value = timer.it_interval;
+    if (setitimer(ITIMER_PROF, &timer, NULL) != 0) {
+        printf("Timer could not be deleted \n");
+    }
+}
+
+/* *************************************************************
+ * public API
+ * *************************************************************
+ */
+
+void vmprof_set_mainloop(void* func, ptrdiff_t sp_offset, 
+                         vmprof_get_virtual_ip_t get_virtual_ip) {
+    vmprof_mainloop_func = func;
+    mainloop_sp_offset = sp_offset;
+    mainloop_get_virtual_ip = get_virtual_ip;
+}
+
+void vmprof_enable(const char* filename, long period_usec) {
+    if (period_usec == -1)
+        period_usec = 1000000 / 100; /* 100hz */
+    open_profile(filename, period_usec);
+    install_sigprof_handler();
+    install_sigprof_timer(period_usec);
+}
+
+void vmprof_disable(void) {
+    remove_sigprof_timer();
+    remove_sigprof_handler();
+    close_profile();
+}
+
+void vmprof_register_virtual_function(const char* name, void* start, void* end) {
+    // for now *end is simply ignored
+    fprintf(symbol_file, "%p: %s\n", start, name);
+}
diff --git a/pypy/module/_vmprof/src/vmprof.h b/pypy/module/_vmprof/src/vmprof.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/src/vmprof.h
@@ -0,0 +1,18 @@
+#ifndef VMPROF_VMPROF_H_
+#define VMPROF_VMPROF_H_
+
+#include <stddef.h>
+
+typedef void* (*vmprof_get_virtual_ip_t)(void*);
+
+extern void* vmprof_mainloop_func;
+void vmprof_set_mainloop(void* func, ptrdiff_t sp_offset, 
+                         vmprof_get_virtual_ip_t get_virtual_ip);
+
+void vmprof_register_virtual_function(const char* name, void* start, void* end);
+
+
+void vmprof_enable(const char* filename, long period_usec);
+void vmprof_disable(void);
+
+#endif


More information about the pypy-commit mailing list