[pypy-commit] pypy vmprof-0.4.4: copy over changes made to vmprof-python (a54ccd2e6f0)

plan_rich pypy.commits at gmail.com
Thu Apr 20 10:02:58 EDT 2017


Author: Richard Plangger <planrichi at gmail.com>
Branch: vmprof-0.4.4
Changeset: r91096:f4b0b92eb6b1
Date: 2017-04-20 09:33 -0400
http://bitbucket.org/pypy/pypy/changeset/f4b0b92eb6b1/

Log:	copy over changes made to vmprof-python (a54ccd2e6f0)

diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.c b/rpython/rlib/rvmprof/src/shared/_vmprof.c
--- a/rpython/rlib/rvmprof/src/shared/_vmprof.c
+++ b/rpython/rlib/rvmprof/src/shared/_vmprof.c
@@ -1,8 +1,7 @@
-/*[clinic input]
-module _vmprof
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b443489e38f2be7d]*/
-
+/**
+ * This file is the CPython module _vmprof. It does not share code
+ * with PyPy. PyPy's _vmprof module is included in the main repo.
+ */
 #define _GNU_SOURCE 1
 
 #include <Python.h>
@@ -14,7 +13,7 @@
 static volatile int is_enabled = 0;
 static destructor Original_code_dealloc = 0;
 static PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
-void dump_native_symbols(int fileno);
+void vmp_scan_profile(int fileno, int dump_native, void *all_code_uids);
 
 #if VMPROF_UNIX
 #include "trampoline.h"
@@ -109,6 +108,33 @@
     return 0;
 }
 
+static int _look_for_code_object_seen(PyObject *o, void *all_codes)
+{
+    if (PyCode_Check(o) && PySet_GET_SIZE(all_codes)) {
+        Py_ssize_t i;
+        PyCodeObject *co = (PyCodeObject *)o;
+        PyObject *uid_co = PyLong_FromVoidPtr((void*)CODE_ADDR_TO_UID(co));
+        int check = PySet_Discard(all_codes, uid_co);
+
+        Py_CLEAR(uid_co);
+
+        if (check < 0)
+            return -1;
+
+        if (check && emit_code_object(co) < 0)
+            return -1;
+
+        i = PyTuple_Size(co->co_consts);
+        while (i > 0) {
+            --i;
+            if (_look_for_code_object(PyTuple_GET_ITEM(co->co_consts, i),
+                                      all_codes) < 0)
+                return -1;
+        }
+    }
+    return 0;
+}
+
 static void emit_all_code_objects(void)
 {
     PyObject *gc_module = NULL, *lst = NULL, *all_codes = NULL;
@@ -118,6 +144,48 @@
     if (gc_module == NULL)
         goto error;
 
+    // lst contains all objects that are known by the gc
+    lst = PyObject_CallMethod(gc_module, "get_objects", "");
+    if (lst == NULL || !PyList_Check(lst))
+        goto error;
+
+    // the set only includes the code objects found in the profile
+    all_codes = PySet_New(NULL);
+    if (all_codes == NULL)
+        goto error;
+
+    size = PyList_GET_SIZE(lst);
+    for (i = 0; i < size; i++) {
+        PyObject *o = PyList_GET_ITEM(lst, i);
+        if (o->ob_type->tp_traverse &&
+            o->ob_type->tp_traverse(o, _look_for_code_object, (void *)all_codes)
+                < 0)
+            goto error;
+    }
+
+ error:
+    Py_XDECREF(all_codes);
+    Py_XDECREF(lst);
+    Py_XDECREF(gc_module);
+}
+
+static int add_code_addr(void *all_code_uids, void *addr)
+{
+    PyObject *co_uid = PyLong_FromVoidPtr(addr);
+    int check = PySet_Add((PyObject*) all_code_uids, co_uid);
+    Py_CLEAR(co_uid);
+    return check;
+}
+
+static void emit_all_code_objects_seen(int fileno)
+{
+    PyObject *gc_module = NULL, *lst = NULL, *all_codes = NULL;
+    Py_ssize_t i, size;
+
+    gc_module = PyImport_ImportModuleNoBlock("gc");
+    if (gc_module == NULL)
+        goto error;
+
     lst = PyObject_CallMethod(gc_module, "get_objects", "");
     if (lst == NULL || !PyList_Check(lst))
         goto error;
@@ -126,16 +194,21 @@
     if (all_codes == NULL)
         goto error;
 
+    // fill up all_codes with every code object found in the profile
+    vmp_scan_profile(fileno, 0, all_codes);
+
+    // intersect the list with the set and dump only the code objects
+    // found in the set!
     size = PyList_GET_SIZE(lst);
     for (i = 0; i < size; i++) {
         PyObject *o = PyList_GET_ITEM(lst, i);
         if (o->ob_type->tp_traverse &&
-            o->ob_type->tp_traverse(o, _look_for_code_object, (void *)all_codes)
-                < 0)
+                o->ob_type->tp_traverse(o, _look_for_code_object_seen, (void *) all_codes)
+            < 0)
             goto error;
     }
 
- error:
+    error:
     Py_XDECREF(all_codes);
     Py_XDECREF(lst);
     Py_XDECREF(gc_module);
@@ -162,8 +235,16 @@
     if (!PyArg_ParseTuple(args, "id|iii", &fd, &interval, &memory, &lines, &native)) {
         return NULL;
     }
-    assert(fd >= 0 && "file descripter provided to vmprof must not" \
-                      " be less then zero.");
+
+    if (write(fd, NULL, 0) != 0) {
+        PyErr_SetString(PyExc_ValueError, "file descriptor must be writeable");
+        return NULL;
+    }
+
+    if ((read(fd, NULL, 0) != 0) && (native != 0)) {
+        PyErr_SetString(PyExc_ValueError, "file descriptor must be readable for native profiling");
+        return NULL;
+    }
 
     if (is_enabled) {
         PyErr_SetString(PyExc_ValueError, "vmprof is already enabled");
@@ -194,16 +275,53 @@
     return Py_None;
 }
 
+static PyObject * vmp_is_enabled(PyObject *module, PyObject *noargs) {
+    if (is_enabled) {
+        Py_RETURN_TRUE;
+    }
+    Py_RETURN_FALSE;
+}
+
 static PyObject *
-disable_vmprof(PyObject *module, PyObject *noarg)
+disable_vmprof(PyObject *module, PyObject *args)
 {
+    int fd = vmp_profile_fileno();
+    int only_needed = 0;
+
+    if (!PyArg_ParseTuple(args, "|i", &only_needed)) {
+        return NULL;
+    }
+
+#if VMPROF_UNIX
+    if ((read(fd, NULL, 0) != 0) && (only_needed != 0)) {
+        PyErr_SetString(PyExc_ValueError,
+                        "file descriptor must be readable to save only needed code objects");
+        return NULL;
+    }
+#else
+    if (only_needed) {
+        PyErr_SetString(PyExc_ValueError,
+                        "saving only needed code objects is not supported for windows");
+        return NULL;
+    }
+#endif
+
     if (!is_enabled) {
         PyErr_SetString(PyExc_ValueError, "vmprof is not enabled");
         return NULL;
     }
+
     is_enabled = 0;
     vmprof_ignore_signals(1);
+
+#if VMPROF_UNIX
+    if (only_needed)
+        emit_all_code_objects_seen(fd);
+    else
+        emit_all_code_objects();
+#else
     emit_all_code_objects();
+#endif
 
     if (vmprof_disable() < 0) {
         PyErr_SetFromErrno(PyExc_OSError);
@@ -320,15 +438,35 @@
 }
 #endif
 
+#ifdef VMPROF_UNIX
+static PyObject * vmp_get_profile_path(PyObject *module, PyObject *noargs) {
+    PyObject * o;
+    if (is_enabled) {
+        char * buffer[4096];
+        ssize_t buffer_len = vmp_fd_to_path(vmp_profile_fileno(), buffer, 4096);
+        if (buffer_len == -1) {
+            PyErr_Format(PyExc_NotImplementedError, "not implemented platform %s", vmp_machine_os_name());
+            return NULL;
+        }
+        return PyStr_n_NEW(buffer, buffer_len);
+    }
+    Py_RETURN_NONE;
+}
+#endif
+
 static PyMethodDef VMProfMethods[] = {
     {"enable",  enable_vmprof, METH_VARARGS, "Enable profiling."},
-    {"disable", disable_vmprof, METH_NOARGS, "Disable profiling."},
+    {"disable", disable_vmprof, METH_VARARGS, "Disable profiling."},
     {"write_all_code_objects", write_all_code_objects, METH_NOARGS,
      "Write eagerly all the IDs of code objects"},
     {"sample_stack_now", sample_stack_now, METH_VARARGS, "Sample the stack now"},
 #ifdef VMP_SUPPORTS_NATIVE_PROFILING
     {"resolve_addr", resolve_addr, METH_VARARGS, "Return the name of the addr"},
 #endif
+    {"is_enabled", vmp_is_enabled, METH_NOARGS, "Indicates if vmprof is currently sampling."},
+#ifdef VMPROF_UNIX
+    {"get_profile_path", vmp_get_profile_path, METH_NOARGS, "Profile path the profiler logs to."},
+#endif
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
diff --git a/rpython/rlib/rvmprof/src/shared/compat.h b/rpython/rlib/rvmprof/src/shared/compat.h
--- a/rpython/rlib/rvmprof/src/shared/compat.h
+++ b/rpython/rlib/rvmprof/src/shared/compat.h
@@ -7,11 +7,13 @@
       #define PyStr_AS_STRING PyBytes_AS_STRING
       #define PyStr_GET_SIZE PyBytes_GET_SIZE
       #define PyStr_NEW      PyUnicode_FromString
+      #define PyStr_n_NEW      PyUnicode_FromStringAndSize
       #define PyLong_NEW     PyLong_FromSsize_t
 #  else
       #define PyStr_AS_STRING PyString_AS_STRING
       #define PyStr_GET_SIZE PyString_GET_SIZE
       #define PyStr_NEW      PyString_FromString
+      #define PyStr_n_NEW      PyString_FromStringAndSize
       #define PyLong_NEW     PyInt_FromSsize_t
       #define PyLong_AsLong  PyInt_AsLong
 #  endif
diff --git a/rpython/rlib/rvmprof/src/shared/machine.c b/rpython/rlib/rvmprof/src/shared/machine.c
--- a/rpython/rlib/rvmprof/src/shared/machine.c
+++ b/rpython/rlib/rvmprof/src/shared/machine.c
@@ -27,3 +27,12 @@
 #endif
 }
 
+long vmp_fd_to_path(int fd, char * buffer, long buffer_len)
+{
+#ifdef VMPROF_LINUX
+    char proffs[24];
+    (void)snprintf(proffs, 24, "/proc/self/fd/%d", fd);
+    return readlink(proffs, buffer, buffer_len);
+#endif
+    return -1;
+}
diff --git a/rpython/rlib/rvmprof/src/shared/machine.h b/rpython/rlib/rvmprof/src/shared/machine.h
--- a/rpython/rlib/rvmprof/src/shared/machine.h
+++ b/rpython/rlib/rvmprof/src/shared/machine.h
@@ -10,3 +10,9 @@
  */
 const char * vmp_machine_os_name(void);
 
+/**
+ * Writes the filename into buffer. Returns -1 if the platform is not
+ * implemented.
+ */
+long vmp_fd_to_path(int fd, char * buffer, long buffer_len);
+
diff --git a/rpython/rlib/rvmprof/src/shared/symboltable.c b/rpython/rlib/rvmprof/src/shared/symboltable.c
--- a/rpython/rlib/rvmprof/src/shared/symboltable.c
+++ b/rpython/rlib/rvmprof/src/shared/symboltable.c
@@ -3,11 +3,15 @@
 #include "vmprof.h"
 #include "machine.h"
 
+#include "khash.h"
+
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 
+#include <assert.h>
 #include <dlfcn.h>
+
 #if defined(VMPROF_LINUX)
 #include <link.h>
 #endif
@@ -206,15 +210,20 @@
 struct backtrace_state * bstate = NULL;
 int vmp_resolve_addr(void * addr, char * name, int name_len, int * lineno, char * srcfile, int srcfile_len) {
 #ifdef __APPLE__
-    Dl_info info;
-    if (dladdr((const void*)addr, &info) == 0) {
+    Dl_info dlinfo;
+    if (dladdr((const void*)addr, &dlinfo) == 0) {
         return 1;
     }
-    if (info.dli_sname != NULL) {
-        (void)strncpy(name, info.dli_sname, name_len-1);
+    if (dlinfo.dli_sname != NULL) {
+        (void)strncpy(name, dlinfo.dli_sname, name_len-1);
         name[name_len-1] = 0;
     }
-    lookup_vmprof_debug_info(name, info.dli_fbase, srcfile, srcfile_len, lineno);
+    lookup_vmprof_debug_info(name, dlinfo.dli_fbase, srcfile, srcfile_len, lineno);
+    // copy the shared object name to the source file name if source cannot be determined
+    if (srcfile[0] == 0 && dlinfo.dli_fname != NULL) {
+        (void)strncpy(srcfile, dlinfo.dli_fname, srcfile_len-1);
+        srcfile[srcfile_len-1] = 0;
+    }
 #elif defined(VMPROF_LINUX)
     if (bstate == NULL) {
         bstate = backtrace_create_state (NULL, 1, backtrace_error_cb, NULL);
@@ -238,6 +247,18 @@
             (void)strncpy(info.name, dlinfo.dli_sname, info.name_len-1);
             name[name_len-1] = 0;
         }
+
+    }
+
+    // copy the shared object name to the source file name if source cannot be determined
+    if (srcfile[0] == 0) {
+        Dl_info dlinfo;
+        dlinfo.dli_fname = NULL;
+        (void)dladdr((const void*)addr, &dlinfo);
+        if (dlinfo.dli_fname != NULL) {
+            (void)strncpy(srcfile, dlinfo.dli_fname, srcfile_len-1);
+            srcfile[srcfile_len-1] = 0;
+        }
     }
 #endif
     return 0;
@@ -338,8 +359,9 @@
     return 0;
 }
 
+KHASH_MAP_INIT_INT(ptr, intptr_t)
 
-void dump_native_symbols(int fileno)
+void vmp_scan_profile(int fileno, int dump_nat_sym, void *all_code_uids)
 {
     off_t orig_pos, cur_pos;
     char marker;
@@ -350,6 +372,9 @@
     fsync(fileno);
     orig_pos = lseek(fileno, 0, SEEK_CUR);
 
+    khash_t(ptr) * nat_syms = kh_init(ptr);
+    khiter_t it;
+
     lseek(fileno, 5*WORD_SIZE, SEEK_SET);
 
     while (1) {
@@ -363,11 +388,17 @@
             case MARKER_HEADER: {
                 LOG("header 0x%llx\n", cur_pos);
                 if (_skip_header(fileno, &version, &flags) != 0) {
+                    kh_destroy(ptr, nat_syms);
                     return;
                 }
                 memory = (flags & PROFILE_MEMORY) != 0;
                 native = (flags & PROFILE_NATIVE) != 0;
                 lines = (flags & PROFILE_LINES) != 0;
+                if (!native && dump_nat_sym) {
+                    lseek(fileno, 0, SEEK_END);
+                    kh_destroy(ptr, nat_syms);
+                    return;
+                }
                 break;
             } case MARKER_META: {
                 LOG("meta 0x%llx\n", cur_pos);
@@ -400,19 +431,46 @@
 #else
                 for (i = 0; i < depth; i++) {
                     void * addr = _read_addr(fileno);
+                    if (lines && i % 2 == 0) {
+                        continue;
+                    }
                     if (((intptr_t)addr & 0x1) == 1) {
 #endif
-                        LOG("found kind %p\n", addr);
-                        char name[MAXLEN];
-                        char srcfile[MAXLEN];
-                        name[0] = 0;
-                        srcfile[0] = 0;
-                        int lineno = 0;
-                        if (vmp_resolve_addr(addr, name, MAXLEN, &lineno, srcfile, MAXLEN) == 0) {
-                            LOG("dumping add %p, name %s, %s:%d\n", addr, name, srcfile, lineno);
-                            _dump_native_symbol(fileno, addr, name, lineno, srcfile);
+                        /* dump the native symbol to disk */
+                        if (dump_nat_sym) {
+                            LOG("found kind %p\n", addr);
+
+                            // if the address has already been dumped,
+                            // do not log it again!
+                            it = kh_get(ptr, nat_syms, (intptr_t)addr);
+                            if (it == kh_end(nat_syms)) {
+                                char name[MAXLEN];
+                                char srcfile[MAXLEN];
+                                name[0] = 0;
+                                srcfile[0] = 0;
+                                int lineno = 0;
+                                if (vmp_resolve_addr(addr, name, MAXLEN, &lineno, srcfile, MAXLEN) == 0) {
+                                    LOG("dumping add %p, name %s, %s:%d\n", addr, name, srcfile, lineno);
+                                    _dump_native_symbol(fileno, addr, name, lineno, srcfile);
+                                    int ret;
+                                    it = kh_put(ptr, nat_syms, (intptr_t)addr, &ret);
+                                    kh_value(nat_syms, it) = 1;
+                                }
+                            }
+                        }
+#ifdef RPYTHON_VMPROF
+                    }
+#else
+                    } else {
+                        // cpython adds all addresses into a set to get the intersection
+                        // of all gc known code addresses
+                        if (all_code_uids != NULL) {
+                            PyObject *co_uid = PyLong_FromVoidPtr(addr);
+                            int check = PySet_Add(all_code_uids, co_uid);
+                            Py_CLEAR(co_uid);
                         }
                     }
+#endif
                 }
                 LOG("passed  memory %d \n", memory);
 
@@ -427,6 +485,7 @@
             } default: {
                 fprintf(stderr, "unknown marker 0x%x\n", marker);
                 lseek(fileno, 0, SEEK_END);
+                kh_destroy(ptr, nat_syms);
                 return;
             }
         }
@@ -437,5 +496,11 @@
         }
     }
 
+    kh_destroy(ptr, nat_syms);
     lseek(fileno, 0, SEEK_END);
 }
+
+void dump_native_symbols(int fileno)
+{
+    vmp_scan_profile(fileno, 1, NULL);
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_main.h b/rpython/rlib/rvmprof/src/shared/vmprof_main.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_main.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_main.h
@@ -218,7 +218,7 @@
         if (p == NULL) {
             /* ignore this signal: there are no free buffers right now */
         } else {
-#ifdef RPYTHON_VMPORF
+#ifdef RPYTHON_VMPROF
             commit = _vmprof_sample_stack(p, NULL, (ucontext_t*)ucontext);
 #else
             commit = _vmprof_sample_stack(p, tstate, (ucontext_t*)ucontext);


More information about the pypy-commit mailing list