[pypy-commit] pypy vmprof-native: apply changes made in pypy source repo, and never ported back to vmprof source

Thu Feb 9 06:23:11 EST 2017

Author: Richard Plangger <planrichi at gmail.com>
Branch: vmprof-native
Changeset: r90020:dffb13845738
Date: 2017-02-09 12:22 +0100
http://bitbucket.org/pypy/pypy/changeset/dffb13845738/

Log:	apply changes made in pypy source repo, and never ported back to
	vmprof source

diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -24,10 +24,11 @@
             i += 5 * WORD # header
             assert s[i    ] == '\x05'    # MARKER_HEADER
             assert s[i + 1] == '\x00'    # 0
-            assert s[i + 2] == '\x02'    # VERSION_THREAD_ID
-            assert s[i + 3] == chr(4)    # len('pypy')
-            assert s[i + 4: i + 8] == 'pypy'
-            i += 8
+            assert s[i + 2] == '\x06'    # VERSION_TIMESTAMP
+            assert s[i + 3] == '\x08'    # PROFILE_RPYTHON
+            assert s[i + 4] == chr(4)    # len('pypy')
+            assert s[i + 5: i + 9] == 'pypy'
+            i += 9
             while i < len(s):
                 if s[i] == '\x03':
                     break
@@ -41,6 +42,8 @@
                     _, size = struct.unpack("ll", s[i:i + 2 * WORD])
                     count += 1
                     i += 2 * WORD + size
+                elif s[i] == '\x06':
+                    i += 8+8+8
                 else:
                     raise AssertionError(ord(s[i]))
             return count
diff --git a/pypy/module/_vmprof/test/test_direct.py b/pypy/module/_vmprof/test/test_direct.py
--- a/pypy/module/_vmprof/test/test_direct.py
+++ b/pypy/module/_vmprof/test/test_direct.py
@@ -43,7 +43,7 @@
 }
 
 
-""" + open(str(srcdir.join("vmprof_get_custom_offset.h"))).read(), include_dirs=[str(srcdir)])
+""" + open(str(srcdir.join("shared/vmprof_get_custom_offset.h"))).read(), include_dirs=[str(srcdir)])
 
 class TestDirect(object):
     def test_infrastructure(self):
diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py
--- a/rpython/rlib/rvmprof/rvmprof.py
+++ b/rpython/rlib/rvmprof/rvmprof.py
@@ -185,15 +185,16 @@
 
     This function can only be called once during translation.
     It generates a C function called __vmprof_eval_vmprof which is used by
-    the C source code as an extern function. This is necessary while walking
-    the native stack. If you see __vmprof_eval_vmprof defined twice during
+    the vmprof C source code and is bound as an extern function.
+    This is necessary while walking the native stack.
+    If you see __vmprof_eval_vmprof defined twice during
     translation, read on:
 
     To remove this restriction do the following:
 
     *) Extend the macro IS_VMPROF_EVAL in the vmprof source repo to check several
        sybmols.
-    *) Give each function provided to this decorator a unique name in C
+    *) Give each function provided to this decorator a unique symbol name in C
     """
     if _hack_update_stack_untranslated:
         from rpython.rtyper.annlowlevel import llhelper
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -60,8 +60,9 @@
 char *vmprof_init(int fd, double interval, int memory,
                   int lines, const char *interp_name, int native)
 {
-    if (interval < 1e-6 || interval >= 1.0)
+    if (!(interval >= 1e-6 && interval < 1.0)) {   /* also if it is NaN */
         return "bad value for 'interval'";
+    }
     prepare_interval_usec = (int)(interval * 1000000.0);
 
     if (prepare_concurrent_bufs() < 0)
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h b/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h
@@ -1,120 +1,53 @@
 #pragma once
 
-#ifdef PYPY_JIT_CODEMAP
 void *pypy_find_codemap_at_addr(long addr, long *start_addr);
 long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
                                 long *current_pos_addr);
-long pypy_jit_stack_depth_at_loc(long loc);
-#endif
 
+#define MAX_INLINE_DEPTH  384
 
-#ifdef CPYTHON_GET_CUSTOM_OFFSET
-static void *tramp_start, *tramp_end;
-#endif
-
-
-static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) {
-
-#if defined(PYPY_JIT_CODEMAP)
-
-    intptr_t ip_l = (intptr_t)ip;
-    return pypy_jit_stack_depth_at_loc(ip_l);
-
-#elif defined(CPYTHON_GET_CUSTOM_OFFSET)
-
-    if (ip >= tramp_start && ip <= tramp_end) {
-        // XXX the return value is wrong for all the places before push and
-        //     after pop, fix
-        void *bp;
-        void *sp;
-
-        /* This is a stage2 trampoline created by hotpatch:
-
-               push   %rbx
-               push   %rbp
-               mov    %rsp,%rbp
-               and    $0xfffffffffffffff0,%rsp   // make sure the stack is aligned
-               movabs $0x7ffff687bb10,%rbx
-               callq  *%rbx
-               leaveq 
-               pop    %rbx
-               retq   
-
-           the stack layout is like this:
-
-               +-----------+                      high addresses
-               | ret addr  |
-               +-----------+
-               | saved rbx |   start of the function frame
-               +-----------+
-               | saved rbp |
-               +-----------+
-               | ........  |   <-- rbp
-               +-----------+                      low addresses
-
-           So, the trampoline frame starts at rbp+16, and the return address,
-           is at rbp+24.  The vmprof API requires us to return the offset of
-           the frame relative to sp, hence we have this weird computation.
-
-           XXX (antocuni): I think we could change the API to return directly
-           the frame address instead of the offset; however, this require a
-           change in the PyPy code too
-        */
-
-        unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
-        unw_get_reg (cp, UNW_X86_64_RBP, (unw_word_t*)&bp);
-        return bp+16+8-sp;
-    }
-    return -1;
-
-#else
-
-    return -1;
-
-#endif
-}
-
-static long vmprof_write_header_for_jit_addr(void **result, long n,
-                                             void *ip, int max_depth)
+static long vmprof_write_header_for_jit_addr(intptr_t *result, long n,
+                                             intptr_t addr, int max_depth)
 {
 #ifdef PYPY_JIT_CODEMAP
     void *codemap;
     long current_pos = 0;
-    intptr_t id;
+    intptr_t ident, local_stack[MAX_INLINE_DEPTH];
+    long m;
     long start_addr = 0;
-    intptr_t addr = (intptr_t)ip;
-    int start, k;
-    void *tmp;
 
     codemap = pypy_find_codemap_at_addr(addr, &start_addr);
-    if (codemap == NULL)
-        // not a jit code at all
+    if (codemap == NULL || n >= max_depth - 2)
+        // not a jit code at all or almost max depth
         return n;
 
     // modify the last entry to point to start address and not the random one
     // in the middle
-    result[n - 1] = (void*)start_addr;
-    result[n] = (void*)2;
-    n++;
-    start = n;
-    while (n < max_depth) {
-        id = pypy_yield_codemap_at_addr(codemap, addr, &current_pos);
-        if (id == -1)
+    result[n++] = VMPROF_ASSEMBLER_TAG;
+    result[n++] = start_addr;
+
+    // build the list of code idents corresponding to the current
+    // position inside this particular piece of assembler.  If (very
+    // unlikely) we get more than MAX_INLINE_DEPTH recursion levels
+    // all inlined inside this single piece of assembler, then stop:
+    // there will be some missing frames then.  Otherwise, we need to
+    // first collect 'local_stack' and then write it to 'result' in the
+    // opposite order, stopping at 'max_depth'.  Previous versions of
+    // the code would write the oldest calls and then stop---whereas
+    // what we really need it to write the newest calls and then stop.
+    m = 0;
+    while (m < MAX_INLINE_DEPTH) {
+        ident = pypy_yield_codemap_at_addr(codemap, addr, &current_pos);
+        if (ident == -1)
             // finish
             break;
-        if (id == 0)
+        if (ident == 0)
             continue; // not main codemap
-        result[n++] = (void *)id;
+        local_stack[m++] = ident;
     }
-    k = 0;
-    while (k < (n - start) / 2) {
-        tmp = result[start + k];
-        result[start + k] = result[n - k - 1];
-        result[n - k - 1] = tmp;
-        k++;
-    }
-    if (n < max_depth) {
-        result[n++] = (void*)3;
+    while (m > 0 && n < max_depth) {
+        result[n++] = VMPROF_JITTED_TAG;
+        result[n++] = local_stack[--m];
     }
 #endif
     return n;