[pypy-commit] pypy default: merge vmprof

fijal noreply at buildbot.pypy.org
Thu Apr 16 10:17:52 CEST 2015


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: 
Changeset: r76802:86cfdf3d2620
Date: 2015-04-16 10:17 +0200
http://bitbucket.org/pypy/pypy/changeset/86cfdf3d2620/

Log:	merge vmprof

diff too long, truncating to 2000 out of 2558 lines

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -9,7 +9,6 @@
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import r_uint, SHRT_MIN, SHRT_MAX, \
     INT_MIN, INT_MAX, UINT_MAX, USHRT_MAX
-from rpython.rlib.rweaklist import RWeakListMixin
 
 from pypy.interpreter.executioncontext import (ExecutionContext, ActionFlag,
     UserDelAction)
@@ -367,10 +366,6 @@
 
 # ____________________________________________________________
 
-class CodeObjWeakList(RWeakListMixin):
-    def __init__(self):
-        self.initialize()
-
 class ObjSpace(object):
     """Base class for the interpreter-level implementations of object spaces.
     http://pypy.readthedocs.org/en/latest/objspace.html"""
@@ -394,7 +389,6 @@
         self.check_signal_action = None   # changed by the signal module
         self.user_del_action = UserDelAction(self)
         self._code_of_sys_exc_info = None
-        self.all_code_objs = CodeObjWeakList()
 
         # can be overridden to a subclass
         self.initialize()
@@ -672,16 +666,16 @@
             assert ec is not None
             return ec
 
+    def register_code_callback(self, callback):
+        ec = self.getexecutioncontext()
+        ec._code_callback = callback
+
     def register_code_object(self, pycode):
-        callback = self.getexecutioncontext().register_code_callback
-        if callback is not None:
-            callback(self, pycode)
-        self.all_code_objs.add_handle(pycode)
-
-    def set_code_callback(self, callback):
         ec = self.getexecutioncontext()
-        ec.register_code_callback = callback
-        
+        if ec._code_callback is None:
+            return
+        ec._code_callback(self, pycode)
+    
     def _freeze_(self):
         return True
 
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -2,6 +2,7 @@
 from pypy.interpreter.error import OperationError, get_cleared_operation_error
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.rlib import jit
+from rpython.rlib.objectmodel import we_are_translated
 
 TICK_COUNTER_STEP = 100
 
@@ -33,11 +34,16 @@
         self.profilefunc = None
         self.w_profilefuncarg = None
         self.thread_disappeared = False   # might be set to True after os.fork()
-        self.register_code_callback = None
+
         if sys.maxint == 2147483647:
             self._code_unique_id = 0 # XXX this is wrong, it won't work on 32bit
         else:
-            self._code_unique_id = 0x7000000000000000
+            if we_are_translated():
+                self._code_unique_id = 0x7000000000000000
+            else:
+                self._code_unique_id = 0x7700000000000000
+                # should be enough code objects
+        self._code_callback = None
 
     @staticmethod
     def _mark_thread_disappeared(space):
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -129,7 +129,7 @@
 
         ec = self.space.getexecutioncontext()
         self._unique_id = ec._code_unique_id
-        ec._code_unique_id += 2 # so we have one bit that we can mark stuff
+        ec._code_unique_id += 4 # so we have two bits that we can mark stuff
         # with
 
     def _get_full_name(self):
diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py
--- a/pypy/module/_vmprof/interp_vmprof.py
+++ b/pypy/module/_vmprof/interp_vmprof.py
@@ -3,13 +3,15 @@
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref, cast_base_ptr_to_instance
 from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib import jit, rposix, entrypoint
+from rpython.rlib import jit, rposix, rgc
+from rpython.rlib.rarithmetic import ovfcheck_float_to_int
 from rpython.rtyper.tool import rffi_platform as platform
 from rpython.rlib.rstring import StringBuilder
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import oefmt, wrap_oserror, OperationError
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter.pyframe import PyFrame
+from pypy.interpreter.pycode import PyCode
 
 ROOT = py.path.local(__file__).join('..')
 SRC = ROOT.join('src')
@@ -28,14 +30,13 @@
     libraries = ['unwind'],
     
     post_include_bits=["""
-        void* pypy_vmprof_get_virtual_ip(void*);
         void pypy_vmprof_init(void);
     """],
     
     separate_module_sources=["""
         void pypy_vmprof_init(void) {
             vmprof_set_mainloop(pypy_execute_frame_trampoline, 0,
-                                pypy_vmprof_get_virtual_ip);
+                                NULL);
         }
     """],
     )
@@ -56,7 +57,7 @@
 
 pypy_execute_frame_trampoline = rffi.llexternal(
     "pypy_execute_frame_trampoline",
-    [llmemory.GCREF, llmemory.GCREF, llmemory.GCREF],
+    [llmemory.GCREF, llmemory.GCREF, llmemory.GCREF, lltype.Signed],
     llmemory.GCREF,
     compilation_info=eci,
     _nowrapper=True, sandboxsafe=True,
@@ -96,23 +97,15 @@
             gc_frame = cast_instance_to_gcref(frame)
             gc_inputvalue = cast_instance_to_gcref(w_inputvalue)
             gc_operr = cast_instance_to_gcref(operr)
-            gc_result = pypy_execute_frame_trampoline(gc_frame, gc_inputvalue, gc_operr)
+            assert frame.pycode._unique_id & 3 == 0
+            unique_id = frame.pycode._unique_id | 1
+            gc_result = pypy_execute_frame_trampoline(gc_frame, gc_inputvalue,
+                                                      gc_operr, unique_id)
             return cast_base_ptr_to_instance(W_Root, gc_result)
         else:
             return original_execute_frame(frame, w_inputvalue, operr)
 
 
- at entrypoint.entrypoint_lowlevel('main', [llmemory.GCREF],
-                                'pypy_vmprof_get_virtual_ip', True)
-def get_virtual_ip(gc_frame):
-    frame = cast_base_ptr_to_instance(PyFrame, gc_frame)
-    if jit._get_virtualizable_token(frame):
-        return rffi.cast(rffi.VOIDP, 0)
-    virtual_ip = do_get_virtual_ip(frame)
-    return rffi.cast(rffi.VOIDP, virtual_ip)
-
-def do_get_virtual_ip(frame):
-    return frame.pycode._unique_id
 
 def write_long_to_string_builder(l, b):
     if sys.maxint == 2147483647:
@@ -130,31 +123,33 @@
         b.append(chr((l >> 48) & 0xff))
         b.append(chr((l >> 56) & 0xff))
 
+def try_cast_to_pycode(gcref):
+    return rgc.try_cast_gcref_to_instance(PyCode, gcref)
+
+MAX_CODES = 1000
+
 class VMProf(object):
     def __init__(self):
         self.is_enabled = False
         self.ever_enabled = False
-        self.mapping_so_far = [] # stored mapping in between runs
         self.fileno = -1
+        self.current_codes = []
 
-    def enable(self, space, fileno, period):
+    def enable(self, space, fileno, period_usec):
         if self.is_enabled:
             raise oefmt(space.w_ValueError, "_vmprof already enabled")
         self.fileno = fileno
         self.is_enabled = True
-        self.write_header(fileno, period)
+        self.write_header(fileno, period_usec)
         if not self.ever_enabled:
             if we_are_translated():
                 pypy_vmprof_init()
             self.ever_enabled = True
-        for weakcode in space.all_code_objs.get_all_handles():
-            code = weakcode()
-            if code:
-                self.register_code(space, code)
-        space.set_code_callback(vmprof_register_code)
+        self.gather_all_code_objs(space)
+        space.register_code_callback(vmprof_register_code)
         if we_are_translated():
             # does not work untranslated
-            res = vmprof_enable(fileno, period, 0,
+            res = vmprof_enable(fileno, period_usec, 0,
                                 lltype.nullptr(rffi.CCHARP.TO), 0)
         else:
             res = 0
@@ -162,42 +157,55 @@
             raise wrap_oserror(space, OSError(rposix.get_saved_errno(),
                                               "_vmprof.enable"))
 
-    def write_header(self, fileno, period):
-        if period == -1:
-            period_usec = 1000000 / 100 #  100hz
-        else:
-            period_usec = period
+    def gather_all_code_objs(self, space):
+        all_code_objs = rgc.do_get_objects(try_cast_to_pycode)
+        for code in all_code_objs:
+            self.register_code(space, code)
+
+    def write_header(self, fileno, period_usec):
+        assert period_usec > 0
         b = StringBuilder()
         write_long_to_string_builder(0, b)
         write_long_to_string_builder(3, b)
         write_long_to_string_builder(0, b)
         write_long_to_string_builder(period_usec, b)
         write_long_to_string_builder(0, b)
+        b.append('\x04') # interp name
+        b.append(chr(len('pypy')))
+        b.append('pypy')
         os.write(fileno, b.build())
 
     def register_code(self, space, code):
         if self.fileno == -1:
             raise OperationError(space.w_RuntimeError,
                                  space.wrap("vmprof not running"))
-        name = code._get_full_name()
+        self.current_codes.append(code)
+        if len(self.current_codes) >= MAX_CODES:
+            self._flush_codes(space)
+
+    def _flush_codes(self, space):
         b = StringBuilder()
-        b.append('\x02')
-        write_long_to_string_builder(code._unique_id, b)
-        write_long_to_string_builder(len(name), b)
-        b.append(name)
+        for code in self.current_codes:
+            name = code._get_full_name()
+            b.append('\x02')
+            write_long_to_string_builder(code._unique_id, b)
+            write_long_to_string_builder(len(name), b)
+            b.append(name)
         os.write(self.fileno, b.build())
+        self.current_codes = []
 
     def disable(self, space):
         if not self.is_enabled:
             raise oefmt(space.w_ValueError, "_vmprof not enabled")
         self.is_enabled = False
+        space.register_code_callback(None)
+        self._flush_codes(space)
         self.fileno = -1
         if we_are_translated():
            # does not work untranslated
             res = vmprof_disable()
         else:
             res = 0
-        space.set_code_callback(None)
         if res == -1:
             raise wrap_oserror(space, OSError(rposix.get_saved_errno(),
                                               "_vmprof.disable"))
@@ -207,13 +215,23 @@
     mod_vmprof = space.getbuiltinmodule('_vmprof')
     assert isinstance(mod_vmprof, Module)
     mod_vmprof.vmprof.register_code(space, code)
-        
- at unwrap_spec(fileno=int, period=int)
-def enable(space, fileno, period=-1):
+
+ at unwrap_spec(fileno=int, period=float)
+def enable(space, fileno, period=0.01):   # default 100 Hz
     from pypy.module._vmprof import Module
     mod_vmprof = space.getbuiltinmodule('_vmprof')
     assert isinstance(mod_vmprof, Module)
-    mod_vmprof.vmprof.enable(space, fileno, period)
+    #
+    try:
+        period_usec = ovfcheck_float_to_int(period * 1000000.0 + 0.5)
+        if period_usec <= 0 or period_usec >= 1e6:
+            # we don't want seconds here at all
+            raise ValueError
+    except (ValueError, OverflowError):
+        raise OperationError(space.w_ValueError,
+                             space.wrap("'period' too large or non positive"))
+    #
+    mod_vmprof.vmprof.enable(space, fileno, period_usec)
 
 def disable(space):
     from pypy.module._vmprof import Module
diff --git a/pypy/module/_vmprof/src/fake_pypy_api.c b/pypy/module/_vmprof/src/fake_pypy_api.c
--- a/pypy/module/_vmprof/src/fake_pypy_api.c
+++ b/pypy/module/_vmprof/src/fake_pypy_api.c
@@ -1,25 +1,15 @@
-
-long pypy_jit_start_addr(void)
-{
-	return 3;
-}
-
-long pypy_jit_end_addr(void)
-{
-	return 3;
-}
 
 long pypy_jit_stack_depth_at_loc(long x)
 {
 	return 0;
 }
 
-long pypy_find_codemap_at_addr(long x)
+void *pypy_find_codemap_at_addr(long x)
 {
-	return 0;
+	return (void *)0;
 }
 
-long pypy_yield_codemap_at_addr(long x, long y, long *a)
+long pypy_yield_codemap_at_addr(void *x, long y, long *a)
 {
 	return 0;
 }
@@ -27,3 +17,5 @@
 void pypy_pyframe_execute_frame(void)
 {
 }
+
+volatile int pypy_codemap_currently_invalid = 0;
diff --git a/pypy/module/_vmprof/src/get_custom_offset.c b/pypy/module/_vmprof/src/get_custom_offset.c
--- a/pypy/module/_vmprof/src/get_custom_offset.c
+++ b/pypy/module/_vmprof/src/get_custom_offset.c
@@ -1,46 +1,65 @@
 
-long pypy_jit_start_addr();
-long pypy_jit_end_addr();
-long pypy_jit_stack_depth_at_loc(long);
-long pypy_find_codemap_at_addr(long);
-long pypy_yield_codemap_at_addr(long, long, long*);
+extern volatile int pypy_codemap_currently_invalid;
+
+void *pypy_find_codemap_at_addr(long addr, long *start_addr);
+long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
+                                long *current_pos_addr);
+long pypy_jit_stack_depth_at_loc(long loc);
+
 
 void vmprof_set_tramp_range(void* start, void* end)
 {
 }
 
-static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, unw_cursor_t *cp) {
-	intptr_t ip_l = (intptr_t)ip;
+int custom_sanity_check()
+{
+    return !pypy_codemap_currently_invalid;
+}
 
-	if (ip_l < pypy_jit_start_addr() || ip_l > pypy_jit_end_addr()) {
-		return -1;
-	}
-	return (void*)pypy_jit_stack_depth_at_loc(ip_l);
+static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) {
+    intptr_t ip_l = (intptr_t)ip;
+    return pypy_jit_stack_depth_at_loc(ip_l);
 }
 
 static long vmprof_write_header_for_jit_addr(void **result, long n,
-											 void *ip, int max_depth)
+                                             void *ip, int max_depth)
 {
-	long codemap_pos;
-	long current_pos = 0;
-	intptr_t id;
-	intptr_t addr = (intptr_t)ip;
+    void *codemap;
+    long current_pos = 0;
+    intptr_t id;
+    long start_addr = 0;
+    intptr_t addr = (intptr_t)ip;
+    int start, k;
+    void *tmp;
 
-	if (addr < pypy_jit_start_addr() || addr > pypy_jit_end_addr()) {
-		return n;
-	}
-	codemap_pos = pypy_find_codemap_at_addr(addr);
-	if (codemap_pos == -1) {
-		return n;
-	}
-	while (1) {
-		id = pypy_yield_codemap_at_addr(codemap_pos, addr, &current_pos);
-		if (id == 0) {
-			return n;
-		}
-		result[n++] = id;
-		if (n >= max_depth) {
-			return n;
-		}
-	}
+    codemap = pypy_find_codemap_at_addr(addr, &start_addr);
+    if (codemap == NULL)
+        // not a jit code at all
+        return n;
+
+    // modify the last entry to point to start address and not the random one
+    // in the middle
+    result[n - 1] = (void*)start_addr;
+    start = n;
+    while (n < max_depth) {
+        id = pypy_yield_codemap_at_addr(codemap, addr, &current_pos);
+        if (id == 0)
+            // finish
+            break;
+        result[n++] = (void *)id;
+    }
+    // we strip the topmost part - the reason is that it's either
+    // represented in the jitted caller or it's not jitted (we have the
+    // same function essentially twice
+    k = 0;
+    while (k < (n - start) / 2) {
+        tmp = result[start + k];
+        result[start + k] = result[n - k - 1];
+        result[n - k - 1] = tmp;
+        k++;
+    }
+    if (n != max_depth) {
+        n--;
+    }
+    return n;
 }
diff --git a/pypy/module/_vmprof/src/trampoline.asmgcc.s b/pypy/module/_vmprof/src/trampoline.asmgcc.s
--- a/pypy/module/_vmprof/src/trampoline.asmgcc.s
+++ b/pypy/module/_vmprof/src/trampoline.asmgcc.s
@@ -6,11 +6,10 @@
 	.type	pypy_execute_frame_trampoline, @function
 pypy_execute_frame_trampoline:
 	.cfi_startproc
-	pushq	%rdi
+	pushq	%rcx
 	.cfi_def_cfa_offset 16
 	call pypy_pyframe_execute_frame at PLT
-	/* GCROOT 0(%rsp) */
-	popq	%rdi
+	popq	%rcx
 	.cfi_def_cfa_offset 8
 	ret
 	.cfi_endproc
diff --git a/pypy/module/_vmprof/src/trampoline.h b/pypy/module/_vmprof/src/trampoline.h
--- a/pypy/module/_vmprof/src/trampoline.h
+++ b/pypy/module/_vmprof/src/trampoline.h
@@ -1,1 +1,1 @@
-void* pypy_execute_frame_trampoline(void*, void*, void*);
+void* pypy_execute_frame_trampoline(void*, void*, void*, long);
diff --git a/pypy/module/_vmprof/src/vmprof.c b/pypy/module/_vmprof/src/vmprof.c
--- a/pypy/module/_vmprof/src/vmprof.c
+++ b/pypy/module/_vmprof/src/vmprof.c
@@ -25,6 +25,8 @@
 #include <unistd.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#include <errno.h>
+#include <pthread.h>
 
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
@@ -34,12 +36,18 @@
 #define _unused(x) ((void)x)
 
 #define MAX_FUNC_NAME 128
-#define MAX_STACK_DEPTH 64
+#define MAX_STACK_DEPTH 1024
+#define BUFFER_SIZE 8192
 
-static FILE* profile_file = NULL;
+
+static int profile_file = 0;
+static char profile_write_buffer[BUFFER_SIZE];
+static int profile_buffer_position = 0;
 void* vmprof_mainloop_func;
 static ptrdiff_t mainloop_sp_offset;
 static vmprof_get_virtual_ip_t mainloop_get_virtual_ip;
+static long last_period_usec = 0;
+static int atfork_hook_installed = 0;
 
 
 /* *************************************************************
@@ -51,27 +59,33 @@
 #define MARKER_VIRTUAL_IP '\x02'
 #define MARKER_TRAILER '\x03'
 
-static void prof_word(FILE* f, long x) {
-    fwrite(&x, sizeof(x), 1, f);
+static void prof_word(long x) {
+	((long*)(profile_write_buffer + profile_buffer_position))[0] = x;
+	profile_buffer_position += sizeof(long);
 }
 
-static void prof_header(FILE* f, long period_usec) {
-    prof_word(f, 0);
-    prof_word(f, 3);
-    prof_word(f, 0);
-    prof_word(f, period_usec);
-    prof_word(f, 0);
+static void prof_header(long period_usec) {
+    // XXX never used here?
+    prof_word(0);
+    prof_word(3);
+    prof_word(0);
+    prof_word(period_usec);
+    prof_word(0);
+    write(profile_file, profile_write_buffer, profile_buffer_position);
+    profile_buffer_position = 0;
 }
 
-static void prof_write_stacktrace(FILE* f, void** stack, int depth, int count) {
+static void prof_write_stacktrace(void** stack, int depth, int count) {
     int i;
 	char marker = MARKER_STACKTRACE;
 
-	fwrite(&marker, 1, 1, f);
-    prof_word(f, count);
-    prof_word(f, depth);
+	profile_write_buffer[profile_buffer_position++] = MARKER_STACKTRACE;
+    prof_word(count);
+    prof_word(depth);
     for(i=0; i<depth; i++)
-        prof_word(f, (long)stack[i]);
+        prof_word((long)stack[i]);
+    write(profile_file, profile_write_buffer, profile_buffer_position);
+    profile_buffer_position = 0;
 }
 
 
@@ -90,12 +104,17 @@
     void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4];
 } vmprof_hacked_unw_cursor_t;
 
-static int vmprof_unw_step(unw_cursor_t *cp) {
+static int vmprof_unw_step(unw_cursor_t *cp, int first_run) {
 	void* ip;
     void* sp;
     ptrdiff_t sp_offset;
     unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip);
     unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
+	if (!first_run)
+		// make sure we're pointing to the CALL and not to the first
+		// instruction after. If the callee adjusts the stack for us
+		// it's not safe to be at the instruction after
+		ip -= 1;
     sp_offset = vmprof_unw_get_custom_offset(ip, cp);
 
     if (sp_offset == -1) {
@@ -122,30 +141,30 @@
  * *************************************************************
  */
 
-// stolen from pprof:
-// Sometimes, we can try to get a stack trace from within a stack
-// trace, because libunwind can call mmap (maybe indirectly via an
-// internal mmap based memory allocator), and that mmap gets trapped
-// and causes a stack-trace request.  If were to try to honor that
-// recursive request, we'd end up with infinite recursion or deadlock.
-// Luckily, it's safe to ignore those subsequent traces.  In such
-// cases, we return 0 to indicate the situation.
+// The original code here has a comment, "stolen from pprof",
+// about a "__thread int recursive".  But general __thread
+// variables are not really supposed to be accessed from a
+// signal handler.  Moreover, we are using SIGPROF, which
+// should not be recursively called on the same thread.
 //static __thread int recursive;
-static int recursive; // XXX antocuni: removed __thread
 
 int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext) {
     void *ip;
     int n = 0;
     unw_cursor_t cursor;
     unw_context_t uc = *ucontext;
-    if (recursive) {
+    //if (recursive) {
+    //    return 0;
+    //}
+    if (!custom_sanity_check()) {
         return 0;
     }
-    ++recursive;
+    //++recursive;
 
     int ret = unw_init_local(&cursor, &uc);
     assert(ret >= 0);
     _unused(ret);
+	int first_run = 1;
 
     while (n < max_depth) {
         if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
@@ -173,16 +192,21 @@
           void **arg_ptr = (void**)arg_addr;
           // fprintf(stderr, "stacktrace mainloop: rsp %p   &f2 %p   offset %ld\n", 
           //         sp, arg_addr, mainloop_sp_offset);
-          ip = mainloop_get_virtual_ip(*arg_ptr);
+		  if (mainloop_get_virtual_ip) {
+			  ip = mainloop_get_virtual_ip(*arg_ptr);
+		  } else {
+			  ip = *arg_ptr;
+		  }
         }
 
         result[n++] = ip;
 		n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth);
-        if (vmprof_unw_step(&cursor) <= 0) {
+        if (vmprof_unw_step(&cursor, first_run) <= 0) {
             break;
         }
+		first_run = 0;
     }
-    --recursive;
+    //--recursive;
     return n;
 }
 
@@ -193,10 +217,12 @@
 
 static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) {
     void* stack[MAX_STACK_DEPTH];
+    int saved_errno = errno;
     stack[0] = GetPC((ucontext_t*)ucontext);
     int depth = frame_forcer(get_stack_trace(stack+1, MAX_STACK_DEPTH-1, ucontext));
     depth++;  // To account for pc value in stack[0];
-    prof_write_stacktrace(profile_file, stack, depth, 1);
+    prof_write_stacktrace(stack, depth, 1);
+    errno = saved_errno;
 }
 
 /* *************************************************************
@@ -209,14 +235,12 @@
 	if ((fd = dup(fd)) == -1) {
 		return -1;
 	}
-    profile_file = fdopen(fd, "wb");
-	if (!profile_file) {
-		return -1;
-	}
+	profile_buffer_position = 0;
+    profile_file = fd;
 	if (write_header)
-		prof_header(profile_file, period_usec);
+		prof_header(period_usec);
 	if (s)
-		fwrite(s, slen, 1, profile_file);
+		write(profile_file, s, slen);
 	return 0;
 }
 
@@ -226,16 +250,16 @@
     char buf[BUFSIZ];
     size_t size;
 	int marker = MARKER_TRAILER;
-	fwrite(&marker, 1, 1, profile_file);
+	write(profile_file, &marker, 1);
 
     // copy /proc/PID/maps to the end of the profile file
     sprintf(buf, "/proc/%d/maps", getpid());
     src = fopen(buf, "r");    
     while ((size = fread(buf, 1, BUFSIZ, src))) {
-        fwrite(buf, 1, size, profile_file);
+        write(profile_file, buf, size);
     }
     fclose(src);
-    fclose(profile_file);
+    close(profile_file);
 	return 0;
 }
 
@@ -253,15 +277,16 @@
 }
 
 static int remove_sigprof_handler(void) {
-    //sighandler_t res = signal(SIGPROF, SIG_DFL);
-	//if (res == SIG_ERR) {
-	//	return -1;
-	//}
+    sighandler_t res = signal(SIGPROF, SIG_DFL);
+	if (res == SIG_ERR) {
+		return -1;
+	}
 	return 0;
 };
 
 static int install_sigprof_timer(long period_usec) {
     static struct itimerval timer;
+    last_period_usec = period_usec;
     timer.it_interval.tv_sec = 0;
     timer.it_interval.tv_usec = period_usec;
     timer.it_value = timer.it_interval;
@@ -273,15 +298,45 @@
 
 static int remove_sigprof_timer(void) {
     static struct itimerval timer;
+    last_period_usec = 0;
     timer.it_interval.tv_sec = 0;
     timer.it_interval.tv_usec = 0;
-    timer.it_value = timer.it_interval;
+    timer.it_value.tv_sec = 0;
+    timer.it_value.tv_usec = 0;
     if (setitimer(ITIMER_PROF, &timer, NULL) != 0) {
 		return -1;
     }
 	return 0;
 }
 
+static void atfork_disable_timer(void) {
+    remove_sigprof_timer();
+}
+
+static void atfork_enable_timer(void) {
+    install_sigprof_timer(last_period_usec);
+}
+
+static int install_pthread_atfork_hooks(void) {
+    /* this is needed to prevent the problems described there:
+         - http://code.google.com/p/gperftools/issues/detail?id=278
+         - http://lists.debian.org/debian-glibc/2010/03/msg00161.html
+
+        TL;DR: if the RSS of the process is large enough, the clone() syscall
+        will be interrupted by the SIGPROF before it can complete, then
+        retried, interrupted again and so on, in an endless loop.  The
+        solution is to disable the timer around the fork, and re-enable it
+        only inside the parent.
+    */
+    if (atfork_hook_installed)
+        return 0;
+    int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL);
+    if (ret != 0)
+        return -1;
+    atfork_hook_installed = 1;
+    return 0;
+}
+
 /* *************************************************************
  * public API
  * *************************************************************
@@ -297,8 +352,7 @@
 int vmprof_enable(int fd, long period_usec, int write_header, char *s,
 				  int slen)
 {
-    if (period_usec == -1)
-        period_usec = 1000000 / 100; /* 100hz */
+    assert(period_usec > 0);
     if (open_profile(fd, period_usec, write_header, s, slen) == -1) {
 		return -1;
 	}
@@ -308,6 +362,9 @@
     if (install_sigprof_timer(period_usec) == -1) {
 		return -1;
 	}
+    if (install_pthread_atfork_hooks() == -1) {
+        return -1;
+    }
 	return 0;
 }
 
@@ -325,6 +382,7 @@
 }
 
 void vmprof_register_virtual_function(const char* name, void* start, void* end) {
+	// XXX unused by pypy
     // for now *end is simply ignored
 	char buf[1024];
 	int lgt = strlen(name) + 2 * sizeof(long) + 1;
@@ -336,5 +394,5 @@
 	((void **)(((void*)buf) + 1))[0] = start;
 	((long *)(((void*)buf) + 1 + sizeof(long)))[0] = lgt - 2 * sizeof(long) - 1;
 	strncpy(buf + 2 * sizeof(long) + 1, name, 1024 - 2 * sizeof(long) - 1);
-	fwrite(buf, lgt, 1, profile_file);
+	write(profile_file, buf, lgt);
 }
diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -21,6 +21,11 @@
             i = 0
             count = 0
             i += 5 * WORD # header
+            assert s[i] == '\x04'
+            i += 1 # marker
+            assert s[i] == '\x04'
+            i += 1 # length
+            i += len('pypy')
             while i < len(s):
                 assert s[i] == '\x02'
                 i += 1
@@ -53,3 +58,11 @@
         assert "py:foo:" in s
         assert "py:foo2:" in s
         assert no_of_codes2 >= no_of_codes + 2 # some extra codes from tests
+
+    def test_enable_ovf(self):
+        import _vmprof
+        raises(ValueError, _vmprof.enable, 999, 0)
+        raises(ValueError, _vmprof.enable, 999, -2.5)
+        raises(ValueError, _vmprof.enable, 999, 1e300)
+        raises(ValueError, _vmprof.enable, 999, 1e300 * 1e300)
+        raises(ValueError, _vmprof.enable, 999, (1e300*1e300) / (1e300*1e300))
diff --git a/pypy/module/_vmprof/test/test_direct.py b/pypy/module/_vmprof/test/test_direct.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/test/test_direct.py
@@ -0,0 +1,66 @@
+
+import cffi, py
+
+srcdir = py.path.local(__file__).join("..", "..", "src")
+
+ffi = cffi.FFI()
+ffi.cdef("""
+long vmprof_write_header_for_jit_addr(void **, long, void*, int);
+void *pypy_find_codemap_at_addr(long addr, long *start_addr);
+long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
+                                long *current_pos_addr);
+long buffer[];
+""")
+
+lib = ffi.verify("""
+volatile int pypy_codemap_currently_invalid = 0;
+
+long buffer[] = {0, 0, 0, 0, 0};
+
+
+
+void *pypy_find_codemap_at_addr(long addr, long *start_addr)
+{
+    return (void*)buffer;
+}
+
+long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
+                                long *current_pos_addr)
+{
+    long c = *current_pos_addr;
+    if (c >= 5)
+        return 0;
+    *current_pos_addr = c + 1;
+    return *((long*)codemap_raw + c);
+}
+
+
+""" + open(str(srcdir.join("get_custom_offset.c"))).read())
+
+class TestDirect(object):
+    def test_infrastructure(self):
+        cont = ffi.new("long[1]", [0])
+        buf = lib.pypy_find_codemap_at_addr(0, cont)
+        assert buf
+        cont[0] = 0
+        next_addr = lib.pypy_yield_codemap_at_addr(buf, 0, cont)
+        assert cont[0] == 1
+        assert not next_addr
+        lib.buffer[0] = 13
+        cont[0] = 0
+        next_addr = lib.pypy_yield_codemap_at_addr(buf, 0, cont)
+        assert int(ffi.cast("long", next_addr)) == 13
+
+    def test_write_header_for_jit_addr(self):
+        lib.buffer[0] = 4
+        lib.buffer[1] = 8
+        lib.buffer[2] = 12
+        lib.buffer[3] = 16
+        lib.buffer[4] = 0
+        buf = ffi.new("long[5]", [0] * 5)
+        result = ffi.cast("void**", buf)
+        res = lib.vmprof_write_header_for_jit_addr(result, 0, ffi.NULL, 100)
+        assert res == 3
+        assert buf[0] == 16
+        assert buf[1] == 12
+        assert buf[2] == 8
diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py
--- a/pypy/module/gc/referents.py
+++ b/pypy/module/gc/referents.py
@@ -44,30 +44,6 @@
     return OperationError(space.w_NotImplementedError,
                           space.wrap("operation not implemented by this GC"))
 
-# ____________________________________________________________
-
-def clear_gcflag_extra(fromlist):
-    pending = fromlist[:]
-    while pending:
-        gcref = pending.pop()
-        if rgc.get_gcflag_extra(gcref):
-            rgc.toggle_gcflag_extra(gcref)
-            pending.extend(rgc.get_rpy_referents(gcref))
-
-def do_get_objects():
-    roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
-    pending = roots[:]
-    result_w = []
-    while pending:
-        gcref = pending.pop()
-        if not rgc.get_gcflag_extra(gcref):
-            rgc.toggle_gcflag_extra(gcref)
-            w_obj = try_cast_gcref_to_w_root(gcref)
-            if w_obj is not None:
-                result_w.append(w_obj)
-            pending.extend(rgc.get_rpy_referents(gcref))
-    clear_gcflag_extra(roots)
-    return result_w
 
 # ____________________________________________________________
 
@@ -116,8 +92,8 @@
                 break
     # done.  Clear flags carefully
     rgc.toggle_gcflag_extra(gcarg)
-    clear_gcflag_extra(roots)
-    clear_gcflag_extra([gcarg])
+    rgc.clear_gcflag_extra(roots)
+    rgc.clear_gcflag_extra([gcarg])
     return result_w
 
 # ____________________________________________________________
@@ -189,8 +165,7 @@
     """Return a list of all app-level objects."""
     if not rgc.has_gcflag_extra():
         raise missing_operation(space)
-    result_w = do_get_objects()
-    rgc.assert_no_more_gcflags()
+    result_w = rgc.do_get_objects(try_cast_gcref_to_w_root)
     return space.newlist(result_w)
 
 def get_referents(space, args_w):
diff --git a/pypy/module/pypyjit/interp_resop.py b/pypy/module/pypyjit/interp_resop.py
--- a/pypy/module/pypyjit/interp_resop.py
+++ b/pypy/module/pypyjit/interp_resop.py
@@ -105,7 +105,7 @@
             ofs = ops_offset.get(op, 0)
         if op.opnum == rop.DEBUG_MERGE_POINT:
             jd_sd = jitdrivers_sd[op.getarg(0).getint()]
-            greenkey = op.getarglist()[3:]
+            greenkey = op.getarglist()[4:]
             repr = jd_sd.warmstate.get_location_str(greenkey)
             w_greenkey = wrap_greenkey(space, jd_sd.jitdriver, greenkey, repr)
             l_w.append(DebugMergePoint(space, jit_hooks._cast_to_gcref(op),
diff --git a/pypy/module/pypyjit/test/test_jit_hook.py b/pypy/module/pypyjit/test/test_jit_hook.py
--- a/pypy/module/pypyjit/test/test_jit_hook.py
+++ b/pypy/module/pypyjit/test/test_jit_hook.py
@@ -55,7 +55,7 @@
         oplist = parse("""
         [i1, i2, p2]
         i3 = int_add(i1, i2)
-        debug_merge_point(0, 0, 0, 0, 0, ConstPtr(ptr0))
+        debug_merge_point(0, 0, 0, 0, 0, 0, ConstPtr(ptr0))
         guard_nonnull(p2) []
         guard_true(i3) []
         """, namespace={'ptr0': code_gcref}).operations
diff --git a/rpython/bin/rpython-vmprof b/rpython/bin/rpython-vmprof
new file mode 100755
--- /dev/null
+++ b/rpython/bin/rpython-vmprof
@@ -0,0 +1,28 @@
+#!/usr/bin/env pypy
+
+"""RPython translation usage:
+
+rpython <translation options> target <targetoptions>
+
+run with --help for more information
+"""
+
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(
+                       os.path.dirname(os.path.realpath(__file__)))))
+from rpython.translator.goal.translate import main
+
+# no implicit targets
+if len(sys.argv) == 1:
+    print __doc__
+    sys.exit(1)
+
+import _vmprof, subprocess
+x = subprocess.Popen('gzip > vmprof.log.gz', shell=True, stdin=subprocess.PIPE)
+_vmprof.enable(x.stdin.fileno(), 0.001)
+try:
+    main()
+finally:
+    _vmprof.disable()
+    x.stdin.close()
+    x.wait()
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -102,7 +102,7 @@
         self.store_reg(mc, r.r0, r.fp, ofs)
         mc.MOV_rr(r.r0.value, r.fp.value)
         self.gen_func_epilog(mc)
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        rawstart = mc.materialize(self.cpu, [])
         self.propagate_exception_path = rawstart
 
     def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None,
@@ -198,7 +198,7 @@
         mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD)
         mc.B(self.propagate_exception_path)
         #
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        rawstart = mc.materialize(self.cpu, [])
         self.stack_check_slowpath = rawstart
 
     def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
@@ -255,7 +255,7 @@
         #
         mc.POP([r.ip.value, r.pc.value])
         #
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        rawstart = mc.materialize(self.cpu, [])
         if for_frame:
             self.wb_slowpath[4] = rawstart
         else:
@@ -276,7 +276,7 @@
                                       callee_only)
         # return
         mc.POP([r.ip.value, r.pc.value])
-        return mc.materialize(self.cpu.asmmemmgr, [])
+        return mc.materialize(self.cpu, [])
 
     def _build_malloc_slowpath(self, kind):
         """ While arriving on slowpath, we have a gcpattern on stack 0.
@@ -352,7 +352,7 @@
         mc.POP([r.ip.value, r.pc.value])
 
         #
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        rawstart = mc.materialize(self.cpu, [])
         return rawstart
 
     def _reload_frame_if_necessary(self, mc):
@@ -473,7 +473,7 @@
         mc.MOV_rr(r.r0.value, r.fp.value)
         #
         self.gen_func_epilog(mc)
-        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        rawstart = mc.materialize(self.cpu, [])
         self.failure_recovery_code[exc + 2 * withfloats] = rawstart
 
     def generate_quick_failure(self, guardtok):
@@ -851,7 +851,7 @@
         # restore registers
         self._pop_all_regs_from_jitframe(mc, [], self.cpu.supports_floats)
         mc.POP([r.ip.value, r.pc.value])  # return
-        self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
+        self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
 
     def _load_shadowstack_top(self, mc, reg, gcrootmap):
         rst = gcrootmap.get_root_stack_top_addr()
@@ -881,7 +881,7 @@
         self.datablockwrapper = None
         allblocks = self.get_asmmemmgr_blocks(looptoken)
         size = self.mc.get_relative_pos() 
-        res = self.mc.materialize(self.cpu.asmmemmgr, allblocks,
+        res = self.mc.materialize(self.cpu, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
         self.cpu.asmmemmgr.register_codemap(
             self.codemap.get_final_bytecode(res, size))
diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py
--- a/rpython/jit/backend/arm/runner.py
+++ b/rpython/jit/backend/arm/runner.py
@@ -50,6 +50,7 @@
     def setup_once(self):
         self.cpuinfo.arch_version = detect_arch_version()
         self.cpuinfo.hf_abi = detect_hardfloat()
+        self.codemap.setup()
         self.assembler.setup_once()
 
     def finish_once(self):
diff --git a/rpython/jit/backend/arm/test/support.py b/rpython/jit/backend/arm/test/support.py
--- a/rpython/jit/backend/arm/test/support.py
+++ b/rpython/jit/backend/arm/test/support.py
@@ -24,7 +24,7 @@
 
 def run_asm(asm):
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
-    addr = asm.mc.materialize(asm.cpu.asmmemmgr, [], None)
+    addr = asm.mc.materialize(asm.cpu, [], None)
     assert addr % 8 == 0
     func = rffi.cast(lltype.Ptr(BOOTSTRAP_TP), addr)
     asm.mc._dump_trace(addr, 'test.asm')
diff --git a/rpython/jit/backend/arm/test/test_calling_convention.py b/rpython/jit/backend/arm/test/test_calling_convention.py
--- a/rpython/jit/backend/arm/test/test_calling_convention.py
+++ b/rpython/jit/backend/arm/test/test_calling_convention.py
@@ -29,7 +29,7 @@
         mc = InstrBuilder()
         mc.MOV_rr(r.r0.value, r.sp.value)
         mc.MOV_rr(r.pc.value, r.lr.value)
-        return mc.materialize(self.cpu.asmmemmgr, [])
+        return mc.materialize(self.cpu, [])
 
     def get_alignment_requirements(self):
         return 8
diff --git a/rpython/jit/backend/llsupport/asmmemmgr.py b/rpython/jit/backend/llsupport/asmmemmgr.py
--- a/rpython/jit/backend/llsupport/asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/asmmemmgr.py
@@ -5,9 +5,6 @@
 from rpython.rlib.debug import debug_start, debug_print, debug_stop
 from rpython.rlib.debug import have_debug_prints
 from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.rbisect import bisect, bisect_tuple
-
-_memmngr = None # global reference so we can use @entrypoint :/
 
 
 class AsmMemoryManager(object):
@@ -27,12 +24,6 @@
         self.free_blocks = {}      # map {start: stop}
         self.free_blocks_end = {}  # map {stop: start}
         self.blocks_by_size = [[] for i in range(self.num_indices)]
-        # two lists of jit addresses (sorted) and the corresponding stack
-        # depths
-        self.jit_addr_map = []
-        self.jit_frame_depth_map = []
-        self.jit_codemap = []
-        # see codemap.py
 
     def malloc(self, minsize, maxsize):
         """Allocate executable memory, between minsize and maxsize bytes,
@@ -54,13 +45,6 @@
         if r_uint is not None:
             self.total_mallocs -= r_uint(stop - start)
         self._add_free_block(start, stop)
-        # fix up jit_addr_map
-        jit_adr_start = bisect(self.jit_addr_map, start)
-        jit_adr_stop = bisect(self.jit_addr_map, stop)
-        self.jit_addr_map = (self.jit_addr_map[:jit_adr_start] +
-                             self.jit_addr_map[jit_adr_stop:])
-        self.jit_frame_depth_map = (self.jit_frame_depth_map[:jit_adr_start] +
-                                    self.jit_frame_depth_map[jit_adr_stop:])
 
     def open_malloc(self, minsize):
         """Allocate at least minsize bytes.  Returns (start, stop)."""
@@ -167,35 +151,6 @@
         del self.free_blocks_end[stop]
         return (start, stop)
 
-    def register_frame_depth_map(self, rawstart, frame_positions,
-                                 frame_assignments):
-        if not frame_positions:
-            return
-        if not self.jit_addr_map or rawstart > self.jit_addr_map[-1]:
-            start = len(self.jit_addr_map)
-            self.jit_addr_map += [0] * len(frame_positions)
-            self.jit_frame_depth_map += [0] * len(frame_positions)
-        else:
-            start = bisect(self.jit_addr_map, rawstart)
-            self.jit_addr_map = (self.jit_addr_map[:start] +
-                                 [0] * len(frame_positions) +
-                                 self.jit_addr_map[start:])
-            self.jit_frame_depth_map = (self.jit_frame_depth_map[:start] +
-                                 [0] * len(frame_positions) +
-                                 self.jit_frame_depth_map[start:])
-        for i, pos in enumerate(frame_positions):
-            self.jit_addr_map[i + start] = pos + rawstart
-            self.jit_frame_depth_map[i + start] = frame_assignments[i]
-
-    def register_codemap(self, codemap):
-        start = codemap[0]
-        pos = bisect_tuple(self.jit_codemap, start)
-        if pos == len(self.jit_codemap): # common case
-            self.jit_codemap.append(codemap)
-        else:
-            self.jit_codemap = (self.jit_codemap[:pos] + [codemap] +
-                                self.jit_codemap[pos:])
-
     def _delete(self):
         "NOT_RPYTHON"
         if self._allocated:
@@ -351,11 +306,11 @@
             #
         debug_stop(logname)
 
-    def materialize(self, asmmemmgr, allblocks, gcrootmap=None):
+    def materialize(self, cpu, allblocks, gcrootmap=None):
         size = self.get_relative_pos()
         align = self.ALIGN_MATERIALIZE
         size += align - 1
-        malloced = asmmemmgr.malloc(size, size)
+        malloced = cpu.asmmemmgr.malloc(size, size)
         allblocks.append(malloced)
         rawstart = malloced[0]
         rawstart = (rawstart + align - 1) & (-align)
@@ -364,8 +319,9 @@
             assert gcrootmap is not None
             for pos, mark in self.gcroot_markers:
                 gcrootmap.register_asm_addr(rawstart + pos, mark)
-        asmmemmgr.register_frame_depth_map(rawstart, self.frame_positions,
-                                           self.frame_assignments)
+        cpu.codemap.register_frame_depth_map(rawstart, rawstart + size,
+                                             self.frame_positions,
+                                             self.frame_assignments)
         self.frame_positions = None
         self.frame_assignments = None
         return rawstart
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -130,7 +130,7 @@
         self.gcmap_for_finish[0] = r_uint(1)
 
     def setup(self, looptoken):
-        self.codemap = CodemapBuilder()
+        self.codemap_builder = CodemapBuilder()
         self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
 
     def set_debug(self, v):
@@ -200,7 +200,9 @@
         return fail_descr, target
 
     def debug_merge_point(self, op):
-        self.codemap.debug_merge_point(op, self.mc.get_relative_pos())
+        self.codemap_builder.debug_merge_point(op.getarg(1).getint(),
+                                               op.getarg(3).getint(),
+                                               self.mc.get_relative_pos())
 
     def call_assembler(self, op, guard_op, argloc, vloc, result_loc, tmploc):
         self._store_force_index(guard_op)
diff --git a/rpython/jit/backend/llsupport/codemap.py b/rpython/jit/backend/llsupport/codemap.py
--- a/rpython/jit/backend/llsupport/codemap.py
+++ b/rpython/jit/backend/llsupport/codemap.py
@@ -9,79 +9,129 @@
 
 """
 
+import os
 from rpython.rlib import rgc
+from rpython.rlib.objectmodel import specialize, we_are_translated
 from rpython.rlib.entrypoint import jit_entrypoint
-from rpython.jit.backend.llsupport import asmmemmgr
-from rpython.rlib.rbisect import bisect, bisect_tuple
+from rpython.rlib.rbisect import bisect_right, bisect_right_addr
+from rpython.rlib.rbisect import bisect_left, bisect_left_addr
 from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.translator.tool.cbuild import ExternalCompilationInfo
+from rpython.translator import cdir
 
- at jit_entrypoint([lltype.Signed], lltype.Signed,
-                c_name='pypy_jit_stack_depth_at_loc')
- at rgc.no_collect
-def stack_depth_at_loc(loc):
-    _memmngr = asmmemmgr._memmngr
 
-    pos = bisect(_memmngr.jit_addr_map, loc)
-    if pos == 0 or pos == len(_memmngr.jit_addr_map):
-        return -1
-    return _memmngr.jit_frame_depth_map[pos-1]
+INT_LIST_PTR = rffi.CArrayPtr(lltype.Signed)
 
- at jit_entrypoint([], lltype.Signed, c_name='pypy_jit_start_addr')
-def jit_start_addr():
-    _memmngr = asmmemmgr._memmngr
 
-    return _memmngr.jit_addr_map[0]
+srcdir = os.path.join(os.path.dirname(__file__), 'src')
 
- at jit_entrypoint([], lltype.Signed, c_name='pypy_jit_end_addr')
-def jit_end_addr():
-    _memmngr = asmmemmgr._memmngr
+eci = ExternalCompilationInfo(post_include_bits=["""
+#include <stdint.h>
+RPY_EXTERN long pypy_jit_codemap_add(uintptr_t addr,
+                                     unsigned int machine_code_size,
+                                     long *bytecode_info,
+                                     unsigned int bytecode_info_size);
+RPY_EXTERN long *pypy_jit_codemap_del(uintptr_t addr);
+RPY_EXTERN uintptr_t pypy_jit_codemap_firstkey(void);
+RPY_EXTERN void *pypy_find_codemap_at_addr(long addr, long* start_addr);
+RPY_EXTERN long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
+                                           long *current_pos_addr);
 
-    return _memmngr.jit_addr_map[-1]
+RPY_EXTERN long pypy_jit_depthmap_add(uintptr_t addr, unsigned int size,
+                                      unsigned int stackdepth);
+RPY_EXTERN void pypy_jit_depthmap_clear(uintptr_t addr, unsigned int size);
 
- at jit_entrypoint([lltype.Signed], lltype.Signed,
-                c_name='pypy_find_codemap_at_addr')
-def find_codemap_at_addr(addr):
-    _memmngr = asmmemmgr._memmngr
+"""], separate_module_sources=[
+    open(os.path.join(srcdir, 'skiplist.c'), 'r').read() +
+    open(os.path.join(srcdir, 'codemap.c'), 'r').read()
+], include_dirs=[cdir])
 
-    res = bisect_tuple(_memmngr.jit_codemap, addr) - 1
-    if res == len(_memmngr.jit_codemap):
-        return -1
-    return res
+def llexternal(name, args, res):
+    return rffi.llexternal(name, args, res, compilation_info=eci,
+                           releasegil=False)
 
- at jit_entrypoint([lltype.Signed, lltype.Signed,
-                 rffi.CArrayPtr(lltype.Signed)], lltype.Signed,
-                 c_name='pypy_yield_codemap_at_addr')
-def yield_bytecode_at_addr(codemap_no, addr, current_pos_addr):
-    """ will return consecutive unique_ids from codemap, starting from position
-    `pos` until addr
+pypy_jit_codemap_add = llexternal('pypy_jit_codemap_add',
+                                  [lltype.Signed, lltype.Signed,
+                                   INT_LIST_PTR, lltype.Signed],
+                                  lltype.Signed)
+pypy_jit_codemap_del = llexternal('pypy_jit_codemap_del',
+                                  [lltype.Signed], INT_LIST_PTR)
+pypy_jit_codemap_firstkey = llexternal('pypy_jit_codemap_firstkey',
+                                       [], lltype.Signed)
+
+pypy_jit_depthmap_add = llexternal('pypy_jit_depthmap_add',
+                                   [lltype.Signed, lltype.Signed,
+                                    lltype.Signed], lltype.Signed)
+pypy_jit_depthmap_clear = llexternal('pypy_jit_depthmap_clear',
+                                     [lltype.Signed, lltype.Signed],
+                                     lltype.Void)
+
+stack_depth_at_loc = llexternal('pypy_jit_stack_depth_at_loc',
+                                [lltype.Signed], lltype.Signed)
+find_codemap_at_addr = llexternal('pypy_find_codemap_at_addr',
+                                  [lltype.Signed, rffi.CArrayPtr(lltype.Signed)], lltype.Signed)
+yield_bytecode_at_addr = llexternal('pypy_yield_codemap_at_addr',
+                                    [lltype.Signed, lltype.Signed,
+                                     rffi.CArrayPtr(lltype.Signed)],
+                                     lltype.Signed)
+
+
+class CodemapStorage(object):
+    """ An immortal wrapper around underlaying jit codemap data
     """
-    _memmngr = asmmemmgr._memmngr
+    def setup(self):
+        if not we_are_translated():
+             # in case someone failed to call free(), in tests only anyway
+             self.free()
 
-    codemap = _memmngr.jit_codemap[codemap_no]
-    current_pos = current_pos_addr[0]
-    start_addr = codemap[0]
-    rel_addr = addr - start_addr
-    while True:
-        if current_pos >= len(codemap[2]):
-            return 0
-        next_start = codemap[2][current_pos + 1]
-        if next_start > rel_addr:
-            return 0
-        next_stop = codemap[2][current_pos + 2]
-        if next_stop > rel_addr:
-            current_pos_addr[0] = current_pos + 4
-            return codemap[2][current_pos]
-        # we need to skip potentially more than one
-        current_pos = codemap[2][current_pos + 3]
+    def free(self):
+        while True:
+            key = pypy_jit_codemap_firstkey()
+            if not key:
+                break
+            items = pypy_jit_codemap_del(key)
+            lltype.free(items, flavor='raw', track_allocation=False)
+
+    def free_asm_block(self, start, stop):
+        items = pypy_jit_codemap_del(start)
+        if items:
+            lltype.free(items, flavor='raw', track_allocation=False)
+        pypy_jit_depthmap_clear(start, stop - start)
+
+    def register_frame_depth_map(self, rawstart, rawstop, frame_positions,
+                                 frame_assignments):
+        if not frame_positions:
+            return
+        assert len(frame_positions) == len(frame_assignments)
+        for i in range(len(frame_positions)-1, -1, -1):
+            pos = rawstart + frame_positions[i]
+            length = rawstop - pos
+            if length > 0:
+                #print "ADD:", pos, length, frame_assignments[i]
+                pypy_jit_depthmap_add(pos, length, frame_assignments[i])
+            rawstop = pos
+
+    def register_codemap(self, (start, size, l)):
+        items = lltype.malloc(INT_LIST_PTR.TO, len(l), flavor='raw',
+                              track_allocation=False)
+        for i in range(len(l)):
+            items[i] = l[i]
+        if pypy_jit_codemap_add(start, size, items, len(l)) < 0:
+            lltype.free(items, flavor='raw', track_allocation=False)
+
+    def finish_once(self):
+        self.free()
 
 def unpack_traceback(addr):
-    codemap_pos = find_codemap_at_addr(addr)
-    assert codemap_pos >= 0
+    codemap_raw = find_codemap_at_addr(addr,
+                                lltype.nullptr(rffi.CArray(lltype.Signed)))
+    if not codemap_raw:
+        return [] # no codemap for that position
     storage = lltype.malloc(rffi.CArray(lltype.Signed), 1, flavor='raw')
     storage[0] = 0
     res = []
     while True:
-        item = yield_bytecode_at_addr(codemap_pos, addr, storage)
+        item = yield_bytecode_at_addr(codemap_raw, addr, storage)
         if item == 0:
             break
         res.append(item)
@@ -95,14 +145,18 @@
         self.patch_position = []
         self.last_call_depth = -1
 
-    def debug_merge_point(self, op, pos):
-        call_depth = op.getarg(1).getint()
+    def debug_merge_point(self, call_depth, unique_id, pos):
         if call_depth != self.last_call_depth:
-            unique_id = op.getarg(3).getint()
             if unique_id == 0: # uninteresting case
                 return
             assert unique_id & 1 == 0
             if call_depth > self.last_call_depth:
+                assert call_depth == self.last_call_depth + 1
+                # ^^^ It should never be the case that we see
+                # debug_merge_points that suddenly go more than *one*
+                # call deeper than the previous one (unless we're at
+                # the start of a bridge, handled by
+                # inherit_code_from_position()).
                 self.l.append(unique_id)
                 self.l.append(pos) # <- this is a relative pos
                 self.patch_position.append(len(self.l))
@@ -139,4 +193,3 @@
             item = self.l[i * 4 + 3] # end in l
             assert item > 0
         return (addr, size, self.l) # XXX compact self.l
-
diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -16,7 +16,7 @@
     FieldDescr, ArrayDescr, CallDescr, InteriorFieldDescr,
     FLAG_POINTER, FLAG_FLOAT)
 from rpython.jit.backend.llsupport.memcpy import memset_fn
-from rpython.jit.backend.llsupport import asmmemmgr
+from rpython.jit.backend.llsupport import asmmemmgr, codemap
 from rpython.rlib.unroll import unrolling_iterable
 
 
@@ -49,7 +49,7 @@
         else:
             self._setup_exception_handling_untranslated()
         self.asmmemmgr = asmmemmgr.AsmMemoryManager()
-        asmmemmgr._memmngr = self.asmmemmgr
+        self.codemap = codemap.CodemapStorage()
         self._setup_frame_realloc(translate_support_code)
         ad = self.gc_ll_descr.getframedescrs(self).arraydescr
         self.signedarraydescr = ad
@@ -79,6 +79,9 @@
     def setup(self):
         pass
 
+    def finish_once(self):
+        self.codemap.finish_once()
+
     def _setup_frame_realloc(self, translate_support_code):
         FUNC_TP = lltype.Ptr(lltype.FuncType([llmemory.GCREF, lltype.Signed],
                                              llmemory.GCREF))
@@ -213,6 +216,7 @@
             for rawstart, rawstop in blocks:
                 self.gc_ll_descr.freeing_block(rawstart, rawstop)
                 self.asmmemmgr.free(rawstart, rawstop)
+                self.codemap.free_asm_block(rawstart, rawstop)
 
     def force(self, addr_of_force_token):
         frame = rffi.cast(jitframe.JITFRAMEPTR, addr_of_force_token)
diff --git a/rpython/jit/backend/llsupport/src/codemap.c b/rpython/jit/backend/llsupport/src/codemap.c
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/src/codemap.c
@@ -0,0 +1,204 @@
+#include "src/precommondefs.h"
+
+#ifndef HAS_SKIPLIST
+# error "skiplist.c needs to be included before"
+#endif
+
+volatile int pypy_codemap_currently_invalid = 0;
+
+void pypy_codemap_invalid_set(int value)
+{
+    if (value)
+        __sync_lock_test_and_set(&pypy_codemap_currently_invalid, 1);
+    else
+        __sync_lock_release(&pypy_codemap_currently_invalid);
+}
+
+
+/************************************************************/
+/***  codemap storage                                     ***/
+/************************************************************/
+
+typedef struct {
+    unsigned int machine_code_size;
+    unsigned int bytecode_info_size;
+    long *bytecode_info;
+} codemap_data_t;
+
+static skipnode_t jit_codemap_head;
+
+/*** interface used from codemap.py ***/
+
+RPY_EXTERN
+long pypy_jit_codemap_add(uintptr_t addr, unsigned int machine_code_size,
+                          long *bytecode_info, unsigned int bytecode_info_size)
+{
+    skipnode_t *new = skiplist_malloc(sizeof(codemap_data_t));
+    codemap_data_t *data;
+    if (new == NULL)
+        return -1;   /* too bad */
+
+    new->key = addr;
+    data = (codemap_data_t *)new->data;
+    data->machine_code_size = machine_code_size;
+    data->bytecode_info = bytecode_info;
+    data->bytecode_info_size = bytecode_info_size;
+
+    pypy_codemap_invalid_set(1);
+    skiplist_insert(&jit_codemap_head, new);
+    pypy_codemap_invalid_set(0);
+    return 0;
+}
+
+RPY_EXTERN
+long *pypy_jit_codemap_del(uintptr_t addr)
+{
+    long *result;
+    skipnode_t *node;
+
+    pypy_codemap_invalid_set(1);
+    node = skiplist_remove(&jit_codemap_head, addr);
+    pypy_codemap_invalid_set(0);
+
+    if (node == NULL)
+        return NULL;
+    result = ((codemap_data_t *)node->data)->bytecode_info;
+    free(node);
+    return result;
+}
+
+RPY_EXTERN
+uintptr_t pypy_jit_codemap_firstkey(void)
+{
+    return skiplist_firstkey(&jit_codemap_head);
+}
+
+/*** interface used from pypy/module/_vmprof ***/
+
+RPY_EXTERN
+void *pypy_find_codemap_at_addr(long addr, long* start_addr)
+{
+    skipnode_t *codemap = skiplist_search(&jit_codemap_head, addr);
+    codemap_data_t *data;
+    uintptr_t rel_addr;
+
+    if (codemap == &jit_codemap_head) {
+        if (start_addr)
+            *start_addr = 0;
+        return NULL;
+    }
+
+    rel_addr = (uintptr_t)addr - codemap->key;
+    data = (codemap_data_t *)codemap->data;
+    if (rel_addr >= data->machine_code_size) {
+        if (start_addr)
+            *start_addr = 0;
+        return NULL;
+    }
+
+    if (start_addr)
+        *start_addr = (long)codemap->key;
+    return (void *)codemap;
+}
+
+RPY_EXTERN
+long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
+                                long *current_pos_addr)
+{
+    // will return consecutive unique_ids from codemap, starting from position
+    // `pos` until addr
+    skipnode_t *codemap = (skipnode_t *)codemap_raw;
+    long current_pos = *current_pos_addr;
+    long rel_addr = addr - codemap->key;
+    long next_start, next_stop;
+    codemap_data_t *data = (codemap_data_t *)codemap->data;
+
+    while (1) {
+        if (current_pos >= data->bytecode_info_size)
+            return 0;
+        next_start = data->bytecode_info[current_pos + 1];
+        if (next_start > rel_addr)
+            return 0;
+        next_stop = data->bytecode_info[current_pos + 2];
+        if (next_stop > rel_addr) {
+            *current_pos_addr = current_pos + 4;
+            return data->bytecode_info[current_pos];
+        }
+        // we need to skip potentially more than one
+        current_pos = data->bytecode_info[current_pos + 3];
+    }
+}
+
+/************************************************************/
+/***  depthmap storage                                    ***/
+/************************************************************/
+
+typedef struct {
+    unsigned int block_size;
+    unsigned int stack_depth;
+} depthmap_data_t;
+
+static skipnode_t jit_depthmap_head;
+
+/*** interface used from codemap.py ***/
+
+RPY_EXTERN
+long pypy_jit_depthmap_add(uintptr_t addr, unsigned int size,
+                           unsigned int stackdepth)
+{
+    skipnode_t *new = skiplist_malloc(sizeof(depthmap_data_t));
+    depthmap_data_t *data;
+    if (new == NULL)
+        return -1;   /* too bad */
+
+    new->key = addr;
+    data = (depthmap_data_t *)new->data;
+    data->block_size = size;
+    data->stack_depth = stackdepth;
+
+    pypy_codemap_invalid_set(1);
+    skiplist_insert(&jit_depthmap_head, new);
+    pypy_codemap_invalid_set(0);
+    return 0;
+}
+
+RPY_EXTERN
+void pypy_jit_depthmap_clear(uintptr_t addr, unsigned int size)
+{
+    uintptr_t search_key = addr + size - 1;
+    if (size == 0)
+        return;
+
+    pypy_codemap_invalid_set(1);
+    while (1) {
+        /* search for all nodes belonging to the range, and remove them */
+        skipnode_t *node = skiplist_search(&jit_depthmap_head, search_key);
+        if (node->key < addr)
+            break;   /* exhausted */
+        skiplist_remove(&jit_depthmap_head, node->key);
+        free(node);
+    }
+    pypy_codemap_invalid_set(0);
+}
+
+/*** interface used from pypy/module/_vmprof ***/
+
+RPY_EXTERN
+long pypy_jit_stack_depth_at_loc(long loc)
+{
+    skipnode_t *depthmap = skiplist_search(&jit_depthmap_head, (uintptr_t)loc);
+    depthmap_data_t *data;
+    uintptr_t rel_addr;
+
+    if (depthmap == &jit_depthmap_head)
+        return -1;
+
+    rel_addr = (uintptr_t)loc - depthmap->key;
+    data = (codemap_data_t *)depthmap->data;
+    if (rel_addr >= data->block_size)
+        return -1;
+
+    return data->stack_depth;
+}
+
+/************************************************************/
diff --git a/rpython/jit/backend/llsupport/src/skiplist.c b/rpython/jit/backend/llsupport/src/skiplist.c
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/src/skiplist.c
@@ -0,0 +1,103 @@
+#include <stdlib.h>
+#include <stdint.h>
+
+#define HAS_SKIPLIST
+#define SKIPLIST_HEIGHT   8
+
+typedef struct skipnode_s {
+    uintptr_t key;
+    char *data;
+    struct skipnode_s *next[SKIPLIST_HEIGHT];   /* may be smaller */
+} skipnode_t;
+
+static skipnode_t *skiplist_malloc(uintptr_t datasize)
+{
+    char *result;
+    uintptr_t basesize;
+    uintptr_t length = 1;
+    while (length < SKIPLIST_HEIGHT && (rand() & 3) == 0)
+        length++;
+    basesize = sizeof(skipnode_t) -
+               (SKIPLIST_HEIGHT - length) * sizeof(skipnode_t *);
+    result = malloc(basesize + datasize);
+    if (result != NULL) {
+        ((skipnode_t *)result)->data = result + basesize;
+    }
+    return (skipnode_t *)result;
+}
+
+static skipnode_t *skiplist_search(skipnode_t *head, uintptr_t searchkey)
+{
+    /* Returns the skipnode with key closest (but <=) searchkey.
+       Note that if there is no item with key <= searchkey in the list,
+       this will return the head node. */
+    uintptr_t level = SKIPLIST_HEIGHT - 1;
+    while (1) {
+        skipnode_t *next = head->next[level];
+        if (next != NULL && next->key <= searchkey) {
+            head = next;
+        }
+        else {
+            if (level == 0)
+                break;
+            level -= 1;
+        }
+    }
+    return head;
+}
+
+static void skiplist_insert(skipnode_t *head, skipnode_t *new)
+{
+    uintptr_t size0 = sizeof(skipnode_t) -
+                      SKIPLIST_HEIGHT * sizeof(skipnode_t *);
+    uintptr_t height_of_new = (new->data - ((char *)new + size0)) /
+                              sizeof(skipnode_t *);
+
+    uintptr_t level = SKIPLIST_HEIGHT - 1;
+    uintptr_t searchkey = new->key;
+    while (1) {
+        skipnode_t *next = head->next[level];
+        if (next != NULL && next->key <= searchkey) {
+            head = next;
+        }
+        else {
+            if (level < height_of_new) {
+                new->next[level] = next;
+                head->next[level] = new;
+                if (level == 0)
+                    break;
+            }
+            level -= 1;
+        }
+    }
+}
+
+static skipnode_t *skiplist_remove(skipnode_t *head, uintptr_t exact_key)
+{
+    uintptr_t level = SKIPLIST_HEIGHT - 1;
+    while (1) {
+        skipnode_t *next = head->next[level];
+        if (next != NULL && next->key <= exact_key) {
+            if (next->key == exact_key) {
+                head->next[level] = next->next[level];
+                if (level == 0)
+                    return next;    /* successfully removed */
+                level -= 1;
+            }
+            else
+                head = next;
+        }
+        else {
+            if (level == 0)
+                return NULL;    /* 'exact_key' not found! */
+            level -= 1;
+        }
+    }
+}
+
+static uintptr_t skiplist_firstkey(skipnode_t *head)
+{
+    if (head->next[0] == NULL)
+        return 0;
+    return head->next[0]->key;
+}
diff --git a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
--- a/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/test/test_asmmemmgr.py
@@ -2,7 +2,7 @@
 from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
-from rpython.jit.backend.llsupport import asmmemmgr
+from rpython.jit.backend.llsupport.codemap import CodemapStorage
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib import debug
 
@@ -96,20 +96,21 @@
 class TestAsmMemoryManager:
 
     def setup_method(self, _):
-        self.memmgr = AsmMemoryManager(min_fragment=8,
+        self.asmmemmgr = AsmMemoryManager(min_fragment=8,
                                        num_indices=10,
                                        large_alloc_size=8192)
+        self.codemap = CodemapStorage()
 
     def teardown_method(self, _):
-        self.memmgr._delete()
+        self.asmmemmgr._delete()
 
     def test_malloc_simple(self):
         for i in range(100):
-            while self.memmgr.total_memory_allocated < 16384:
+            while self.asmmemmgr.total_memory_allocated < 16384:
                 reqsize = random.randrange(1, 200)
-                (start, stop) = self.memmgr.malloc(reqsize, reqsize)
+                (start, stop) = self.asmmemmgr.malloc(reqsize, reqsize)
                 assert reqsize <= stop - start < reqsize + 8
-                assert self.memmgr.total_memory_allocated in [8192, 16384]
+                assert self.asmmemmgr.total_memory_allocated in [8192, 16384]
             self.teardown_method(None)
             self.setup_method(None)
 
@@ -123,7 +124,7 @@
             if got and (random.random() < 0.4 or len(got) == 1000):
                 # free
                 start, stop = got.pop(random.randrange(0, len(got)))
-                self.memmgr.free(start, stop)
+                self.asmmemmgr.free(start, stop)
                 real_use -= (stop - start)
                 assert real_use >= 0
             #
@@ -134,18 +135,18 @@
                     reqmaxsize = reqsize
                 else:
                     reqmaxsize = reqsize + random.randrange(0, 200)
-                (start, stop) = self.memmgr.malloc(reqsize, reqmaxsize)
+                (start, stop) = self.asmmemmgr.malloc(reqsize, reqmaxsize)
                 assert reqsize <= stop - start < reqmaxsize + 8
                 for otherstart, otherstop in got:           # no overlap
                     assert otherstop <= start or stop <= otherstart
                 got.append((start, stop))
                 real_use += (stop - start)
-                if self.memmgr.total_memory_allocated == prev_total:
+                if self.asmmemmgr.total_memory_allocated == prev_total:
                     iterations_without_allocating_more += 1
                     if iterations_without_allocating_more == 40000:
                         break    # ok
                 else:
-                    new_total = self.memmgr.total_memory_allocated
+                    new_total = self.asmmemmgr.total_memory_allocated
                     iterations_without_allocating_more = 0
                     print real_use, new_total
                     # We seem to never see a printed value greater
@@ -172,7 +173,7 @@
         #
         gcrootmap = FakeGcRootMap()
         allblocks = []
-        rawstart = mc.materialize(self.memmgr, allblocks, gcrootmap)
+        rawstart = mc.materialize(self, allblocks, gcrootmap)
         p = rffi.cast(rffi.CArrayPtr(lltype.Char), rawstart)
         assert p[0] == 'X'
         assert p[1] == 'x'
@@ -268,16 +269,3 @@
     md.done()
     assert allblocks == [(1597, 1697), (1797, 1835)]
     assert ops == [('free', 1835, 1897)]
-
-def test_find_jit_frame_depth():
-    mgr = AsmMemoryManager()
-    mgr.register_frame_depth_map(11, [0, 5, 10], [1, 2, 3])
-    mgr.register_frame_depth_map(30, [0, 5, 10], [4, 5, 6])
-    mgr.register_frame_depth_map(0, [0, 5, 10], [7, 8, 9])
-    asmmemmgr._memmngr = mgr
-    assert asmmemmgr.stack_depth_at_loc(13) == 1
-    assert asmmemmgr.stack_depth_at_loc(-3) == -1
-    assert asmmemmgr.stack_depth_at_loc(41) == -1
-    assert asmmemmgr.stack_depth_at_loc(5) == 8
-    assert asmmemmgr.stack_depth_at_loc(17) == 2
-    assert asmmemmgr.stack_depth_at_loc(38) == 5
diff --git a/rpython/jit/backend/llsupport/test/test_codemap.py b/rpython/jit/backend/llsupport/test/test_codemap.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/test/test_codemap.py
@@ -0,0 +1,93 @@
+
+from rpython.rtyper.lltypesystem import rffi, lltype
+from rpython.jit.backend.llsupport.codemap import stack_depth_at_loc
+from rpython.jit.backend.llsupport.codemap import CodemapStorage, \
+     CodemapBuilder, unpack_traceback, find_codemap_at_addr
+
+NULL = lltype.nullptr(rffi.CArray(lltype.Signed))
+     
+def test_register_codemap():
+    codemap = CodemapStorage()
+    codemap.setup()
+    codemap.register_codemap((100, 20, [13, 14, 15]))
+    codemap.register_codemap((300, 30, [16, 17, 18]))
+    codemap.register_codemap((200, 100, [19, 20, 21, 22, 23]))
+    #
+    raw100 = find_codemap_at_addr(100, NULL)
+    assert find_codemap_at_addr(119, NULL) == raw100
+    assert not find_codemap_at_addr(120, NULL)
+    #
+    raw200 = find_codemap_at_addr(200, NULL)
+    assert raw200 != raw100
+    assert find_codemap_at_addr(299, NULL) == raw200
+    #
+    raw300 = find_codemap_at_addr(329, NULL)
+    assert raw300 != raw100 and raw300 != raw200
+    assert find_codemap_at_addr(300, NULL) == raw300
+    #
+    codemap.free()
+
+def test_find_jit_frame_depth():
+    codemap = CodemapStorage()
+    codemap.setup()
+    codemap.register_frame_depth_map(11, 26, [0, 5, 10], [1, 2, 3])
+    codemap.register_frame_depth_map(30, 41, [0, 5, 10], [4, 5, 6])
+    codemap.register_frame_depth_map(0, 11, [0, 5, 10], [7, 8, 9])
+    assert stack_depth_at_loc(13) == 1
+    assert stack_depth_at_loc(-3) == -1
+    assert stack_depth_at_loc(40) == 6
+    assert stack_depth_at_loc(41) == -1
+    assert stack_depth_at_loc(5) == 8
+    assert stack_depth_at_loc(17) == 2
+    assert stack_depth_at_loc(38) == 5
+    assert stack_depth_at_loc(25) == 3
+    assert stack_depth_at_loc(26) == -1
+    assert stack_depth_at_loc(11) == 1
+    assert stack_depth_at_loc(10) == 9
+    codemap.free_asm_block(11, 26)
+    assert stack_depth_at_loc(11) == -1
+    assert stack_depth_at_loc(13) == -1
+    assert stack_depth_at_loc(-3) == -1
+    assert stack_depth_at_loc(40) == 6
+    assert stack_depth_at_loc(41) == -1
+    assert stack_depth_at_loc(5) == 8
+    assert stack_depth_at_loc(38) == 5
+    assert stack_depth_at_loc(10) == 9
+    codemap.free()
+
+def test_codemaps():
+    builder = CodemapBuilder()
+    builder.debug_merge_point(0, 102, 0)
+    builder.debug_merge_point(0, 102, 13)
+    builder.debug_merge_point(1, 104, 15)
+    builder.debug_merge_point(1, 104, 16)
+    builder.debug_merge_point(2, 106, 20)
+    builder.debug_merge_point(2, 106, 25)
+    builder.debug_merge_point(1, 104, 30)
+    builder.debug_merge_point(0, 102, 35)
+    codemap = CodemapStorage()
+    codemap.setup()
+    codemap.register_codemap(builder.get_final_bytecode(100, 40))
+    builder = CodemapBuilder()
+    builder.debug_merge_point(0, 202, 0)
+    builder.debug_merge_point(0, 202, 10)
+    builder.debug_merge_point(1, 204, 20)
+    builder.debug_merge_point(1, 204, 30)
+    builder.debug_merge_point(2, 206, 40)
+    builder.debug_merge_point(2, 206, 50)
+    builder.debug_merge_point(1, 204, 60)
+    builder.debug_merge_point(0, 202, 70)
+    codemap.register_codemap(builder.get_final_bytecode(200, 100))
+    assert unpack_traceback(110) == [102]
+    assert unpack_traceback(117) == [102, 104]
+    assert unpack_traceback(121) == [102, 104, 106]
+    assert unpack_traceback(131) == [102, 104]
+    assert unpack_traceback(137) == [102]
+    assert unpack_traceback(205) == [202]
+    assert unpack_traceback(225) == [202, 204]
+    assert unpack_traceback(245) == [202, 204, 206]
+    assert unpack_traceback(265) == [202, 204]
+    assert unpack_traceback(275) == [202]
+    codemap.free_asm_block(200, 300)
+    assert unpack_traceback(225) == []


More information about the pypy-commit mailing list