[pypy-commit] pypy optresult: merge default

fijal noreply at buildbot.pypy.org
Thu Nov 13 14:23:44 CET 2014


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: optresult
Changeset: r74504:25a2e8817653
Date: 2014-11-13 15:22 +0200
http://bitbucket.org/pypy/pypy/changeset/25a2e8817653/

Log:	merge default

diff --git a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
--- a/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
+++ b/pypy/module/test_lib_pypy/ctypes_tests/_ctypes_test.c
@@ -316,8 +316,8 @@
 {
 }
 
-EXPORT(LONG_LONG) last_tf_arg_s;
-EXPORT(unsigned LONG_LONG) last_tf_arg_u;
+EXPORT(LONG_LONG) last_tf_arg_s = 0;
+EXPORT(unsigned LONG_LONG) last_tf_arg_u = 0;
 
 struct BITS {
 	int A: 1, B:2, C:3, D:4, E: 5, F: 6, G: 7, H: 8, I: 9;
diff --git a/pypy/objspace/std/strbufobject.py b/pypy/objspace/std/strbufobject.py
--- a/pypy/objspace/std/strbufobject.py
+++ b/pypy/objspace/std/strbufobject.py
@@ -5,6 +5,7 @@
 from pypy.objspace.std.bytesobject import (W_AbstractBytesObject,
     W_BytesObject, StringBuffer)
 from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.error import OperationError
 from rpython.rlib.rstring import StringBuilder
 
 
@@ -46,15 +47,18 @@
         return space.wrap(self.length)
 
     def descr_add(self, space, w_other):
+        try:
+            other = W_BytesObject._op_val(space, w_other)
+        except OperationError as e:
+            if e.match(space, space.w_TypeError):
+                return space.w_NotImplemented
+            raise
         if self.builder.getlength() != self.length:
             builder = StringBuilder()
             builder.append(self.force())
         else:
             builder = self.builder
-        if isinstance(w_other, W_StringBufferObject):
-            builder.append(w_other.force())
-        else:
-            builder.append(w_other._value)
+        builder.append(other)
         return W_StringBufferObject(builder)
 
     def descr_str(self, space):
diff --git a/pypy/objspace/std/test/test_strbufobject.py b/pypy/objspace/std/test/test_strbufobject.py
--- a/pypy/objspace/std/test/test_strbufobject.py
+++ b/pypy/objspace/std/test/test_strbufobject.py
@@ -78,3 +78,8 @@
         c = '0'.__add__('1')
         x = c + a
         assert x == '01ab'
+
+    def test_add_non_string(self):
+        a = 'a'
+        a += 'b'
+        raises(TypeError, "a += 5")
diff --git a/rpython/__main__.py b/rpython/__main__.py
new file mode 100644
--- /dev/null
+++ b/rpython/__main__.py
@@ -0,0 +1,16 @@
+"""RPython translation usage:
+
+rpython <translation options> target <targetoptions>
+
+run with --help for more information
+"""
+
+import sys
+
+# no implicit targets
+if len(sys.argv) == 1:
+    print __doc__
+    sys.exit(1)
+
+from rpython.translator.goal.translate import main
+main()
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -55,7 +55,6 @@
 # ____________________________________________________________
 
 class GcLLDescription(GcCache):
-    malloc_zero_filled = True
 
     def __init__(self, gcdescr, translator=None, rtyper=None):
         GcCache.__init__(self, translator is not None, rtyper)
@@ -246,6 +245,7 @@
 
 class GcLLDescr_boehm(GcLLDescription):
     kind                  = 'boehm'
+    malloc_zero_filled    = True
     moving_gc             = False
     round_up              = False
     write_barrier_descr   = None
diff --git a/rpython/jit/backend/llsupport/jitframe.py b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -3,6 +3,7 @@
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.debug import ll_assert
 from rpython.rlib.objectmodel import enforceargs
+from rpython.rlib import rgc
 
 SIZEOFSIGNED = rffi.sizeof(lltype.Signed)
 IS_32BIT = (SIZEOFSIGNED == 4)
@@ -45,6 +46,7 @@
 # detailed explanation how it is on your architecture
 
 def jitframe_allocate(frame_info):
+    rgc.register_custom_trace_hook(JITFRAME, lambda_jitframe_trace)
     frame = lltype.malloc(JITFRAME, frame_info.jfi_frame_depth)
     frame.jf_frame_info = frame_info
     frame.jf_extra_stack_depth = 0
@@ -80,8 +82,6 @@
     ('jf_guard_exc', llmemory.GCREF),
     # in case the frame got reallocated, we have to forward it somewhere
     ('jf_forward', lltype.Ptr(JITFRAME)),
-    # absolutely useless field used to make up for tracing hooks inflexibilities
-    ('jf_gc_trace_state', lltype.Signed),
     # the actual frame
     ('jf_frame', lltype.Array(lltype.Signed)),
     # note that we keep length field, because it's crucial to have the data
@@ -105,75 +105,38 @@
 UNSIGN_SIZE = llmemory.sizeof(lltype.Unsigned)
 STACK_DEPTH_OFS = getofs('jf_extra_stack_depth')
 
-def jitframe_trace(obj_addr, prev):
-    if prev == llmemory.NULL:
-        (obj_addr + getofs('jf_gc_trace_state')).signed[0] = -1
-        return obj_addr + getofs('jf_descr')
-    fld = (obj_addr + getofs('jf_gc_trace_state')).signed[0]
-    if fld < 0:
-        if fld == -1:
-            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = -2
-            return obj_addr + getofs('jf_force_descr')
-        elif fld == -2:
-            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = -3
-            return obj_addr + getofs('jf_savedata')
-        elif fld == -3:
-            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = -4
-            return obj_addr + getofs('jf_guard_exc')
-        elif fld == -4:
-            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = -5
-            return obj_addr + getofs('jf_forward')
-        else:
-            if not (obj_addr + getofs('jf_gcmap')).address[0]:
-                return llmemory.NULL    # done
-            else:
-                fld = 0    # fall-through
-    # bit pattern
-    # decode the pattern
+def jitframe_trace(gc, obj_addr, callback, arg):
+    gc._trace_callback(callback, arg, obj_addr + getofs('jf_descr'))
+    gc._trace_callback(callback, arg, obj_addr + getofs('jf_force_descr'))
+    gc._trace_callback(callback, arg, obj_addr + getofs('jf_savedata'))
+    gc._trace_callback(callback, arg, obj_addr + getofs('jf_guard_exc'))
+    gc._trace_callback(callback, arg, obj_addr + getofs('jf_forward'))
+
     if IS_32BIT:
-        # 32 possible bits
-        state = fld & 0x1f
-        no = fld >> 5
         MAX = 32
     else:
-        # 64 possible bits
-        state = fld & 0x3f
-        no = fld >> 6
         MAX = 64
     gcmap = (obj_addr + getofs('jf_gcmap')).address[0]
+    if not gcmap:
+        return      # done
     gcmap_lgt = (gcmap + GCMAPLENGTHOFS).signed[0]
+    no = 0
     while no < gcmap_lgt:
         cur = (gcmap + GCMAPBASEOFS + UNSIGN_SIZE * no).unsigned[0]
-        while not (cur & (1 << state)):
-            state += 1
-            if state == MAX:
-                no += 1
-                state = 0
-                break      # next iteration of the outermost loop
-        else:
-            # found it
-            index = no * SIZEOFSIGNED * 8 + state
-            # save new state
-            state += 1
-            if state == MAX:
-                no += 1
-                state = 0
-            if IS_32BIT:
-                new_state = state | (no << 5)
-            else:
-                new_state = state | (no << 6)
-            (obj_addr + getofs('jf_gc_trace_state')).signed[0] = new_state
-            # sanity check
-            frame_lgt = (obj_addr + getofs('jf_frame') + LENGTHOFS).signed[0]
-            ll_assert(index < frame_lgt, "bogus frame field get")
-            return (obj_addr + getofs('jf_frame') + BASEITEMOFS + SIGN_SIZE *
-                    (index))
-    return llmemory.NULL
-
-CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                  llmemory.Address)
-jitframe_trace_ptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), jitframe_trace)
-
-lltype.attachRuntimeTypeInfo(JITFRAME, customtraceptr=jitframe_trace_ptr)
+        bitindex = 0
+        while bitindex < MAX:
+            if cur & (1 << bitindex):
+                # the 'bitindex' is set in 'cur'
+                index = no * SIZEOFSIGNED * 8 + bitindex
+                # sanity check
+                frame_lgt = (obj_addr + getofs('jf_frame') + LENGTHOFS) \
+                    .signed[0]
+                ll_assert(index < frame_lgt, "bogus frame field get")
+                gc._trace_callback(callback, arg,
+                                   obj_addr + getofs('jf_frame') +
+                                   BASEITEMOFS + SIGN_SIZE * index)
+            bitindex += 1
+        no += 1
+lambda_jitframe_trace = lambda: jitframe_trace
 
 JITFRAMEPTR = lltype.Ptr(JITFRAME)
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -263,7 +263,7 @@
     def gen_malloc_frame(self, frame_info, frame, size_box):
         descrs = self.gc_ll_descr.getframedescrs(self.cpu)
         if self.gc_ll_descr.kind == 'boehm':
-            op0 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
+            op0 = ResOperation(rop.GETFIELD_RAW, [history.ConstInt(frame_info)],
                                size_box,
                                descr=descrs.jfi_frame_depth)
             self.newops.append(op0)
@@ -272,7 +272,7 @@
             self.handle_new_array(descrs.arraydescr, op1)
         else:
             # we read size in bytes here, not the length
-            op0 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
+            op0 = ResOperation(rop.GETFIELD_RAW, [history.ConstInt(frame_info)],
                                size_box,
                                descr=descrs.jfi_frame_size)
             self.newops.append(op0)
@@ -282,7 +282,7 @@
             # we need to explicitely zero all the gc fields, because
             # of the unusal malloc pattern
             extra_ops = [
-                ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
+                ResOperation(rop.GETFIELD_RAW, [history.ConstInt(frame_info)],
                              length_box, descr=descrs.jfi_frame_depth),
                 ResOperation(rop.SETFIELD_GC, [frame, self.c_zero],
                              None, descr=descrs.jf_extra_stack_depth),
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -254,11 +254,15 @@
     frame.jf_gcmap[2] = r_uint(2 | 16 | 32 | 128)
     frame.jf_gcmap[3] = r_uint(0)
     frame_adr = llmemory.cast_ptr_to_adr(frame)
+    #
     all_addrs = []
-    next = jitframe.jitframe_trace(frame_adr, llmemory.NULL)
-    while next:
-        all_addrs.append(next)
-        next = jitframe.jitframe_trace(frame_adr, next)
+    class FakeGC:
+        def _trace_callback(self, callback, arg, addr):
+            assert callback == "hello"
+            assert arg == "world"
+            all_addrs.append(addr)
+    jitframe.jitframe_trace(FakeGC(), frame_adr, "hello", "world")
+    #
     counter = 0
     for name in jitframe.JITFRAME._names:
         TP = getattr(jitframe.JITFRAME, name)
@@ -297,12 +301,12 @@
     frame.jf_gcmap[0] = r_uint(18446744073441116160)
     frame.jf_gcmap[1] = r_uint(18446740775107559407)
     frame.jf_gcmap[2] = r_uint(3)
-    all_addrs = []
     frame_adr = llmemory.cast_ptr_to_adr(frame)
-    next = jitframe.jitframe_trace(frame_adr, llmemory.NULL)
-    while next:
-        all_addrs.append(next)
-        next = jitframe.jitframe_trace(frame_adr, next)
+    class FakeGC:
+        def _trace_callback(self, callback, arg, addr):
+            assert callback == "hello"
+            assert arg == "world"
+    jitframe.jitframe_trace(FakeGC(), frame_adr, "hello", "world")
     # assert did not hang
 
     lltype.free(frame_info, flavor='raw')
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -981,10 +981,10 @@
         i2 = call_assembler(i0, f0, descr=casmdescr)
         """, """
         [i0, f0]
-        i1 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_size)
+        i1 = getfield_raw(ConstClass(frame_info), descr=jfi_frame_size)
         p1 = call_malloc_nursery_varsize_frame(i1)
         setfield_gc(p1, 0, descr=tiddescr)
-        i2 = getfield_gc(ConstClass(frame_info), descr=jfi_frame_depth)
+        i2 = getfield_raw(ConstClass(frame_info), descr=jfi_frame_depth)
         setfield_gc(p1, 0, descr=jf_extra_stack_depth)
         setfield_gc(p1, NULL, descr=jf_savedata)
         setfield_gc(p1, NULL, descr=jf_force_descr)
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1398,7 +1398,7 @@
         startindex_loc = self.rm.make_sure_var_in_reg(args[1], args)
         if 0 <= constbytes <= 16 * 8 and (
                 valid_addressing_size(itemsize) or
--               isinstance(startindex_loc, ImmedLoc)):
+                isinstance(startindex_loc, ImmedLoc)):
             if IS_X86_64:
                 null_loc = X86_64_XMM_SCRATCH_REG
             else:
diff --git a/rpython/jit/metainterp/logger.py b/rpython/jit/metainterp/logger.py
--- a/rpython/jit/metainterp/logger.py
+++ b/rpython/jit/metainterp/logger.py
@@ -137,6 +137,14 @@
             s = jd_sd.warmstate.get_location_str(op.getarglist()[3:])
             s = s.replace(',', '.') # we use comma for argument splitting
             return "debug_merge_point(%d, %d, '%s')" % (op.getarg(1).getint(), op.getarg(2).getint(), s)
+        if op.getopnum() == rop.JIT_DEBUG:
+            args = op.getarglist()
+            s = args[0]._get_str()
+            s = s.replace(',', '.') # we use comma for argument splitting
+            s2 = ''
+            for box in args[1:]:
+                s2 += ', %d' % box.getint()
+            return "jit_debug('%s'%s)" % (s, s2)
         if ops_offset is None:
             offset = -1
         else:
diff --git a/rpython/jit/metainterp/optimizeopt/heap.py b/rpython/jit/metainterp/optimizeopt/heap.py
--- a/rpython/jit/metainterp/optimizeopt/heap.py
+++ b/rpython/jit/metainterp/optimizeopt/heap.py
@@ -273,6 +273,7 @@
             opnum == rop.STRSETITEM or           # no effect on GC struct/array
             opnum == rop.UNICODESETITEM or       # no effect on GC struct/array
             opnum == rop.DEBUG_MERGE_POINT or    # no effect whatsoever
+            opnum == rop.JIT_DEBUG or            # no effect whatsoever
             opnum == rop.COPYSTRCONTENT or       # no effect on GC struct/array
             opnum == rop.COPYUNICODECONTENT):    # no effect on GC struct/array
             return
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -661,6 +661,9 @@
     def optimize_DEBUG_MERGE_POINT(self, op):
         self.emit_operation(op)
 
+    def optimize_JIT_DEBUG(self, op):
+        self.emit_operation(op)
+
     def optimize_STRGETITEM(self, op):
         indexvalue = self.getvalue(op.getarg(1))
         if indexvalue.is_constant():
diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py
--- a/rpython/jit/metainterp/optimizeopt/rewrite.py
+++ b/rpython/jit/metainterp/optimizeopt/rewrite.py
@@ -8,8 +8,7 @@
 from rpython.jit.metainterp.optimizeopt.optimizer import (Optimization, REMOVED,
     CONST_0, CONST_1)
 from rpython.jit.metainterp.optimizeopt.util import _findall, make_dispatcher_method
-from rpython.jit.metainterp.resoperation import (opboolinvers, opboolreflex, rop,
-    ResOperation)
+from rpython.jit.metainterp.resoperation import rop, ResOperation, opclasses
 from rpython.rlib.rarithmetic import highest_bit
 import math
 
@@ -26,9 +25,10 @@
             sb.add_potential(op)
 
     def propagate_forward(self, op):
-        args = self.optimizer.make_args_key(op)
-        if self.find_rewritable_bool(op, args):
-            return
+        if op.boolinverse != -1 or op.boolreflex != -1:
+            args = self.optimizer.make_args_key(op)
+            if self.find_rewritable_bool(op, args):
+                return
 
         dispatch_opt(self, op)
 
@@ -48,21 +48,15 @@
 
 
     def find_rewritable_bool(self, op, args):
-        try:
-            oldopnum = opboolinvers[op.getopnum()]
-        except KeyError:
-            pass
-        else:
+        oldopnum = op.boolinverse
+        if oldopnum != -1:
             targs = self.optimizer.make_args_key(ResOperation(oldopnum, [args[0], args[1]],
                                                               None))
             if self.try_boolinvers(op, targs):
                 return True
 
-        try:
-            oldopnum = opboolreflex[op.getopnum()] # FIXME: add INT_ADD, INT_MUL
-        except KeyError:
-            pass
-        else:
+        oldopnum = op.boolreflex # FIXME: add INT_ADD, INT_MUL
+        if oldopnum != -1:
             targs = self.optimizer.make_args_key(ResOperation(oldopnum, [args[1], args[0]],
                                                               None))
             oldop = self.get_pure_result(targs)
@@ -70,13 +64,12 @@
                 self.make_equal_to(op.result, self.getvalue(oldop.result))
                 return True
 
-        try:
-            oldopnum = opboolinvers[opboolreflex[op.getopnum()]]
-        except KeyError:
-            pass
-        else:
-            targs = self.optimizer.make_args_key(ResOperation(oldopnum, [args[1], args[0]],
-                                                              None))
+        if op.boolreflex == -1:
+            return False
+        oldopnum = opclasses[op.boolreflex].boolinverse
+        if oldopnum != -1:
+            targs = self.optimizer.make_args_key(
+                ResOperation(oldopnum, [args[1], args[0]], None))
             if self.try_boolinvers(op, targs):
                 return True
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py
@@ -2024,6 +2024,27 @@
         """
         self.optimize_loop(ops, expected)
 
+    def test_virtual_raw_buffer_forced_but_slice_not_forced(self):
+        ops = """
+        [f1]
+        i0 = call('malloc', 16, descr=raw_malloc_descr)
+        guard_no_exception() []
+        i1 = int_add(i0, 8)
+        escape(i0)
+        setarrayitem_raw(i1, 0, f1, descr=rawarraydescr_float)
+        jump(f1)
+        """
+        expected = """
+        [f1]
+        i0 = call('malloc', 16, descr=raw_malloc_descr)
+        #guard_no_exception() []  # XXX should appear
+        escape(i0)
+        i1 = int_add(i0, 8)
+        setarrayitem_raw(i1, 0, f1, descr=rawarraydescr_float)
+        jump(f1)
+        """
+        self.optimize_loop(ops, expected)
+
     def test_duplicate_getfield_1(self):
         ops = """
         [p1, p2]
diff --git a/rpython/jit/metainterp/optimizeopt/virtualize.py b/rpython/jit/metainterp/optimizeopt/virtualize.py
--- a/rpython/jit/metainterp/optimizeopt/virtualize.py
+++ b/rpython/jit/metainterp/optimizeopt/virtualize.py
@@ -443,9 +443,17 @@
         self.buffer.values[i] = newval
 
     def getitem_raw(self, offset, length, descr):
+        if not self.is_virtual():
+            raise InvalidRawOperation
+            # see 'test_virtual_raw_buffer_forced_but_slice_not_forced'
+            # for the test above: it's not enough to check is_virtual()
+            # on the original object, because it might be a VRawSliceValue
+            # instead.  If it is a virtual one, then we'll reach here anway.
         return self.buffer.read_value(offset, length, descr)
 
     def setitem_raw(self, offset, length, descr, value):
+        if not self.is_virtual():
+            raise InvalidRawOperation
         self.buffer.write_value(offset, length, descr, value)
 
     def _really_force(self, optforce):
@@ -818,12 +826,10 @@
                 try:
                     itemvalue = value.getitem_raw(offset, itemsize, descr)
                 except InvalidRawOperation:
-                    box = value.force_box(self)
-                    op.setarg(0, box)
-                    self.emit_operation(op)
+                    pass
                 else:
                     self.make_equal_to(op.result, itemvalue)
-                return
+                    return
         value.ensure_nonnull()
         self.emit_operation(op)
     optimize_GETARRAYITEM_RAW_F = optimize_GETARRAYITEM_RAW_I
@@ -837,11 +843,9 @@
                 itemvalue = self.getvalue(op.getarg(2))
                 try:
                     value.setitem_raw(offset, itemsize, descr, itemvalue)
+                    return
                 except InvalidRawOperation:
-                    box = value.force_box(self)
-                    op.setarg(0, box)
-                    self.emit_operation(op)
-                return
+                    pass
         value.ensure_nonnull()
         self.emit_operation(op)
 
@@ -861,12 +865,10 @@
                 try:
                     itemvalue = value.getitem_raw(offset, itemsize, descr)
                 except InvalidRawOperation:
-                    box = value.force_box(self)
-                    op.setarg(0, box)
-                    self.emit_operation(op)
+                    pass
                 else:
                     self.make_equal_to(op.result, itemvalue)
-                return
+                    return
         value.ensure_nonnull()
         self.emit_operation(op)
     optimize_RAW_LOAD_F = optimize_RAW_LOAD_I
@@ -880,11 +882,9 @@
                 itemvalue = self.getvalue(op.getarg(2))
                 try:
                     value.setitem_raw(offset, itemsize, descr, itemvalue)
+                    return
                 except InvalidRawOperation:
-                    box = value.force_box(self)
-                    op.setarg(0, box)
-                    self.emit_operation(op)
-                return
+                    pass
         value.ensure_nonnull()
         self.emit_operation(op)
 
diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -1179,10 +1179,7 @@
 
     @arguments("box", "box", "box", "box", "box")
     def opimpl_jit_debug(self, stringbox, arg1box, arg2box, arg3box, arg4box):
-        from rpython.rtyper.lltypesystem import rstr
-        from rpython.rtyper.annlowlevel import hlstr
-        msg = stringbox.getref(lltype.Ptr(rstr.STR))
-        debug_print('jit_debug:', hlstr(msg),
+        debug_print('jit_debug:', stringbox._get_str(),
                     arg1box.getint(), arg2box.getint(),
                     arg3box.getint(), arg4box.getint())
         args = [stringbox, arg1box, arg2box, arg3box, arg4box]
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -39,6 +39,8 @@
     opnum = 0
     _cls_has_bool_result = False
     type = 'v'
+    boolreflex = -1
+    boolinverse = -1
 
     _attrs_ = ()
 
@@ -743,7 +745,7 @@
 setup(__name__ == '__main__')   # print out the table when run directly
 del _oplist
 
-opboolinvers = {
+_opboolinverse = {
     rop.INT_EQ: rop.INT_NE,
     rop.INT_NE: rop.INT_EQ,
     rop.INT_LT: rop.INT_GE,
@@ -767,7 +769,7 @@
     rop.PTR_NE: rop.PTR_EQ,
 }
 
-opboolreflex = {
+_opboolreflex = {
     rop.INT_EQ: rop.INT_EQ,
     rop.INT_NE: rop.INT_NE,
     rop.INT_LT: rop.INT_GT,
@@ -791,6 +793,19 @@
     rop.PTR_NE: rop.PTR_NE,
 }
 
+def setup2():
+    for cls in opclasses:
+        if cls is None:
+            continue
+        opnum = cls.opnum
+        if opnum in _opboolreflex:
+            cls.boolreflex = _opboolreflex[opnum]
+        if opnum in _opboolinverse:
+            cls.boolinverse = _opboolinverse[opnum]
+
+setup2()
+del _opboolinverse
+del _opboolreflex
 
 def get_deep_immutable_oplist(operations):
     """
diff --git a/rpython/jit/metainterp/test/test_logger.py b/rpython/jit/metainterp/test/test_logger.py
--- a/rpython/jit/metainterp/test/test_logger.py
+++ b/rpython/jit/metainterp/test/test_logger.py
@@ -137,6 +137,17 @@
         assert loop.operations[0].getarg(2).getint() == 0
         assert oloop.operations[0].getarg(2)._get_str() == "dupa"
 
+    def test_jit_debug(self):
+        inp = '''
+        []
+        jit_debug('foobar', -1, 5)
+        '''
+        _, loop, oloop = self.reparse(inp)
+        assert loop.operations[0].getarg(0)._get_str() == "foobar"
+        assert loop.operations[0].getarg(1).getint() == -1
+        assert oloop.operations[0].getarg(0)._get_str() == "foobar"
+        assert oloop.operations[0].getarg(1).getint() == -1
+
     def test_floats(self):
         inp = '''
         [f0]
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -321,8 +321,9 @@
         first_comment = None
         for line in lines:
             # for simplicity comments are not allowed on
-            # debug_merge_point lines
-            if '#' in line and 'debug_merge_point(' not in line:
+            # debug_merge_point or jit_debug lines
+            if '#' in line and ('debug_merge_point(' not in line and
+                                'jit_debug(' not in line):
                 if line.lstrip()[0] == '#': # comment only
                     if first_comment is None:
                         first_comment = line
diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py
--- a/rpython/memory/gc/base.py
+++ b/rpython/memory/gc/base.py
@@ -71,7 +71,6 @@
                             member_index,
                             is_rpython_class,
                             has_custom_trace,
-                            get_custom_trace,
                             fast_path_tracing,
                             has_gcptr,
                             cannot_pin):
@@ -90,7 +89,6 @@
         self.member_index = member_index
         self.is_rpython_class = is_rpython_class
         self.has_custom_trace = has_custom_trace
-        self.get_custom_trace = get_custom_trace
         self.fast_path_tracing = fast_path_tracing
         self.has_gcptr = has_gcptr
         self.cannot_pin = cannot_pin
@@ -235,16 +233,14 @@
                 item += itemlength
                 length -= 1
         if self.has_custom_trace(typeid):
-            generator = self.get_custom_trace(typeid)
-            item = llmemory.NULL
-            while True:
-                item = generator(obj, item)
-                if not item:
-                    break
-                if self.points_to_valid_gc_object(item):
-                    callback(item, arg)
+            self.custom_trace_dispatcher(obj, typeid, callback, arg)
     _trace_slow_path._annspecialcase_ = 'specialize:arg(2)'
 
+    def _trace_callback(self, callback, arg, addr):
+        if self.is_valid_gc_object(addr.address[0]):
+            callback(addr, arg)
+    _trace_callback._annspecialcase_ = 'specialize:arg(1)'
+
     def trace_partial(self, obj, start, stop, callback, arg):
         """Like trace(), but only walk the array part, for indices in
         range(start, stop).  Must only be called if has_gcptr_in_varsize().
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -1,9 +1,11 @@
 from rpython.annotator import model as annmodel
 from rpython.rtyper.llannotation import SomeAddress, SomePtr
 from rpython.rlib import rgc
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib.unroll import unrolling_iterable
 from rpython.rtyper import rmodel, annlowlevel
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, llgroup
-from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
+from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS, llop
 from rpython.memory import gctypelayout
 from rpython.memory.gctransform.log import log
 from rpython.memory.gctransform.support import get_rtti, ll_call_destructor
@@ -239,6 +241,7 @@
             root_walker.need_stacklet_support(self, getfn)
 
         self.layoutbuilder.encode_type_shapes_now()
+        self.create_custom_trace_funcs(gcdata.gc, translator.rtyper)
 
         annhelper.finish()   # at this point, annotate all mix-level helpers
         annhelper.backend_optimize()
@@ -502,6 +505,29 @@
                                                    [SomeAddress()],
                                                    annmodel.s_None)
 
+    def create_custom_trace_funcs(self, gc, rtyper):
+        custom_trace_funcs = tuple(rtyper.custom_trace_funcs)
+        rtyper.custom_trace_funcs = custom_trace_funcs
+        # too late to register new custom trace functions afterwards
+
+        custom_trace_funcs_unrolled = unrolling_iterable(
+            [(self.get_type_id(TP), func) for TP, func in custom_trace_funcs])
+
+        @specialize.arg(2)
+        def custom_trace_dispatcher(obj, typeid, callback, arg):
+            for type_id_exp, func in custom_trace_funcs_unrolled:
+                if (llop.combine_ushort(lltype.Signed, typeid, 0) ==
+                    llop.combine_ushort(lltype.Signed, type_id_exp, 0)):
+                    func(gc, obj, callback, arg)
+                    return
+            else:
+                assert False
+
+        gc.custom_trace_dispatcher = custom_trace_dispatcher
+
+        for TP, func in custom_trace_funcs:
+            self.gcdata._has_got_custom_trace(self.get_type_id(TP))
+            specialize.arg(2)(func)
 
     def consider_constant(self, TYPE, value):
         self.layoutbuilder.consider_constant(TYPE, value, self.gcdata.gc)
diff --git a/rpython/memory/gctransform/shadowstack.py b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -73,16 +73,13 @@
             return top
         self.decr_stack = decr_stack
 
-        root_iterator = get_root_iterator(gctransformer)
         def walk_stack_root(callback, start, end):
-            root_iterator.setcontext(NonConstant(llmemory.NULL))
             gc = self.gc
             addr = end
-            while True:
-                addr = root_iterator.nextleft(gc, start, addr)
-                if addr == llmemory.NULL:
-                    return
-                callback(gc, addr)
+            while addr != start:
+                addr -= sizeofaddr
+                if gc.points_to_valid_gc_object(addr):
+                    callback(gc, addr)
         self.rootstackhook = walk_stack_root
 
         self.shadow_stack_pool = ShadowStackPool(gcdata)
@@ -349,25 +346,6 @@
                 raise MemoryError
 
 
-def get_root_iterator(gctransformer):
-    if hasattr(gctransformer, '_root_iterator'):
-        return gctransformer._root_iterator     # if already built
-    class RootIterator(object):
-        def _freeze_(self):
-            return True
-        def setcontext(self, context):
-            pass
-        def nextleft(self, gc, start, addr):
-            while addr != start:
-                addr -= sizeofaddr
-                if gc.points_to_valid_gc_object(addr):
-                    return addr
-            return llmemory.NULL
-    result = RootIterator()
-    gctransformer._root_iterator = result
-    return result
-
-
 def get_shadowstackref(root_walker, gctransformer):
     if hasattr(gctransformer, '_SHADOWSTACKREF'):
         return gctransformer._SHADOWSTACKREF
@@ -381,19 +359,19 @@
                                      rtti=True)
     SHADOWSTACKREFPTR.TO.become(SHADOWSTACKREF)
 
+    def customtrace(gc, obj, callback, arg):
+        obj = llmemory.cast_adr_to_ptr(obj, SHADOWSTACKREFPTR)
+        addr = obj.top
+        start = obj.base
+        while addr != start:
+            addr -= sizeofaddr
+            gc._trace_callback(callback, arg, addr)
+
     gc = gctransformer.gcdata.gc
-    root_iterator = get_root_iterator(gctransformer)
-
-    def customtrace(obj, prev):
-        obj = llmemory.cast_adr_to_ptr(obj, SHADOWSTACKREFPTR)
-        if not prev:
-            root_iterator.setcontext(obj.context)
-            prev = obj.top
-        return root_iterator.nextleft(gc, obj.base, prev)
-
-    CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                      llmemory.Address)
-    customtraceptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), customtrace)
+    assert not hasattr(gc, 'custom_trace_dispatcher')
+    # ^^^ create_custom_trace_funcs() must not run before this
+    gctransformer.translator.rtyper.custom_trace_funcs.append(
+        (SHADOWSTACKREF, customtrace))
 
     def shadowstack_destructor(shadowstackref):
         if root_walker.stacklet_support:
@@ -414,8 +392,7 @@
     destrptr = gctransformer.annotate_helper(shadowstack_destructor,
                                              [SHADOWSTACKREFPTR], lltype.Void)
 
-    lltype.attachRuntimeTypeInfo(SHADOWSTACKREF, customtraceptr=customtraceptr,
-                                 destrptr=destrptr)
+    lltype.attachRuntimeTypeInfo(SHADOWSTACKREF, destrptr=destrptr)
 
     gctransformer._SHADOWSTACKREF = SHADOWSTACKREF
     return SHADOWSTACKREF
diff --git a/rpython/memory/gctypelayout.py b/rpython/memory/gctypelayout.py
--- a/rpython/memory/gctypelayout.py
+++ b/rpython/memory/gctypelayout.py
@@ -21,18 +21,12 @@
     # It is called with the object as first argument, and the previous
     # returned address (or NULL the first time) as the second argument.
     FINALIZER_FUNC = lltype.FuncType([llmemory.Address], lltype.Void)
-    CUSTOMTRACER_FUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                        llmemory.Address)
     FINALIZER = lltype.Ptr(FINALIZER_FUNC)
-    CUSTOMTRACER = lltype.Ptr(CUSTOMTRACER_FUNC)
-    EXTRA = lltype.Struct("type_info_extra",
-                          ('finalizer', FINALIZER),
-                          ('customtracer', CUSTOMTRACER))
 
     # structure describing the layout of a typeid
     TYPE_INFO = lltype.Struct("type_info",
         ("infobits",       lltype.Signed),    # combination of the T_xxx consts
-        ("extra",          lltype.Ptr(EXTRA)),
+        ("finalizer",      FINALIZER),
         ("fixedsize",      lltype.Signed),
         ("ofstoptrs",      lltype.Ptr(OFFSETS_TO_GC_PTR)),
         hints={'immutable': True},
@@ -84,26 +78,18 @@
         return (infobits & T_IS_GCARRAY_OF_GCPTR) != 0
 
     def q_cannot_pin(self, typeid):
-        infobits = self.get(typeid).infobits
-        ANY = (T_HAS_GCPTR |
-               T_IS_WEAKREF |
-               T_HAS_FINALIZER |
-               T_HAS_LIGHTWEIGHT_FINALIZER)
-        return (infobits & ANY) != 0
+        typeinfo = self.get(typeid)
+        ANY = (T_HAS_GCPTR | T_IS_WEAKREF)
+        return (typeinfo.infobits & ANY) != 0 or bool(typeinfo.finalizer)
 
     def q_finalizer(self, typeid):
-        typeinfo = self.get(typeid)
-        if typeinfo.infobits & T_HAS_FINALIZER:
-            return typeinfo.extra.finalizer
-        else:
-            return lltype.nullptr(GCData.FINALIZER_FUNC)
+        return self.get(typeid).finalizer
 
     def q_light_finalizer(self, typeid):
         typeinfo = self.get(typeid)
         if typeinfo.infobits & T_HAS_LIGHTWEIGHT_FINALIZER:
-            return typeinfo.extra.finalizer
-        else:
-            return lltype.nullptr(GCData.FINALIZER_FUNC)
+            return typeinfo.finalizer
+        return lltype.nullptr(GCData.FINALIZER_FUNC)
 
     def q_offsets_to_gc_pointers(self, typeid):
         return self.get(typeid).ofstoptrs
@@ -141,12 +127,6 @@
         infobits = self.get(typeid).infobits
         return infobits & T_HAS_CUSTOM_TRACE != 0
 
-    def q_get_custom_trace(self, typeid):
-        ll_assert(self.q_has_custom_trace(typeid),
-                  "T_HAS_CUSTOM_TRACE missing")
-        typeinfo = self.get(typeid)
-        return typeinfo.extra.customtracer
-
     def q_fast_path_tracing(self, typeid):
         # return True if none of the flags T_HAS_GCPTR_IN_VARSIZE,
         # T_IS_GCARRAY_OF_GCPTR or T_HAS_CUSTOM_TRACE is set
@@ -173,11 +153,14 @@
             self.q_member_index,
             self.q_is_rpython_class,
             self.q_has_custom_trace,
-            self.q_get_custom_trace,
             self.q_fast_path_tracing,
             self.q_has_gcptr,
             self.q_cannot_pin)
 
+    def _has_got_custom_trace(self, typeid):
+        type_info = self.get(typeid)
+        type_info.infobits |= (T_HAS_CUSTOM_TRACE | T_HAS_GCPTR)
+
 
 # the lowest 16bits are used to store group member index
 T_MEMBER_INDEX              =   0xffff
@@ -186,9 +169,8 @@
 T_IS_GCARRAY_OF_GCPTR       = 0x040000
 T_IS_WEAKREF                = 0x080000
 T_IS_RPYTHON_INSTANCE       = 0x100000 # the type is a subclass of OBJECT
-T_HAS_FINALIZER             = 0x200000
-T_HAS_CUSTOM_TRACE          = 0x400000
-T_HAS_LIGHTWEIGHT_FINALIZER = 0x800000
+T_HAS_CUSTOM_TRACE          = 0x200000
+T_HAS_LIGHTWEIGHT_FINALIZER = 0x400000
 T_HAS_GCPTR                 = 0x1000000
 T_KEY_MASK                  = intmask(0xFE000000) # bug detection only
 T_KEY_VALUE                 = intmask(0x5A000000) # bug detection only
@@ -217,18 +199,11 @@
     #
     fptrs = builder.special_funcptr_for_type(TYPE)
     if fptrs:
-        extra = lltype.malloc(GCData.EXTRA, zero=True, immortal=True,
-                              flavor='raw')
         if "finalizer" in fptrs:
-            extra.finalizer = fptrs["finalizer"]
-            infobits |= T_HAS_FINALIZER
+            info.finalizer = fptrs["finalizer"]
         if "light_finalizer" in fptrs:
-            extra.finalizer = fptrs["light_finalizer"]
-            infobits |= T_HAS_FINALIZER | T_HAS_LIGHTWEIGHT_FINALIZER
-        if "custom_trace" in fptrs:
-            extra.customtracer = fptrs["custom_trace"]
-            infobits |= T_HAS_CUSTOM_TRACE | T_HAS_GCPTR
-        info.extra = extra
+            info.finalizer = fptrs["light_finalizer"]
+            infobits |= T_HAS_LIGHTWEIGHT_FINALIZER
     #
     if not TYPE._is_varsize():
         info.fixedsize = llarena.round_up_for_allocation(
@@ -420,7 +395,9 @@
         return None
 
     def initialize_gc_query_function(self, gc):
-        return GCData(self.type_info_group).set_query_functions(gc)
+        gcdata = GCData(self.type_info_group)
+        gcdata.set_query_functions(gc)
+        return gcdata
 
     def consider_constant(self, TYPE, value, gc):
         if value is not lltype.top_container(value):
diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py
--- a/rpython/memory/gcwrapper.py
+++ b/rpython/memory/gcwrapper.py
@@ -29,7 +29,7 @@
                                                lltype2vtable,
                                                self.llinterp)
         self.get_type_id = layoutbuilder.get_type_id
-        layoutbuilder.initialize_gc_query_function(self.gc)
+        gcdata = layoutbuilder.initialize_gc_query_function(self.gc)
 
         constants = collect_constants(flowgraphs)
         for obj in constants:
@@ -38,8 +38,25 @@
 
         self.constantroots = layoutbuilder.addresses_of_static_ptrs
         self.constantrootsnongc = layoutbuilder.addresses_of_static_ptrs_in_nongc
+        self.prepare_custom_trace_funcs(gcdata)
         self._all_prebuilt_gc = layoutbuilder.all_prebuilt_gc
 
+    def prepare_custom_trace_funcs(self, gcdata):
+        custom_trace_funcs = self.llinterp.typer.custom_trace_funcs
+
+        def custom_trace(obj, typeid, callback, arg):
+            for TP, func in custom_trace_funcs:
+                if typeid == self.get_type_id(TP):
+                    func(self.gc, obj, callback, arg)
+                    return
+            else:
+                assert False
+        
+        for TP, func in custom_trace_funcs:
+            gcdata._has_got_custom_trace(self.get_type_id(TP))
+
+        self.gc.custom_trace_dispatcher = custom_trace
+
     # ____________________________________________________________
     #
     # Interface for the llinterp
diff --git a/rpython/memory/test/gc_test_base.py b/rpython/memory/test/gc_test_base.py
--- a/rpython/memory/test/gc_test_base.py
+++ b/rpython/memory/test/gc_test_base.py
@@ -6,7 +6,7 @@
 from rpython.rtyper.test.test_llinterp import get_interpreter
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, keepalive_until_here
 from rpython.rlib.objectmodel import compute_unique_id
 from rpython.rlib import rgc
 from rpython.rlib.rstring import StringBuilder
@@ -237,26 +237,20 @@
         assert 160 <= res <= 165
 
     def test_custom_trace(self):
-        from rpython.rtyper.annlowlevel import llhelper
         from rpython.rtyper.lltypesystem import llmemory
         from rpython.rtyper.lltypesystem.llarena import ArenaError
         #
         S = lltype.GcStruct('S', ('x', llmemory.Address),
-                                 ('y', llmemory.Address), rtti=True)
+                                 ('y', llmemory.Address))
         T = lltype.GcStruct('T', ('z', lltype.Signed))
         offset_of_x = llmemory.offsetof(S, 'x')
-        def customtrace(obj, prev):
-            if not prev:
-                return obj + offset_of_x
-            else:
-                return llmemory.NULL
-        CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                          llmemory.Address)
-        customtraceptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), customtrace)
-        lltype.attachRuntimeTypeInfo(S, customtraceptr=customtraceptr)
+        def customtrace(gc, obj, callback, arg):
+            gc._trace_callback(callback, arg, obj + offset_of_x)
+        lambda_customtrace = lambda: customtrace
         #
         for attrname in ['x', 'y']:
             def setup():
+                rgc.register_custom_trace_hook(S, lambda_customtrace)
                 s1 = lltype.malloc(S)
                 tx = lltype.malloc(T)
                 tx.z = 42
@@ -762,6 +756,23 @@
             assert rgc.get_gcflag_extra(a1) == False
             assert rgc.get_gcflag_extra(a2) == False
         self.interpret(fn, [])
+    
+    def test_register_custom_trace_hook(self):
+        S = lltype.GcStruct('S', ('x', lltype.Signed))
+        called = []
+
+        def trace_hook(gc, obj, callback, arg):
+            called.append("called")
+        lambda_trace_hook = lambda: trace_hook
+
+        def f():
+            rgc.register_custom_trace_hook(S, lambda_trace_hook)
+            s = lltype.malloc(S)
+            rgc.collect()
+            keepalive_until_here(s)
+
+        self.interpret(f, [])
+        assert called # not empty, can contain more than one item
 
     def test_pinning(self):
         def fn(n):
diff --git a/rpython/memory/test/test_transformed_gc.py b/rpython/memory/test/test_transformed_gc.py
--- a/rpython/memory/test/test_transformed_gc.py
+++ b/rpython/memory/test/test_transformed_gc.py
@@ -14,7 +14,7 @@
 from rpython.conftest import option
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.rarithmetic import LONG_BIT
-import pdb
+
 
 WORD = LONG_BIT // 8
 
@@ -385,26 +385,20 @@
         assert 160 <= res <= 165
 
     def define_custom_trace(cls):
-        from rpython.rtyper.annlowlevel import llhelper
-        from rpython.rtyper.lltypesystem import llmemory
         #
-        S = lltype.GcStruct('S', ('x', llmemory.Address), rtti=True)
+        S = lltype.GcStruct('S', ('x', llmemory.Address))
         T = lltype.GcStruct('T', ('z', lltype.Signed))
         offset_of_x = llmemory.offsetof(S, 'x')
-        def customtrace(obj, prev):
-            if not prev:
-                return obj + offset_of_x
-            else:
-                return llmemory.NULL
-        CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                          llmemory.Address)
-        customtraceptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), customtrace)
-        lltype.attachRuntimeTypeInfo(S, customtraceptr=customtraceptr)
+        def customtrace(gc, obj, callback, arg):
+            gc._trace_callback(callback, arg, obj + offset_of_x)
+        lambda_customtrace = lambda: customtrace
+
         #
         def setup():
-            s1 = lltype.malloc(S)
+            rgc.register_custom_trace_hook(S, lambda_customtrace)
             tx = lltype.malloc(T)
             tx.z = 4243
+            s1 = lltype.malloc(S)
             s1.x = llmemory.cast_ptr_to_adr(tx)
             return s1
         def f():
diff --git a/rpython/rlib/_stacklet_asmgcc.py b/rpython/rlib/_stacklet_asmgcc.py
--- a/rpython/rlib/_stacklet_asmgcc.py
+++ b/rpython/rlib/_stacklet_asmgcc.py
@@ -1,4 +1,6 @@
 from rpython.rlib.debug import ll_assert
+from rpython.rlib import rgc
+from rpython.rlib.objectmodel import specialize
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.annlowlevel import llhelper, MixLevelHelperAnnotator
@@ -11,6 +13,10 @@
 _stackletrootwalker = None
 
 def get_stackletrootwalker():
+    # XXX this is too complicated now; we don't need a StackletRootWalker
+    # instance to store global state.  We could rewrite it all in one big
+    # function.  We don't care enough for now.
+
     # lazily called, to make the following imports lazy
     global _stackletrootwalker
     if _stackletrootwalker is not None:
@@ -25,8 +31,6 @@
     class StackletRootWalker(object):
         _alloc_flavor_ = "raw"
 
-        enumerating = False
-
         def setup(self, obj):
             # initialization: read the SUSPSTACK object
             p = llmemory.cast_adr_to_ptr(obj, lltype.Ptr(SUSPSTACK))
@@ -66,7 +70,8 @@
                 self.fill_initial_frame(self.curframe, anchor)
                 return True
 
-        def next(self, obj, prev):
+        @specialize.arg(3)
+        def customtrace(self, gc, obj, callback, arg):
             #
             # Pointers to the stack can be "translated" or not:
             #
@@ -79,29 +84,20 @@
             # Note that 'curframe' contains non-translated pointers, and
             # of course the stack itself is full of non-translated pointers.
             #
+            if not self.setup(obj):
+                return
+
             while True:
-                if not self.enumerating:
-                    if not prev:
-                        if not self.setup(obj):      # one-time initialization
-                            return llmemory.NULL
-                        prev = obj   # random value, but non-NULL
-                    callee = self.curframe
-                    retaddraddr = self.translateptr(callee.frame_address)
-                    retaddr = retaddraddr.address[0]
-                    ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP]
-                    ebp_in_caller = self.translateptr(ebp_in_caller)
-                    ebp_in_caller = ebp_in_caller.address[0]
-                    basewalker.locate_caller_based_on_retaddr(retaddr,
-                                                              ebp_in_caller)
-                    self.enumerating = True
-                else:
-                    callee = self.curframe
-                    ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP]
-                    ebp_in_caller = self.translateptr(ebp_in_caller)
-                    ebp_in_caller = ebp_in_caller.address[0]
-                #
-                # not really a loop, but kept this way for similarity
-                # with asmgcroot:
+                callee = self.curframe
+                retaddraddr = self.translateptr(callee.frame_address)
+                retaddr = retaddraddr.address[0]
+                ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP]
+                ebp_in_caller = self.translateptr(ebp_in_caller)
+                ebp_in_caller = ebp_in_caller.address[0]
+                basewalker.locate_caller_based_on_retaddr(retaddr,
+                                                          ebp_in_caller)
+
+                # see asmgcroot for similarity:
                 while True:
                     location = basewalker._shape_decompressor.next()
                     if location == 0:
@@ -109,9 +105,9 @@
                     addr = basewalker.getlocation(callee, ebp_in_caller,
                                                   location)
                     # yield the translated addr of the next GCREF in the stack
-                    return self.translateptr(addr)
-                #
-                self.enumerating = False
+                    addr = self.translateptr(addr)
+                    gc._trace_callback(callback, arg, addr)
+
                 caller = self.otherframe
                 reg = CALLEE_SAVED_REGS - 1
                 while reg >= 0:
@@ -129,7 +125,7 @@
                 if caller.frame_address == llmemory.NULL:
                     # completely done with this piece of stack
                     if not self.fetch_next_stack_piece():
-                        return llmemory.NULL
+                        return
                     continue
                 #
                 self.otherframe = callee
@@ -154,9 +150,10 @@
     lltype.attachRuntimeTypeInfo(SUSPSTACK, destrptr=destrptr)
 
 
-def customtrace(obj, prev):
+def customtrace(gc, obj, callback, arg):
     stackletrootwalker = get_stackletrootwalker()
-    return stackletrootwalker.next(obj, prev)
+    stackletrootwalker.customtrace(gc, obj, callback, arg)
+lambda_customtrace = lambda: customtrace
 
 def suspstack_destructor(suspstack):
     h = suspstack.handle
@@ -170,10 +167,6 @@
                             ('callback_pieces', llmemory.Address),
                             rtti=True)
 NULL_SUSPSTACK = lltype.nullptr(SUSPSTACK)
-CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                  llmemory.Address)
-customtraceptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), customtrace)
-lltype.attachRuntimeTypeInfo(SUSPSTACK, customtraceptr=customtraceptr)
 
 ASM_FRAMEDATA_HEAD_PTR = lltype.Ptr(lltype.ForwardReference())
 ASM_FRAMEDATA_HEAD_PTR.TO.become(lltype.Struct('ASM_FRAMEDATA_HEAD',
@@ -263,6 +256,7 @@
         self.runfn = callback
         self.arg = arg
         # make a fresh new clean SUSPSTACK
+        rgc.register_custom_trace_hook(SUSPSTACK, lambda_customtrace)
         newsuspstack = lltype.malloc(SUSPSTACK)
         newsuspstack.handle = _c.null_handle
         self.suspstack = newsuspstack
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -643,3 +643,22 @@
 
 def lltype_is_gc(TP):
     return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc'
+
+def register_custom_trace_hook(TP, lambda_func):
+    """ This function does not do anything, but called from any annotated
+    place, will tell that "func" is used to trace GC roots inside any instance
+    of the type TP.  The func must be specified as "lambda: func" in this
+    call, for internal reasons.
+    """
+
+class RegisterGcTraceEntry(ExtRegistryEntry):
+    _about_ = register_custom_trace_hook
+
+    def compute_result_annotation(self, *args_s):
+        pass
+
+    def specialize_call(self, hop):
+        TP = hop.args_s[0].const
+        lambda_func = hop.args_s[1].const
+        hop.exception_cannot_occur()
+        hop.rtyper.custom_trace_funcs.append((TP, lambda_func()))
diff --git a/rpython/rlib/test/test_libffi.py b/rpython/rlib/test/test_libffi.py
--- a/rpython/rlib/test/test_libffi.py
+++ b/rpython/rlib/test/test_libffi.py
@@ -576,7 +576,9 @@
             }
             """
             libfoo = self.get_libfoo()
-            func = (libfoo, 'std_diff_xy', [types.sint, types.signed], types.sint)
+            # __stdcall without a DEF file decorates the name with the number of bytes
+            # that the callee will remove from the call stack
+            func = (libfoo, '_std_diff_xy at 8', [types.sint, types.signed], types.sint)
             try:
                 self.call(func, [50, 8], lltype.Signed)
             except ValueError, e:
@@ -613,7 +615,9 @@
             """
             from rpython.rlib.libffi import WinDLL
             dll = WinDLL(self.libfoo_name)
-            f_by_name = dll.getpointer('BBB_second_ordinal_function' ,[],
+            # __stdcall without a DEF file decorates the name with the number of bytes
+            # that the callee will remove from the call stack
+            f_by_name = dll.getpointer('_BBB_second_ordinal_function at 0' ,[],
                                           types.uint)
             f_by_ordinal = dll.getpointer_by_ordinal(2 ,[], types.uint)
             print dir(f_by_name)
diff --git a/rpython/rlib/test/test_rgc.py b/rpython/rlib/test/test_rgc.py
--- a/rpython/rlib/test/test_rgc.py
+++ b/rpython/rlib/test/test_rgc.py
@@ -228,3 +228,17 @@
     x1 = X()
     n = rgc.get_rpy_memory_usage(rgc.cast_instance_to_gcref(x1))
     assert n >= 8 and n <= 64
+
+def test_register_custom_trace_hook():
+    TP = lltype.GcStruct('X')
+
+    def trace_func():
+        xxx # should not be annotated here
+    lambda_trace_func = lambda: trace_func
+    
+    def f():
+        rgc.register_custom_trace_hook(TP, lambda_trace_func)
+    
+    t, typer, graph = gengraph(f, [])
+
+    assert typer.custom_trace_funcs == [(TP, trace_func)]
diff --git a/rpython/rtyper/annlowlevel.py b/rpython/rtyper/annlowlevel.py
--- a/rpython/rtyper/annlowlevel.py
+++ b/rpython/rtyper/annlowlevel.py
@@ -513,6 +513,13 @@
                                   % (ptr, Class))
     return ptr
 
+ at specialize.arg(0)
+def cast_gcref_to_instance(Class, ptr):
+    """Reverse the hacking done in cast_instance_to_gcref()."""
+    from rpython.rtyper.rclass import OBJECTPTR
+    ptr = lltype.cast_opaque_ptr(OBJECTPTR, ptr)
+    return cast_base_ptr_to_instance(Class, ptr)
+
 class CastBasePtrToInstanceEntry(extregistry.ExtRegistryEntry):
     _about_ = cast_base_ptr_to_instance
 
diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py
--- a/rpython/rtyper/lltypesystem/lltype.py
+++ b/rpython/rtyper/lltypesystem/lltype.py
@@ -383,8 +383,7 @@
                                                 about=self)._obj
         Struct._install_extras(self, **kwds)
 
-    def _attach_runtime_type_info_funcptr(self, funcptr, destrptr,
-                                          customtraceptr):
+    def _attach_runtime_type_info_funcptr(self, funcptr, destrptr):
         if self._runtime_type_info is None:
             raise TypeError("attachRuntimeTypeInfo: %r must have been built "
                             "with the rtti=True argument" % (self,))
@@ -408,18 +407,6 @@
                 raise TypeError("expected a destructor function "
                                 "implementation, got: %s" % destrptr)
             self._runtime_type_info.destructor_funcptr = destrptr
-        if customtraceptr is not None:
-            from rpython.rtyper.lltypesystem import llmemory
-            T = typeOf(customtraceptr)
-            if (not isinstance(T, Ptr) or
-                not isinstance(T.TO, FuncType) or
-                len(T.TO.ARGS) != 2 or
-                T.TO.RESULT != llmemory.Address or
-                T.TO.ARGS[0] != llmemory.Address or
-                T.TO.ARGS[1] != llmemory.Address):
-                raise TypeError("expected a custom trace function "
-                                "implementation, got: %s" % customtraceptr)
-            self._runtime_type_info.custom_trace_funcptr = customtraceptr
 
 class GcStruct(RttiStruct):
     _gckind = 'gc'
@@ -2288,12 +2275,10 @@
     return SomePtr(ll_ptrtype=PtrT.const)
 
 
-def attachRuntimeTypeInfo(GCSTRUCT, funcptr=None, destrptr=None,
-                          customtraceptr=None):
+def attachRuntimeTypeInfo(GCSTRUCT, funcptr=None, destrptr=None):
     if not isinstance(GCSTRUCT, RttiStruct):
         raise TypeError("expected a RttiStruct: %s" % GCSTRUCT)
-    GCSTRUCT._attach_runtime_type_info_funcptr(funcptr, destrptr,
-                                               customtraceptr)
+    GCSTRUCT._attach_runtime_type_info_funcptr(funcptr, destrptr)
     return _ptr(Ptr(RuntimeTypeInfo), GCSTRUCT._runtime_type_info)
 
 def getRuntimeTypeInfo(GCSTRUCT):
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -82,13 +82,11 @@
         else:
             def op_function(x, y):
                 if not isinstance(x, argtype):
-                    if not (isinstance(x, AddressAsInt) and argtype is int):
-                        raise TypeError("%r arg 1 must be %s, got %r instead"% (
-                            fullopname, typname, type(x).__name__))
+                    raise TypeError("%r arg 1 must be %s, got %r instead"% (
+                        fullopname, typname, type(x).__name__))
                 if not isinstance(y, argtype):
-                    if not (isinstance(y, AddressAsInt) and argtype is int):
-                        raise TypeError("%r arg 2 must be %s, got %r instead"% (
-                            fullopname, typname, type(y).__name__))
+                    raise TypeError("%r arg 2 must be %s, got %r instead"% (
+                        fullopname, typname, type(y).__name__))
                 return adjust_result(func(x, y))
 
     return func_with_new_name(op_function, 'op_' + fullopname)
@@ -104,6 +102,19 @@
             lltype.typeOf(adr),))
 
 
+def op_int_eq(x, y):
+    if not isinstance(x, (int, long)):
+        from rpython.rtyper.lltypesystem import llgroup
+        assert isinstance(x, llgroup.CombinedSymbolic), (
+            "'int_eq' arg 1 must be int-like, got %r instead" % (
+                type(x).__name__,))
+    if not isinstance(y, (int, long)):
+        from rpython.rtyper.lltypesystem import llgroup
+        assert isinstance(y, llgroup.CombinedSymbolic), (
+            "'int_eq' arg 2 must be int-like, got %r instead" % (
+                type(y).__name__,))
+    return x == y
+
 def op_ptr_eq(ptr1, ptr2):
     checkptr(ptr1)
     checkptr(ptr2)
diff --git a/rpython/rtyper/rtyper.py b/rpython/rtyper/rtyper.py
--- a/rpython/rtyper/rtyper.py
+++ b/rpython/rtyper/rtyper.py
@@ -60,6 +60,7 @@
         # make the primitive_to_repr constant mapping
         self.primitive_to_repr = {}
         self.exceptiondata = ExceptionData(self)
+        self.custom_trace_funcs = []
 
         try:
             self.seed = int(os.getenv('RTYPERSEED'))
@@ -645,7 +646,7 @@
             raise TyperError("runtime type info function %r returns %r, "
                              "excepted Ptr(RuntimeTypeInfo)" % (func, s))
         funcptr = self.getcallable(graph)
-        attachRuntimeTypeInfo(GCSTRUCT, funcptr, destrptr, None)
+        attachRuntimeTypeInfo(GCSTRUCT, funcptr, destrptr)
 
 # register operations from annotation model
 RPythonTyper._registeroperations(unaryop.UNARY_OPERATIONS, binaryop.BINARY_OPERATIONS)
diff --git a/rpython/rtyper/test/test_annlowlevel.py b/rpython/rtyper/test/test_annlowlevel.py
--- a/rpython/rtyper/test/test_annlowlevel.py
+++ b/rpython/rtyper/test/test_annlowlevel.py
@@ -4,7 +4,7 @@
 
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rtyper.lltypesystem.rstr import mallocstr, mallocunicode
-from rpython.rtyper.lltypesystem import lltype
+from rpython.rtyper.lltypesystem import lltype, llmemory
 from rpython.rtyper.annlowlevel import hlstr, llstr
 from rpython.rtyper.annlowlevel import hlunicode, llunicode
 from rpython.rtyper import annlowlevel
@@ -73,6 +73,15 @@
         y = annlowlevel.cast_base_ptr_to_instance(X, ptr)
         assert y is x
 
+    def test_cast_instance_to_gcref(self):
+        class X(object):
+            pass
+        x = X()
+        ptr = annlowlevel.cast_instance_to_gcref(x)
+        assert lltype.typeOf(ptr) == llmemory.GCREF
+        y = annlowlevel.cast_gcref_to_instance(X, ptr)
+        assert y is x
+
     def test_delayedptr(self):
         FUNCTYPE = lltype.FuncType([], lltype.Signed)
         name = "delayed!myfunc"
diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -443,19 +443,14 @@
     def define_custom_trace(cls):
         from rpython.rtyper.annlowlevel import llhelper
         #
-        S = lltype.GcStruct('S', ('x', llmemory.Address), rtti=True)
+        S = lltype.GcStruct('S', ('x', llmemory.Address))
         offset_of_x = llmemory.offsetof(S, 'x')
-        def customtrace(obj, prev):
-            if not prev:
-                return obj + offset_of_x
-            else:
-                return llmemory.NULL
-        CUSTOMTRACEFUNC = lltype.FuncType([llmemory.Address, llmemory.Address],
-                                          llmemory.Address)
-        customtraceptr = llhelper(lltype.Ptr(CUSTOMTRACEFUNC), customtrace)
-        lltype.attachRuntimeTypeInfo(S, customtraceptr=customtraceptr)
+        def customtrace(gc, obj, callback, arg):
+            gc._trace_callback(callback, arg, obj + offset_of_x)
+        lambda_customtrace = lambda: customtrace
         #
         def setup():
+            rgc.register_custom_trace_hook(S, lambda_customtrace)
             s = lltype.nullptr(S)
             for i in range(10000):
                 t = lltype.malloc(S)


More information about the pypy-commit mailing list