[pypy-commit] pypy reverse-debugger: Copy from stmgc-c8 the logic to disable some fast paths that read or

arigo pypy.commits at gmail.com
Sun Jun 5 10:10:58 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: reverse-debugger
Changeset: r84937:bbd0913d7575
Date: 2016-06-05 16:11 +0200
http://bitbucket.org/pypy/pypy/changeset/bbd0913d7575/

Log:	Copy from stmgc-c8 the logic to disable some fast paths that read or
	write inside GC objects (like strings) in bulk. This makes the first
	test in 'reversedb' pass.

diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -276,6 +276,13 @@
                  suggests={"arm": [("translation.gcrootfinder", "shadowstack"),
                                    ("translation.jit_backend", "arm")]}),
 
+    BoolOption("split_gc_address_space",
+               "Ensure full separation of GC and non-GC pointers", default=False),
+    BoolOption("reversedb",
+               "Give an executable that writes a log file for reverse debugging",
+               default=False, cmdline='--reversedb',
+               requires=[('translation.split_gc_address_space', True),
+                         ('translation.jit', False)]),
 ])
 
 def get_combined_translation_config(other_optdescr=None,
diff --git a/rpython/memory/gctransform/boehm.py b/rpython/memory/gctransform/boehm.py
--- a/rpython/memory/gctransform/boehm.py
+++ b/rpython/memory/gctransform/boehm.py
@@ -17,11 +17,11 @@
         self.finalizer_funcptrs = {}
 
         atomic_mh = mallocHelpers()
-        atomic_mh.allocate = lambda size: llop.boehm_malloc_atomic(llmemory.Address, size)
+        atomic_mh.allocate = lambda size: llop.boehm_malloc_atomic(llmemory.GCREF, size)
         ll_malloc_fixedsize_atomic = atomic_mh._ll_malloc_fixedsize
 
         mh = mallocHelpers()
-        mh.allocate = lambda size: llop.boehm_malloc(llmemory.Address, size)
+        mh.allocate = lambda size: llop.boehm_malloc(llmemory.GCREF, size)
         ll_malloc_fixedsize = mh._ll_malloc_fixedsize
 
         # XXX, do we need/want an atomic version of this function?
@@ -39,13 +39,13 @@
 
         if self.translator:
             self.malloc_fixedsize_ptr = self.inittime_helper(
-                ll_malloc_fixedsize, [lltype.Signed], llmemory.Address)
+                ll_malloc_fixedsize, [lltype.Signed], llmemory.GCREF)
             self.malloc_fixedsize_atomic_ptr = self.inittime_helper(
-                ll_malloc_fixedsize_atomic, [lltype.Signed], llmemory.Address)
+                ll_malloc_fixedsize_atomic, [lltype.Signed], llmemory.GCREF)
             self.malloc_varsize_no_length_ptr = self.inittime_helper(
-                ll_malloc_varsize_no_length, [lltype.Signed]*3, llmemory.Address, inline=False)
+                ll_malloc_varsize_no_length, [lltype.Signed]*3, llmemory.GCREF, inline=False)
             self.malloc_varsize_ptr = self.inittime_helper(
-                ll_malloc_varsize, [lltype.Signed]*4, llmemory.Address, inline=False)
+                ll_malloc_varsize, [lltype.Signed]*4, llmemory.GCREF, inline=False)
             if self.translator.config.translation.rweakref:
                 self.weakref_create_ptr = self.inittime_helper(
                     ll_weakref_create, [llmemory.Address], llmemory.WeakRefPtr,
@@ -66,7 +66,7 @@
             funcptr = self.malloc_fixedsize_ptr
         v_raw = hop.genop("direct_call",
                           [funcptr, c_size],
-                          resulttype=llmemory.Address)
+                          resulttype=llmemory.GCREF)
         finalizer_ptr = self.finalizer_funcptr_for_type(TYPE)
         if finalizer_ptr:
             c_finalizer_ptr = Constant(finalizer_ptr, self.FINALIZER_PTR)
@@ -80,12 +80,12 @@
             v_raw = hop.genop("direct_call",
                                [self.malloc_varsize_no_length_ptr, v_length,
                                 c_const_size, c_item_size],
-                               resulttype=llmemory.Address)
+                               resulttype=llmemory.GCREF)
         else:
             v_raw = hop.genop("direct_call",
                                [self.malloc_varsize_ptr, v_length,
                                 c_const_size, c_item_size, c_offset_to_length],
-                               resulttype=llmemory.Address)
+                               resulttype=llmemory.GCREF)
         return v_raw
 
     def finalizer_funcptr_for_type(self, TYPE):
diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py
--- a/rpython/memory/gctransform/transform.py
+++ b/rpython/memory/gctransform/transform.py
@@ -444,7 +444,7 @@
 
     def ll_malloc_varsize(length, size, itemsize, lengthoffset):
         result = mh.ll_malloc_varsize_no_length(length, size, itemsize)
-        (result + lengthoffset).signed[0] = length
+        llop.raw_store(lltype.Void, result, lengthoffset, length)
         return result
     mh.ll_malloc_varsize = ll_malloc_varsize
 
@@ -471,9 +471,9 @@
         ll_raw_malloc_varsize = mh.ll_malloc_varsize
         ll_raw_malloc_varsize_no_length_zero  = mh.ll_malloc_varsize_no_length_zero
 
-        stack_mh = mallocHelpers()
-        stack_mh.allocate = lambda size: llop.stack_malloc(llmemory.Address, size)
-        ll_stack_malloc_fixedsize = stack_mh._ll_malloc_fixedsize
+        ## stack_mh = mallocHelpers()
+        ## stack_mh.allocate = lambda size: llop.stack_malloc(llmemory.GCREF, size)
+        ## ll_stack_malloc_fixedsize = stack_mh._ll_malloc_fixedsize
 
         if self.translator:
             self.raw_malloc_fixedsize_ptr = self.inittime_helper(
@@ -485,8 +485,8 @@
             self.raw_malloc_varsize_no_length_zero_ptr = self.inittime_helper(
                 ll_raw_malloc_varsize_no_length_zero, [lltype.Signed]*3, llmemory.Address, inline=False)
 
-            self.stack_malloc_fixedsize_ptr = self.inittime_helper(
-                ll_stack_malloc_fixedsize, [lltype.Signed], llmemory.Address)
+            ## self.stack_malloc_fixedsize_ptr = self.inittime_helper(
+            ##     ll_stack_malloc_fixedsize, [lltype.Signed], llmemory.Address)
 
     def gct_malloc(self, hop, add_flags=None):
         TYPE = hop.spaceop.result.concretetype.TO
@@ -509,11 +509,12 @@
         return v_raw
 
     def gct_fv_stack_malloc(self, hop, flags, TYPE, c_size):
-        v_raw = hop.genop("direct_call", [self.stack_malloc_fixedsize_ptr, c_size],
-                          resulttype=llmemory.Address)
-        if flags.get('zero'):
-            hop.genop("raw_memclear", [v_raw, c_size])
-        return v_raw
+        raise Exception("not supported any more")
+        ## v_raw = hop.genop("direct_call", [self.stack_malloc_fixedsize_ptr, c_size],
+        ##                   resulttype=llmemory.Address)
+        ## if flags.get('zero'):
+        ##     hop.genop("raw_memclear", [v_raw, c_size])
+        ## return v_raw
 
     def gct_malloc_varsize(self, hop, add_flags=None):
         flags = hop.spaceop.args[1].value
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -19,6 +19,13 @@
     """
     pass
 
+def must_split_gc_address_space():
+    """Returns True if we have a "split GC address space", i.e. if
+    we are translating with an option that doesn't support taking raw
+    addresses inside GC objects and "hacking" at them.  This is
+    notably the case with --reversedb."""
+    return False
+
 # for test purposes we allow objects to be pinned and use
 # the following list to keep track of the pinned objects
 _pinned_objects = []
@@ -147,6 +154,18 @@
     """
     return True
 
+class SplitAddrSpaceEntry(ExtRegistryEntry):
+    _about_ = must_split_gc_address_space
+ 
+    def compute_result_annotation(self):
+        config = self.bookkeeper.annotator.translator.config
+        result = config.translation.split_gc_address_space
+        return self.bookkeeper.immutablevalue(result)
+
+    def specialize_call(self, hop):
+        hop.exception_cannot_occur()
+        return hop.inputconst(lltype.Bool, hop.s_result.const)
+
 class CanMoveEntry(ExtRegistryEntry):
     _about_ = can_move
 
@@ -280,18 +299,25 @@
 
     TP = lltype.typeOf(source).TO
     assert TP == lltype.typeOf(dest).TO
-    if _contains_gcptr(TP.OF):
+
+    slowpath = False
+    if must_split_gc_address_space():
+        slowpath = True
+    elif _contains_gcptr(TP.OF):
         # perform a write barrier that copies necessary flags from
         # source to dest
         if not llop.gc_writebarrier_before_copy(lltype.Bool, source, dest,
                                                 source_start, dest_start,
                                                 length):
-            # if the write barrier is not supported, copy by hand
-            i = 0
-            while i < length:
-                copy_item(source, dest, i + source_start, i + dest_start)
-                i += 1
-            return
+            slowpath = True
+    if slowpath:
+        # if the write barrier is not supported, or if we translate with
+        # the option 'split_gc_address_space', then copy by hand
+        i = 0
+        while i < length:
+            copy_item(source, dest, i + source_start, i + dest_start)
+            i += 1
+        return
     source_addr = llmemory.cast_ptr_to_adr(source)
     dest_addr   = llmemory.cast_ptr_to_adr(dest)
     cp_source_addr = (source_addr + llmemory.itemoffsetof(TP, 0) +
@@ -325,6 +351,14 @@
     field = getattr(p, TP._names[0])
     setattr(newp, TP._names[0], field)
 
+    if must_split_gc_address_space():
+        # do the copying element by element
+        i = 0
+        while i < smallerlength:
+            newp.chars[i] = p.chars[i]
+            i += 1
+        return newp
+
     ARRAY = getattr(TP, TP._arrayfld)
     offset = (llmemory.offsetof(TP, TP._arrayfld) +
               llmemory.itemoffsetof(ARRAY, 0))
@@ -345,9 +379,18 @@
 
     length = len(p)
     ARRAY = lltype.typeOf(p).TO
-    offset = llmemory.itemoffsetof(ARRAY, 0)
-    dest_addr = llmemory.cast_ptr_to_adr(p) + offset
-    llmemory.raw_memclear(dest_addr, llmemory.sizeof(ARRAY.OF) * length)
+    if must_split_gc_address_space():
+        # do the clearing element by element
+        from rpython.rtyper.lltypesystem import rffi
+        ZERO = rffi.cast(ARRAY.OF, 0)
+        i = 0
+        while i < length:
+            p[i] = ZERO
+            i += 1
+    else:
+        offset = llmemory.itemoffsetof(ARRAY, 0)
+        dest_addr = llmemory.cast_ptr_to_adr(p) + offset
+        llmemory.raw_memclear(dest_addr, llmemory.sizeof(ARRAY.OF) * length)
     keepalive_until_here(p)
 
 
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -825,14 +825,19 @@
         count = len(data)
 
         pinned = False
-        if rgc.can_move(data):
+        fallback = False
+        if rgc.must_split_gc_address_space():
+            fallback = True
+        elif rgc.can_move(data):
             if rgc.pin(data):
                 pinned = True
             else:
-                buf = lltype.malloc(TYPEP.TO, count, flavor='raw')
-                copy_string_to_raw(lldata, buf, 0, count)
-                return buf, pinned, True
-                # ^^^ raw malloc used to get a nonmovable copy
+                fallback = True
+        if fallback:
+            buf = lltype.malloc(TYPEP.TO, count, flavor='raw')
+            copy_string_to_raw(lldata, buf, 0, count)
+            return buf, pinned, True
+            # ^^^ raw malloc used to get a nonmovable copy
         #
         # following code is executed if:
         # - rgc.can_move(data) and rgc.pin(data) both returned true
@@ -878,12 +883,17 @@
         """
         new_buf = mallocfn(count)
         pinned = 0
-        if rgc.can_move(new_buf):
+        fallback = False
+        if rgc.must_split_gc_address_space():
+            fallback = True
+        elif rgc.can_move(new_buf):
             if rgc.pin(new_buf):
                 pinned = 1
             else:
-                raw_buf = lltype.malloc(TYPEP.TO, count, flavor='raw')
-                return raw_buf, new_buf, 2
+                fallback = True
+        if fallback:
+            raw_buf = lltype.malloc(TYPEP.TO, count, flavor='raw')
+            return raw_buf, new_buf, 2
         #
         # following code is executed if:
         # - rgc.can_move(data) and rgc.pin(data) both returned true
diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -1,7 +1,7 @@
 from weakref import WeakValueDictionary
 
 from rpython.annotator import model as annmodel
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, rgc
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
     _hash_string, keepalive_until_here, specialize, enforceargs)
 from rpython.rlib.signature import signature
@@ -88,6 +88,17 @@
         ll_assert(srcstart + length <= len(src.chars), "copystrc: src ovf")
         ll_assert(dststart >= 0, "copystrc: negative dststart")
         ll_assert(dststart + length <= len(dst.chars), "copystrc: dst ovf")
+        #
+        # If the 'split_gc_address_space' option is set, we must copy
+        # manually, character-by-character
+        if rgc.must_split_gc_address_space():
+            i = 0
+            while i < length:
+                dst.chars[dststart + i] = src.chars[srcstart + i]
+                i += 1
+            return
+        #  
+        #
         # from here, no GC operations can happen
         asrc = _get_raw_buf(SRC_TP, src, srcstart)
         adst = _get_raw_buf(DST_TP, dst, dststart)
@@ -108,6 +119,16 @@
         """
         # xxx Warning: same note as above apply: don't do this at home
         assert length >= 0
+        #
+        # If the 'split_gc_address_space' option is set, we must copy
+        # manually, character-by-character
+        if rgc.must_split_gc_address_space():
+            i = 0
+            while i < length:
+                ptrdst[i] = src.chars[srcstart + i]
+                i += 1
+            return
+        #
         # from here, no GC operations can happen
         asrc = _get_raw_buf(SRC_TP, src, srcstart)
         adst = llmemory.cast_ptr_to_adr(ptrdst)
@@ -124,6 +145,16 @@
     def copy_raw_to_string(ptrsrc, dst, dststart, length):
         # xxx Warning: same note as above apply: don't do this at home
         assert length >= 0
+        #
+        # If the 'split_gc_address_space' option is set, we must copy
+        # manually, character-by-character
+        if rgc.must_split_gc_address_space():
+            i = 0
+            while i < length:
+                dst.chars[dststart + i] = ptrsrc[i]
+                i += 1
+            return
+        #
         # from here, no GC operations can happen
         adst = _get_raw_buf(SRC_TP, dst, dststart)
         asrc = llmemory.cast_ptr_to_adr(ptrsrc)
@@ -1221,6 +1252,16 @@
         SRC = typeOf(src).TO     # STR or UNICODE
         DST = typeOf(dst).TO     # GcArray
         assert DST.OF is SRC.chars.OF
+        #
+        # If the 'split_gc_address_space' option is set, we must copy
+        # manually, character-by-character
+        if rgc.must_split_gc_address_space():
+            i = 0
+            while i < length:
+                dst[i] = src.chars[i]
+                i += 1
+            return lst
+        #
         # from here, no GC operations can happen
         asrc = llmemory.cast_ptr_to_adr(src) + (
             llmemory.offsetof(SRC, 'chars') +
diff --git a/rpython/translator/c/database.py b/rpython/translator/c/database.py
--- a/rpython/translator/c/database.py
+++ b/rpython/translator/c/database.py
@@ -31,10 +31,12 @@
                  gcpolicyclass=None,
                  exctransformer=None,
                  thread_enabled=False,
-                 sandbox=False):
+                 sandbox=False,
+                 split_gc_address_space=False):
         self.translator = translator
         self.standalone = standalone
         self.sandbox    = sandbox
+        self.split_gc_address_space = split_gc_address_space
         if gcpolicyclass is None:
             gcpolicyclass = gc.RefcountingGcPolicy
         self.gcpolicy = gcpolicyclass(self, thread_enabled)
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -606,7 +606,23 @@
                 self.expr(op.args[0]),
                 self.expr(op.args[1]))
 
+    def _check_split_gc_address_space(self, op):
+        if self.db.split_gc_address_space:
+            TYPE = self.lltypemap(op.result)
+            TSRC = self.lltypemap(op.args[0])
+            gcdst = isinstance(TYPE, Ptr) and TYPE.TO._gckind == 'gc'
+            gcsrc = isinstance(TSRC, Ptr) and TSRC.TO._gckind == 'gc'
+            if gcsrc != gcdst:
+                raise Exception(
+                  "cast between pointer types changes the address space,\n"
+                  "but the 'split_gc_address_space' option is enabled:\n"
+                  "  func: %s\n"
+                  "    op: %s\n"
+                  "  from: %s\n"
+                  "    to: %s" % (self.graph, op, TSRC, TYPE))
+
     def OP_CAST_POINTER(self, op):
+        self._check_split_gc_address_space(op)
         TYPE = self.lltypemap(op.result)
         typename = self.db.gettype(TYPE)
         result = []
@@ -625,6 +641,7 @@
             % (self.expr(op.result), self.expr(op.args[0])))
 
     def OP_CAST_INT_TO_PTR(self, op):
+        self._check_split_gc_address_space(op)
         TYPE = self.lltypemap(op.result)
         typename = self.db.gettype(TYPE)
         return "%s = (%s)%s;" % (self.expr(op.result), cdecl(typename, ""),
@@ -690,6 +707,7 @@
           % locals())
 
     def OP_CAST_PRIMITIVE(self, op):
+        self._check_split_gc_address_space(op)
         TYPE = self.lltypemap(op.result)
         val =  self.expr(op.args[0])
         result = self.expr(op.result)
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -131,7 +131,9 @@
                               gcpolicyclass=gcpolicyclass,
                               exctransformer=exctransformer,
                               thread_enabled=self.config.translation.thread,
-                              sandbox=self.config.translation.sandbox)
+                              sandbox=self.config.translation.sandbox,
+                              split_gc_address_space=
+                                 self.config.translation.split_gc_address_space)
         self.db = db
 
         # give the gc a chance to register interest in the start-up functions it
diff --git a/rpython/translator/reversedb/__init__.py b/rpython/translator/reversedb/__init__.py
new file mode 100644
diff --git a/rpython/translator/reversedb/test/__init__.py b/rpython/translator/reversedb/test/__init__.py
new file mode 100644
diff --git a/rpython/translator/reversedb/test/test_basic.py b/rpython/translator/reversedb/test/test_basic.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/reversedb/test/test_basic.py
@@ -0,0 +1,29 @@
+import py
+from rpython.translator.interactive import Translation
+
+
+class TestBasic(object):
+
+    def getcompiled(self, entry_point, argtypes, backendopt=True):
+        t = Translation(entry_point, None, gc="boehm")
+        t.config.translation.reversedb = True
+        t.config.translation.rweakref = False
+        if not backendopt:
+            t.disable(["backendopt_lltype"])
+        t.annotate()
+        t.rtype()
+        if t.backendopt:
+            t.backendopt()
+        t.compile_c()
+
+        def run(*argv):
+            stdout = t.driver.cbuilder.cmdexec(' '.join(argv))
+            return stdout
+        return run
+
+    def test_simple(self):
+        def main(argv):
+            print argv[1:]
+            return 0
+        fn = self.getcompiled(main, [], backendopt=False)
+        assert fn('abc d') == '[abc, d]\n'


More information about the pypy-commit mailing list