[pypy-commit] pypy stm-thread-2: In-progress? Support for hash, partly copied from semispace/hybrid.

arigo noreply at buildbot.pypy.org
Sat Sep 8 15:01:17 CEST 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: stm-thread-2
Changeset: r57248:cb00c408ae8e
Date: 2012-09-08 15:01 +0200
http://bitbucket.org/pypy/pypy/changeset/cb00c408ae8e/

Log:	In-progress? Support for hash, partly copied from semispace/hybrid.

diff --git a/pypy/rpython/memory/gc/stmgc.py b/pypy/rpython/memory/gc/stmgc.py
--- a/pypy/rpython/memory/gc/stmgc.py
+++ b/pypy/rpython/memory/gc/stmgc.py
@@ -2,7 +2,6 @@
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage, raw_memcopy
 from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
-from pypy.rpython.memory.support import mangle_hash
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rlib.rarithmetic import LONG_BIT, r_uint
 from pypy.rlib.debug import ll_assert, debug_start, debug_stop, fatalerror
@@ -63,11 +62,7 @@
 #     surviving during a collection.  Between collections, it is set on
 #     the LOCAL COPY objects, but only on them.
 #
-#   - GCFLAG_HAS_SHADOW: set on nursery objects whose id() or identityhash()
-#     was taken.  Means that we already have a corresponding object allocated
-#     outside the nursery.
-#
-#   - GCFLAG_FIXED_HASH: only on some prebuilt objects.  For identityhash().
+#   - GCFLAG_HASH01, GCFLAG_HASH02: to handle hashes
 #
 # Invariant: between two transactions, all objects visible from the current
 # thread are always GLOBAL.  In particular:
@@ -100,10 +95,6 @@
 #   - for local objects with GCFLAG_LOCAL_COPY, it points to the GLOBAL
 #     original (*).
 #
-#   - if GCFLAG_HAS_SHADOW, it points to the shadow object outside the
-#     nursery (!). (It is not used on other nursery objects before
-#     collection.)
-#
 #   - it contains the 'next' object of the 'sharedarea_tls.chained_list'
 #     list, which describes all LOCAL objects malloced outside the
 #     nursery (!).
@@ -122,12 +113,23 @@
 GCFLAG_NOT_WRITTEN       = first_gcflag << 2     # keep in sync with et.h
 GCFLAG_LOCAL_COPY        = first_gcflag << 3     # keep in sync with et.h
 GCFLAG_VISITED           = first_gcflag << 4     # keep in sync with et.h
-GCFLAG_HAS_SHADOW        = first_gcflag << 5
-GCFLAG_FIXED_HASH        = first_gcflag << 6
+GCFLAG_HASH01            = first_gcflag << 5
+GCFLAG_HASH02            = first_gcflag << 6
+GCFLAG_HASHMASK          = GCFLAG_HASH01 | GCFLAG_HASH02
 
-GCFLAG_PREBUILT          = GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN
+GCFLAG_PREBUILT          = GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN | GCFLAG_HASH01
 REV_INITIAL              = r_uint(1)
 
+# the two flags GCFLAG_HASH0n together give one of the following four cases:
+#   - nobody ever asked for the hash of the object
+GC_HASH_NOTTAKEN   = 0
+#   - someone asked, and we gave the address of the object + mangle_hash
+GC_HASH_TAKEN_ADDR = GCFLAG_HASH01
+#   - someone asked, and we gave the address + nursery_hash_base + mangle_hash
+GC_HASH_TAKEN_NURS = GCFLAG_HASH02
+#   - we have our own extra field to store the hash
+GC_HASH_HASFIELD   = GCFLAG_HASH01 | GCFLAG_HASH02
+
 
 def always_inline(fn):
     fn._always_inline_ = True
@@ -147,7 +149,10 @@
     HDR = lltype.Struct('header', ('tid', lltype.Signed),
                                   ('revision', lltype.Unsigned))
     typeid_is_in_field = 'tid'
-    withhash_flag_is_in_field = 'tid', GCFLAG_FIXED_HASH
+    withhash_flag_is_in_field = 'tid', GCFLAG_HASH02
+    # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
+    #     have GC_HASH_HASFIELD (and then they are one word longer).
+    #     The difference between the two cases is GCFLAG_HASH02.
 
     TRANSLATION_PARAMS = {
         'stm_operations': 'use_real_one',
@@ -298,6 +303,13 @@
         tid = self.header(obj).tid
         return llop.extract_ushort(llgroup.HALFWORD, tid)
 
+    def get_size_incl_hash(self, obj):
+        size = self.get_size(obj)
+        hdr = self.header(obj)
+        if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
+            size += llmemory.sizeof(lltype.Signed)
+        return size
+
     @always_inline
     def combine(self, typeid16, flags):
         return llop.combine_ushort(lltype.Signed, typeid16, flags)
@@ -320,12 +332,10 @@
         set_hdr_revision(self.header(obj), nrevision)
 
     def stm_duplicate(self, obj):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        size = self.get_size(obj)
-        totalsize = size_gc_header + size
         tls = self.get_tls()
         try:
-            localobj = tls.malloc_local_copy(totalsize)
+            localobj = tls.duplicate_obj(obj, self.get_size(obj))
+            tls.copied_local_objects.append(localobj)     # XXX KILL
         except MemoryError:
             # should not really let the exception propagate.
             # XXX do something slightly better, like abort the transaction
@@ -333,11 +343,6 @@
             fatalerror("FIXME: MemoryError in stm_duplicate")
             return llmemory.NULL
         #
-        # Initialize the copy by doing a memcpy of the bytes.
-        # The object header of localobj will then be fixed by the C code.
-        llmemory.raw_memcopy(obj - size_gc_header,
-                             localobj - size_gc_header,
-                             totalsize)
         hdr = self.header(localobj)
         hdr.tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED)
         hdr.tid |= (GCFLAG_VISITED | GCFLAG_LOCAL_COPY)
@@ -346,69 +351,23 @@
     # ----------
     # id() and identityhash() support
 
-    def id_or_identityhash(self, gcobj, is_hash):
-        """Implement the common logic of id() and identityhash()
-        of an object, given as a GCREF.
-        """
+    def id(self, gcobj):
+        """NOT IMPLEMENTED! XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"""
+        return self.identityhash(gcobj)
+
+    def identityhash(self, gcobj):
+        stmtls = self.get_tls()
         obj = llmemory.cast_ptr_to_adr(gcobj)
         hdr = self.header(obj)
-        tls = self.get_tls()
-        if tls.is_in_nursery(obj):
-            #
-            # The object is still in the nursery of the current TLS.
-            # (It cannot be in the nursery of a different thread, because
-            # such an object would not be visible to this thread at all.)
-            #
-            ll_assert(hdr.tid & GCFLAG_LOCAL_COPY == 0, "id: LOCAL_COPY?")
-            #
-            if hdr.tid & GCFLAG_HAS_SHADOW == 0:
-                #
-                # We need to allocate a non-movable object here.  We only
-                # allocate it for now; it is left completely uninitialized.
-                size_gc_header = self.gcheaderbuilder.size_gc_header
-                size = self.get_size(obj)
-                totalsize = size_gc_header + size
-                fixedobj = tls.sharedarea_tls.malloc_object(totalsize)
-                tls.sharedarea_tls.add_regular(fixedobj)
-                self.header(fixedobj).tid = 0     # GCFLAG_VISITED is off
-                #
-                # Update the header of the local 'obj'
-                hdr.tid |= GCFLAG_HAS_SHADOW
-                set_hdr_revision(hdr, fixedobj)
-                #
+        if hdr.tid & GCFLAG_HASHMASK == 0:
+            # set one of the GC_HASH_TAKEN_xxx flags.
+            if stmtls.is_in_nursery(obj):
+                hdr.tid |= GC_HASH_TAKEN_NURS
             else:
-                # There is already a corresponding fixedobj
-                fixedobj = hdr_revision(hdr)
-            #
-            obj = fixedobj
-            #
-        elif hdr.tid & GCFLAG_LOCAL_COPY:
-            #
-            # The object is the local copy of a LOCAL-GLOBAL pair.
-            obj = hdr_revision(hdr)
-        #
-        i = llmemory.cast_adr_to_int(obj)
-        if is_hash:
-            # For identityhash(), we need a special case for some
-            # prebuilt objects: their hash must be the same before
-            # and after translation.  It is stored as an extra word
-            # after the object.  But we cannot use it for id()
-            # because the stored value might clash with a real one.
-            if self.header(obj).tid & GCFLAG_FIXED_HASH:
-                size = self.get_size(obj)
-                i = (obj + size).signed[0]
-            else:
-                # mangle the hash value to increase the dispertion
-                # on the trailing bits, but only if !GCFLAG_FIXED_HASH
-                i = mangle_hash(i)
-        return i
-
-    def id(self, gcobj):
-        return self.id_or_identityhash(gcobj, False)
-
-    def identityhash(self, gcobj):
-        return self.id_or_identityhash(gcobj, True)
-
+                hdr.tid |= GC_HASH_TAKEN_ADDR
+        # Compute and return the result
+        objsize = self.get_size(obj)
+        return stmtls._get_object_hash(obj, objsize, hdr.tid)
 
 # ____________________________________________________________
 # helpers
diff --git a/pypy/rpython/memory/gc/stmtls.py b/pypy/rpython/memory/gc/stmtls.py
--- a/pypy/rpython/memory/gc/stmtls.py
+++ b/pypy/rpython/memory/gc/stmtls.py
@@ -4,15 +4,18 @@
 from pypy.rpython.annlowlevel import cast_base_ptr_to_instance, base_ptr_lltype
 from pypy.rlib.objectmodel import we_are_translated, free_non_gc_object
 from pypy.rlib.objectmodel import specialize
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, intmask
 from pypy.rlib.debug import ll_assert, debug_start, debug_stop, fatalerror
+from pypy.rpython.memory.support import mangle_hash
 
 from pypy.rpython.memory.gc.stmgc import WORD, NULL
 from pypy.rpython.memory.gc.stmgc import always_inline, dont_inline
 from pypy.rpython.memory.gc.stmgc import GCFLAG_GLOBAL, GCFLAG_VISITED
-from pypy.rpython.memory.gc.stmgc import GCFLAG_LOCAL_COPY, GCFLAG_HAS_SHADOW
+from pypy.rpython.memory.gc.stmgc import GCFLAG_LOCAL_COPY
 from pypy.rpython.memory.gc.stmgc import GCFLAG_POSSIBLY_OUTDATED
 from pypy.rpython.memory.gc.stmgc import GCFLAG_NOT_WRITTEN
+from pypy.rpython.memory.gc.stmgc import GCFLAG_HASHMASK, GC_HASH_TAKEN_ADDR
+from pypy.rpython.memory.gc.stmgc import GC_HASH_TAKEN_NURS, GC_HASH_HASFIELD
 from pypy.rpython.memory.gc.stmgc import hdr_revision, set_hdr_revision
 
 
@@ -23,6 +26,7 @@
     _alloc_flavor_ = 'raw'
 
     nontranslated_dict = {}
+    nursery_hash_base = -1
 
     def __init__(self, gc):
         self.gc = gc
@@ -209,9 +213,21 @@
         size_used = self.nursery_free - self.nursery_start
         llarena.arena_reset(self.nursery_start, size_used, 2)
         self.nursery_free = self.nursery_start
+        self.change_nursery_hash_base()    # the nursery is empty now
         #
         debug_stop("gc-local")
 
+    def change_nursery_hash_base(self):
+        # The following should be enough to ensure that young objects
+        # tend to always get a different hash.  It also makes sure that
+        # nursery_hash_base is not a multiple of 4, to avoid collisions
+        # with the hash of non-young objects.
+        hash_base = self.nursery_hash_base
+        hash_base += self.nursery_size - 1
+        if (hash_base & 3) == 0:
+            hash_base -= 1
+        self.nursery_hash_base = intmask(hash_base)
+
     # ------------------------------------------------------------
 
     @always_inline
@@ -244,13 +260,6 @@
                   "odd-valued (i.e. tagged) pointer unexpected here")
         return self.nursery_start <= addr < self.nursery_top
 
-    def malloc_local_copy(self, totalsize):
-        """Allocate an object that will be used as a LOCAL COPY of
-        some GLOBAL object."""
-        localobj = self.sharedarea_tls.malloc_object(totalsize)
-        self.copied_local_objects.append(localobj)     # XXX KILL
-        return localobj
-
     def fresh_new_weakref(self, obj):
         self.local_weakrefs.append(obj)
 
@@ -271,12 +280,12 @@
             ll_assert(hdr.tid & GCFLAG_VISITED == 0, "unexpected VISITED [1]")
             ll_assert(hdr.tid & GCFLAG_LOCAL_COPY == 0,"already LOCAL_COPY [1]")
             hdr.tid |= GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN
-            self._clear_version_for_global_object(hdr)
+            self._clear_revision_for_global_object(hdr)
         #
         self.copied_local_objects.clear()
 
-    def _clear_version_for_global_object(self, hdr):
-        # Reset the 'version' to initialize a newly global object.
+    def _clear_revision_for_global_object(self, hdr):
+        # Reset the 'revision' to initialize a newly global object.
         # When translated with C code, we set it to 1.
         # When non-translated, we reset it instead to '_uninitialized'
         # to simulate the fact that the C code might change it.
@@ -404,50 +413,24 @@
             return
         #
         # If 'obj' was already forwarded, change it to its forwarding address.
-        # If 'obj' has already a shadow but isn't forwarded so far, use it.
-        # The common case is the "else" part, so we use only one test to
-        # know if we are in the common case or not.
-        if hdr.tid & (GCFLAG_VISITED | GCFLAG_HAS_SHADOW):
-            #
-            if hdr.tid & GCFLAG_VISITED:
-                root.address[0] = hdr_revision(hdr)
-                return
-            #
-            # Case of GCFLAG_HAS_SHADOW.  See comments below.
-            size_gc_header = self.gc.gcheaderbuilder.size_gc_header
-            totalsize = size_gc_header + size
-            hdr.tid &= ~GCFLAG_HAS_SHADOW
-            newobj = hdr_revision(hdr)
-            newhdr = self.gc.header(newobj)
-            #
-            saved_version = hdr_revision(newhdr)
-            llmemory.raw_memcopy(obj - size_gc_header,
-                                 newobj - size_gc_header,
-                                 totalsize)
-            set_hdr_revision(newhdr, saved_version)
-            newhdr.tid = hdr.tid | GCFLAG_VISITED
-            #
-        else:
-            #
-            # First visit to 'obj': we must move this YOUNG obj out of the
-            # nursery.
-            size_gc_header = self.gc.gcheaderbuilder.size_gc_header
-            totalsize = size_gc_header + size
-            #
-            # Common case: allocate a new nonmovable location for it.
-            newobj = self._malloc_out_of_nursery(totalsize)
-            #
-            # Copy it.  Note that references to other objects in the
-            # nursery are kept unchanged in this step.
-            llmemory.raw_memcopy(obj - size_gc_header,
-                                 newobj - size_gc_header,
-                                 totalsize)
-            #
-            # Register the object here, not before the memcopy() that would
-            # overwrite its 'version' field
-            self._register_newly_malloced_obj(newobj)
+        if hdr.tid & GCFLAG_VISITED:
+            root.address[0] = hdr_revision(hdr)
+            return
         #
-        # Set the YOUNG copy's GCFLAG_VISITED and set its version to
+        # First visit to 'obj': we must move this YOUNG obj out of the
+        # nursery.  This is the common case.  Allocate a new location
+        # for it outside the nursery.
+        newobj = self.duplicate_obj(obj, size)
+        #
+        # Note that references from 'obj' to other objects in the
+        # nursery are kept unchanged in this step: they are copied
+        # verbatim to 'newobj'.
+        #
+        # Register the object here, not before the memcopy() that would
+        # overwrite its 'revision' field
+        self._register_newly_malloced_obj(newobj)
+        #
+        # Set the YOUNG copy's GCFLAG_VISITED and set its revision to
         # point to the OLD copy.
         hdr.tid |= GCFLAG_VISITED
         set_hdr_revision(hdr, newobj)
@@ -461,8 +444,45 @@
         # walk 'pending_list'.
         self.pending.append(newobj)
 
-    def _malloc_out_of_nursery(self, totalsize):
-        return self.sharedarea_tls.malloc_object(totalsize)
+    def duplicate_obj(self, obj, objsize):
+        size_gc_header = self.gc.gcheaderbuilder.size_gc_header
+        totalsize_without_hash = size_gc_header + objsize
+        if self.gc.header(obj).tid & GCFLAG_HASHMASK:
+            newtotalsize = totalsize_without_hash + (
+                llmemory.sizeof(lltype.Signed))
+        else:
+            newtotalsize = totalsize_without_hash
+        #
+        newobj = self.sharedarea_tls.malloc_object(newtotalsize)
+        #
+        # Initialize the copy by doing a memcpy of the bytes.
+        # The object header of localobj will then be fixed by the C code.
+        llmemory.raw_memcopy(obj - size_gc_header,
+                             newobj - size_gc_header,
+                             totalsize_without_hash)
+        #
+        newhdr = self.gc.header(newobj)
+        if newhdr.tid & GCFLAG_HASHMASK:
+            hash = self._get_object_hash(obj, objsize, newhdr.tid)
+            newaddr = llarena.getfakearenaaddress(newobj)
+            (newaddr + objsize).signed[0] = hash
+            newhdr.tid |= GC_HASH_HASFIELD
+        #
+        return newobj
+
+    def _get_object_hash(self, obj, objsize, tid):
+        # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
+        gc_hash = tid & GCFLAG_HASHMASK
+        if gc_hash == GC_HASH_HASFIELD:
+            obj = llarena.getfakearenaaddress(obj)
+            return (obj + objsize).signed[0]
+        elif gc_hash == GC_HASH_TAKEN_ADDR:
+            return mangle_hash(llmemory.cast_adr_to_int(obj))
+        elif gc_hash == GC_HASH_TAKEN_NURS:
+            return mangle_hash(intmask(llmemory.cast_adr_to_int(obj) +
+                                       self.nursery_hash_base))
+        else:
+            assert 0, "gc_hash == GC_HASH_NOTTAKEN"
 
     def _register_newly_malloced_obj(self, obj):
         self.sharedarea_tls.add_regular(obj)
diff --git a/pypy/rpython/memory/gc/test/test_stmgc.py b/pypy/rpython/memory/gc/test/test_stmgc.py
--- a/pypy/rpython/memory/gc/test/test_stmgc.py
+++ b/pypy/rpython/memory/gc/test/test_stmgc.py
@@ -222,10 +222,10 @@
         else:
             L = self.gc._stm_duplicate(R)
             hdr = self.gc.header(L)
-            assert hdr.tid & GCFLAG_GLOBAL
-            hdr.tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED)
+            assert hdr.tid & GCFLAG_GLOBAL == 0
+            assert hdr.tid & GCFLAG_POSSIBLY_OUTDATED == 0
+            assert hdr.tid & GCFLAG_LOCAL_COPY
             assert hdr.tid & GCFLAG_NOT_WRITTEN
-            hdr.tid |= GCFLAG_LOCAL_COPY
             set_hdr_revision(hdr, R)     # back-reference to the original
             self.gc.stm_operations.tldict_add(R, L)
             self.gc.stm_operations._transactional_copies.append((R, L))
@@ -583,10 +583,12 @@
         s, s_adr = self.malloc(S)
         i = self.gc.identityhash(s)
         assert i == mangle_hash(llmemory.cast_adr_to_int(s_adr))
+        self.gc.collect(0)
+        assert self.gc.identityhash(s) == i
 
     def test_hash_of_globallocal(self):
         s, s_adr = self.malloc(S, globl=True)
-        t_adr = self.gc.stm_writebarrier(s_adr)   # make a local copy
+        t_adr = self.stm_writebarrier(s_adr)   # make a local copy
         t = llmemory.cast_adr_to_ptr(t_adr, llmemory.GCREF)
         i = self.gc.identityhash(t)
         assert i == mangle_hash(llmemory.cast_adr_to_int(s_adr))


More information about the pypy-commit mailing list