[pypy-commit] pypy stm-thread-2: In-progress? Support for hash, partly copied from semispace/hybrid.
arigo
noreply at buildbot.pypy.org
Sat Sep 8 15:01:17 CEST 2012
Author: Armin Rigo <arigo at tunes.org>
Branch: stm-thread-2
Changeset: r57248:cb00c408ae8e
Date: 2012-09-08 15:01 +0200
http://bitbucket.org/pypy/pypy/changeset/cb00c408ae8e/
Log: In-progress? Support for hash, partly copied from semispace/hybrid.
diff --git a/pypy/rpython/memory/gc/stmgc.py b/pypy/rpython/memory/gc/stmgc.py
--- a/pypy/rpython/memory/gc/stmgc.py
+++ b/pypy/rpython/memory/gc/stmgc.py
@@ -2,7 +2,6 @@
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage, raw_memcopy
from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
-from pypy.rpython.memory.support import mangle_hash
from pypy.rpython.annlowlevel import llhelper
from pypy.rlib.rarithmetic import LONG_BIT, r_uint
from pypy.rlib.debug import ll_assert, debug_start, debug_stop, fatalerror
@@ -63,11 +62,7 @@
# surviving during a collection. Between collections, it is set on
# the LOCAL COPY objects, but only on them.
#
-# - GCFLAG_HAS_SHADOW: set on nursery objects whose id() or identityhash()
-# was taken. Means that we already have a corresponding object allocated
-# outside the nursery.
-#
-# - GCFLAG_FIXED_HASH: only on some prebuilt objects. For identityhash().
+# - GCFLAG_HASH01, GCFLAG_HASH02: to handle hashes
#
# Invariant: between two transactions, all objects visible from the current
# thread are always GLOBAL. In particular:
@@ -100,10 +95,6 @@
# - for local objects with GCFLAG_LOCAL_COPY, it points to the GLOBAL
# original (*).
#
-# - if GCFLAG_HAS_SHADOW, it points to the shadow object outside the
-# nursery (!). (It is not used on other nursery objects before
-# collection.)
-#
# - it contains the 'next' object of the 'sharedarea_tls.chained_list'
# list, which describes all LOCAL objects malloced outside the
# nursery (!).
@@ -122,12 +113,23 @@
GCFLAG_NOT_WRITTEN = first_gcflag << 2 # keep in sync with et.h
GCFLAG_LOCAL_COPY = first_gcflag << 3 # keep in sync with et.h
GCFLAG_VISITED = first_gcflag << 4 # keep in sync with et.h
-GCFLAG_HAS_SHADOW = first_gcflag << 5
-GCFLAG_FIXED_HASH = first_gcflag << 6
+GCFLAG_HASH01 = first_gcflag << 5
+GCFLAG_HASH02 = first_gcflag << 6
+GCFLAG_HASHMASK = GCFLAG_HASH01 | GCFLAG_HASH02
-GCFLAG_PREBUILT = GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN
+GCFLAG_PREBUILT = GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN | GCFLAG_HASH01
REV_INITIAL = r_uint(1)
+# the two flags GCFLAG_HASH0n together give one of the following four cases:
+# - nobody ever asked for the hash of the object
+GC_HASH_NOTTAKEN = 0
+# - someone asked, and we gave the address of the object + mangle_hash
+GC_HASH_TAKEN_ADDR = GCFLAG_HASH01
+# - someone asked, and we gave the address + nursery_hash_base + mangle_hash
+GC_HASH_TAKEN_NURS = GCFLAG_HASH02
+# - we have our own extra field to store the hash
+GC_HASH_HASFIELD = GCFLAG_HASH01 | GCFLAG_HASH02
+
def always_inline(fn):
fn._always_inline_ = True
@@ -147,7 +149,10 @@
HDR = lltype.Struct('header', ('tid', lltype.Signed),
('revision', lltype.Unsigned))
typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', GCFLAG_FIXED_HASH
+ withhash_flag_is_in_field = 'tid', GCFLAG_HASH02
+ # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
+ # have GC_HASH_HASFIELD (and then they are one word longer).
+ # The difference between the two cases is GCFLAG_HASH02.
TRANSLATION_PARAMS = {
'stm_operations': 'use_real_one',
@@ -298,6 +303,13 @@
tid = self.header(obj).tid
return llop.extract_ushort(llgroup.HALFWORD, tid)
+ def get_size_incl_hash(self, obj):
+ size = self.get_size(obj)
+ hdr = self.header(obj)
+ if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
+ size += llmemory.sizeof(lltype.Signed)
+ return size
+
@always_inline
def combine(self, typeid16, flags):
return llop.combine_ushort(lltype.Signed, typeid16, flags)
@@ -320,12 +332,10 @@
set_hdr_revision(self.header(obj), nrevision)
def stm_duplicate(self, obj):
- size_gc_header = self.gcheaderbuilder.size_gc_header
- size = self.get_size(obj)
- totalsize = size_gc_header + size
tls = self.get_tls()
try:
- localobj = tls.malloc_local_copy(totalsize)
+ localobj = tls.duplicate_obj(obj, self.get_size(obj))
+ tls.copied_local_objects.append(localobj) # XXX KILL
except MemoryError:
# should not really let the exception propagate.
# XXX do something slightly better, like abort the transaction
@@ -333,11 +343,6 @@
fatalerror("FIXME: MemoryError in stm_duplicate")
return llmemory.NULL
#
- # Initialize the copy by doing a memcpy of the bytes.
- # The object header of localobj will then be fixed by the C code.
- llmemory.raw_memcopy(obj - size_gc_header,
- localobj - size_gc_header,
- totalsize)
hdr = self.header(localobj)
hdr.tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED)
hdr.tid |= (GCFLAG_VISITED | GCFLAG_LOCAL_COPY)
@@ -346,69 +351,23 @@
# ----------
# id() and identityhash() support
- def id_or_identityhash(self, gcobj, is_hash):
- """Implement the common logic of id() and identityhash()
- of an object, given as a GCREF.
- """
+ def id(self, gcobj):
+ """NOT IMPLEMENTED! XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"""
+ return self.identityhash(gcobj)
+
+ def identityhash(self, gcobj):
+ stmtls = self.get_tls()
obj = llmemory.cast_ptr_to_adr(gcobj)
hdr = self.header(obj)
- tls = self.get_tls()
- if tls.is_in_nursery(obj):
- #
- # The object is still in the nursery of the current TLS.
- # (It cannot be in the nursery of a different thread, because
- # such an object would not be visible to this thread at all.)
- #
- ll_assert(hdr.tid & GCFLAG_LOCAL_COPY == 0, "id: LOCAL_COPY?")
- #
- if hdr.tid & GCFLAG_HAS_SHADOW == 0:
- #
- # We need to allocate a non-movable object here. We only
- # allocate it for now; it is left completely uninitialized.
- size_gc_header = self.gcheaderbuilder.size_gc_header
- size = self.get_size(obj)
- totalsize = size_gc_header + size
- fixedobj = tls.sharedarea_tls.malloc_object(totalsize)
- tls.sharedarea_tls.add_regular(fixedobj)
- self.header(fixedobj).tid = 0 # GCFLAG_VISITED is off
- #
- # Update the header of the local 'obj'
- hdr.tid |= GCFLAG_HAS_SHADOW
- set_hdr_revision(hdr, fixedobj)
- #
+ if hdr.tid & GCFLAG_HASHMASK == 0:
+ # set one of the GC_HASH_TAKEN_xxx flags.
+ if stmtls.is_in_nursery(obj):
+ hdr.tid |= GC_HASH_TAKEN_NURS
else:
- # There is already a corresponding fixedobj
- fixedobj = hdr_revision(hdr)
- #
- obj = fixedobj
- #
- elif hdr.tid & GCFLAG_LOCAL_COPY:
- #
- # The object is the local copy of a LOCAL-GLOBAL pair.
- obj = hdr_revision(hdr)
- #
- i = llmemory.cast_adr_to_int(obj)
- if is_hash:
- # For identityhash(), we need a special case for some
- # prebuilt objects: their hash must be the same before
- # and after translation. It is stored as an extra word
- # after the object. But we cannot use it for id()
- # because the stored value might clash with a real one.
- if self.header(obj).tid & GCFLAG_FIXED_HASH:
- size = self.get_size(obj)
- i = (obj + size).signed[0]
- else:
- # mangle the hash value to increase the dispertion
- # on the trailing bits, but only if !GCFLAG_FIXED_HASH
- i = mangle_hash(i)
- return i
-
- def id(self, gcobj):
- return self.id_or_identityhash(gcobj, False)
-
- def identityhash(self, gcobj):
- return self.id_or_identityhash(gcobj, True)
-
+ hdr.tid |= GC_HASH_TAKEN_ADDR
+ # Compute and return the result
+ objsize = self.get_size(obj)
+ return stmtls._get_object_hash(obj, objsize, hdr.tid)
# ____________________________________________________________
# helpers
diff --git a/pypy/rpython/memory/gc/stmtls.py b/pypy/rpython/memory/gc/stmtls.py
--- a/pypy/rpython/memory/gc/stmtls.py
+++ b/pypy/rpython/memory/gc/stmtls.py
@@ -4,15 +4,18 @@
from pypy.rpython.annlowlevel import cast_base_ptr_to_instance, base_ptr_lltype
from pypy.rlib.objectmodel import we_are_translated, free_non_gc_object
from pypy.rlib.objectmodel import specialize
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, intmask
from pypy.rlib.debug import ll_assert, debug_start, debug_stop, fatalerror
+from pypy.rpython.memory.support import mangle_hash
from pypy.rpython.memory.gc.stmgc import WORD, NULL
from pypy.rpython.memory.gc.stmgc import always_inline, dont_inline
from pypy.rpython.memory.gc.stmgc import GCFLAG_GLOBAL, GCFLAG_VISITED
-from pypy.rpython.memory.gc.stmgc import GCFLAG_LOCAL_COPY, GCFLAG_HAS_SHADOW
+from pypy.rpython.memory.gc.stmgc import GCFLAG_LOCAL_COPY
from pypy.rpython.memory.gc.stmgc import GCFLAG_POSSIBLY_OUTDATED
from pypy.rpython.memory.gc.stmgc import GCFLAG_NOT_WRITTEN
+from pypy.rpython.memory.gc.stmgc import GCFLAG_HASHMASK, GC_HASH_TAKEN_ADDR
+from pypy.rpython.memory.gc.stmgc import GC_HASH_TAKEN_NURS, GC_HASH_HASFIELD
from pypy.rpython.memory.gc.stmgc import hdr_revision, set_hdr_revision
@@ -23,6 +26,7 @@
_alloc_flavor_ = 'raw'
nontranslated_dict = {}
+ nursery_hash_base = -1
def __init__(self, gc):
self.gc = gc
@@ -209,9 +213,21 @@
size_used = self.nursery_free - self.nursery_start
llarena.arena_reset(self.nursery_start, size_used, 2)
self.nursery_free = self.nursery_start
+ self.change_nursery_hash_base() # the nursery is empty now
#
debug_stop("gc-local")
+ def change_nursery_hash_base(self):
+ # The following should be enough to ensure that young objects
+ # tend to always get a different hash. It also makes sure that
+ # nursery_hash_base is not a multiple of 4, to avoid collisions
+ # with the hash of non-young objects.
+ hash_base = self.nursery_hash_base
+ hash_base += self.nursery_size - 1
+ if (hash_base & 3) == 0:
+ hash_base -= 1
+ self.nursery_hash_base = intmask(hash_base)
+
# ------------------------------------------------------------
@always_inline
@@ -244,13 +260,6 @@
"odd-valued (i.e. tagged) pointer unexpected here")
return self.nursery_start <= addr < self.nursery_top
- def malloc_local_copy(self, totalsize):
- """Allocate an object that will be used as a LOCAL COPY of
- some GLOBAL object."""
- localobj = self.sharedarea_tls.malloc_object(totalsize)
- self.copied_local_objects.append(localobj) # XXX KILL
- return localobj
-
def fresh_new_weakref(self, obj):
self.local_weakrefs.append(obj)
@@ -271,12 +280,12 @@
ll_assert(hdr.tid & GCFLAG_VISITED == 0, "unexpected VISITED [1]")
ll_assert(hdr.tid & GCFLAG_LOCAL_COPY == 0,"already LOCAL_COPY [1]")
hdr.tid |= GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN
- self._clear_version_for_global_object(hdr)
+ self._clear_revision_for_global_object(hdr)
#
self.copied_local_objects.clear()
- def _clear_version_for_global_object(self, hdr):
- # Reset the 'version' to initialize a newly global object.
+ def _clear_revision_for_global_object(self, hdr):
+ # Reset the 'revision' to initialize a newly global object.
# When translated with C code, we set it to 1.
# When non-translated, we reset it instead to '_uninitialized'
# to simulate the fact that the C code might change it.
@@ -404,50 +413,24 @@
return
#
# If 'obj' was already forwarded, change it to its forwarding address.
- # If 'obj' has already a shadow but isn't forwarded so far, use it.
- # The common case is the "else" part, so we use only one test to
- # know if we are in the common case or not.
- if hdr.tid & (GCFLAG_VISITED | GCFLAG_HAS_SHADOW):
- #
- if hdr.tid & GCFLAG_VISITED:
- root.address[0] = hdr_revision(hdr)
- return
- #
- # Case of GCFLAG_HAS_SHADOW. See comments below.
- size_gc_header = self.gc.gcheaderbuilder.size_gc_header
- totalsize = size_gc_header + size
- hdr.tid &= ~GCFLAG_HAS_SHADOW
- newobj = hdr_revision(hdr)
- newhdr = self.gc.header(newobj)
- #
- saved_version = hdr_revision(newhdr)
- llmemory.raw_memcopy(obj - size_gc_header,
- newobj - size_gc_header,
- totalsize)
- set_hdr_revision(newhdr, saved_version)
- newhdr.tid = hdr.tid | GCFLAG_VISITED
- #
- else:
- #
- # First visit to 'obj': we must move this YOUNG obj out of the
- # nursery.
- size_gc_header = self.gc.gcheaderbuilder.size_gc_header
- totalsize = size_gc_header + size
- #
- # Common case: allocate a new nonmovable location for it.
- newobj = self._malloc_out_of_nursery(totalsize)
- #
- # Copy it. Note that references to other objects in the
- # nursery are kept unchanged in this step.
- llmemory.raw_memcopy(obj - size_gc_header,
- newobj - size_gc_header,
- totalsize)
- #
- # Register the object here, not before the memcopy() that would
- # overwrite its 'version' field
- self._register_newly_malloced_obj(newobj)
+ if hdr.tid & GCFLAG_VISITED:
+ root.address[0] = hdr_revision(hdr)
+ return
#
- # Set the YOUNG copy's GCFLAG_VISITED and set its version to
+ # First visit to 'obj': we must move this YOUNG obj out of the
+ # nursery. This is the common case. Allocate a new location
+ # for it outside the nursery.
+ newobj = self.duplicate_obj(obj, size)
+ #
+ # Note that references from 'obj' to other objects in the
+ # nursery are kept unchanged in this step: they are copied
+ # verbatim to 'newobj'.
+ #
+ # Register the object here, not before the memcopy() that would
+ # overwrite its 'revision' field
+ self._register_newly_malloced_obj(newobj)
+ #
+ # Set the YOUNG copy's GCFLAG_VISITED and set its revision to
# point to the OLD copy.
hdr.tid |= GCFLAG_VISITED
set_hdr_revision(hdr, newobj)
@@ -461,8 +444,45 @@
# walk 'pending_list'.
self.pending.append(newobj)
- def _malloc_out_of_nursery(self, totalsize):
- return self.sharedarea_tls.malloc_object(totalsize)
+ def duplicate_obj(self, obj, objsize):
+ size_gc_header = self.gc.gcheaderbuilder.size_gc_header
+ totalsize_without_hash = size_gc_header + objsize
+ if self.gc.header(obj).tid & GCFLAG_HASHMASK:
+ newtotalsize = totalsize_without_hash + (
+ llmemory.sizeof(lltype.Signed))
+ else:
+ newtotalsize = totalsize_without_hash
+ #
+ newobj = self.sharedarea_tls.malloc_object(newtotalsize)
+ #
+ # Initialize the copy by doing a memcpy of the bytes.
+ # The object header of localobj will then be fixed by the C code.
+ llmemory.raw_memcopy(obj - size_gc_header,
+ newobj - size_gc_header,
+ totalsize_without_hash)
+ #
+ newhdr = self.gc.header(newobj)
+ if newhdr.tid & GCFLAG_HASHMASK:
+ hash = self._get_object_hash(obj, objsize, newhdr.tid)
+ newaddr = llarena.getfakearenaaddress(newobj)
+ (newaddr + objsize).signed[0] = hash
+ newhdr.tid |= GC_HASH_HASFIELD
+ #
+ return newobj
+
+ def _get_object_hash(self, obj, objsize, tid):
+ # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
+ gc_hash = tid & GCFLAG_HASHMASK
+ if gc_hash == GC_HASH_HASFIELD:
+ obj = llarena.getfakearenaaddress(obj)
+ return (obj + objsize).signed[0]
+ elif gc_hash == GC_HASH_TAKEN_ADDR:
+ return mangle_hash(llmemory.cast_adr_to_int(obj))
+ elif gc_hash == GC_HASH_TAKEN_NURS:
+ return mangle_hash(intmask(llmemory.cast_adr_to_int(obj) +
+ self.nursery_hash_base))
+ else:
+ assert 0, "gc_hash == GC_HASH_NOTTAKEN"
def _register_newly_malloced_obj(self, obj):
self.sharedarea_tls.add_regular(obj)
diff --git a/pypy/rpython/memory/gc/test/test_stmgc.py b/pypy/rpython/memory/gc/test/test_stmgc.py
--- a/pypy/rpython/memory/gc/test/test_stmgc.py
+++ b/pypy/rpython/memory/gc/test/test_stmgc.py
@@ -222,10 +222,10 @@
else:
L = self.gc._stm_duplicate(R)
hdr = self.gc.header(L)
- assert hdr.tid & GCFLAG_GLOBAL
- hdr.tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED)
+ assert hdr.tid & GCFLAG_GLOBAL == 0
+ assert hdr.tid & GCFLAG_POSSIBLY_OUTDATED == 0
+ assert hdr.tid & GCFLAG_LOCAL_COPY
assert hdr.tid & GCFLAG_NOT_WRITTEN
- hdr.tid |= GCFLAG_LOCAL_COPY
set_hdr_revision(hdr, R) # back-reference to the original
self.gc.stm_operations.tldict_add(R, L)
self.gc.stm_operations._transactional_copies.append((R, L))
@@ -583,10 +583,12 @@
s, s_adr = self.malloc(S)
i = self.gc.identityhash(s)
assert i == mangle_hash(llmemory.cast_adr_to_int(s_adr))
+ self.gc.collect(0)
+ assert self.gc.identityhash(s) == i
def test_hash_of_globallocal(self):
s, s_adr = self.malloc(S, globl=True)
- t_adr = self.gc.stm_writebarrier(s_adr) # make a local copy
+ t_adr = self.stm_writebarrier(s_adr) # make a local copy
t = llmemory.cast_adr_to_ptr(t_adr, llmemory.GCREF)
i = self.gc.identityhash(t)
assert i == mangle_hash(llmemory.cast_adr_to_int(s_adr))
More information about the pypy-commit
mailing list