[pypy-svn] r54611 - in pypy/dist/pypy/rpython: lltypesystem memory memory/gc memory/gc/test memory/gctransform memory/test
arigo at codespeak.net
arigo at codespeak.net
Sat May 10 10:20:50 CEST 2008
Author: arigo
Date: Sat May 10 10:20:48 2008
New Revision: 54611
Added:
pypy/dist/pypy/rpython/memory/gc/test/test_direct.py
- copied unchanged from r54605, pypy/branch/gc-tweak/pypy/rpython/memory/gc/test/test_direct.py
pypy/dist/pypy/rpython/memory/lldict.py
- copied unchanged from r54605, pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py
pypy/dist/pypy/rpython/memory/test/test_lldict.py
- copied unchanged from r54605, pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py
Modified:
pypy/dist/pypy/rpython/lltypesystem/llmemory.py
pypy/dist/pypy/rpython/lltypesystem/lltype.py
pypy/dist/pypy/rpython/lltypesystem/opimpl.py
pypy/dist/pypy/rpython/lltypesystem/rdict.py
pypy/dist/pypy/rpython/memory/gc/base.py
pypy/dist/pypy/rpython/memory/gc/generation.py
pypy/dist/pypy/rpython/memory/gc/hybrid.py
pypy/dist/pypy/rpython/memory/gc/semispace.py
pypy/dist/pypy/rpython/memory/gctransform/framework.py
pypy/dist/pypy/rpython/memory/gctypelayout.py
pypy/dist/pypy/rpython/memory/gcwrapper.py
pypy/dist/pypy/rpython/memory/support.py
pypy/dist/pypy/rpython/memory/test/test_gc.py
pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py
Log:
Merge the gc-tweak branch.
(svn merge -r54384:54605 svn+ssh://codespeak.net/svn/pypy/branch/gc-tweak)
* Build on top of the GenerationGC's GCFLAG_NO_HEAP_PTRS
flag to make the HybridGC a full 3-generations collector.
This increases the complexity of the HybridGC, but it
seems worth the trouble: it seems to remove the few bad
cases on tuatara's microbenchmarks where the HybridGC was
much slower than the others, and it gives +20% on
"pypy-c translate.py --gc=semispace targetgcbench".
* Add optional heap consistency checks after each collection.
Useful to find GC bugs.
* Add a few direct GC tests that can run without the
llinterpreter at all (gc/test_direct.py).
* Add an AddressDict. Based on lltypesystem/rdict.py
but without using the GC itself.
* Kill the obscure id() logic, and replace it with something
custom but sane in each GC class. Motivated by profiling
showing that 26% of L2 cache read misses came from the
old id() logic in the above pypy-c translate.py run.
Modified: pypy/dist/pypy/rpython/lltypesystem/llmemory.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/llmemory.py (original)
+++ pypy/dist/pypy/rpython/lltypesystem/llmemory.py Sat May 10 10:20:48 2008
@@ -582,8 +582,10 @@
def cast_adr_to_int(adr):
return adr._cast_to_int()
+_NONGCREF = lltype.Ptr(lltype.OpaqueType('NONGCREF'))
def cast_int_to_adr(int):
- raise NotImplementedError("cast_int_to_adr")
+ ptr = lltype.cast_int_to_ptr(_NONGCREF, int)
+ return cast_ptr_to_adr(ptr)
# ____________________________________________________________
# Weakrefs.
Modified: pypy/dist/pypy/rpython/lltypesystem/lltype.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/lltype.py (original)
+++ pypy/dist/pypy/rpython/lltypesystem/lltype.py Sat May 10 10:20:48 2008
@@ -905,6 +905,8 @@
return p # primitive
if not p:
return None # null pointer
+ if type(p._obj0) is int:
+ return p # a pointer obtained by cast_int_to_ptr
container = p._obj._normalizedcontainer()
if container is not p._obj:
p = _ptr(Ptr(typeOf(container)), container, p._solid)
@@ -985,7 +987,7 @@
_obj = property(_getobj)
def _was_freed(self):
- return (self._obj0 is not None and
+ return (type(self._obj0) not in (type(None), int) and
self._getobj(check=False)._was_freed())
def __getattr__(self, field_name): # ! can only return basic or ptr !
Modified: pypy/dist/pypy/rpython/lltypesystem/opimpl.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/opimpl.py (original)
+++ pypy/dist/pypy/rpython/lltypesystem/opimpl.py Sat May 10 10:20:48 2008
@@ -383,7 +383,8 @@
def op_debug_print(*args):
for arg in args:
- print arg
+ print arg,
+ print
# ____________________________________________________________
Modified: pypy/dist/pypy/rpython/lltypesystem/rdict.py
==============================================================================
--- pypy/dist/pypy/rpython/lltypesystem/rdict.py (original)
+++ pypy/dist/pypy/rpython/lltypesystem/rdict.py Sat May 10 10:20:48 2008
@@ -76,6 +76,8 @@
# compute the shape of the DICTENTRY structure
entryfields = []
entrymeths = {
+ 'allocate': lltype.typeMethod(_ll_malloc_entries),
+ 'delete': _ll_free_entries,
'must_clear_key': (isinstance(self.DICTKEY, lltype.Ptr)
and self.DICTKEY._needsgc()),
'must_clear_value': (isinstance(self.DICTVALUE, lltype.Ptr)
@@ -189,6 +191,7 @@
}
adtmeths['KEY'] = self.DICTKEY
adtmeths['VALUE'] = self.DICTVALUE
+ adtmeths['allocate'] = lltype.typeMethod(_ll_malloc_dict)
self.DICT.become(lltype.GcStruct("dicttable", adtmeths=adtmeths,
*fields))
@@ -485,7 +488,7 @@
new_size = old_size * 2
while new_size > DICT_INITSIZE and d.num_items < new_size / 4:
new_size /= 2
- d.entries = lltype.malloc(lltype.typeOf(old_entries).TO, new_size, zero=True)
+ d.entries = lltype.typeOf(old_entries).TO.allocate(new_size)
d.num_items = 0
d.num_pristine_entries = new_size
i = 0
@@ -495,6 +498,7 @@
entry = old_entries[i]
ll_dict_insertclean(d, entry.key, entry.value, hash)
i += 1
+ old_entries.delete()
# ------- a port of CPython's dictobject.c's lookdict implementation -------
PERTURB_SHIFT = 5
@@ -582,8 +586,8 @@
DICT_INITSIZE = 8
def ll_newdict(DICT):
- d = lltype.malloc(DICT)
- d.entries = lltype.malloc(DICT.entries.TO, DICT_INITSIZE, zero=True)
+ d = DICT.allocate()
+ d.entries = DICT.entries.TO.allocate(DICT_INITSIZE)
d.num_items = 0
d.num_pristine_entries = DICT_INITSIZE
return d
@@ -594,13 +598,23 @@
n = DICT_INITSIZE
while n < length_estimate:
n *= 2
- d = lltype.malloc(DICT)
- d.entries = lltype.malloc(DICT.entries.TO, n, zero=True)
+ d = DICT.allocate()
+ d.entries = DICT.entries.TO.allocate(n)
d.num_items = 0
d.num_pristine_entries = n
return d
ll_newdict_size.oopspec = 'newdict()'
+# pypy.rpython.memory.lldict uses a dict based on Struct and Array
+# instead of GcStruct and GcArray, which is done by using different
+# 'allocate' and 'delete' adtmethod implementations than the ones below
+def _ll_malloc_dict(DICT):
+ return lltype.malloc(DICT)
+def _ll_malloc_entries(ENTRIES, n):
+ return lltype.malloc(ENTRIES, n, zero=True)
+def _ll_free_entries(entries):
+ pass
+
def rtype_r_dict(hop):
r_dict = hop.r_result
@@ -691,8 +705,8 @@
def ll_copy(dict):
DICT = lltype.typeOf(dict).TO
dictsize = len(dict.entries)
- d = lltype.malloc(DICT)
- d.entries = lltype.malloc(DICT.entries.TO, dictsize, zero=True)
+ d = DICT.allocate()
+ d.entries = DICT.entries.TO.allocate(dictsize)
d.num_items = dict.num_items
d.num_pristine_entries = dict.num_pristine_entries
if hasattr(DICT, 'fnkeyeq'): d.fnkeyeq = dict.fnkeyeq
@@ -713,10 +727,11 @@
def ll_clear(d):
if len(d.entries) == d.num_pristine_entries == DICT_INITSIZE:
return
- DICT = lltype.typeOf(d).TO
- d.entries = lltype.malloc(DICT.entries.TO, DICT_INITSIZE, zero=True)
+ old_entries = d.entries
+ d.entries = lltype.typeOf(old_entries).TO.allocate(DICT_INITSIZE)
d.num_items = 0
d.num_pristine_entries = DICT_INITSIZE
+ old_entries.delete()
def ll_update(dic1, dic2):
entries = dic2.entries
Modified: pypy/dist/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc/base.py (original)
+++ pypy/dist/pypy/rpython/memory/gc/base.py Sat May 10 10:20:48 2008
@@ -8,6 +8,12 @@
malloc_zero_filled = False
prebuilt_gc_objects_are_static_roots = True
+ # The following flag enables costly consistency checks after each
+ # collection. It is automatically set to True by test_gc.py. The
+ # checking logic is translatable, so the flag can be set to True
+ # here before translation.
+ DEBUG = False
+
def set_query_functions(self, is_varsize, has_gcptr_in_varsize,
is_gcarrayofgcptr,
getfinalizer,
@@ -148,113 +154,49 @@
length -= 1
trace._annspecialcase_ = 'specialize:arg(2)'
+ def debug_check_consistency(self):
+ """To use after a collection. If self.DEBUG is set, this
+ enumerates all roots and traces all objects to check if we didn't
+ accidentally free a reachable object or forgot to update a pointer
+ to an object that moved.
+ """
+ if self.DEBUG:
+ from pypy.rlib.objectmodel import we_are_translated
+ from pypy.rpython.memory.support import AddressDict
+ self._debug_seen = AddressDict()
+ self._debug_pending = self.AddressStack()
+ if not we_are_translated():
+ self.root_walker._walk_prebuilt_gc(self._debug_record)
+ callback = GCBase._debug_callback
+ self.root_walker.walk_roots(callback, callback, callback)
+ pending = self._debug_pending
+ while pending.non_empty():
+ obj = pending.pop()
+ self.debug_check_object(obj)
+ self.trace(obj, self._debug_callback2, None)
+ self._debug_seen.delete()
+ self._debug_pending.delete()
+
+ def _debug_record(self, obj):
+ seen = self._debug_seen
+ if not seen.contains(obj):
+ seen.add(obj)
+ self._debug_pending.append(obj)
+ def _debug_callback(self, root):
+ obj = root.address[0]
+ ll_assert(bool(obj), "NULL address from walk_roots()")
+ self._debug_record(obj)
+ def _debug_callback2(self, pointer, ignored):
+ obj = pointer.address[0]
+ if obj:
+ self._debug_record(obj)
-class MovingGCBase(GCBase):
- moving_gc = True
+ def debug_check_object(self, obj):
+ pass
- def __init__(self):
- # WaRnInG! Putting GC objects as fields of the GC itself is
- # basically *not* working in general! When running tests with
- # the gcwrapper, there is no way they can be returned from
- # get_roots_from_llinterp(). When the whole GC goes through the
- # gctransformer, though, it works if the fields are read-only
- # (and thus only ever reference a prebuilt list or dict). These
- # prebuilt lists or dicts themselves can be mutated and point to
- # more non-prebuild GC objects; this is fine because the
- # internal GC ptr in the prebuilt list or dict is found by
- # gctypelayout and listed in addresses_of_static_ptrs.
-
- # XXX I'm not sure any more about the warning above. The fields
- # of 'self' are found by gctypelayout and added to
- # addresses_of_static_ptrs_in_nongc, so in principle they could
- # be mutated and still be found by collect().
-
- self.wr_to_objects_with_id = []
- self.object_id_dict = {}
- self.object_id_dict_ends_at = 0
- def id(self, ptr):
- self.disable_finalizers()
- try:
- return self._compute_id(ptr)
- finally:
- self.enable_finalizers()
-
- def _compute_id(self, ptr):
- # XXX this may explode if --no-translation-rweakref is specified
- # ----------------------------------------------------------------
- # Basic logic: the list item wr_to_objects_with_id[i] contains a
- # weakref to the object whose id is i + 1. The object_id_dict is
- # an optimization that tries to reduce the number of linear
- # searches in this list.
- # ----------------------------------------------------------------
- # Invariant: if object_id_dict_ends_at >= 0, then object_id_dict
- # contains all pairs {address: id}, for the addresses
- # of all objects that are the targets of the weakrefs of the
- # following slice: wr_to_objects_with_id[:object_id_dict_ends_at].
- # ----------------------------------------------------------------
- # Essential: as long as notify_objects_just_moved() is not called,
- # we assume that the objects' addresses did not change. We also
- # assume that the address of a live object cannot be reused for
- # another object without an intervening notify_objects_just_moved()
- # call, but this could be fixed easily if needed.
- # ----------------------------------------------------------------
- # First check the dictionary
- i = self.object_id_dict_ends_at
- if i < 0:
- self.object_id_dict.clear() # dictionary invalid
- self.object_id_dict_ends_at = 0
- i = 0
- else:
- adr = llmemory.cast_ptr_to_adr(ptr)
- try:
- i = self.object_id_dict[adr]
- except KeyError:
- pass
- else:
- # double-check that the answer we got is correct
- lst = self.wr_to_objects_with_id
- target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
- ll_assert(target == ptr, "bogus object_id_dict")
- return i + 1 # found via the dict
- # Walk the tail of the list, where entries are not also in the dict
- lst = self.wr_to_objects_with_id
- end = len(lst)
- freeentry = -1
- while i < end:
- target = llmemory.weakref_deref(llmemory.GCREF, lst[i])
- if not target:
- freeentry = i
- else:
- ll_assert(self.get_type_id(llmemory.cast_ptr_to_adr(target))
- > 0, "bogus weakref in compute_id()")
- # record this entry in the dict
- adr = llmemory.cast_ptr_to_adr(target)
- self.object_id_dict[adr] = i
- if target == ptr:
- break # found
- i += 1
- else:
- # not found
- wr = llmemory.weakref_create(ptr)
- if freeentry < 0:
- ll_assert(end == len(lst), "unexpected lst growth in gc_id")
- i = end
- lst.append(wr)
- else:
- i = freeentry # reuse the id() of a dead object
- lst[i] = wr
- adr = llmemory.cast_ptr_to_adr(ptr)
- self.object_id_dict[adr] = i
- # all entries up to and including index 'i' are now valid in the dict
- # unless a collection occurred while we were working, in which case
- # the object_id_dict is bogus anyway
- if self.object_id_dict_ends_at >= 0:
- self.object_id_dict_ends_at = i + 1
- return i + 1 # this produces id() values 1, 2, 3, 4...
-
- def notify_objects_just_moved(self):
- self.object_id_dict_ends_at = -1
+class MovingGCBase(GCBase):
+ moving_gc = True
def choose_gc_from_config(config):
Modified: pypy/dist/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc/generation.py (original)
+++ pypy/dist/pypy/rpython/memory/gc/generation.py Sat May 10 10:20:48 2008
@@ -16,8 +16,12 @@
# pointer to a young object.
GCFLAG_NO_YOUNG_PTRS = SemiSpaceGC.first_unused_gcflag << 0
-# The following flag is set for static roots which are not on the list
-# of static roots yet, but will appear with write barrier
+# The following flag is set on some last-generation objects (== prebuilt
+# objects for GenerationGC, but see also HybridGC). The flag is set
+# unless the object is already listed in 'last_generation_root_objects'.
+# When a pointer is written inside an object with GCFLAG_NO_HEAP_PTRS
+# set, the write_barrier clears the flag and adds the object to
+# 'last_generation_root_objects'.
GCFLAG_NO_HEAP_PTRS = SemiSpaceGC.first_unused_gcflag << 1
class GenerationGC(SemiSpaceGC):
@@ -70,6 +74,8 @@
self.lb_young_var_basesize = sz
def setup(self):
+ self.last_generation_root_objects = self.AddressStack()
+ self.young_objects_with_id = self.AddressDict()
SemiSpaceGC.setup(self)
self.set_nursery_size(self.initial_nursery_size)
# the GC is fully setup now. The rest can make use of it.
@@ -244,6 +250,7 @@
def semispace_collect(self, size_changing=False):
self.reset_young_gcflags() # we are doing a full collection anyway
self.weakrefs_grow_older()
+ self.ids_grow_older()
self.reset_nursery()
if DEBUG_PRINT:
llop.debug_print(lltype.Void, "major collect, size changing", size_changing)
@@ -281,6 +288,42 @@
obj = self.young_objects_with_weakrefs.pop()
self.objects_with_weakrefs.append(obj)
+ def collect_roots(self):
+ """GenerationGC: collects all roots.
+ HybridGC: collects all roots, excluding the generation 3 ones.
+ """
+ # Warning! References from static (and possibly gen3) objects
+ # are found by collect_last_generation_roots(), which must be
+ # called *first*! If it is called after walk_roots(), then the
+ # HybridGC explodes if one of the _collect_root causes an object
+ # to be added to self.last_generation_root_objects. Indeed, in
+ # this case, the newly added object is traced twice: once by
+ # collect_last_generation_roots() and once because it was added
+ # in self.rawmalloced_objects_to_trace.
+ self.collect_last_generation_roots()
+ self.root_walker.walk_roots(
+ SemiSpaceGC._collect_root, # stack roots
+ SemiSpaceGC._collect_root, # static in prebuilt non-gc structures
+ None) # we don't need the static in prebuilt gc objects
+
+ def collect_last_generation_roots(self):
+ stack = self.last_generation_root_objects
+ self.last_generation_root_objects = self.AddressStack()
+ while stack.non_empty():
+ obj = stack.pop()
+ self.header(obj).tid |= GCFLAG_NO_HEAP_PTRS
+ # ^^^ the flag we just added will be removed immediately if
+ # the object still contains pointers to younger objects
+ self.trace(obj, self._trace_external_obj, obj)
+ stack.delete()
+
+ def _trace_external_obj(self, pointer, obj):
+ addr = pointer.address[0]
+ if addr != NULL:
+ newaddr = self.copy(addr)
+ pointer.address[0] = newaddr
+ self.write_into_last_generation_obj(obj, newaddr)
+
# ____________________________________________________________
# Implementation of nursery-only collections
@@ -302,11 +345,13 @@
# GCFLAG_NO_YOUNG_PTRS set again by trace_and_drag_out_of_nursery
if self.young_objects_with_weakrefs.non_empty():
self.invalidate_young_weakrefs()
- self.notify_objects_just_moved()
+ if self.young_objects_with_id.length() > 0:
+ self.update_young_objects_with_id()
# mark the nursery as free and fill it with zeroes again
llarena.arena_reset(self.nursery, self.nursery_size, True)
if DEBUG_PRINT:
llop.debug_print(lltype.Void, "percent survived:", float(scan - beginning) / self.nursery_size)
+ #self.debug_check_consistency() # -- quite expensive
else:
# no nursery - this occurs after a full collect, triggered either
# just above or by some previous non-nursery-based allocation.
@@ -390,25 +435,106 @@
if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
self.remember_young_pointer(addr_struct, newvalue)
- def append_to_static_roots(self, pointer, arg):
- self.root_walker.append_static_root(pointer)
-
- def move_to_static_roots(self, addr_struct):
- objhdr = self.header(addr_struct)
- objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
- self.trace(addr_struct, self.append_to_static_roots, None)
-
def remember_young_pointer(self, addr_struct, addr):
ll_assert(not self.is_in_nursery(addr_struct),
"nursery object with GCFLAG_NO_YOUNG_PTRS")
- oldhdr = self.header(addr_struct)
if self.is_in_nursery(addr):
self.old_objects_pointing_to_young.append(addr_struct)
- oldhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
- if oldhdr.tid & GCFLAG_NO_HEAP_PTRS:
- self.move_to_static_roots(addr_struct)
+ self.header(addr_struct).tid &= ~GCFLAG_NO_YOUNG_PTRS
+ elif addr == NULL:
+ return
+ self.write_into_last_generation_obj(addr_struct, addr)
remember_young_pointer._dont_inline_ = True
+ def write_into_last_generation_obj(self, addr_struct, addr):
+ objhdr = self.header(addr_struct)
+ if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
+ if not self.is_last_generation(addr):
+ objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+ self.last_generation_root_objects.append(addr_struct)
+
+ def is_last_generation(self, obj):
+ # overridden by HybridGC
+ return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
+
+ def _compute_id(self, obj):
+ if self.is_in_nursery(obj):
+ result = self.young_objects_with_id.get(obj)
+ if not result:
+ result = self._next_id()
+ self.young_objects_with_id.setitem(obj, result)
+ return result
+ else:
+ return SemiSpaceGC._compute_id(self, obj)
+
+ def update_young_objects_with_id(self):
+ self.young_objects_with_id.foreach(self._update_object_id,
+ self.objects_with_id)
+ self.young_objects_with_id.clear()
+ # NB. the clear() also makes the dictionary shrink back to its
+ # minimal size, which is actually a good idea: a large, mostly-empty
+ # table is bad for the next call to 'foreach'.
+
+ def ids_grow_older(self):
+ self.young_objects_with_id.foreach(self._id_grow_older, None)
+ self.young_objects_with_id.clear()
+
+ def _id_grow_older(self, obj, id, ignored):
+ self.objects_with_id.setitem(obj, id)
+
+ def debug_check_object(self, obj):
+ """Check the invariants about 'obj' that should be true
+ between collections."""
+ SemiSpaceGC.debug_check_object(self, obj)
+ tid = self.header(obj).tid
+ if tid & GCFLAG_NO_YOUNG_PTRS:
+ ll_assert(not self.is_in_nursery(obj),
+ "nursery object with GCFLAG_NO_YOUNG_PTRS")
+ self.trace(obj, self._debug_no_nursery_pointer, None)
+ elif not self.is_in_nursery(obj):
+ ll_assert(self._d_oopty.contains(obj),
+ "missing from old_objects_pointing_to_young")
+ if tid & GCFLAG_NO_HEAP_PTRS:
+ ll_assert(self.is_last_generation(obj),
+ "GCFLAG_NO_HEAP_PTRS on non-3rd-generation object")
+ self.trace(obj, self._debug_no_gen1or2_pointer, None)
+ elif self.is_last_generation(obj):
+ ll_assert(self._d_lgro.contains(obj),
+ "missing from last_generation_root_objects")
+
+ def _debug_no_nursery_pointer(self, root, ignored):
+ ll_assert(not self.is_in_nursery(root.address[0]),
+ "GCFLAG_NO_YOUNG_PTRS but found a young pointer")
+ def _debug_no_gen1or2_pointer(self, root, ignored):
+ target = root.address[0]
+ ll_assert(not target or self.is_last_generation(target),
+ "GCFLAG_NO_HEAP_PTRS but found a pointer to gen1or2")
+
+ def debug_check_consistency(self):
+ if self.DEBUG:
+ self._d_oopty = self.old_objects_pointing_to_young.stack2dict()
+ self._d_lgro = self.last_generation_root_objects.stack2dict()
+ SemiSpaceGC.debug_check_consistency(self)
+ self._d_oopty.delete()
+ self._d_lgro.delete()
+ self.old_objects_pointing_to_young.foreach(
+ self._debug_check_flag_1, None)
+ self.last_generation_root_objects.foreach(
+ self._debug_check_flag_2, None)
+
+ def _debug_check_flag_1(self, obj, ignored):
+ ll_assert(not (self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS),
+ "unexpected GCFLAG_NO_YOUNG_PTRS")
+ def _debug_check_flag_2(self, obj, ignored):
+ ll_assert(not (self.header(obj).tid & GCFLAG_NO_HEAP_PTRS),
+ "unexpected GCFLAG_NO_HEAP_PTRS")
+
+ def debug_check_can_copy(self, obj):
+ if self.is_in_nursery(obj):
+ pass # it's ok to copy an object out of the nursery
+ else:
+ SemiSpaceGC.debug_check_can_copy(self, obj)
+
# ____________________________________________________________
import os
Modified: pypy/dist/pypy/rpython/memory/gc/hybrid.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc/hybrid.py (original)
+++ pypy/dist/pypy/rpython/memory/gc/hybrid.py Sat May 10 10:20:48 2008
@@ -2,30 +2,72 @@
from pypy.rpython.memory.gc.semispace import SemiSpaceGC
from pypy.rpython.memory.gc.semispace import DEBUG_PRINT
from pypy.rpython.memory.gc.generation import GenerationGC, GCFLAG_FORWARDED
+from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL
from pypy.rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
+from pypy.rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rlib.debug import ll_assert
from pypy.rlib.rarithmetic import ovfcheck
-# The "age" of an object is the number of times it is copied between the
-# two semispaces. When an object would reach MAX_SEMISPACE_AGE, it is
-# instead copied to a nonmoving location. For example, a value of 4
+# _______in the semispaces_________ ______external (non-moving)_____
+# / \ / \
+# ___raw_malloc'ed__ _prebuilt_
+# +----------------------------------+ / \ / \
+# | | | | | | | |
+# | | | | | | | | age < max age == max
+# |nur-|o|o|o|o| | | +---+ +---+ +---+
+# |sery|b|b|b|b|free| empty | |obj| |obj| |obj|
+# | |j|j|j|j| | | +---+ +---+ +---+
+# | | | | | | | | +---+ +---+ +---+
+# +-----------------+----------------+ |obj| |obj| |obj|
+# age <= max +---+ +---+ +---+
+#
+# |gen1|------------- generation 2 -----------------|-----generation 3-----|
+#
+# Object lists:
+# * gen2_rawmalloced_objects
+# * gen3_rawmalloced_objects
+# * old_objects_pointing_to_young: gen2or3 objs that point to gen1 objs
+# * last_generation_root_objects: gen3 objs that point to gen1or2 objs
+#
+# How to tell the objects apart:
+# * external: tid & GCFLAG_EXTERNAL
+# * gen1: is_in_nursery(obj)
+# * gen3: (tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
+# (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX)
+#
+# Some invariants:
+# * gen3 are either GCFLAG_NO_HEAP_PTRS or in 'last_generation_root_objects'
+# * between collections, GCFLAG_UNVISITED set exactly for gen2_rawmalloced
+#
+# A malloc_varsize() of large objects returns objects that are external
+# but initially of generation 2. Old objects from the semispaces are
+# moved to external objects directly as generation 3.
+
+# The "age" of an object is the number of times it survived a full
+# collections, without counting the step that moved it out of the nursery.
+# When a semispace-based object would grow older than MAX_SEMISPACE_AGE,
+# it is instead copied to a nonmoving location. For example, a value of 3
# ensures that an object is copied at most 5 times in total: from the
# nursery to the semispace, then three times between the two spaces,
# then one last time to a nonmoving location.
-MAX_SEMISPACE_AGE = 4
+MAX_SEMISPACE_AGE = 3
GCFLAG_UNVISITED = GenerationGC.first_unused_gcflag << 0
_gcflag_next_bit = GenerationGC.first_unused_gcflag << 1
GCFLAG_AGE_ONE = _gcflag_next_bit
-GCFLAG_AGE_MAX = _gcflag_next_bit * (MAX_SEMISPACE_AGE-1)
+GCFLAG_AGE_MAX = _gcflag_next_bit * MAX_SEMISPACE_AGE
GCFLAG_AGE_MASK = 0
while GCFLAG_AGE_MASK < GCFLAG_AGE_MAX:
GCFLAG_AGE_MASK |= _gcflag_next_bit
_gcflag_next_bit <<= 1
+# The 3rd generation objects are only collected after the following
+# number of calls to semispace_collect():
+GENERATION3_COLLECT_THRESHOLD = 20
+
class HybridGC(GenerationGC):
"""A two-generations semi-space GC like the GenerationGC,
@@ -33,6 +75,7 @@
they are allocated via raw_malloc/raw_free in a mark-n-sweep fashion.
"""
first_unused_gcflag = _gcflag_next_bit
+ prebuilt_gc_objects_are_static_roots = True
# the following values override the default arguments of __init__ when
# translating to a real backend.
@@ -45,6 +88,8 @@
def __init__(self, *args, **kwds):
large_object = kwds.pop('large_object', 24)
large_object_gcptrs = kwds.pop('large_object_gcptrs', 32)
+ self.generation3_collect_threshold = kwds.pop(
+ 'generation3_collect_threshold', GENERATION3_COLLECT_THRESHOLD)
GenerationGC.__init__(self, *args, **kwds)
# Objects whose total size is at least 'large_object' bytes are
@@ -66,10 +111,12 @@
self.large_objects_collect_trigger = self.space_size
if DEBUG_PRINT:
self._initial_trigger = self.large_objects_collect_trigger
- self.pending_external_object_list = self.AddressDeque()
+ self.rawmalloced_objects_to_trace = self.AddressStack()
+ self.count_semispaceonly_collects = 0
def setup(self):
- self.large_objects_list = self.AddressDeque()
+ self.gen2_rawmalloced_objects = self.AddressStack()
+ self.gen3_rawmalloced_objects = self.AddressStack()
GenerationGC.setup(self)
def set_max_heap_size(self, size):
@@ -184,7 +231,7 @@
# need to follow suit.
llmemory.raw_memclear(result, totalsize)
size_gc_header = self.gcheaderbuilder.size_gc_header
- self.large_objects_list.append(result + size_gc_header)
+ self.gen2_rawmalloced_objects.append(result + size_gc_header)
return result
def allocate_external_object(self, totalsize):
@@ -192,19 +239,62 @@
# If so, we'd also use arena_reset() in malloc_varsize_marknsweep().
return llmemory.raw_malloc(totalsize)
+ def init_gc_object_immortal(self, addr, typeid,
+ flags=(GCFLAG_NO_YOUNG_PTRS |
+ GCFLAG_NO_HEAP_PTRS |
+ GCFLAG_AGE_MAX)):
+ GenerationGC.init_gc_object_immortal(self, addr, typeid, flags)
+
+ # ___________________________________________________________________
+ # collect() and semispace_collect() are not synonyms in this GC: the
+ # former is a complete collect, while the latter is only collecting
+ # the semispaces and not always doing the mark-n-sweep pass over the
+ # external objects of 3rd generation.
+
+ def collect(self):
+ self.count_semispaceonly_collects = self.generation3_collect_threshold
+ GenerationGC.collect(self)
+
+ def is_collecting_gen3(self):
+ count = self.count_semispaceonly_collects
+ return count >= self.generation3_collect_threshold
+
# ___________________________________________________________________
# the following methods are hook into SemiSpaceGC.semispace_collect()
def starting_full_collect(self):
- # At the start of a collection, all raw_malloc'ed objects should
- # have the GCFLAG_UNVISITED bit set. No other object ever has
- # this bit set.
- ll_assert(not self.pending_external_object_list.non_empty(),
- "pending_external_object_list should be empty at start")
+ # At the start of a collection, the GCFLAG_UNVISITED bit is set
+ # exactly on the objects in gen2_rawmalloced_objects. Only
+ # raw_malloc'ed objects can ever have this bit set.
+ self.count_semispaceonly_collects += 1
+ if self.is_collecting_gen3():
+ # set the GCFLAG_UNVISITED on all rawmalloced generation-3 objects
+ # as well, to let them be recorded by visit_external_object()
+ self.gen3_rawmalloced_objects.foreach(self._set_gcflag_unvisited,
+ None)
+ ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
+ "rawmalloced_objects_to_trace should be empty at start")
if DEBUG_PRINT:
self._nonmoving_copy_count = 0
self._nonmoving_copy_size = 0
+ def _set_gcflag_unvisited(self, obj, ignored):
+ ll_assert(not (self.header(obj).tid & GCFLAG_UNVISITED),
+ "bogus GCFLAG_UNVISITED on gen3 obj")
+ self.header(obj).tid |= GCFLAG_UNVISITED
+
+ def collect_roots(self):
+ if not self.is_collecting_gen3():
+ GenerationGC.collect_roots(self)
+ else:
+ # as we don't record which prebuilt gc objects point to
+ # rawmalloced generation 3 objects, we have to trace all
+ # the prebuilt gc objects.
+ self.root_walker.walk_roots(
+ SemiSpaceGC._collect_root, # stack roots
+ SemiSpaceGC._collect_root, # static in prebuilt non-gc structs
+ SemiSpaceGC._collect_root) # static in prebuilt gc objects
+
def surviving(self, obj):
# To use during a collection. The objects that survive are the
# ones with GCFLAG_FORWARDED set and GCFLAG_UNVISITED not set.
@@ -213,12 +303,16 @@
flags = self.header(obj).tid & (GCFLAG_FORWARDED|GCFLAG_UNVISITED)
return flags == GCFLAG_FORWARDED
+ def is_last_generation(self, obj):
+ return ((self.header(obj).tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
+ (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX))
+
def visit_external_object(self, obj):
hdr = self.header(obj)
if hdr.tid & GCFLAG_UNVISITED:
# This is a not-visited-yet raw_malloced object.
hdr.tid -= GCFLAG_UNVISITED
- self.pending_external_object_list.append(obj)
+ self.rawmalloced_objects_to_trace.append(obj)
def make_a_copy(self, obj, objsize):
# During a full collect, all copied objects might implicitly come
@@ -246,20 +340,26 @@
# NB. the object can have a finalizer or be a weakref, but
# it's not an issue.
totalsize = self.size_gc_header() + objsize
- if DEBUG_PRINT:
- self._nonmoving_copy_count += 1
- self._nonmoving_copy_size += raw_malloc_usage(totalsize)
newaddr = self.allocate_external_object(totalsize)
if not newaddr:
return llmemory.NULL # can't raise MemoryError during a collect()
+ if DEBUG_PRINT:
+ self._nonmoving_copy_count += 1
+ self._nonmoving_copy_size += raw_malloc_usage(totalsize)
llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
newobj = newaddr + self.size_gc_header()
hdr = self.header(newobj)
hdr.tid |= self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
# GCFLAG_UNVISITED is not set
- self.large_objects_list.append(newobj)
- self.pending_external_object_list.append(newobj)
+ # GCFLAG_NO_HEAP_PTRS is not set either, conservatively. It may be
+ # set by the next collection's collect_last_generation_roots().
+ # This old object is immediately put at generation 3.
+ ll_assert(self.is_last_generation(newobj),
+ "make_a_nonmoving_copy: object too young")
+ self.gen3_rawmalloced_objects.append(newobj)
+ self.last_generation_root_objects.append(newobj)
+ self.rawmalloced_objects_to_trace.append(newobj) # visit me
return newobj
def scan_copied(self, scan):
@@ -270,23 +370,62 @@
newscan = GenerationGC.scan_copied(self, scan)
progress = newscan != scan
scan = newscan
- while self.pending_external_object_list.non_empty():
- obj = self.pending_external_object_list.popleft()
+ while self.rawmalloced_objects_to_trace.non_empty():
+ obj = self.rawmalloced_objects_to_trace.pop()
self.trace_and_copy(obj)
progress = True
return scan
def finished_full_collect(self):
- ll_assert(not self.pending_external_object_list.non_empty(),
- "pending_external_object_list should be empty at end")
- # free all mark-n-sweep-managed objects that have not been marked
- large_objects = self.large_objects_list
- remaining_large_objects = self.AddressDeque()
+ ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
+ "rawmalloced_objects_to_trace should be empty at end")
+ if DEBUG_PRINT:
+ llop.debug_print(lltype.Void,
+ "| [hybrid] made nonmoving: ",
+ self._nonmoving_copy_size, "bytes in",
+ self._nonmoving_copy_count, "objs")
+ # sweep the nonmarked rawmalloced objects
+ if self.is_collecting_gen3():
+ self.sweep_rawmalloced_objects(generation=3)
+ self.sweep_rawmalloced_objects(generation=2)
+ # As we just collected, it's fine to raw_malloc'ate up to space_size
+ # bytes again before we should force another collect.
+ self.large_objects_collect_trigger = self.space_size
+ if self.is_collecting_gen3():
+ self.count_semispaceonly_collects = 0
+ if DEBUG_PRINT:
+ self._initial_trigger = self.large_objects_collect_trigger
+
+ def sweep_rawmalloced_objects(self, generation):
+ # free all the rawmalloced objects of the specified generation
+ # that have not been marked
+ if generation == 2:
+ objects = self.gen2_rawmalloced_objects
+ # generation 2 sweep: if A points to an object object B that
+ # moves from gen2 to gen3, it's possible that A no longer points
+ # to any gen2 object. In this case, A remains a bit too long in
+ # last_generation_root_objects, but this will be fixed by the
+ # next collect_last_generation_roots().
+ else:
+ objects = self.gen3_rawmalloced_objects
+ # generation 3 sweep: remove from last_generation_root_objects
+ # all the objects that we are about to free
+ gen3roots = self.last_generation_root_objects
+ newgen3roots = self.AddressStack()
+ while gen3roots.non_empty():
+ obj = gen3roots.pop()
+ if not (self.header(obj).tid & GCFLAG_UNVISITED):
+ newgen3roots.append(obj)
+ gen3roots.delete()
+ self.last_generation_root_objects = newgen3roots
+
+ surviving_objects = self.AddressStack()
if DEBUG_PRINT: alive_count = alive_size = 0
if DEBUG_PRINT: dead_count = dead_size = 0
- while large_objects.non_empty():
- obj = large_objects.popleft()
- if self.header(obj).tid & GCFLAG_UNVISITED:
+ while objects.non_empty():
+ obj = objects.pop()
+ tid = self.header(obj).tid
+ if tid & GCFLAG_UNVISITED:
if DEBUG_PRINT:dead_count+=1
if DEBUG_PRINT:dead_size+=raw_malloc_usage(self.get_size(obj))
addr = obj - self.gcheaderbuilder.size_gc_header
@@ -294,25 +433,85 @@
else:
if DEBUG_PRINT:alive_count+=1
if DEBUG_PRINT:alive_size+=raw_malloc_usage(self.get_size(obj))
- self.header(obj).tid |= GCFLAG_UNVISITED
- remaining_large_objects.append(obj)
- large_objects.delete()
- self.large_objects_list = remaining_large_objects
- # As we just collected, it's fine to raw_malloc'ate up to space_size
- # bytes again before we should force another collect.
- self.large_objects_collect_trigger = self.space_size
-
+ if generation == 3:
+ surviving_objects.append(obj)
+ else:
+ ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
+ "wrong age for generation 2 object")
+ tid += GCFLAG_AGE_ONE
+ if (tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX:
+ # the object becomes part of generation 3
+ self.gen3_rawmalloced_objects.append(obj)
+ # GCFLAG_NO_HEAP_PTRS not set yet, conservatively
+ self.last_generation_root_objects.append(obj)
+ else:
+ # the object stays in generation 2
+ tid |= GCFLAG_UNVISITED
+ surviving_objects.append(obj)
+ self.header(obj).tid = tid
+ objects.delete()
+ if generation == 2:
+ self.gen2_rawmalloced_objects = surviving_objects
+ else:
+ self.gen3_rawmalloced_objects = surviving_objects
if DEBUG_PRINT:
- self._initial_trigger = self.large_objects_collect_trigger
llop.debug_print(lltype.Void,
- "| [hybrid] made nonmoving: ",
- self._nonmoving_copy_size, "bytes in",
- self._nonmoving_copy_count, "objs")
- llop.debug_print(lltype.Void,
- "| [hybrid] nonmoving now alive: ",
+ "| [hyb] gen", generation,
+ "nonmoving now alive: ",
alive_size, "bytes in",
alive_count, "objs")
llop.debug_print(lltype.Void,
- "| [hybrid] nonmoving freed: ",
+ "| [hyb] gen", generation,
+ "nonmoving freed: ",
dead_size, "bytes in",
dead_count, "objs")
+
+ def _compute_id_for_external(self, obj):
+ # the base classes make the assumption that all external objects
+ # have an id equal to their address. This is wrong if the object
+ # is a generation 3 rawmalloced object that initially lived in
+ # the semispaces.
+ if self.is_last_generation(obj):
+ # in this case, we still need to check if the object had its
+ # id taken before. If not, we can use its address as its id.
+ return self.objects_with_id.get(obj, obj)
+ else:
+ # a generation 2 external object was never non-external in
+ # the past, so it cannot be listed in self.objects_with_id.
+ return obj
+ # XXX a possible optimization would be to use three dicts, one
+ # for each generation, instead of mixing gen2 and gen3 objects.
+
+ def debug_check_object(self, obj):
+ """Check the invariants about 'obj' that should be true
+ between collections."""
+ GenerationGC.debug_check_object(self, obj)
+ tid = self.header(obj).tid
+ if tid & GCFLAG_UNVISITED:
+ ll_assert(self._d_gen2ro.contains(obj),
+ "GCFLAG_UNVISITED on non-gen2 object")
+
+ def debug_check_consistency(self):
+ if self.DEBUG:
+ self._d_gen2ro = self.gen2_rawmalloced_objects.stack2dict()
+ GenerationGC.debug_check_consistency(self)
+ self._d_gen2ro.delete()
+ self.gen2_rawmalloced_objects.foreach(self._debug_check_gen2, None)
+ self.gen3_rawmalloced_objects.foreach(self._debug_check_gen3, None)
+
+ def _debug_check_gen2(self, obj, ignored):
+ tid = self.header(obj).tid
+ ll_assert(bool(tid & GCFLAG_EXTERNAL),
+ "gen2: missing GCFLAG_EXTERNAL")
+ ll_assert(bool(tid & GCFLAG_UNVISITED),
+ "gen2: missing GCFLAG_UNVISITED")
+ ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
+ "gen2: age field too large")
+ def _debug_check_gen3(self, obj, ignored):
+ tid = self.header(obj).tid
+ ll_assert(bool(tid & GCFLAG_EXTERNAL),
+ "gen3: missing GCFLAG_EXTERNAL")
+ ll_assert(not (tid & GCFLAG_UNVISITED),
+ "gen3: unexpected GCFLAG_UNVISITED")
+ ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,
+ "gen3: wrong age field")
Modified: pypy/dist/pypy/rpython/memory/gc/semispace.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gc/semispace.py (original)
+++ pypy/dist/pypy/rpython/memory/gc/semispace.py Sat May 10 10:20:48 2008
@@ -3,6 +3,7 @@
from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
from pypy.rpython.memory.support import DEFAULT_CHUNK_SIZE
from pypy.rpython.memory.support import get_address_stack, get_address_deque
+from pypy.rpython.memory.support import AddressDict
from pypy.rpython.memory.gcheader import GCHeaderBuilder
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rlib.objectmodel import free_non_gc_object
@@ -50,8 +51,11 @@
self.gcheaderbuilder = GCHeaderBuilder(self.HDR)
self.AddressStack = get_address_stack(chunk_size)
self.AddressDeque = get_address_deque(chunk_size)
+ self.AddressDict = AddressDict
self.finalizer_lock_count = 0
self.red_zone = 0
+ self.id_free_list = self.AddressStack()
+ self.next_free_id = 1
def setup(self):
if DEBUG_PRINT:
@@ -66,6 +70,7 @@
self.objects_with_finalizers = self.AddressDeque()
self.run_finalizers = self.AddressDeque()
self.objects_with_weakrefs = self.AddressStack()
+ self.objects_with_id = self.AddressDict()
def disable_finalizers(self):
self.finalizer_lock_count += 1
@@ -202,9 +207,11 @@
self.max_space_size = size
def collect(self):
+ self.debug_check_consistency()
self.semispace_collect()
# the indirection is required by the fact that collect() is referred
# to by the gc transformer, and the default argument would crash
+ # (this is also a hook for the HybridGC)
def semispace_collect(self, size_changing=False):
if DEBUG_PRINT:
@@ -233,8 +240,9 @@
scan = self.deal_with_objects_with_finalizers(scan)
if self.objects_with_weakrefs.non_empty():
self.invalidate_weakrefs()
+ self.update_objects_with_id()
self.finished_full_collect()
- self.notify_objects_just_moved()
+ self.debug_check_consistency()
if not size_changing:
llarena.arena_reset(fromspace, self.space_size, True)
self.record_red_zone()
@@ -314,6 +322,8 @@
root.address[0] = self.copy(root.address[0])
def copy(self, obj):
+ if self.DEBUG:
+ self.debug_check_can_copy(obj)
if self.is_forwarded(obj):
#llop.debug_print(lltype.Void, obj, "already copied to", self.get_forwarding_address(obj))
return self.get_forwarding_address(obj)
@@ -568,5 +578,80 @@
finally:
self.finalizer_lock_count -= 1
+ def id(self, ptr):
+ obj = llmemory.cast_ptr_to_adr(ptr)
+ if self.header(obj).tid & GCFLAG_EXTERNAL:
+ result = self._compute_id_for_external(obj)
+ else:
+ result = self._compute_id(obj)
+ return llmemory.cast_adr_to_int(result)
+
+ def _next_id(self):
+ # return an id not currently in use (as an address instead of an int)
+ if self.id_free_list.non_empty():
+ result = self.id_free_list.pop() # reuse a dead id
+ else:
+ # make up a fresh id number
+ result = llmemory.cast_int_to_adr(self.next_free_id)
+ self.next_free_id += 2 # only odd numbers, to make lltype
+ # and llmemory happy and to avoid
+ # clashes with real addresses
+ return result
+
+ def _compute_id(self, obj):
+ # look if the object is listed in objects_with_id
+ result = self.objects_with_id.get(obj)
+ if not result:
+ result = self._next_id()
+ self.objects_with_id.setitem(obj, result)
+ return result
+
+ def _compute_id_for_external(self, obj):
+ # For prebuilt objects, we can simply return their address.
+ # This method is overriden by the HybridGC.
+ return obj
+
+ def update_objects_with_id(self):
+ old = self.objects_with_id
+ new_objects_with_id = self.AddressDict(old.length())
+ old.foreach(self._update_object_id_FAST, new_objects_with_id)
+ old.delete()
+ self.objects_with_id = new_objects_with_id
+
+ def _update_object_id(self, obj, id, new_objects_with_id):
+ # safe version (used by subclasses)
+ if self.surviving(obj):
+ newobj = self.get_forwarding_address(obj)
+ new_objects_with_id.setitem(newobj, id)
+ else:
+ self.id_free_list.append(id)
+
+ def _update_object_id_FAST(self, obj, id, new_objects_with_id):
+ # unsafe version, assumes that the new_objects_with_id is large enough
+ if self.surviving(obj):
+ newobj = self.get_forwarding_address(obj)
+ new_objects_with_id.insertclean(newobj, id)
+ else:
+ self.id_free_list.append(id)
+
+ def debug_check_object(self, obj):
+ """Check the invariants about 'obj' that should be true
+ between collections."""
+ tid = self.header(obj).tid
+ if tid & GCFLAG_EXTERNAL:
+ ll_assert(tid & GCFLAG_FORWARDED, "bug: external+!forwarded")
+ ll_assert(not (self.tospace <= obj < self.free),
+ "external flag but object inside the semispaces")
+ else:
+ ll_assert(not (tid & GCFLAG_FORWARDED), "bug: !external+forwarded")
+ ll_assert(self.tospace <= obj < self.free,
+ "!external flag but object outside the semispaces")
+ ll_assert(not (tid & GCFLAG_FINALIZATION_ORDERING),
+ "unexpected GCFLAG_FINALIZATION_ORDERING")
+
+ def debug_check_can_copy(self, obj):
+ ll_assert(not (self.tospace <= obj < self.free),
+ "copy() on already-copied object")
+
STATISTICS_NUMBERS = 0
Modified: pypy/dist/pypy/rpython/memory/gctransform/framework.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gctransform/framework.py (original)
+++ pypy/dist/pypy/rpython/memory/gctransform/framework.py Sat May 10 10:20:48 2008
@@ -391,11 +391,8 @@
self.layoutbuilder.addresses_of_static_ptrs_in_nongc +
self.layoutbuilder.addresses_of_static_ptrs)
log.info("found %s static roots" % (len(addresses_of_static_ptrs), ))
- additional_ptrs = self.layoutbuilder.additional_roots_sources
- log.info("additional %d potential static roots" % additional_ptrs)
ll_static_roots_inside = lltype.malloc(lltype.Array(llmemory.Address),
- len(addresses_of_static_ptrs) +
- additional_ptrs,
+ len(addresses_of_static_ptrs),
immortal=True)
for i in range(len(addresses_of_static_ptrs)):
ll_static_roots_inside[i] = addresses_of_static_ptrs[i]
@@ -788,10 +785,6 @@
def setup_root_walker(self):
pass
- def append_static_root(self, adr):
- self.gcdata.static_root_end.address[0] = adr
- self.gcdata.static_root_end += sizeofaddr
-
def walk_roots(self, collect_stack_root,
collect_static_in_prebuilt_nongc,
collect_static_in_prebuilt_gc):
Modified: pypy/dist/pypy/rpython/memory/gctypelayout.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gctypelayout.py (original)
+++ pypy/dist/pypy/rpython/memory/gctypelayout.py Sat May 10 10:20:48 2008
@@ -193,10 +193,9 @@
self.addresses_of_static_ptrs = []
# this lists contains pointers in raw Structs and Arrays
self.addresses_of_static_ptrs_in_nongc = []
- # if not gc.prebuilt_gc_objects_are_static_roots, then
- # additional_roots_sources counts the number of locations
- # within prebuilt GC objects that are of type Ptr(Gc)
- self.additional_roots_sources = 0
+ # for debugging, the following list collects all the prebuilt
+ # GcStructs and GcArrays
+ self.all_prebuilt_gc = []
self.finalizer_funcptrs = {}
self.offsettable_cache = {}
self.next_typeid_cache = {}
@@ -284,6 +283,7 @@
hdr = gc.gcheaderbuilder.new_header(value)
adr = llmemory.cast_ptr_to_adr(hdr)
gc.init_gc_object_immortal(adr, typeid)
+ self.all_prebuilt_gc.append(value)
# The following collects the addresses of all the fields that have
# a GC Pointer type, inside the current prebuilt object. All such
@@ -291,12 +291,10 @@
# they could be changed later to point to GC heap objects.
adr = llmemory.cast_ptr_to_adr(value._as_ptr())
if TYPE._gckind == "gc":
- if not gc.prebuilt_gc_objects_are_static_roots:
- for a in gc_pointers_inside(value, adr):
- self.additional_roots_sources += 1
- return
- else:
+ if gc.prebuilt_gc_objects_are_static_roots or gc.DEBUG:
appendto = self.addresses_of_static_ptrs
+ else:
+ return
else:
appendto = self.addresses_of_static_ptrs_in_nongc
for a in gc_pointers_inside(value, adr, mutable_only=True):
Modified: pypy/dist/pypy/rpython/memory/gcwrapper.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/gcwrapper.py (original)
+++ pypy/dist/pypy/rpython/memory/gcwrapper.py Sat May 10 10:20:48 2008
@@ -10,6 +10,7 @@
def __init__(self, llinterp, flowgraphs, gc_class, GC_PARAMS={}):
self.gc = gc_class(chunk_size = 10, **GC_PARAMS)
self.gc.set_root_walker(LLInterpRootWalker(self))
+ self.gc.DEBUG = True
self.llinterp = llinterp
self.prepare_graphs(flowgraphs)
self.gc.setup()
@@ -24,8 +25,9 @@
TYPE = lltype.typeOf(obj)
layoutbuilder.consider_constant(TYPE, obj, self.gc)
- self.constantroots = list(layoutbuilder.addresses_of_static_ptrs)
+ self.constantroots = layoutbuilder.addresses_of_static_ptrs
self.constantrootsnongc = layoutbuilder.addresses_of_static_ptrs_in_nongc
+ self._all_prebuilt_gc = layoutbuilder.all_prebuilt_gc
# ____________________________________________________________
#
@@ -114,9 +116,6 @@
def __init__(self, gcheap):
self.gcheap = gcheap
- def append_static_root(self, pointer):
- self.gcheap.constantroots.append(pointer)
-
def walk_roots(self, collect_stack_root,
collect_static_in_prebuilt_nongc,
collect_static_in_prebuilt_gc):
@@ -135,6 +134,10 @@
if addrofaddr.address[0]:
collect_stack_root(gc, addrofaddr)
+ def _walk_prebuilt_gc(self, collect): # debugging only! not RPython
+ for obj in self.gcheap._all_prebuilt_gc:
+ collect(llmemory.cast_ptr_to_adr(obj._as_ptr()))
+
class DirectRunLayoutBuilder(gctypelayout.TypeLayoutBuilder):
Modified: pypy/dist/pypy/rpython/memory/support.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/support.py (original)
+++ pypy/dist/pypy/rpython/memory/support.py Sat May 10 10:20:48 2008
@@ -1,5 +1,6 @@
from pypy.rpython.lltypesystem import lltype, llmemory
from pypy.rlib.objectmodel import free_non_gc_object, we_are_translated
+from pypy.rlib.rarithmetic import r_uint, LONG_BIT
from pypy.rlib.debug import ll_assert
DEFAULT_CHUNK_SIZE = 1019
@@ -110,6 +111,14 @@
cur = next
free_non_gc_object(self)
+ def length(self):
+ chunk = self.chunk
+ count = self.used_in_last_chunk
+ while chunk:
+ chunk = chunk.next
+ count += chunk_size
+ return count
+
def foreach(self, callback, arg):
"""Invoke 'callback(address, arg)' for all addresses in the stack.
Typically, 'callback' is a bound method and 'arg' can be None.
@@ -124,9 +133,17 @@
count = chunk_size
foreach._annspecialcase_ = 'specialize:arg(1)'
+ def stack2dict(self):
+ result = AddressDict(self.length())
+ self.foreach(_add_in_dict, result)
+ return result
+
cache[chunk_size] = AddressStack
return AddressStack
+def _add_in_dict(item, d):
+ d.add(item)
+
def get_address_deque(chunk_size=DEFAULT_CHUNK_SIZE, cache={}):
try:
@@ -193,3 +210,82 @@
cache[chunk_size] = AddressDeque
return AddressDeque
+
+# ____________________________________________________________
+
+def AddressDict(length_estimate=0):
+ if we_are_translated():
+ from pypy.rpython.memory import lldict
+ return lldict.newdict(length_estimate)
+ else:
+ return BasicAddressDict()
+
+class BasicAddressDict(object):
+
+ def __init__(self):
+ self.data = {}
+
+ def _key(self, addr):
+ "NOT_RPYTHON: prebuilt AddressDicts are not supported"
+ return addr._fixup().ptr._obj
+
+ def _wrapkey(self, obj):
+ return llmemory.cast_ptr_to_adr(obj._as_ptr())
+
+ def delete(self):
+ pass
+
+ def length(self):
+ return len(self.data)
+
+ def contains(self, keyaddr):
+ return self._key(keyaddr) in self.data
+
+ def get(self, keyaddr, default=llmemory.NULL):
+ return self.data.get(self._key(keyaddr), default)
+
+ def setitem(self, keyaddr, valueaddr):
+ assert keyaddr
+ self.data[self._key(keyaddr)] = valueaddr
+
+ def insertclean(self, keyaddr, valueaddr):
+ assert keyaddr
+ key = self._key(keyaddr)
+ assert key not in self.data
+ self.data[key] = valueaddr
+
+ def add(self, keyaddr):
+ self.setitem(keyaddr, llmemory.NULL)
+
+ def clear(self):
+ self.data.clear()
+
+ def foreach(self, callback, arg):
+ """Invoke 'callback(key, value, arg)' for all items in the dict.
+ Typically, 'callback' is a bound method and 'arg' can be None."""
+ for key, value in self.data.iteritems():
+ callback(self._wrapkey(key), value, arg)
+
+
+def copy_and_update(dict, surviving, updated_address):
+ """Make a copy of 'dict' in which the keys are updated as follows:
+ * if surviving(key) returns False, the item is removed
+ * otherwise, updated_address(key) is inserted in the copy.
+ """
+ newdict = AddressDict
+ if not we_are_translated():
+ # when not translated, return a dict of the same kind as 'dict'
+ if not isinstance(dict, BasicAddressDict):
+ from pypy.rpython.memory.lldict import newdict
+ result = newdict(dict.length())
+ dict.foreach(_get_updater(surviving, updated_address), result)
+ return result
+copy_and_update._annspecialcase_ = 'specialize:arg(1,2)'
+
+def _get_updater(surviving, updated_address):
+ def callback(key, value, arg):
+ if surviving(key):
+ newkey = updated_address(key)
+ arg.setitem(newkey, value)
+ return callback
+_get_updater._annspecialcase_ = 'specialize:memo'
Modified: pypy/dist/pypy/rpython/memory/test/test_gc.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/test/test_gc.py (original)
+++ pypy/dist/pypy/rpython/memory/test/test_gc.py Sat May 10 10:20:48 2008
@@ -478,3 +478,46 @@
return b.num_deleted
res = self.interpret(f, [15])
assert res == 16
+
+class TestHybridGCSmallHeap(GCTest):
+ from pypy.rpython.memory.gc.hybrid import HybridGC as GCClass
+ GC_PARAMS = {'space_size': 192,
+ 'min_nursery_size': 48,
+ 'nursery_size': 48,
+ 'large_object': 12,
+ 'large_object_gcptrs': 12,
+ 'generation3_collect_threshold': 5,
+ }
+
+ def test_gen3_to_gen2_refs(self):
+ class A(object):
+ def __init__(self):
+ self.x1 = -1
+ def f(x):
+ loop = A()
+ loop.next = loop
+ loop.prev = loop
+ i = 0
+ while i < x:
+ i += 1
+ a1 = A()
+ a1.x1 = i
+ a2 = A()
+ a2.x1 = i + 1000
+ a1.prev = loop.prev
+ a1.prev.next = a1
+ a1.next = loop
+ loop.prev = a1
+ a2.prev = loop
+ a2.next = loop.next
+ a2.next.prev = a2
+ loop.next = a2
+ i = 0
+ a = loop
+ while True:
+ a = a.next
+ i += 1
+ if a is loop:
+ return i
+ res = self.interpret(f, [200])
+ assert res == 401
Modified: pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py
==============================================================================
--- pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py (original)
+++ pypy/dist/pypy/rpython/memory/test/test_transformed_gc.py Sat May 10 10:20:48 2008
@@ -855,9 +855,11 @@
run, transformer = self.runner(f, nbargs=2, transformer=True)
run([1, 4])
- assert len(transformer.layoutbuilder.addresses_of_static_ptrs) == 0
- assert transformer.layoutbuilder.additional_roots_sources >= 4
- # NB. Remember that additional_roots_sources does not count
+ if not transformer.GCClass.prebuilt_gc_objects_are_static_roots:
+ assert len(transformer.layoutbuilder.addresses_of_static_ptrs) == 0
+ else:
+ assert len(transformer.layoutbuilder.addresses_of_static_ptrs) >= 4
+ # NB. Remember that the number above does not count
# the number of prebuilt GC objects, but the number of locations
# within prebuilt GC objects that are of type Ptr(Gc).
# At the moment we get additional_roots_sources == 6:
@@ -917,3 +919,25 @@
'nursery_size': 128,
'large_object': 32}
root_stack_depth = 200
+
+ def test_ref_from_rawmalloced_to_regular(self):
+ import gc
+ S = lltype.GcStruct('S', ('x', lltype.Signed))
+ A = lltype.GcStruct('A', ('p', lltype.Ptr(S)),
+ ('a', lltype.Array(lltype.Char)))
+ def setup(j):
+ p = lltype.malloc(S)
+ p.x = j*2
+ lst = lltype.malloc(A, j)
+ # the following line generates a write_barrier call at the moment,
+ # which is important because the 'lst' can be allocated directly
+ # in generation 2. This can only occur with varsized mallocs.
+ lst.p = p
+ return lst
+ def f(i, j):
+ lst = setup(j)
+ gc.collect()
+ return lst.p.x
+ run = self.runner(f, nbargs=2)
+ res = run([100, 100])
+ assert res == 200
More information about the Pypy-commit
mailing list