[pypy-commit] pypy gc-del: Remove the semispace, generational, and hybrid GCs, after discussing
arigo
noreply at buildbot.pypy.org
Sat Apr 27 13:34:13 CEST 2013
Author: Armin Rigo <arigo at tunes.org>
Branch: gc-del
Changeset: r63681:24b1a13f0f35
Date: 2013-04-27 12:57 +0200
http://bitbucket.org/pypy/pypy/changeset/24b1a13f0f35/
Log: Remove the semispace, generational, and hybrid GCs, after discussing
on IRC their remaining value. Even the educational value is very
low because of the 3-GCs-in-one approach, and the fact that starting
from a semispace GC is really the wrong approach for PyPy ---
semispace GCs work very badly in this context.
diff too long, truncating to 2000 out of 2477 lines
diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py
--- a/rpython/memory/gc/base.py
+++ b/rpython/memory/gc/base.py
@@ -425,10 +425,7 @@
if config.translation.gctransformer != "framework":
raise AssertionError("fix this test")
- classes = {"semispace": "semispace.SemiSpaceGC",
- "generation": "generation.GenerationGC",
- "hybrid": "hybrid.HybridGC",
- "minimark" : "minimark.MiniMarkGC",
+ classes = {"minimark" : "minimark.MiniMarkGC",
}
try:
modulename, classname = classes[config.translation.gc].split('.')
diff --git a/rpython/memory/gc/generation.py b/rpython/memory/gc/generation.py
deleted file mode 100644
--- a/rpython/memory/gc/generation.py
+++ /dev/null
@@ -1,639 +0,0 @@
-import sys
-from rpython.memory.gc.semispace import SemiSpaceGC
-from rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from rpython.memory.gc import env
-from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena
-from rpython.rlib.objectmodel import free_non_gc_object
-from rpython.rlib.debug import ll_assert
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rlib.rarithmetic import intmask, LONG_BIT
-from rpython.rtyper.lltypesystem.lloperation import llop
-
-WORD = LONG_BIT // 8
-
-# The following flag is never set on young objects, i.e. the ones living
-# in the nursery. It is initially set on all prebuilt and old objects,
-# and gets cleared by the write_barrier() when we write in them a
-# pointer to a young object.
-GCFLAG_NO_YOUNG_PTRS = SemiSpaceGC.first_unused_gcflag << 0
-
-# The following flag is set on some last-generation objects (== prebuilt
-# objects for GenerationGC, but see also HybridGC). The flag is set
-# unless the object is already listed in 'last_generation_root_objects'.
-# When a pointer is written inside an object with GCFLAG_NO_HEAP_PTRS
-# set, the write_barrier clears the flag and adds the object to
-# 'last_generation_root_objects'.
-GCFLAG_NO_HEAP_PTRS = SemiSpaceGC.first_unused_gcflag << 1
-
-class GenerationGC(SemiSpaceGC):
- """A basic generational GC: it's a SemiSpaceGC with an additional
- nursery for young objects. A write barrier is used to ensure that
- old objects that contain pointers to young objects are recorded in
- a list.
- """
- inline_simple_malloc = True
- inline_simple_malloc_varsize = True
- needs_write_barrier = True
- prebuilt_gc_objects_are_static_roots = False
- first_unused_gcflag = SemiSpaceGC.first_unused_gcflag << 2
-
- # the following values override the default arguments of __init__ when
- # translating to a real backend.
- TRANSLATION_PARAMS = {'space_size': 8*1024*1024, # 8 MB
- 'nursery_size': 3*1024*1024, # 3 MB
- 'min_nursery_size': 48*1024,
- 'auto_nursery_size': True}
-
- nursery_hash_base = -1
-
- def __init__(self, config,
- nursery_size=32*WORD,
- min_nursery_size=32*WORD,
- auto_nursery_size=False,
- space_size=1024*WORD,
- max_space_size=sys.maxint//2+1,
- **kwds):
- SemiSpaceGC.__init__(self, config,
- space_size = space_size,
- max_space_size = max_space_size,
- **kwds)
- assert min_nursery_size <= nursery_size <= space_size // 2
- self.initial_nursery_size = nursery_size
- self.auto_nursery_size = auto_nursery_size
- self.min_nursery_size = min_nursery_size
-
- # define nursery fields
- self.reset_nursery()
- self._setup_wb()
-
- # compute the constant lower bounds for the attributes
- # largest_young_fixedsize and largest_young_var_basesize.
- # It is expected that most (or all) objects have a fixedsize
- # that is much lower anyway.
- sz = self.get_young_fixedsize(self.min_nursery_size)
- self.lb_young_fixedsize = sz
- sz = self.get_young_var_basesize(self.min_nursery_size)
- self.lb_young_var_basesize = sz
-
- def setup(self):
- self.old_objects_pointing_to_young = self.AddressStack()
- # ^^^ a list of addresses inside the old objects space; it
- # may contain static prebuilt objects as well. More precisely,
- # it lists exactly the old and static objects whose
- # GCFLAG_NO_YOUNG_PTRS bit is not set.
- self.young_objects_with_weakrefs = self.AddressStack()
-
- self.last_generation_root_objects = self.AddressStack()
- self.young_objects_with_id = self.AddressDict()
- SemiSpaceGC.setup(self)
- self.set_nursery_size(self.initial_nursery_size)
- # the GC is fully setup now. The rest can make use of it.
- if self.auto_nursery_size:
- newsize = nursery_size_from_env()
- #if newsize <= 0:
- # ---disabled--- just use the default value.
- # newsize = env.estimate_best_nursery_size()
- if newsize > 0:
- self.set_nursery_size(newsize)
-
- self.reset_nursery()
-
- def _teardown(self):
- self.collect() # should restore last gen objects flags
- SemiSpaceGC._teardown(self)
-
- def reset_nursery(self):
- self.nursery = NULL
- self.nursery_top = NULL
- self.nursery_free = NULL
-
- def set_nursery_size(self, newsize):
- debug_start("gc-set-nursery-size")
- if newsize < self.min_nursery_size:
- newsize = self.min_nursery_size
- if newsize > self.space_size // 2:
- newsize = self.space_size // 2
-
- # Compute the new bounds for how large young objects can be
- # (larger objects are allocated directly old). XXX adjust
- self.nursery_size = newsize
- self.largest_young_fixedsize = self.get_young_fixedsize(newsize)
- self.largest_young_var_basesize = self.get_young_var_basesize(newsize)
- scale = 0
- while (self.min_nursery_size << (scale+1)) <= newsize:
- scale += 1
- self.nursery_scale = scale
- debug_print("nursery_size =", newsize)
- debug_print("largest_young_fixedsize =",
- self.largest_young_fixedsize)
- debug_print("largest_young_var_basesize =",
- self.largest_young_var_basesize)
- debug_print("nursery_scale =", scale)
- # we get the following invariant:
- assert self.nursery_size >= (self.min_nursery_size << scale)
-
- # Force a full collect to remove the current nursery whose size
- # no longer matches the bounds that we just computed. This must
- # be done after changing the bounds, because it might re-create
- # a new nursery (e.g. if it invokes finalizers).
- self.semispace_collect()
- debug_stop("gc-set-nursery-size")
-
- @staticmethod
- def get_young_fixedsize(nursery_size):
- return nursery_size // 2 - 1
-
- @staticmethod
- def get_young_var_basesize(nursery_size):
- return nursery_size // 4 - 1
-
- @classmethod
- def JIT_max_size_of_young_obj(cls):
- min_nurs_size = cls.TRANSLATION_PARAMS['min_nursery_size']
- return cls.get_young_fixedsize(min_nurs_size)
-
- def is_in_nursery(self, addr):
- ll_assert(llmemory.cast_adr_to_int(addr) & 1 == 0,
- "odd-valued (i.e. tagged) pointer unexpected here")
- return self.nursery <= addr < self.nursery_top
-
- def appears_to_be_in_nursery(self, addr):
- # same as is_in_nursery(), but may return True accidentally if
- # 'addr' is a tagged pointer with just the wrong value.
- if not self.translated_to_c:
- if not self.is_valid_gc_object(addr):
- return False
- return self.nursery <= addr < self.nursery_top
-
- def malloc_fixedsize_clear(self, typeid, size,
- has_finalizer=False,
- is_finalizer_light=False,
- contains_weakptr=False):
- if (has_finalizer or
- (raw_malloc_usage(size) > self.lb_young_fixedsize and
- raw_malloc_usage(size) > self.largest_young_fixedsize)):
- # ^^^ we do two size comparisons; the first one appears redundant,
- # but it can be constant-folded if 'size' is a constant; then
- # it almost always folds down to False, which kills the
- # second comparison as well.
- ll_assert(not contains_weakptr, "wrong case for mallocing weakref")
- # "non-simple" case or object too big: don't use the nursery
- return SemiSpaceGC.malloc_fixedsize_clear(self, typeid, size,
- has_finalizer,
- is_finalizer_light,
- contains_weakptr)
- size_gc_header = self.gcheaderbuilder.size_gc_header
- totalsize = size_gc_header + size
- result = self.nursery_free
- if raw_malloc_usage(totalsize) > self.nursery_top - result:
- result = self.collect_nursery()
- llarena.arena_reserve(result, totalsize)
- # GCFLAG_NO_YOUNG_PTRS is never set on young objs
- self.init_gc_object(result, typeid, flags=0)
- self.nursery_free = result + totalsize
- if contains_weakptr:
- self.young_objects_with_weakrefs.append(result + size_gc_header)
- return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
- def malloc_varsize_clear(self, typeid, length, size, itemsize,
- offset_to_length):
- # Only use the nursery if there are not too many items.
- if not raw_malloc_usage(itemsize):
- too_many_items = False
- else:
- # The following line is usually constant-folded because both
- # min_nursery_size and itemsize are constants (the latter
- # due to inlining).
- maxlength_for_minimal_nursery = (self.min_nursery_size // 4 //
- raw_malloc_usage(itemsize))
-
- # The actual maximum length for our nursery depends on how
- # many times our nursery is bigger than the minimal size.
- # The computation is done in this roundabout way so that
- # only the only remaining computation is the following
- # shift.
- maxlength = maxlength_for_minimal_nursery << self.nursery_scale
- too_many_items = length > maxlength
-
- if (too_many_items or
- (raw_malloc_usage(size) > self.lb_young_var_basesize and
- raw_malloc_usage(size) > self.largest_young_var_basesize)):
- # ^^^ we do two size comparisons; the first one appears redundant,
- # but it can be constant-folded if 'size' is a constant; then
- # it almost always folds down to False, which kills the
- # second comparison as well.
- return SemiSpaceGC.malloc_varsize_clear(self, typeid, length, size,
- itemsize, offset_to_length)
- # with the above checks we know now that totalsize cannot be more
- # than about half of the nursery size; in particular, the + and *
- # cannot overflow
- size_gc_header = self.gcheaderbuilder.size_gc_header
- totalsize = size_gc_header + size + itemsize * length
- result = self.nursery_free
- if raw_malloc_usage(totalsize) > self.nursery_top - result:
- result = self.collect_nursery()
- llarena.arena_reserve(result, totalsize)
- # GCFLAG_NO_YOUNG_PTRS is never set on young objs
- self.init_gc_object(result, typeid, flags=0)
- (result + size_gc_header + offset_to_length).signed[0] = length
- self.nursery_free = result + llarena.round_up_for_allocation(totalsize)
- return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
- # override the init_gc_object methods to change the default value of 'flags',
- # used by objects that are directly created outside the nursery by the SemiSpaceGC.
- # These objects must have the GCFLAG_NO_YOUNG_PTRS flag set immediately.
- def init_gc_object(self, addr, typeid, flags=GCFLAG_NO_YOUNG_PTRS):
- SemiSpaceGC.init_gc_object(self, addr, typeid, flags)
-
- def init_gc_object_immortal(self, addr, typeid,
- flags=GCFLAG_NO_YOUNG_PTRS|GCFLAG_NO_HEAP_PTRS):
- SemiSpaceGC.init_gc_object_immortal(self, addr, typeid, flags)
-
- # flags exposed for the HybridGC subclass
- GCFLAGS_FOR_NEW_YOUNG_OBJECTS = 0 # NO_YOUNG_PTRS never set on young objs
- GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS = (GCFLAG_EXTERNAL | GCFLAG_FORWARDED |
- GCFLAG_NO_YOUNG_PTRS |
- GC_HASH_TAKEN_ADDR)
-
- # ____________________________________________________________
- # Support code for full collections
-
- def collect(self, gen=1):
- if gen == 0:
- self.collect_nursery()
- else:
- SemiSpaceGC.collect(self)
-
- def semispace_collect(self, size_changing=False):
- self.reset_young_gcflags() # we are doing a full collection anyway
- self.weakrefs_grow_older()
- self.ids_grow_older()
- self.reset_nursery()
- SemiSpaceGC.semispace_collect(self, size_changing)
-
- def make_a_copy(self, obj, objsize):
- tid = self.header(obj).tid
- # During a full collect, all copied objects might implicitly come
- # from the nursery. In case they do, we must add this flag:
- tid |= GCFLAG_NO_YOUNG_PTRS
- return self._make_a_copy_with_tid(obj, objsize, tid)
- # history: this was missing and caused an object to become old but without the
- # flag set. Such an object is bogus in the sense that the write_barrier doesn't
- # work on it. So it can eventually contain a ptr to a young object but we didn't
- # know about it. That ptr was not updated in the next minor collect... boom at
- # the next usage.
-
- def reset_young_gcflags(self):
- # This empties self.old_objects_pointing_to_young, and puts the
- # GCFLAG_NO_YOUNG_PTRS back on all these objects. We could put
- # the flag back more lazily but we expect this list to be short
- # anyway, and it's much saner to stick to the invariant:
- # non-young objects all have GCFLAG_NO_YOUNG_PTRS set unless
- # they are listed in old_objects_pointing_to_young.
- oldlist = self.old_objects_pointing_to_young
- while oldlist.non_empty():
- obj = oldlist.pop()
- hdr = self.header(obj)
- hdr.tid |= GCFLAG_NO_YOUNG_PTRS
-
- def weakrefs_grow_older(self):
- while self.young_objects_with_weakrefs.non_empty():
- obj = self.young_objects_with_weakrefs.pop()
- self.objects_with_weakrefs.append(obj)
-
- def collect_roots(self):
- """GenerationGC: collects all roots.
- HybridGC: collects all roots, excluding the generation 3 ones.
- """
- # Warning! References from static (and possibly gen3) objects
- # are found by collect_last_generation_roots(), which must be
- # called *first*! If it is called after walk_roots(), then the
- # HybridGC explodes if one of the _collect_root causes an object
- # to be added to self.last_generation_root_objects. Indeed, in
- # this case, the newly added object is traced twice: once by
- # collect_last_generation_roots() and once because it was added
- # in self.rawmalloced_objects_to_trace.
- self.collect_last_generation_roots()
- self.root_walker.walk_roots(
- SemiSpaceGC._collect_root, # stack roots
- SemiSpaceGC._collect_root, # static in prebuilt non-gc structures
- None) # we don't need the static in prebuilt gc objects
-
- def collect_last_generation_roots(self):
- stack = self.last_generation_root_objects
- self.last_generation_root_objects = self.AddressStack()
- while stack.non_empty():
- obj = stack.pop()
- self.header(obj).tid |= GCFLAG_NO_HEAP_PTRS
- # ^^^ the flag we just added will be removed immediately if
- # the object still contains pointers to younger objects
- self.trace(obj, self._trace_external_obj, obj)
- stack.delete()
-
- def _trace_external_obj(self, pointer, obj):
- addr = pointer.address[0]
- newaddr = self.copy(addr)
- pointer.address[0] = newaddr
- self.write_into_last_generation_obj(obj, newaddr)
-
- # ____________________________________________________________
- # Implementation of nursery-only collections
-
- def collect_nursery(self):
- if self.nursery_size > self.top_of_space - self.free:
- # the semispace is running out, do a full collect
- self.obtain_free_space(self.nursery_size)
- ll_assert(self.nursery_size <= self.top_of_space - self.free,
- "obtain_free_space failed to do its job")
- if self.nursery:
- debug_start("gc-minor")
- debug_print("--- minor collect ---")
- debug_print("nursery:", self.nursery, "to", self.nursery_top)
- # a nursery-only collection
- scan = beginning = self.free
- self.collect_oldrefs_to_nursery()
- self.collect_roots_in_nursery()
- scan = self.scan_objects_just_copied_out_of_nursery(scan)
- # at this point, all static and old objects have got their
- # GCFLAG_NO_YOUNG_PTRS set again by trace_and_drag_out_of_nursery
- if self.young_objects_with_weakrefs.non_empty():
- self.invalidate_young_weakrefs()
- if self.young_objects_with_id.length() > 0:
- self.update_young_objects_with_id()
- # mark the nursery as free and fill it with zeroes again
- llarena.arena_reset(self.nursery, self.nursery_size, 2)
- debug_print("survived (fraction of the size):",
- float(scan - beginning) / self.nursery_size)
- debug_stop("gc-minor")
- #self.debug_check_consistency() # -- quite expensive
- else:
- # no nursery - this occurs after a full collect, triggered either
- # just above or by some previous non-nursery-based allocation.
- # Grab a piece of the current space for the nursery.
- self.nursery = self.free
- self.nursery_top = self.nursery + self.nursery_size
- self.free = self.nursery_top
- self.nursery_free = self.nursery
- # at this point we know that the nursery is empty
- self.change_nursery_hash_base()
- return self.nursery_free
-
- def change_nursery_hash_base(self):
- # The following should be enough to ensure that young objects
- # tend to always get a different hash. It also makes sure that
- # nursery_hash_base is not a multiple of 4, to avoid collisions
- # with the hash of non-young objects.
- hash_base = self.nursery_hash_base
- hash_base += self.nursery_size - 1
- if (hash_base & 3) == 0:
- hash_base -= 1
- self.nursery_hash_base = intmask(hash_base)
-
- # NB. we can use self.copy() to move objects out of the nursery,
- # but only if the object was really in the nursery.
-
- def collect_oldrefs_to_nursery(self):
- # Follow the old_objects_pointing_to_young list and move the
- # young objects they point to out of the nursery.
- count = 0
- oldlist = self.old_objects_pointing_to_young
- while oldlist.non_empty():
- count += 1
- obj = oldlist.pop()
- hdr = self.header(obj)
- hdr.tid |= GCFLAG_NO_YOUNG_PTRS
- self.trace_and_drag_out_of_nursery(obj)
- debug_print("collect_oldrefs_to_nursery", count)
-
- def collect_roots_in_nursery(self):
- # we don't need to trace prebuilt GcStructs during a minor collect:
- # if a prebuilt GcStruct contains a pointer to a young object,
- # then the write_barrier must have ensured that the prebuilt
- # GcStruct is in the list self.old_objects_pointing_to_young.
- self.root_walker.walk_roots(
- GenerationGC._collect_root_in_nursery, # stack roots
- GenerationGC._collect_root_in_nursery, # static in prebuilt non-gc
- None) # static in prebuilt gc
-
- def _collect_root_in_nursery(self, root):
- obj = root.address[0]
- if self.is_in_nursery(obj):
- root.address[0] = self.copy(obj)
-
- def scan_objects_just_copied_out_of_nursery(self, scan):
- while scan < self.free:
- curr = scan + self.size_gc_header()
- self.trace_and_drag_out_of_nursery(curr)
- scan += self.size_gc_header() + self.get_size_incl_hash(curr)
- return scan
-
- def trace_and_drag_out_of_nursery(self, obj):
- """obj must not be in the nursery. This copies all the
- young objects it references out of the nursery.
- """
- self.trace(obj, self._trace_drag_out, None)
-
- def _trace_drag_out(self, pointer, ignored):
- if self.is_in_nursery(pointer.address[0]):
- pointer.address[0] = self.copy(pointer.address[0])
-
- # The code relies on the fact that no weakref can be an old object
- # weakly pointing to a young object. Indeed, weakrefs are immutable
- # so they cannot point to an object that was created after it.
- def invalidate_young_weakrefs(self):
- # walk over the list of objects that contain weakrefs and are in the
- # nursery. if the object it references survives then update the
- # weakref; otherwise invalidate the weakref
- while self.young_objects_with_weakrefs.non_empty():
- obj = self.young_objects_with_weakrefs.pop()
- if not self.surviving(obj):
- continue # weakref itself dies
- obj = self.get_forwarding_address(obj)
- offset = self.weakpointer_offset(self.get_type_id(obj))
- pointing_to = (obj + offset).address[0]
- if self.is_in_nursery(pointing_to):
- if self.surviving(pointing_to):
- (obj + offset).address[0] = self.get_forwarding_address(
- pointing_to)
- else:
- (obj + offset).address[0] = NULL
- continue # no need to remember this weakref any longer
- self.objects_with_weakrefs.append(obj)
-
- # for the JIT: a minimal description of the write_barrier() method
- # (the JIT assumes it is of the shape
- # "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
- JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
-
- def write_barrier(self, newvalue, addr_struct):
- if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
- self.remember_young_pointer(addr_struct, newvalue)
-
- def _setup_wb(self):
- DEBUG = self.DEBUG
- # The purpose of attaching remember_young_pointer to the instance
- # instead of keeping it as a regular method is to help the JIT call it.
- # Additionally, it makes the code in write_barrier() marginally smaller
- # (which is important because it is inlined *everywhere*).
- # For x86, there is also an extra requirement: when the JIT calls
- # remember_young_pointer(), it assumes that it will not touch the SSE
- # registers, so it does not save and restore them (that's a *hack*!).
- def remember_young_pointer(addr_struct, addr):
- #llop.debug_print(lltype.Void, "\tremember_young_pointer",
- # addr_struct, "<-", addr)
- if DEBUG:
- ll_assert(not self.is_in_nursery(addr_struct),
- "nursery object with GCFLAG_NO_YOUNG_PTRS")
- #
- # What is important in this function is that it *must*
- # clear the flag GCFLAG_NO_YOUNG_PTRS from 'addr_struct'
- # if 'addr' is in the nursery. It is ok if, accidentally,
- # it also clears the flag in some more rare cases, like
- # 'addr' being a tagged pointer whose value happens to be
- # a large integer that fools is_in_nursery().
- if self.appears_to_be_in_nursery(addr):
- self.old_objects_pointing_to_young.append(addr_struct)
- self.header(addr_struct).tid &= ~GCFLAG_NO_YOUNG_PTRS
- self.write_into_last_generation_obj(addr_struct, addr)
- remember_young_pointer._dont_inline_ = True
- self.remember_young_pointer = remember_young_pointer
-
- def write_into_last_generation_obj(self, addr_struct, addr):
- objhdr = self.header(addr_struct)
- if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
- if (self.is_valid_gc_object(addr) and
- not self.is_last_generation(addr)):
- objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
- self.last_generation_root_objects.append(addr_struct)
- write_into_last_generation_obj._always_inline_ = True
-
- def assume_young_pointers(self, addr_struct):
- objhdr = self.header(addr_struct)
- if objhdr.tid & GCFLAG_NO_YOUNG_PTRS:
- self.old_objects_pointing_to_young.append(addr_struct)
- objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
- if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
- objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
- self.last_generation_root_objects.append(addr_struct)
-
- def writebarrier_before_copy(self, source_addr, dest_addr,
- source_start, dest_start, length):
- """ This has the same effect as calling writebarrier over
- each element in dest copied from source, except it might reset
- one of the following flags a bit too eagerly, which means we'll have
- a bit more objects to track, but being on the safe side.
- """
- source_hdr = self.header(source_addr)
- dest_hdr = self.header(dest_addr)
- if dest_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
- return True
- # ^^^ a fast path of write-barrier
- if source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
- # there might be an object in source that is in nursery
- self.old_objects_pointing_to_young.append(dest_addr)
- dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
- if dest_hdr.tid & GCFLAG_NO_HEAP_PTRS:
- if source_hdr.tid & GCFLAG_NO_HEAP_PTRS == 0:
- # ^^^ equivalend of addr from source not being in last
- # gen
- dest_hdr.tid &= ~GCFLAG_NO_HEAP_PTRS
- self.last_generation_root_objects.append(dest_addr)
- return True
-
- def is_last_generation(self, obj):
- # overridden by HybridGC
- return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
-
- def _compute_id(self, obj):
- if self.is_in_nursery(obj):
- result = self.young_objects_with_id.get(obj)
- if not result:
- result = self._next_id()
- self.young_objects_with_id.setitem(obj, result)
- return result
- else:
- return SemiSpaceGC._compute_id(self, obj)
-
- def update_young_objects_with_id(self):
- self.young_objects_with_id.foreach(self._update_object_id,
- self.objects_with_id)
- self.young_objects_with_id.clear()
- # NB. the clear() also makes the dictionary shrink back to its
- # minimal size, which is actually a good idea: a large, mostly-empty
- # table is bad for the next call to 'foreach'.
-
- def ids_grow_older(self):
- self.young_objects_with_id.foreach(self._id_grow_older, None)
- self.young_objects_with_id.clear()
-
- def _id_grow_older(self, obj, id, ignored):
- self.objects_with_id.setitem(obj, id)
-
- def _compute_current_nursery_hash(self, obj):
- return intmask(llmemory.cast_adr_to_int(obj) + self.nursery_hash_base)
-
- def enumerate_all_roots(self, callback, arg):
- self.last_generation_root_objects.foreach(callback, arg)
- SemiSpaceGC.enumerate_all_roots(self, callback, arg)
- enumerate_all_roots._annspecialcase_ = 'specialize:arg(1)'
-
- def debug_check_object(self, obj):
- """Check the invariants about 'obj' that should be true
- between collections."""
- SemiSpaceGC.debug_check_object(self, obj)
- tid = self.header(obj).tid
- if tid & GCFLAG_NO_YOUNG_PTRS:
- ll_assert(not self.is_in_nursery(obj),
- "nursery object with GCFLAG_NO_YOUNG_PTRS")
- self.trace(obj, self._debug_no_nursery_pointer, None)
- elif not self.is_in_nursery(obj):
- ll_assert(self._d_oopty.contains(obj),
- "missing from old_objects_pointing_to_young")
- if tid & GCFLAG_NO_HEAP_PTRS:
- ll_assert(self.is_last_generation(obj),
- "GCFLAG_NO_HEAP_PTRS on non-3rd-generation object")
- self.trace(obj, self._debug_no_gen1or2_pointer, None)
- elif self.is_last_generation(obj):
- ll_assert(self._d_lgro.contains(obj),
- "missing from last_generation_root_objects")
-
- def _debug_no_nursery_pointer(self, root, ignored):
- ll_assert(not self.is_in_nursery(root.address[0]),
- "GCFLAG_NO_YOUNG_PTRS but found a young pointer")
- def _debug_no_gen1or2_pointer(self, root, ignored):
- target = root.address[0]
- ll_assert(not target or self.is_last_generation(target),
- "GCFLAG_NO_HEAP_PTRS but found a pointer to gen1or2")
-
- def debug_check_consistency(self):
- if self.DEBUG:
- self._d_oopty = self.old_objects_pointing_to_young.stack2dict()
- self._d_lgro = self.last_generation_root_objects.stack2dict()
- SemiSpaceGC.debug_check_consistency(self)
- self._d_oopty.delete()
- self._d_lgro.delete()
- self.old_objects_pointing_to_young.foreach(
- self._debug_check_flag_1, None)
- self.last_generation_root_objects.foreach(
- self._debug_check_flag_2, None)
-
- def _debug_check_flag_1(self, obj, ignored):
- ll_assert(not (self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS),
- "unexpected GCFLAG_NO_YOUNG_PTRS")
- def _debug_check_flag_2(self, obj, ignored):
- ll_assert(not (self.header(obj).tid & GCFLAG_NO_HEAP_PTRS),
- "unexpected GCFLAG_NO_HEAP_PTRS")
-
- def debug_check_can_copy(self, obj):
- if self.is_in_nursery(obj):
- pass # it's ok to copy an object out of the nursery
- else:
- SemiSpaceGC.debug_check_can_copy(self, obj)
-
-
-# ____________________________________________________________
-
-def nursery_size_from_env():
- return env.read_from_env('PYPY_GENERATIONGC_NURSERY')
diff --git a/rpython/memory/gc/hybrid.py b/rpython/memory/gc/hybrid.py
deleted file mode 100644
--- a/rpython/memory/gc/hybrid.py
+++ /dev/null
@@ -1,559 +0,0 @@
-import sys
-from rpython.memory.gc.semispace import SemiSpaceGC
-from rpython.memory.gc.generation import GenerationGC, WORD
-from rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from rpython.memory.gc.semispace import GCFLAG_HASHMASK
-from rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
-from rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
-from rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from rpython.memory.gc.semispace import GC_HASH_HASFIELD
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena
-from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage
-from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.debug import ll_assert, have_debug_prints
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.lltypesystem import rffi
-
-# _______in the semispaces_________ ______external (non-moving)_____
-# / \ / \
-# ___raw_malloc'ed__ _prebuilt_
-# +----------------------------------+ / \ / \
-# | | | | | | | |
-# | | | | | | | | age < max age == max
-# |nur-|o|o|o|o| | | +---+ +---+ +---+
-# |sery|b|b|b|b|free| empty | |obj| |obj| |obj|
-# | |j|j|j|j| | | +---+ +---+ +---+
-# | | | | | | | | +---+ +---+ +---+
-# +-----------------+----------------+ |obj| |obj| |obj|
-# age <= max +---+ +---+ +---+
-#
-# |gen1|------------- generation 2 -----------------|-----generation 3-----|
-#
-# Object lists:
-# * gen2_rawmalloced_objects
-# * gen3_rawmalloced_objects
-# * old_objects_pointing_to_young: gen2or3 objs that point to gen1 objs
-# * last_generation_root_objects: gen3 objs that point to gen1or2 objs
-#
-# How to tell the objects apart:
-# * external: tid & GCFLAG_EXTERNAL
-# * gen1: is_in_nursery(obj)
-# * gen3: (tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
-# (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX)
-#
-# Some invariants:
-# * gen3 are either GCFLAG_NO_HEAP_PTRS or in 'last_generation_root_objects'
-# * between collections, GCFLAG_UNVISITED set exactly for gen2_rawmalloced
-#
-# A malloc_varsize() of large objects returns objects that are external
-# but initially of generation 2. Old objects from the semispaces are
-# moved to external objects directly as generation 3.
-
-# The "age" of an object is the number of times it survived a full
-# collections, without counting the step that moved it out of the nursery.
-# When a semispace-based object would grow older than MAX_SEMISPACE_AGE,
-# it is instead copied to a nonmoving location. For example, a value of 3
-# ensures that an object is copied at most 5 times in total: from the
-# nursery to the semispace, then three times between the two spaces,
-# then one last time to a nonmoving location.
-MAX_SEMISPACE_AGE = 3
-
-GCFLAG_UNVISITED = GenerationGC.first_unused_gcflag << 0
-_gcflag_next_bit = GenerationGC.first_unused_gcflag << 1
-GCFLAG_AGE_ONE = _gcflag_next_bit
-GCFLAG_AGE_MAX = _gcflag_next_bit * MAX_SEMISPACE_AGE
-GCFLAG_AGE_MASK = 0
-while GCFLAG_AGE_MASK < GCFLAG_AGE_MAX:
- GCFLAG_AGE_MASK |= _gcflag_next_bit
- _gcflag_next_bit <<= 1
-
-# The 3rd generation objects are only collected after the following
-# number of calls to semispace_collect():
-GENERATION3_COLLECT_THRESHOLD = 20
-
-class HybridGC(GenerationGC):
- """A two-generations semi-space GC like the GenerationGC,
- except that objects above a certain size are handled separately:
- they are allocated via raw_malloc/raw_free in a mark-n-sweep fashion.
- """
- first_unused_gcflag = _gcflag_next_bit
- prebuilt_gc_objects_are_static_roots = True
-
- # the following values override the default arguments of __init__ when
- # translating to a real backend.
- TRANSLATION_PARAMS = GenerationGC.TRANSLATION_PARAMS.copy()
- TRANSLATION_PARAMS['large_object'] = 6*1024 # XXX adjust
- TRANSLATION_PARAMS['large_object_gcptrs'] = 31*1024 # XXX adjust
- TRANSLATION_PARAMS['min_nursery_size'] = 128*1024
- # condition: large_object <= large_object_gcptrs < min_nursery_size/4
-
- def __init__(self, *args, **kwds):
- large_object = kwds.pop('large_object', 6*WORD)
- large_object_gcptrs = kwds.pop('large_object_gcptrs', 8*WORD)
- self.generation3_collect_threshold = kwds.pop(
- 'generation3_collect_threshold', GENERATION3_COLLECT_THRESHOLD)
- GenerationGC.__init__(self, *args, **kwds)
-
- # Objects whose total size is at least 'large_object' bytes are
- # allocated separately in a mark-n-sweep fashion. If the object
- # has GC pointers in its varsized part, we use instead the
- # higher limit 'large_object_gcptrs'. The idea is that
- # separately allocated objects are allocated immediately "old"
- # and it's not good to have too many pointers from old to young
- # objects.
-
- # In this class, we assume that the 'large_object' limit is not
- # very high, so that all objects that wouldn't easily fit in the
- # nursery are considered large by this limit. This is the
- # meaning of the 'assert' below.
- self.nonlarge_max = large_object - 1
- self.nonlarge_gcptrs_max = large_object_gcptrs - 1
- assert self.nonlarge_gcptrs_max <= self.lb_young_var_basesize
- assert self.nonlarge_max <= self.nonlarge_gcptrs_max
-
- def setup(self):
- self.large_objects_collect_trigger = self.param_space_size
- self._initial_trigger = self.large_objects_collect_trigger
- self.rawmalloced_objects_to_trace = self.AddressStack()
- self.count_semispaceonly_collects = 0
-
- self.gen2_rawmalloced_objects = self.AddressStack()
- self.gen3_rawmalloced_objects = self.AddressStack()
- GenerationGC.setup(self)
-
- def set_max_heap_size(self, size):
- raise NotImplementedError
-
- # NB. to simplify the code, only varsized objects can be considered
- # 'large'.
-
- def malloc_varsize_clear(self, typeid, length, size, itemsize,
- offset_to_length):
- size_gc_header = self.gcheaderbuilder.size_gc_header
- nonvarsize = size_gc_header + size
-
- # Compute the maximal length that makes the object still
- # below 'nonlarge_max'. All the following logic is usually
- # constant-folded because self.nonlarge_max, size and itemsize
- # are all constants (the arguments are constant due to
- # inlining) and self.has_gcptr_in_varsize() is constant-folded.
- if self.has_gcptr_in_varsize(typeid):
- nonlarge_max = self.nonlarge_gcptrs_max
- else:
- nonlarge_max = self.nonlarge_max
-
- if not raw_malloc_usage(itemsize):
- too_many_items = raw_malloc_usage(nonvarsize) > nonlarge_max
- else:
- maxlength = nonlarge_max - raw_malloc_usage(nonvarsize)
- maxlength = maxlength // raw_malloc_usage(itemsize)
- too_many_items = length > maxlength
-
- if not too_many_items:
- # With the above checks we know now that totalsize cannot be more
- # than 'nonlarge_max'; in particular, the + and * cannot overflow.
- # Let's try to fit the object in the nursery.
- totalsize = nonvarsize + itemsize * length
- result = self.nursery_free
- if raw_malloc_usage(totalsize) <= self.nursery_top - result:
- llarena.arena_reserve(result, totalsize)
- # GCFLAG_NO_YOUNG_PTRS is never set on young objs
- self.init_gc_object(result, typeid, flags=0)
- (result + size_gc_header + offset_to_length).signed[0] = length
- self.nursery_free = result + llarena.round_up_for_allocation(
- totalsize)
- return llmemory.cast_adr_to_ptr(result+size_gc_header,
- llmemory.GCREF)
- return self.malloc_varsize_slowpath(typeid, length)
-
- def malloc_varsize_slowpath(self, typeid, length, force_nonmovable=False):
- # For objects that are too large, or when the nursery is exhausted.
- # In order to keep malloc_varsize_clear() as compact as possible,
- # we recompute what we need in this slow path instead of passing
- # it all as function arguments.
- size_gc_header = self.gcheaderbuilder.size_gc_header
- nonvarsize = size_gc_header + self.fixed_size(typeid)
- itemsize = self.varsize_item_sizes(typeid)
- offset_to_length = self.varsize_offset_to_length(typeid)
- try:
- varsize = ovfcheck(itemsize * length)
- totalsize = ovfcheck(nonvarsize + varsize)
- except OverflowError:
- raise MemoryError()
- if self.has_gcptr_in_varsize(typeid):
- nonlarge_max = self.nonlarge_gcptrs_max
- else:
- nonlarge_max = self.nonlarge_max
- if force_nonmovable or raw_malloc_usage(totalsize) > nonlarge_max:
- result = self.malloc_varsize_marknsweep(totalsize)
- flags = self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS | GCFLAG_UNVISITED
- else:
- result = self.malloc_varsize_collecting_nursery(totalsize)
- flags = self.GCFLAGS_FOR_NEW_YOUNG_OBJECTS
- self.init_gc_object(result, typeid, flags)
- (result + size_gc_header + offset_to_length).signed[0] = length
- return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
- malloc_varsize_slowpath._dont_inline_ = True
-
- def malloc_varsize_nonmovable(self, typeid, length):
- return self.malloc_varsize_slowpath(typeid, length, True)
-
- def malloc_nonmovable(self, typeid, length, zero):
- # helper for testing, same as GCBase.malloc
- if self.is_varsize(typeid):
- gcref = self.malloc_varsize_slowpath(typeid, length, True)
- else:
- raise NotImplementedError("Not supported")
- return llmemory.cast_ptr_to_adr(gcref)
-
- def can_move(self, addr):
- tid = self.header(addr).tid
- return not (tid & GCFLAG_EXTERNAL)
-
- def malloc_varsize_collecting_nursery(self, totalsize):
- result = self.collect_nursery()
- ll_assert(raw_malloc_usage(totalsize) <= self.nursery_top - result,
- "not enough room in malloc_varsize_collecting_nursery()")
- llarena.arena_reserve(result, totalsize)
- self.nursery_free = result + llarena.round_up_for_allocation(
- totalsize)
- return result
-
- def _check_rawsize_alloced(self, size_estimate):
- self.large_objects_collect_trigger -= size_estimate
- if self.large_objects_collect_trigger < 0:
- debug_start("gc-rawsize-collect")
- debug_print("allocated", (self._initial_trigger -
- self.large_objects_collect_trigger),
- "bytes, triggering full collection")
- self.semispace_collect()
- debug_stop("gc-rawsize-collect")
-
- def malloc_varsize_marknsweep(self, totalsize):
- # In order to free the large objects from time to time, we
- # arbitrarily force a full collect() if none occurs when we have
- # allocated self.space_size + rawmalloced bytes of large objects.
- self._check_rawsize_alloced(raw_malloc_usage(totalsize))
- result = self.allocate_external_object(totalsize)
- if not result:
- raise MemoryError()
- # The parent classes guarantee zero-filled allocations, so we
- # need to follow suit.
- llmemory.raw_memclear(result, totalsize)
- size_gc_header = self.gcheaderbuilder.size_gc_header
- self.gen2_rawmalloced_objects.append(result + size_gc_header)
- return result
-
- def allocate_external_object(self, totalsize):
- # XXX maybe we should use arena_malloc() above a certain size?
- # If so, we'd also use arena_reset() in malloc_varsize_marknsweep().
- return llmemory.raw_malloc(totalsize)
-
- def init_gc_object_immortal(self, addr, typeid,
- flags=(GCFLAG_NO_YOUNG_PTRS |
- GCFLAG_NO_HEAP_PTRS |
- GCFLAG_AGE_MAX)):
- GenerationGC.init_gc_object_immortal(self, addr, typeid, flags)
-
- # ___________________________________________________________________
- # collect() and semispace_collect() are not synonyms in this GC: the
- # former is a complete collect, while the latter is only collecting
- # the semispaces and not always doing the mark-n-sweep pass over the
- # external objects of 3rd generation.
-
- def collect(self, gen=2):
- if gen > 1:
- self.count_semispaceonly_collects = self.generation3_collect_threshold
- GenerationGC.collect(self, gen)
-
- def is_collecting_gen3(self):
- count = self.count_semispaceonly_collects
- return count >= self.generation3_collect_threshold
-
- # ___________________________________________________________________
- # the following methods are hook into SemiSpaceGC.semispace_collect()
-
- def starting_full_collect(self):
- # At the start of a collection, the GCFLAG_UNVISITED bit is set
- # exactly on the objects in gen2_rawmalloced_objects. Only
- # raw_malloc'ed objects can ever have this bit set.
- self.count_semispaceonly_collects += 1
- if self.is_collecting_gen3():
- # set the GCFLAG_UNVISITED on all rawmalloced generation-3 objects
- # as well, to let them be recorded by visit_external_object()
- self.gen3_rawmalloced_objects.foreach(self._set_gcflag_unvisited,
- None)
- ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
- "rawmalloced_objects_to_trace should be empty at start")
- self._nonmoving_copy_count = 0
- self._nonmoving_copy_size = 0
-
- def _set_gcflag_unvisited(self, obj, ignored):
- ll_assert(not (self.header(obj).tid & GCFLAG_UNVISITED),
- "bogus GCFLAG_UNVISITED on gen3 obj")
- self.header(obj).tid |= GCFLAG_UNVISITED
-
- def collect_roots(self):
- if not self.is_collecting_gen3():
- GenerationGC.collect_roots(self)
- else:
- # as we don't record which prebuilt gc objects point to
- # rawmalloced generation 3 objects, we have to trace all
- # the prebuilt gc objects.
- self.root_walker.walk_roots(
- SemiSpaceGC._collect_root, # stack roots
- SemiSpaceGC._collect_root, # static in prebuilt non-gc structs
- SemiSpaceGC._collect_root) # static in prebuilt gc objects
-
- def surviving(self, obj):
- # To use during a collection. The objects that survive are the
- # ones with GCFLAG_FORWARDED set and GCFLAG_UNVISITED not set.
- # This is equivalent to self.is_forwarded() for all objects except
- # the ones obtained by raw_malloc.
- flags = self.header(obj).tid & (GCFLAG_FORWARDED|GCFLAG_UNVISITED)
- return flags == GCFLAG_FORWARDED
-
- def is_last_generation(self, obj):
- return ((self.header(obj).tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
- (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX))
-
- def visit_external_object(self, obj):
- hdr = self.header(obj)
- if hdr.tid & GCFLAG_UNVISITED:
- # This is a not-visited-yet raw_malloced object.
- hdr.tid &= ~GCFLAG_UNVISITED
- self.rawmalloced_objects_to_trace.append(obj)
-
- def make_a_copy(self, obj, objsize):
- # During a full collect, all copied objects might implicitly come
- # from the nursery. If they do, we must add the GCFLAG_NO_YOUNG_PTRS.
- # If they don't, we count how many times they are copied and when
- # some threshold is reached we make the copy a non-movable "external"
- # object. The threshold is MAX_SEMISPACE_AGE.
- tid = self.header(obj).tid
- # XXX the following logic is not doing exactly what is explained
- # above: any object without GCFLAG_NO_YOUNG_PTRS has its age not
- # incremented. This is accidental: it means that objects that
- # are very often modified to point to young objects don't reach
- # the 3rd generation. For now I'll leave it this way because
- # I'm not sure that it's a bad thing.
- if not (tid & GCFLAG_NO_YOUNG_PTRS):
- tid |= GCFLAG_NO_YOUNG_PTRS # object comes from the nursery
- elif (tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX:
- tid += GCFLAG_AGE_ONE
- else:
- newobj = self.make_a_nonmoving_copy(obj, objsize)
- if newobj:
- return newobj
- tid &= ~GCFLAG_AGE_MASK
- # skip GenerationGC.make_a_copy() as we already did the right
- # thing about GCFLAG_NO_YOUNG_PTRS
- return self._make_a_copy_with_tid(obj, objsize, tid)
-
- def make_a_nonmoving_copy(self, obj, objsize):
- # NB. the object can have a finalizer or be a weakref, but
- # it's not an issue.
- totalsize = self.size_gc_header() + objsize
- tid = self.header(obj).tid
- if tid & GCFLAG_HASHMASK:
- totalsize_incl_hash = totalsize + llmemory.sizeof(lltype.Signed)
- else:
- totalsize_incl_hash = totalsize
- newaddr = self.allocate_external_object(totalsize_incl_hash)
- if not newaddr:
- return llmemory.NULL # can't raise MemoryError during a collect()
- self._nonmoving_copy_count += 1
- self._nonmoving_copy_size += raw_malloc_usage(totalsize)
-
- llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
- if tid & GCFLAG_HASHMASK:
- hash = self._get_object_hash(obj, objsize, tid)
- (newaddr + totalsize).signed[0] = hash
- tid |= GC_HASH_HASFIELD
- #
- # GCFLAG_UNVISITED is not set
- # GCFLAG_NO_HEAP_PTRS is not set either, conservatively. It may be
- # set by the next collection's collect_last_generation_roots().
- # This old object is immediately put at generation 3.
- newobj = newaddr + self.size_gc_header()
- hdr = self.header(newobj)
- hdr.tid = tid | self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
- ll_assert(self.is_last_generation(newobj),
- "make_a_nonmoving_copy: object too young")
- self.gen3_rawmalloced_objects.append(newobj)
- self.last_generation_root_objects.append(newobj)
- self.rawmalloced_objects_to_trace.append(newobj) # visit me
- return newobj
-
- def scan_copied(self, scan):
- # Alternate between scanning the regular objects we just moved
- # and scanning the raw_malloc'ed object we just visited.
- progress = True
- while progress:
- newscan = GenerationGC.scan_copied(self, scan)
- progress = newscan != scan
- scan = newscan
- while self.rawmalloced_objects_to_trace.non_empty():
- obj = self.rawmalloced_objects_to_trace.pop()
- self.trace_and_copy(obj)
- progress = True
- return scan
-
- def finished_full_collect(self):
- ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
- "rawmalloced_objects_to_trace should be empty at end")
- debug_print("| [hybrid] made nonmoving: ",
- self._nonmoving_copy_size, "bytes in",
- self._nonmoving_copy_count, "objs")
- rawmalloced_trigger = 0
- # sweep the nonmarked rawmalloced objects
- if self.is_collecting_gen3():
- rawmalloced_trigger += self.sweep_rawmalloced_objects(generation=3)
- rawmalloced_trigger += self.sweep_rawmalloced_objects(generation=2)
- self.large_objects_collect_trigger = (rawmalloced_trigger +
- self.space_size)
- if self.is_collecting_gen3():
- self.count_semispaceonly_collects = 0
- self._initial_trigger = self.large_objects_collect_trigger
-
- def sweep_rawmalloced_objects(self, generation):
- # free all the rawmalloced objects of the specified generation
- # that have not been marked
- if generation == 2:
- objects = self.gen2_rawmalloced_objects
- # generation 2 sweep: if A points to an object object B that
- # moves from gen2 to gen3, it's possible that A no longer points
- # to any gen2 object. In this case, A remains a bit too long in
- # last_generation_root_objects, but this will be fixed by the
- # next collect_last_generation_roots().
- elif generation == 3:
- objects = self.gen3_rawmalloced_objects
- # generation 3 sweep: remove from last_generation_root_objects
- # all the objects that we are about to free
- gen3roots = self.last_generation_root_objects
- newgen3roots = self.AddressStack()
- while gen3roots.non_empty():
- obj = gen3roots.pop()
- if not (self.header(obj).tid & GCFLAG_UNVISITED):
- newgen3roots.append(obj)
- gen3roots.delete()
- self.last_generation_root_objects = newgen3roots
- else:
- ll_assert(False, "bogus 'generation'")
- return 0 # to please the flowspace
-
- surviving_objects = self.AddressStack()
- # Help the flow space
- alive_count = alive_size = dead_count = dead_size = 0
- debug = have_debug_prints()
- while objects.non_empty():
- obj = objects.pop()
- tid = self.header(obj).tid
- if tid & GCFLAG_UNVISITED:
- if debug:
- dead_count+=1
- dead_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
- addr = obj - self.gcheaderbuilder.size_gc_header
- llmemory.raw_free(addr)
- else:
- if debug:
- alive_count+=1
- alive_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
- if generation == 3:
- surviving_objects.append(obj)
- elif generation == 2:
- ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
- "wrong age for generation 2 object")
- tid += GCFLAG_AGE_ONE
- if (tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX:
- # the object becomes part of generation 3
- self.gen3_rawmalloced_objects.append(obj)
- # GCFLAG_NO_HEAP_PTRS not set yet, conservatively
- self.last_generation_root_objects.append(obj)
- else:
- # the object stays in generation 2
- tid |= GCFLAG_UNVISITED
- surviving_objects.append(obj)
- self.header(obj).tid = tid
- objects.delete()
- if generation == 2:
- self.gen2_rawmalloced_objects = surviving_objects
- elif generation == 3:
- self.gen3_rawmalloced_objects = surviving_objects
- debug_print("| [hyb] gen", generation,
- "nonmoving now alive: ",
- alive_size, "bytes in",
- alive_count, "objs")
- debug_print("| [hyb] gen", generation,
- "nonmoving freed: ",
- dead_size, "bytes in",
- dead_count, "objs")
- return alive_size
-
- def id(self, ptr):
- obj = llmemory.cast_ptr_to_adr(ptr)
-
- # is it a tagged pointer?
- if not self.is_valid_gc_object(obj):
- return llmemory.cast_adr_to_int(obj)
-
- if self._is_external(obj):
- # a prebuilt or rawmalloced object
- if self.is_last_generation(obj):
- # a generation 3 object may be one that used to live in
- # the semispace. So we still need to check if the object had
- # its id taken before. If not, we can use its address as its
- # id as it is not going to move any more.
- result = self.objects_with_id.get(obj, obj)
- else:
- # a generation 2 external object was never non-external in
- # the past, so it cannot be listed in self.objects_with_id.
- result = obj
- else:
- result = self._compute_id(obj) # common case
- return llmemory.cast_adr_to_int(result) * 2 # see comment in base.py
- # XXX a possible optimization would be to use three dicts, one
- # for each generation, instead of mixing gen2 and gen3 objects.
-
- def debug_check_object(self, obj):
- """Check the invariants about 'obj' that should be true
- between collections."""
- GenerationGC.debug_check_object(self, obj)
- tid = self.header(obj).tid
- if tid & GCFLAG_UNVISITED:
- ll_assert(self._d_gen2ro.contains(obj),
- "GCFLAG_UNVISITED on non-gen2 object")
-
- def debug_check_consistency(self):
- if self.DEBUG:
- self._d_gen2ro = self.gen2_rawmalloced_objects.stack2dict()
- GenerationGC.debug_check_consistency(self)
- self._d_gen2ro.delete()
- self.gen2_rawmalloced_objects.foreach(self._debug_check_gen2, None)
- self.gen3_rawmalloced_objects.foreach(self._debug_check_gen3, None)
-
- def _debug_check_gen2(self, obj, ignored):
- tid = self.header(obj).tid
- ll_assert(bool(tid & GCFLAG_EXTERNAL),
- "gen2: missing GCFLAG_EXTERNAL")
- ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
- "gen2: missing GC_HASH_TAKEN_ADDR")
- ll_assert(bool(tid & GCFLAG_UNVISITED),
- "gen2: missing GCFLAG_UNVISITED")
- ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
- "gen2: age field too large")
- def _debug_check_gen3(self, obj, ignored):
- tid = self.header(obj).tid
- ll_assert(bool(tid & GCFLAG_EXTERNAL),
- "gen3: missing GCFLAG_EXTERNAL")
- ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
- "gen3: missing GC_HASH_TAKEN_ADDR")
- ll_assert(not (tid & GCFLAG_UNVISITED),
- "gen3: unexpected GCFLAG_UNVISITED")
- ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,
- "gen3: wrong age field")
-
- def can_malloc_nonmovable(self):
- return True
diff --git a/rpython/memory/gc/semispace.py b/rpython/memory/gc/semispace.py
deleted file mode 100644
--- a/rpython/memory/gc/semispace.py
+++ /dev/null
@@ -1,745 +0,0 @@
-from rpython.rtyper.lltypesystem.llmemory import raw_malloc, raw_free
-from rpython.rtyper.lltypesystem.llmemory import raw_memcopy, raw_memclear
-from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
-from rpython.memory.support import get_address_stack, get_address_deque
-from rpython.memory.support import AddressDict
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, rffi, llgroup
-from rpython.rlib.objectmodel import free_non_gc_object
-from rpython.rlib.debug import ll_assert, have_debug_prints
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT
-from rpython.memory.gc.base import MovingGCBase, ARRAY_TYPEID_MAP,\
- TYPEID_MAP
-
-import sys, os
-
-first_gcflag = 1 << (LONG_BIT//2)
-GCFLAG_FORWARDED = first_gcflag
-# GCFLAG_EXTERNAL is set on objects not living in the semispace:
-# either immortal objects or (for HybridGC) externally raw_malloc'ed
-GCFLAG_EXTERNAL = first_gcflag << 1
-GCFLAG_FINALIZATION_ORDERING = first_gcflag << 2
-
-_GCFLAG_HASH_BASE = first_gcflag << 3
-GCFLAG_HASHMASK = _GCFLAG_HASH_BASE * 0x3 # also consumes 'first_gcflag << 4'
-# the two bits in GCFLAG_HASHMASK can have one of the following values:
-# - nobody ever asked for the hash of the object
-GC_HASH_NOTTAKEN = _GCFLAG_HASH_BASE * 0x0
-# - someone asked, and we gave the address of the object
-GC_HASH_TAKEN_ADDR = _GCFLAG_HASH_BASE * 0x1
-# - someone asked, and we gave the address plus 'nursery_hash_base'
-GC_HASH_TAKEN_NURS = _GCFLAG_HASH_BASE * 0x2
-# - we have our own extra field to store the hash
-GC_HASH_HASFIELD = _GCFLAG_HASH_BASE * 0x3
-
-GCFLAG_EXTRA = first_gcflag << 5 # for RPython abuse only
-
-memoryError = MemoryError()
-
-
-class SemiSpaceGC(MovingGCBase):
- _alloc_flavor_ = "raw"
- inline_simple_malloc = True
- inline_simple_malloc_varsize = True
- malloc_zero_filled = True
- first_unused_gcflag = first_gcflag << 6
- gcflag_extra = GCFLAG_EXTRA
-
- HDR = lltype.Struct('header', ('tid', lltype.Signed)) # XXX or rffi.INT?
- typeid_is_in_field = 'tid'
- withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
- # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
- # have GC_HASH_HASFIELD (and then they are one word longer).
- FORWARDSTUB = lltype.GcStruct('forwarding_stub',
- ('forw', llmemory.Address))
- FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
-
- object_minimal_size = llmemory.sizeof(FORWARDSTUB)
-
- # the following values override the default arguments of __init__ when
- # translating to a real backend.
- TRANSLATION_PARAMS = {'space_size': 8*1024*1024} # XXX adjust
-
- def __init__(self, config, space_size=4096, max_space_size=sys.maxint//2+1,
- **kwds):
- self.param_space_size = space_size
- self.param_max_space_size = max_space_size
- MovingGCBase.__init__(self, config, **kwds)
-
- def setup(self):
- #self.total_collection_time = 0.0
- self.total_collection_count = 0
-
- self.space_size = self.param_space_size
- self.max_space_size = self.param_max_space_size
- self.red_zone = 0
-
- #self.program_start_time = time.time()
- self.tospace = llarena.arena_malloc(self.space_size, True)
- ll_assert(bool(self.tospace), "couldn't allocate tospace")
- self.top_of_space = self.tospace + self.space_size
- self.fromspace = llarena.arena_malloc(self.space_size, True)
- ll_assert(bool(self.fromspace), "couldn't allocate fromspace")
- self.free = self.tospace
- MovingGCBase.setup(self)
- self.objects_with_finalizers = self.AddressDeque()
- self.objects_with_light_finalizers = self.AddressStack()
- self.objects_with_weakrefs = self.AddressStack()
-
- def _teardown(self):
- debug_print("Teardown")
- llarena.arena_free(self.fromspace)
- llarena.arena_free(self.tospace)
-
- # This class only defines the malloc_{fixed,var}size_clear() methods
- # because the spaces are filled with zeroes in advance.
-
- def malloc_fixedsize_clear(self, typeid16, size,
- has_finalizer=False,
- is_finalizer_light=False,
- contains_weakptr=False):
- size_gc_header = self.gcheaderbuilder.size_gc_header
- totalsize = size_gc_header + size
- result = self.free
- if raw_malloc_usage(totalsize) > self.top_of_space - result:
- result = self.obtain_free_space(totalsize)
- llarena.arena_reserve(result, totalsize)
- self.init_gc_object(result, typeid16)
- self.free = result + totalsize
- #if is_finalizer_light:
- # self.objects_with_light_finalizers.append(result + size_gc_header)
- #else:
- if has_finalizer:
- self.objects_with_finalizers.append(result + size_gc_header)
- if contains_weakptr:
- self.objects_with_weakrefs.append(result + size_gc_header)
- return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
- def malloc_varsize_clear(self, typeid16, length, size, itemsize,
- offset_to_length):
- size_gc_header = self.gcheaderbuilder.size_gc_header
- nonvarsize = size_gc_header + size
- try:
- varsize = ovfcheck(itemsize * length)
- totalsize = ovfcheck(nonvarsize + varsize)
- except OverflowError:
- raise memoryError
- result = self.free
- if raw_malloc_usage(totalsize) > self.top_of_space - result:
- result = self.obtain_free_space(totalsize)
- llarena.arena_reserve(result, totalsize)
- self.init_gc_object(result, typeid16)
- (result + size_gc_header + offset_to_length).signed[0] = length
- self.free = result + llarena.round_up_for_allocation(totalsize)
- return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
- def shrink_array(self, addr, smallerlength):
- size_gc_header = self.gcheaderbuilder.size_gc_header
- if self._is_in_the_space(addr - size_gc_header):
- typeid = self.get_type_id(addr)
- totalsmallersize = (
- size_gc_header + self.fixed_size(typeid) +
- self.varsize_item_sizes(typeid) * smallerlength)
- llarena.arena_shrink_obj(addr - size_gc_header, totalsmallersize)
- #
- offset_to_length = self.varsize_offset_to_length(typeid)
- (addr + offset_to_length).signed[0] = smallerlength
- return True
- else:
- return False
-
- def obtain_free_space(self, needed):
- # a bit of tweaking to maximize the performance and minimize the
- # amount of code in an inlined version of malloc_fixedsize_clear()
- if not self.try_obtain_free_space(needed):
- raise memoryError
- return self.free
- obtain_free_space._dont_inline_ = True
-
- def try_obtain_free_space(self, needed):
- # XXX for bonus points do big objects differently
- needed = raw_malloc_usage(needed)
- if (self.red_zone >= 2 and self.space_size < self.max_space_size and
- self.double_space_size()):
- pass # collect was done during double_space_size()
- else:
- self.semispace_collect()
- missing = needed - (self.top_of_space - self.free)
- if missing <= 0:
- return True # success
- else:
- # first check if the object could possibly fit
- proposed_size = self.space_size
- while missing > 0:
- if proposed_size >= self.max_space_size:
- return False # no way
- missing -= proposed_size
- proposed_size *= 2
- # For address space fragmentation reasons, we double the space
- # size possibly several times, moving the objects at each step,
- # instead of going directly for the final size. We assume that
- # it's a rare case anyway.
- while self.space_size < proposed_size:
- if not self.double_space_size():
- return False
- ll_assert(needed <= self.top_of_space - self.free,
- "double_space_size() failed to do its job")
- return True
-
- def double_space_size(self):
- self.red_zone = 0
- old_fromspace = self.fromspace
- newsize = self.space_size * 2
- newspace = llarena.arena_malloc(newsize, True)
- if not newspace:
- return False # out of memory
- llarena.arena_free(old_fromspace)
- self.fromspace = newspace
- # now self.tospace contains the existing objects and
- # self.fromspace is the freshly allocated bigger space
-
- self.semispace_collect(size_changing=True)
- self.top_of_space = self.tospace + newsize
- # now self.tospace is the freshly allocated bigger space,
- # and self.fromspace is the old smaller space, now empty
- llarena.arena_free(self.fromspace)
-
- newspace = llarena.arena_malloc(newsize, True)
- if not newspace:
- # Complex failure case: we have in self.tospace a big chunk
- # of memory, and the two smaller original spaces are already gone.
- # Unsure if it's worth these efforts, but we can artificially
- # split self.tospace in two again...
- self.max_space_size = self.space_size # don't try to grow again,
- # because doing arena_free(self.fromspace) would crash
- self.fromspace = self.tospace + self.space_size
- self.top_of_space = self.fromspace
- ll_assert(self.free <= self.top_of_space,
- "unexpected growth of GC space usage during collect")
- return False # out of memory
-
- self.fromspace = newspace
- self.space_size = newsize
- return True # success
-
- def set_max_heap_size(self, size):
- # Set the maximum semispace size.
- # The size is rounded down to the next power of two. Also, this is
- # the size of one semispace only, so actual usage can be the double
- # during a collection. Finally, note that this will never shrink
- # an already-allocated heap.
- if size < 1:
- size = 1 # actually, the minimum is 8MB in default translations
- self.max_space_size = sys.maxint//2+1
- while self.max_space_size > size:
- self.max_space_size >>= 1
-
- @classmethod
- def JIT_minimal_size_in_nursery(cls):
- return cls.object_minimal_size
-
- def collect(self, gen=0):
- self.debug_check_consistency()
- self.semispace_collect()
- # the indirection is required by the fact that collect() is referred
- # to by the gc transformer, and the default argument would crash
- # (this is also a hook for the HybridGC)
-
- def semispace_collect(self, size_changing=False):
- debug_start("gc-collect")
- debug_print()
- debug_print(".----------- Full collection ------------------")
- start_usage = self.free - self.tospace
- debug_print("| used before collection: ",
- start_usage, "bytes")
- #start_time = time.time()
- #llop.debug_print(lltype.Void, 'semispace_collect', int(size_changing))
-
- # Switch the spaces. We copy everything over to the empty space
- # (self.fromspace at the beginning of the collection), and clear the old
- # one (self.tospace at the beginning). Their purposes will be reversed
- # for the next collection.
- tospace = self.fromspace
- fromspace = self.tospace
- self.fromspace = fromspace
- self.tospace = tospace
- self.top_of_space = tospace + self.space_size
- scan = self.free = tospace
- self.starting_full_collect()
- self.collect_roots()
- if self.run_finalizers.non_empty():
- self.update_run_finalizers()
- scan = self.scan_copied(scan)
- if self.objects_with_light_finalizers.non_empty():
- self.deal_with_objects_with_light_finalizers()
- if self.objects_with_finalizers.non_empty():
- scan = self.deal_with_objects_with_finalizers(scan)
- if self.objects_with_weakrefs.non_empty():
- self.invalidate_weakrefs()
- self.update_objects_with_id()
- self.finished_full_collect()
- self.debug_check_consistency()
- if not size_changing:
- llarena.arena_reset(fromspace, self.space_size, True)
- self.record_red_zone()
- self.execute_finalizers()
- #llop.debug_print(lltype.Void, 'collected', self.space_size, size_changing, self.top_of_space - self.free)
- if have_debug_prints():
- #end_time = time.time()
- #elapsed_time = end_time - start_time
- #self.total_collection_time += elapsed_time
- self.total_collection_count += 1
- #total_program_time = end_time - self.program_start_time
- end_usage = self.free - self.tospace
- debug_print("| used after collection: ",
- end_usage, "bytes")
- debug_print("| freed: ",
- start_usage - end_usage, "bytes")
- debug_print("| size of each semispace: ",
- self.space_size, "bytes")
- debug_print("| fraction of semispace now used: ",
- end_usage * 100.0 / self.space_size, "%")
- #ct = self.total_collection_time
- cc = self.total_collection_count
- debug_print("| number of semispace_collects: ",
- cc)
- #debug_print("| i.e.: ",
- # cc / total_program_time, "per second")
- #debug_print("| total time in semispace_collect: ",
- # ct, "seconds")
- #debug_print("| i.e.: ",
- # ct * 100.0 / total_program_time, "%")
- debug_print("`----------------------------------------------")
- debug_stop("gc-collect")
-
- def starting_full_collect(self):
- pass # hook for the HybridGC
-
- def finished_full_collect(self):
- pass # hook for the HybridGC
-
- def record_red_zone(self):
- # red zone: if the space is more than 80% full, the next collection
- # should double its size. If it is more than 66% full twice in a row,
- # then it should double its size too. (XXX adjust)
- # The goal is to avoid many repeated collection that don't free a lot
- # of memory each, if the size of the live object set is just below the
- # size of the space.
- free_after_collection = self.top_of_space - self.free
- if free_after_collection > self.space_size // 3:
- self.red_zone = 0
- else:
- self.red_zone += 1
- if free_after_collection < self.space_size // 5:
- self.red_zone += 1
-
- def get_size_incl_hash(self, obj):
- size = self.get_size(obj)
- hdr = self.header(obj)
- if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
- size += llmemory.sizeof(lltype.Signed)
- return size
-
- def scan_copied(self, scan):
- while scan < self.free:
- curr = scan + self.size_gc_header()
- self.trace_and_copy(curr)
- scan += self.size_gc_header() + self.get_size_incl_hash(curr)
- return scan
-
- def collect_roots(self):
- self.root_walker.walk_roots(
- SemiSpaceGC._collect_root, # stack roots
- SemiSpaceGC._collect_root, # static in prebuilt non-gc structures
- SemiSpaceGC._collect_root) # static in prebuilt gc objects
-
- def _collect_root(self, root):
- root.address[0] = self.copy(root.address[0])
-
- def copy(self, obj):
- if self.DEBUG:
- self.debug_check_can_copy(obj)
- if self.is_forwarded(obj):
- #llop.debug_print(lltype.Void, obj, "already copied to", self.get_forwarding_address(obj))
- return self.get_forwarding_address(obj)
- else:
- objsize = self.get_size(obj)
- newobj = self.make_a_copy(obj, objsize)
- #llop.debug_print(lltype.Void, obj, "copied to", newobj,
- # "tid", self.header(obj).tid,
- # "size", totalsize)
- self.set_forwarding_address(obj, newobj, objsize)
- return newobj
-
- def _get_object_hash(self, obj, objsize, tid):
- # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
- gc_hash = tid & GCFLAG_HASHMASK
- if gc_hash == GC_HASH_HASFIELD:
- obj = llarena.getfakearenaaddress(obj)
- return (obj + objsize).signed[0]
- elif gc_hash == GC_HASH_TAKEN_ADDR:
- return llmemory.cast_adr_to_int(obj)
- elif gc_hash == GC_HASH_TAKEN_NURS:
- return self._compute_current_nursery_hash(obj)
- else:
- assert 0, "gc_hash == GC_HASH_NOTTAKEN"
-
- def _make_a_copy_with_tid(self, obj, objsize, tid):
- totalsize = self.size_gc_header() + objsize
- newaddr = self.free
- llarena.arena_reserve(newaddr, totalsize)
- raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
- if tid & GCFLAG_HASHMASK:
- hash = self._get_object_hash(obj, objsize, tid)
- llarena.arena_reserve(newaddr + totalsize,
- llmemory.sizeof(lltype.Signed))
- (newaddr + totalsize).signed[0] = hash
- tid |= GC_HASH_HASFIELD
- totalsize += llmemory.sizeof(lltype.Signed)
- self.free += totalsize
- newhdr = llmemory.cast_adr_to_ptr(newaddr, lltype.Ptr(self.HDR))
- newhdr.tid = tid
- newobj = newaddr + self.size_gc_header()
- return newobj
-
- def make_a_copy(self, obj, objsize):
- tid = self.header(obj).tid
- return self._make_a_copy_with_tid(obj, objsize, tid)
-
- def trace_and_copy(self, obj):
- self.trace(obj, self._trace_copy, None)
-
- def _trace_copy(self, pointer, ignored):
- pointer.address[0] = self.copy(pointer.address[0])
-
- def surviving(self, obj):
- # To use during a collection. Check if the object is currently
- # marked as surviving the collection. This is equivalent to
- # self.is_forwarded() for all objects except the nonmoving objects
- # created by the HybridGC subclass. In all cases, if an object
- # survives, self.get_forwarding_address() returns its new address.
- return self.is_forwarded(obj)
-
- def is_forwarded(self, obj):
- return self.header(obj).tid & GCFLAG_FORWARDED != 0
- # note: all prebuilt objects also have this flag set
-
- def get_forwarding_address(self, obj):
- tid = self.header(obj).tid
- if tid & GCFLAG_EXTERNAL:
- self.visit_external_object(obj)
- return obj # external or prebuilt objects are "forwarded"
- # to themselves
- else:
- stub = llmemory.cast_adr_to_ptr(obj, self.FORWARDSTUBPTR)
- return stub.forw
-
- def visit_external_object(self, obj):
- pass # hook for the HybridGC
-
- def get_possibly_forwarded_type_id(self, obj):
- tid = self.header(obj).tid
- if self.is_forwarded(obj) and not (tid & GCFLAG_EXTERNAL):
- obj = self.get_forwarding_address(obj)
- return self.get_type_id(obj)
-
- def set_forwarding_address(self, obj, newobj, objsize):
- # To mark an object as forwarded, we set the GCFLAG_FORWARDED and
- # overwrite the object with a FORWARDSTUB. Doing so is a bit
- # long-winded on llarena, but it all melts down to two memory
- # writes after translation to C.
- size_gc_header = self.size_gc_header()
- stubsize = llmemory.sizeof(self.FORWARDSTUB)
- tid = self.header(obj).tid
- ll_assert(tid & GCFLAG_EXTERNAL == 0, "unexpected GCFLAG_EXTERNAL")
- ll_assert(tid & GCFLAG_FORWARDED == 0, "unexpected GCFLAG_FORWARDED")
- # replace the object at 'obj' with a FORWARDSTUB.
- hdraddr = obj - size_gc_header
- llarena.arena_reset(hdraddr, size_gc_header + objsize, False)
- llarena.arena_reserve(hdraddr, size_gc_header + stubsize)
- hdr = llmemory.cast_adr_to_ptr(hdraddr, lltype.Ptr(self.HDR))
- hdr.tid = tid | GCFLAG_FORWARDED
- stub = llmemory.cast_adr_to_ptr(obj, self.FORWARDSTUBPTR)
- stub.forw = newobj
-
- def combine(self, typeid16, flags):
- return llop.combine_ushort(lltype.Signed, typeid16, flags)
-
- def get_type_id(self, addr):
- tid = self.header(addr).tid
- ll_assert(tid & (GCFLAG_FORWARDED|GCFLAG_EXTERNAL) != GCFLAG_FORWARDED,
- "get_type_id on forwarded obj")
- # Non-prebuilt forwarded objects are overwritten with a FORWARDSTUB.
- # Although calling get_type_id() on a forwarded object works by itself,
- # we catch it as an error because it's likely that what is then
- # done with the typeid is bogus.
- return llop.extract_ushort(llgroup.HALFWORD, tid)
-
- def init_gc_object(self, addr, typeid16, flags=0):
- hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
- hdr.tid = self.combine(typeid16, flags)
-
- def init_gc_object_immortal(self, addr, typeid16, flags=0):
- hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
- flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GC_HASH_TAKEN_ADDR
- hdr.tid = self.combine(typeid16, flags)
- # immortal objects always have GCFLAG_FORWARDED set;
- # see get_forwarding_address().
-
- def deal_with_objects_with_light_finalizers(self):
- """ This is a much simpler version of dealing with finalizers
- and an optimization - we can reasonably assume that those finalizers
- don't do anything fancy and *just* call them. Among other things
- they won't resurrect objects
- """
- new_objects = self.AddressStack()
- while self.objects_with_light_finalizers.non_empty():
- obj = self.objects_with_light_finalizers.pop()
- if self.surviving(obj):
- new_objects.append(self.get_forwarding_address(obj))
- else:
- finalizer = self.getfinalizer(self.get_type_id(obj))
- finalizer(obj)
- self.objects_with_light_finalizers.delete()
- self.objects_with_light_finalizers = new_objects
-
- def deal_with_objects_with_finalizers(self, scan):
- # walk over list of objects with finalizers
- # if it is not copied, add it to the list of to-be-called finalizers
- # and copy it, to me make the finalizer runnable
- # We try to run the finalizers in a "reasonable" order, like
- # CPython does. The details of this algorithm are in
- # pypy/doc/discussion/finalizer-order.txt.
- new_with_finalizer = self.AddressDeque()
- marked = self.AddressDeque()
- pending = self.AddressStack()
- self.tmpstack = self.AddressStack()
- while self.objects_with_finalizers.non_empty():
- x = self.objects_with_finalizers.popleft()
- ll_assert(self._finalization_state(x) != 1,
- "bad finalization state 1")
- if self.surviving(x):
- new_with_finalizer.append(self.get_forwarding_address(x))
- continue
- marked.append(x)
- pending.append(x)
- while pending.non_empty():
- y = pending.pop()
- state = self._finalization_state(y)
- if state == 0:
- self._bump_finalization_state_from_0_to_1(y)
- self.trace(y, self._append_if_nonnull, pending)
- elif state == 2:
- self._recursively_bump_finalization_state_from_2_to_3(y)
- scan = self._recursively_bump_finalization_state_from_1_to_2(
- x, scan)
-
- while marked.non_empty():
- x = marked.popleft()
- state = self._finalization_state(x)
- ll_assert(state >= 2, "unexpected finalization state < 2")
- newx = self.get_forwarding_address(x)
- if state == 2:
- self.run_finalizers.append(newx)
- # we must also fix the state from 2 to 3 here, otherwise
- # we leave the GCFLAG_FINALIZATION_ORDERING bit behind
- # which will confuse the next collection
- self._recursively_bump_finalization_state_from_2_to_3(x)
- else:
- new_with_finalizer.append(newx)
-
- self.tmpstack.delete()
- pending.delete()
- marked.delete()
- self.objects_with_finalizers.delete()
- self.objects_with_finalizers = new_with_finalizer
- return scan
-
- def _append_if_nonnull(pointer, stack):
- stack.append(pointer.address[0])
- _append_if_nonnull = staticmethod(_append_if_nonnull)
-
- def _finalization_state(self, obj):
- if self.surviving(obj):
- newobj = self.get_forwarding_address(obj)
- hdr = self.header(newobj)
- if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
- return 2
- else:
- return 3
- else:
- hdr = self.header(obj)
- if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
- return 1
- else:
- return 0
-
- def _bump_finalization_state_from_0_to_1(self, obj):
- ll_assert(self._finalization_state(obj) == 0,
- "unexpected finalization state != 0")
- hdr = self.header(obj)
- hdr.tid |= GCFLAG_FINALIZATION_ORDERING
-
- def _recursively_bump_finalization_state_from_2_to_3(self, obj):
- ll_assert(self._finalization_state(obj) == 2,
- "unexpected finalization state != 2")
- newobj = self.get_forwarding_address(obj)
- pending = self.tmpstack
- ll_assert(not pending.non_empty(), "tmpstack not empty")
- pending.append(newobj)
- while pending.non_empty():
- y = pending.pop()
- hdr = self.header(y)
- if hdr.tid & GCFLAG_FINALIZATION_ORDERING: # state 2 ?
- hdr.tid &= ~GCFLAG_FINALIZATION_ORDERING # change to state 3
- self.trace(y, self._append_if_nonnull, pending)
-
- def _recursively_bump_finalization_state_from_1_to_2(self, obj, scan):
- # recursively convert objects from state 1 to state 2.
- # Note that copy() copies all bits, including the
- # GCFLAG_FINALIZATION_ORDERING. The mapping between
- # state numbers and the presence of this bit was designed
- # for the following to work :-)
- self.copy(obj)
- return self.scan_copied(scan)
-
- def invalidate_weakrefs(self):
- # walk over list of objects that contain weakrefs
- # if the object it references survives then update the weakref
- # otherwise invalidate the weakref
- new_with_weakref = self.AddressStack()
- while self.objects_with_weakrefs.non_empty():
- obj = self.objects_with_weakrefs.pop()
- if not self.surviving(obj):
- continue # weakref itself dies
- obj = self.get_forwarding_address(obj)
- offset = self.weakpointer_offset(self.get_type_id(obj))
- pointing_to = (obj + offset).address[0]
- # XXX I think that pointing_to cannot be NULL here
- if pointing_to:
- if self.surviving(pointing_to):
- (obj + offset).address[0] = self.get_forwarding_address(
- pointing_to)
- new_with_weakref.append(obj)
- else:
- (obj + offset).address[0] = NULL
- self.objects_with_weakrefs.delete()
- self.objects_with_weakrefs = new_with_weakref
-
- def update_run_finalizers(self):
- # we are in an inner collection, caused by a finalizer
- # the run_finalizers objects need to be copied
- new_run_finalizer = self.AddressDeque()
- while self.run_finalizers.non_empty():
- obj = self.run_finalizers.popleft()
- new_run_finalizer.append(self.copy(obj))
- self.run_finalizers.delete()
- self.run_finalizers = new_run_finalizer
-
- def _is_external(self, obj):
- return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
-
- def _is_in_the_space(self, obj):
- return self.tospace <= obj < self.free
-
- def debug_check_object(self, obj):
- """Check the invariants about 'obj' that should be true
- between collections."""
- tid = self.header(obj).tid
- if tid & GCFLAG_EXTERNAL:
- ll_assert(tid & GCFLAG_FORWARDED != 0, "bug: external+!forwarded")
- ll_assert(not (self.tospace <= obj < self.free),
- "external flag but object inside the semispaces")
- else:
- ll_assert(not (tid & GCFLAG_FORWARDED), "bug: !external+forwarded")
- ll_assert(self.tospace <= obj < self.free,
- "!external flag but object outside the semispaces")
- ll_assert(not (tid & GCFLAG_FINALIZATION_ORDERING),
- "unexpected GCFLAG_FINALIZATION_ORDERING")
-
- def debug_check_can_copy(self, obj):
- ll_assert(not (self.tospace <= obj < self.free),
- "copy() on already-copied object")
-
- STATISTICS_NUMBERS = 0
-
- def is_in_nursery(self, addr):
- # overridden in generation.py.
- return False
-
- def _compute_current_nursery_hash(self, obj):
- # overridden in generation.py.
- raise AssertionError("should not be called")
-
- def identityhash(self, gcobj):
- # The following loop should run at most twice.
- while 1:
- obj = llmemory.cast_ptr_to_adr(gcobj)
- hdr = self.header(obj)
- if hdr.tid & GCFLAG_HASHMASK:
- break
- # It's the first time we ask for a hash, and it's not an
- # external object. Shrink the top of space by the extra
- # hash word that will be needed after a collect.
- shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
- if shrunk_top < self.free:
- # Cannot shrink! Do a collection, asking for at least
- # one word of free space, and try again. May raise
- # MemoryError. Obscure: not called directly, but
- # across an llop, to make sure that there is the
- # correct push_roots/pop_roots around the call...
- llop.gc_obtain_free_space(llmemory.Address,
- llmemory.sizeof(lltype.Signed))
- continue
- else:
- # Now we can have side-effects: lower the top of space
- # and set one of the GC_HASH_TAKEN_xxx flags.
- self.top_of_space = shrunk_top
- if self.is_in_nursery(obj):
- hdr.tid |= GC_HASH_TAKEN_NURS
- else:
- hdr.tid |= GC_HASH_TAKEN_ADDR
- break
- # Now we can return the result
- objsize = self.get_size(obj)
- return self._get_object_hash(obj, objsize, hdr.tid)
-
- def track_heap_parent(self, obj, parent):
- addr = obj.address[0]
- parent_idx = llop.get_member_index(lltype.Signed,
- self.get_type_id(parent))
- idx = llop.get_member_index(lltype.Signed, self.get_type_id(addr))
- self._ll_typeid_map[parent_idx].links[idx] += 1
- self.track_heap(addr)
-
- def track_heap(self, adr):
- if self._tracked_dict.contains(adr):
- return
- self._tracked_dict.add(adr)
- idx = llop.get_member_index(lltype.Signed, self.get_type_id(adr))
- self._ll_typeid_map[idx].count += 1
- totsize = self.get_size(adr) + self.size_gc_header()
- self._ll_typeid_map[idx].size += llmemory.raw_malloc_usage(totsize)
- self.trace(adr, self.track_heap_parent, adr)
-
- @staticmethod
- def _track_heap_root(obj, self):
- self.track_heap(obj)
-
- def heap_stats(self):
- self._tracked_dict = self.AddressDict()
- max_tid = self.root_walker.gcdata.max_type_id
- ll_typeid_map = lltype.malloc(ARRAY_TYPEID_MAP, max_tid, zero=True)
- for i in range(max_tid):
- ll_typeid_map[i] = lltype.malloc(TYPEID_MAP, max_tid, zero=True)
- self._ll_typeid_map = ll_typeid_map
- self._tracked_dict.add(llmemory.cast_ptr_to_adr(ll_typeid_map))
- i = 0
- while i < max_tid:
- self._tracked_dict.add(llmemory.cast_ptr_to_adr(ll_typeid_map[i]))
- i += 1
- self.enumerate_all_roots(SemiSpaceGC._track_heap_root, self)
- self._ll_typeid_map = lltype.nullptr(ARRAY_TYPEID_MAP)
- self._tracked_dict.delete()
- return ll_typeid_map
diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py
--- a/rpython/memory/gc/test/test_direct.py
+++ b/rpython/memory/gc/test/test_direct.py
@@ -364,120 +364,6 @@
p = self.stackroots[-50+i]
assert p[i-1] == chr(i)
-class TestSemiSpaceGC(DirectGCTest):
- from rpython.memory.gc.semispace import SemiSpaceGC as GCClass
-
- def test_shrink_array(self):
- S1 = lltype.GcStruct('S1', ('h', lltype.Char),
- ('v', lltype.Array(lltype.Char)))
- p1 = self.malloc(S1, 2)
- p1.h = '?'
- for i in range(2):
- p1.v[i] = chr(50 + i)
- addr = llmemory.cast_ptr_to_adr(p1)
- ok = self.gc.shrink_array(addr, 1)
- assert ok
- assert p1.h == '?'
- assert len(p1.v) == 1
- for i in range(1):
- assert p1.v[i] == chr(50 + i)
-
-
-class TestGenerationGC(TestSemiSpaceGC):
More information about the pypy-commit
mailing list