[pypy-commit] pypy cpyext-gc-cycle: Implemented configurable limit for incremental rrc collection
stevie_92
pypy.commits at gmail.com
Wed Oct 9 03:38:41 EDT 2019
Author: Stefan Beyer <home at sbeyer.at>
Branch: cpyext-gc-cycle
Changeset: r97741:518eb87d054c
Date: 2019-10-09 09:37 +0200
http://bitbucket.org/pypy/pypy/changeset/518eb87d054c/
Log: Implemented configurable limit for incremental rrc collection
Implemented working set to improve overall performance
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -2415,7 +2415,6 @@
# finalizers/weak references are rare and short which means that
# they do not need a separate state and do not need to be
# made incremental.
- # For now, the same applies to rawrefcount'ed objects.
if rrc_finished:
ll_assert(not (self.probably_young_objects_with_finalizers
.non_empty()),
diff --git a/rpython/memory/gc/rrc/base.py b/rpython/memory/gc/rrc/base.py
--- a/rpython/memory/gc/rrc/base.py
+++ b/rpython/memory/gc/rrc/base.py
@@ -1,6 +1,7 @@
from rpython.rtyper.lltypesystem import lltype, llmemory, llgroup, rffi
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
+from rpython.memory.gc import env
def choose_rrc_gc_from_config(config):
if config.translation.rrcgc:
@@ -118,6 +119,11 @@
self.tuple_maybe_untrack = tuple_maybe_untrack
self.state = self.STATE_DEFAULT
self.cycle_enabled = True
+ inc_limit = env.read_uint_from_env('PYPY_RRC_GC_INCREMENT_STEP')
+ if inc_limit > 0:
+ self.inc_limit = inc_limit
+ else:
+ self.inc_limit = 1000
def create_link_pypy(self, gcobj, pyobject):
obj = llmemory.cast_ptr_to_adr(gcobj)
@@ -566,8 +572,14 @@
pygchdr = self.pyobj_as_gc(pyobj)
if pygchdr <> lltype.nullptr(self.PYOBJ_GC_HDR):
if pygchdr.c_gc_refs != self.RAWREFCOUNT_REFS_UNTRACKED:
- pygchdr.c_gc_refs += self.refcnt_add << \
- self.RAWREFCOUNT_REFS_SHIFT
+ if (self.state != self.STATE_GARBAGE_MARKING and
+ pygchdr.c_gc_refs >> self.RAWREFCOUNT_REFS_SHIFT == 0
+ and self.refcnt_add > 0):
+ addr = llmemory.cast_ptr_to_adr(pygchdr)
+ self.pyobj_to_trace.append(addr)
+ else:
+ pygchdr.c_gc_refs += (self.refcnt_add <<
+ self.RAWREFCOUNT_REFS_SHIFT)
elif pyobj.c_ob_pypy_link != 0:
pyobj.c_ob_refcnt += self.refcnt_add
if self.refcnt_add > 0:
diff --git a/rpython/memory/gc/rrc/incmark.py b/rpython/memory/gc/rrc/incmark.py
--- a/rpython/memory/gc/rrc/incmark.py
+++ b/rpython/memory/gc/rrc/incmark.py
@@ -16,37 +16,46 @@
if self.state == self.STATE_DEFAULT:
# untrack all tuples with only non-gc rrc objects and
# promote all other tuples to the pyobj_list
- self._untrack_tuples()
- # TODO: execute incrementally? (before snapshot!, own phase)
+ self._untrack_tuples() # execute incrementally?
# now take a snapshot
self._take_snapshot()
self._debug_print_snap(print_label="after-snapshot")
- # collect all rawrefcounted roots
- self._collect_roots()
- # TODO: execute incrementally (own phase, save index)
-
- self._debug_print_snap(print_label="roots-marked")
- self._debug_check_consistency(print_label="roots-marked")
-
- self._gc_list_init(self.pyobj_old_list)
self.state = self.STATE_MARKING
+ self.marking_state = 0
return False
if self.state == self.STATE_MARKING:
- # mark all objects reachable from rawrefcounted roots
- all_rrc_marked = self._mark_rawrefcount()
- # TODO: execute incrementally
-
- if (all_rrc_marked and not self.gc.objects_to_trace.non_empty() and
- not self.gc.more_objects_to_trace.non_empty()):
- # all objects have been marked, dead objects will stay dead
- self._debug_print_snap(print_label="before-fin")
- self._debug_check_consistency(print_label="before-fin")
- self.state = self.STATE_GARBAGE_MARKING
+ if self.marking_state == 0:
+ # collect all rawrefcounted roots
+ self._collect_roots() # execute incrementally (save index)?
+ self._debug_print_snap(print_label="roots-marked")
+ self._debug_check_consistency(print_label="roots-marked")
+ self._gc_list_init(self.pyobj_old_list)
+ self.marking_state = 1
+ return False
+ elif self.marking_state == 1:
+ # initialize working set from roots, then pause
+ self.pyobj_to_trace = self.gc.AddressStack()
+ for i in range(0, self.total_objs):
+ obj = self.snapshot_objs[i]
+ self._mark_rawrefcount_obj(obj)
+ self.p_list_old.foreach(self._mark_rawrefcount_linked, None)
+ self.o_list_old.foreach(self._mark_rawrefcount_linked, None)
+ self.marking_state = 2
+ return False
else:
- return False
+ # mark all objects reachable from rawrefcounted roots
+ all_rrc_marked = self._mark_rawrefcount()
+ if (all_rrc_marked and not self.gc.objects_to_trace.non_empty() and
+ not self.gc.more_objects_to_trace.non_empty()):
+ # all objects have been marked, dead objects will stay dead
+ self._debug_print_snap(print_label="before-fin")
+ self._debug_check_consistency(print_label="before-fin")
+ self.state = self.STATE_GARBAGE_MARKING
+ else:
+ return False
# we are finished with marking, now finish things up
ll_assert(self.state == self.STATE_GARBAGE_MARKING, "invalid state")
@@ -91,7 +100,8 @@
# sync p_list_old (except gc-objects)
# simply iterate the snapshot for objects in p_list, as linked objects
- # might not be freed, except by the gc
+ # might not be freed, except by the gc; p_list is always at the
+ # beginning of the snapshot, so break if we reached a different pyobj
free_p_list = self.gc.AddressStack()
for i in range(0, self.total_objs):
snapobj = self.snapshot_objs[i]
@@ -102,6 +112,10 @@
if (pygchdr != lltype.nullptr(self.PYOBJ_GC_HDR) and
pygchdr.c_gc_refs != self.RAWREFCOUNT_REFS_UNTRACKED):
break # only look for non-gc
+ addr = llmemory.cast_int_to_adr(snapobj.pypy_link)
+ if (self.gc.header(addr).tid &
+ (self.GCFLAG_VISITED | self.GCFLAG_NO_HEAP_PTRS)):
+ continue # keep proxy if obj is marked
if snapobj.refcnt == 0:
# check consistency
consistent = pyobj.c_ob_refcnt == snapobj.refcnt_original
@@ -269,17 +283,23 @@
# objects are found, increment the refcount of all referenced objects
# of those newly found objects
reached_limit = False
- found_alive = True
simple_limit = 0
+ first = True # rescan proxies, in case only non-rc have been marked
#
- while found_alive and not reached_limit: # TODO: working set to improve performance?
- found_alive = False
- for i in range(0, self.total_objs):
- obj = self.snapshot_objs[i]
- found_alive |= self._mark_rawrefcount_obj(obj)
- simple_limit += 1
- if simple_limit > 3: # TODO: implement sane limit
- reached_limit
+ while first or (self.pyobj_to_trace.non_empty() and not reached_limit):
+ while self.pyobj_to_trace.non_empty() and not reached_limit:
+ addr = self.pyobj_to_trace.pop()
+ snapobj = llmemory.cast_adr_to_ptr(addr,
+ self.PYOBJ_SNAPSHOT_OBJ_PTR)
+ snapobj.refcnt += 1
+ self._mark_rawrefcount_obj(snapobj)
+ simple_limit += 1
+ if simple_limit > self.inc_limit: # TODO: add test
+ reached_limit = True
+
+ self.p_list_old.foreach(self._mark_rawrefcount_linked, None)
+ self.o_list_old.foreach(self._mark_rawrefcount_linked, None)
+ first = False
return not reached_limit # are there any objects left?
def _mark_rawrefcount_obj(self, snapobj):
@@ -301,17 +321,38 @@
obj_ref = llmemory.cast_adr_to_ptr(addr,
self.PYOBJ_SNAPSHOT_OBJ_PTR)
if obj_ref != lltype.nullptr(self.PYOBJ_SNAPSHOT_OBJ):
- obj_ref.refcnt += 1
+ if obj_ref.refcnt == 0:
+ addr = llmemory.cast_ptr_to_adr(obj_ref)
+ self.pyobj_to_trace.append(addr)
+ else:
+ obj_ref.refcnt += 1
# mark recursively, if it is a pypyobj
if snapobj.pypy_link <> 0:
intobj = snapobj.pypy_link
obj = llmemory.cast_int_to_adr(intobj)
self.gc.objects_to_trace.append(obj)
- self.gc.visit_all_objects() # TODO: remove to improve pause times
+ self.gc.visit_all_objects() # TODO: move to outer loop, implement sane limit (ex. half of normal limit), retrace proxies
# mark as processed
snapobj.status = 0
return alive
+ def _mark_rawrefcount_linked(self, pyobject, ignore):
+ # we only have to take gc-objs into consideration, rc-proxies only
+ # keep their non-rc objs alive (see _major_free)
+ pyobj = self._pyobj(pyobject)
+ addr = llmemory.cast_int_to_adr(pyobj.c_ob_pypy_link)
+ if self.gc.header(addr).tid & (self.GCFLAG_VISITED |
+ self.GCFLAG_NO_HEAP_PTRS):
+ pygchdr = self.pyobj_as_gc(pyobj)
+ if (pygchdr != lltype.nullptr(self.PYOBJ_GC_HDR) and
+ pygchdr.c_gc_refs > 0 and
+ pygchdr.c_gc_refs != self.RAWREFCOUNT_REFS_UNTRACKED):
+ index = pygchdr.c_gc_refs - 1
+ snapobj = self.snapshot_objs[index]
+ if snapobj.refcnt == 0:
+ addr = llmemory.cast_ptr_to_adr(snapobj)
+ self.pyobj_to_trace.append(addr)
+
def _take_snapshot(self):
total_refcnt = 0
total_objs = 0
diff --git a/rpython/memory/gc/rrc/mark.py b/rpython/memory/gc/rrc/mark.py
--- a/rpython/memory/gc/rrc/mark.py
+++ b/rpython/memory/gc/rrc/mark.py
@@ -166,25 +166,36 @@
self._gc_list_init(self.pyobj_old_list)
else:
self._gc_list_move(self.pyobj_list, self.pyobj_old_list)
+ pyobj_old = self.pyobj_list
+
+ # initialize working set
+ self.pyobj_to_trace = self.gc.AddressStack()
+ gchdr = self.pyobj_old_list.c_gc_next
+ while gchdr <> self.pyobj_old_list:
+ next_old = gchdr.c_gc_next
+ self._mark_rawrefcount_obj(gchdr, pyobj_old)
+ gchdr = next_old
+ gchdr = self.pyobj_isolate_old_list.c_gc_next
+ while gchdr <> self.pyobj_isolate_old_list:
+ next_old = gchdr.c_gc_next
+ self._mark_rawrefcount_obj(gchdr, pyobj_old)
+ gchdr = next_old
+ self.p_list_old.foreach(self._mark_rawrefcount_linked, None)
+ self.o_list_old.foreach(self._mark_rawrefcount_linked, None)
+
# as long as new objects with cyclic a refcount > 0 or alive border
# objects are found, increment the refcount of all referenced objects
# of those newly found objects
- found_alive = True
- pyobj_old = self.pyobj_list
- #
- while found_alive: # TODO: working set to improve performance?
- found_alive = False
- gchdr = self.pyobj_old_list.c_gc_next
- while gchdr <> self.pyobj_old_list:
- next_old = gchdr.c_gc_next
- found_alive |= self._mark_rawrefcount_obj(gchdr, pyobj_old)
- gchdr = next_old
- gchdr = self.pyobj_isolate_old_list.c_gc_next
- while gchdr <> self.pyobj_isolate_old_list:
- next_old = gchdr.c_gc_next
- found_alive |= self._mark_rawrefcount_obj(gchdr, pyobj_old)
- gchdr = next_old
- #
+ while self.pyobj_to_trace.non_empty():
+ while self.pyobj_to_trace.non_empty():
+ addr = self.pyobj_to_trace.pop()
+ gchdr = llmemory.cast_adr_to_ptr(addr, self.PYOBJ_GC_HDR_PTR)
+ gchdr.c_gc_refs += 1 << self.RAWREFCOUNT_REFS_SHIFT
+ self._mark_rawrefcount_obj(gchdr, pyobj_old)
+ self.gc.visit_all_objects()
+ self.p_list_old.foreach(self._mark_rawrefcount_linked, None)
+ self.o_list_old.foreach(self._mark_rawrefcount_linked, None)
+
# now all rawrefcounted objects, which are alive, have a cyclic
# refcount > 0 or are marked
@@ -212,5 +223,15 @@
# mark recursively, if it is a pypyobj
if pyobj.c_ob_pypy_link <> 0:
self.gc.objects_to_trace.append(obj)
- self.gc.visit_all_objects()
return alive
+
+ def _mark_rawrefcount_linked(self, pyobject, ignore):
+ pyobj = self._pyobj(pyobject)
+ obj = self.refcnt_dict.get(pyobject)
+ if self.gc.header(obj).tid & (self.GCFLAG_VISITED |
+ self.GCFLAG_NO_HEAP_PTRS):
+ gchdr = self.pyobj_as_gc(pyobj)
+ if gchdr <> lltype.nullptr(self.PYOBJ_GC_HDR):
+ if gchdr.c_gc_refs >> self.RAWREFCOUNT_REFS_SHIFT == 0:
+ addr = llmemory.cast_ptr_to_adr(gchdr)
+ self.pyobj_to_trace.append(addr)
diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py
--- a/rpython/memory/gc/test/test_rawrefcount.py
+++ b/rpython/memory/gc/test/test_rawrefcount.py
@@ -104,6 +104,7 @@
rawrefcount_pyobj_as_gc,
rawrefcount_finalizer_type,
rawrefcount_tuple_maybe_untrack)
+ self.gc.rrc_gc.inc_limit = 2 # low limit to test incremental collection
def _collect(self, major, expected_trigger=0):
if major:
More information about the pypy-commit
mailing list