[pypy-svn] r79399 - in pypy/branch/jit-free/pypy/rpython/memory/gc: . test
arigo at codespeak.net
arigo at codespeak.net
Tue Nov 23 15:48:03 CET 2010
Author: arigo
Date: Tue Nov 23 15:48:01 2010
New Revision: 79399
Added:
pypy/branch/jit-free/pypy/rpython/memory/gc/env.py
- copied unchanged from r79398, pypy/trunk/pypy/rpython/memory/gc/env.py
pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_env.py
- copied unchanged from r79398, pypy/trunk/pypy/rpython/memory/gc/test/test_env.py
Modified:
pypy/branch/jit-free/pypy/rpython/memory/gc/base.py
pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py
pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py
pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py
pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py
Log:
Merge from trunk all changes done to pypy/rpython/memory/.
Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/base.py (original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/base.py Tue Nov 23 15:48:01 2010
@@ -5,7 +5,6 @@
from pypy.rpython.memory.support import get_address_stack, get_address_deque
from pypy.rpython.memory.support import AddressDict
from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
-from pypy.rlib.rarithmetic import r_uint
TYPEID_MAP = lltype.GcStruct('TYPEID_MAP', ('count', lltype.Signed),
('size', lltype.Signed),
@@ -411,42 +410,6 @@
GCClass = getattr(module, classname)
return GCClass, GCClass.TRANSLATION_PARAMS
-def _read_float_and_factor_from_env(varname):
- import os
- value = os.environ.get(varname)
- if value:
- if len(value) > 1 and value[-1] in 'bB':
- value = value[:-1]
- realvalue = value[:-1]
- if value[-1] in 'kK':
- factor = 1024
- elif value[-1] in 'mM':
- factor = 1024*1024
- elif value[-1] in 'gG':
- factor = 1024*1024*1024
- else:
- factor = 1
- realvalue = value
- try:
- return (float(realvalue), factor)
- except ValueError:
- pass
- return (0.0, 0)
-
-def read_from_env(varname):
- value, factor = _read_float_and_factor_from_env(varname)
- return int(value * factor)
-
-def read_uint_from_env(varname):
- value, factor = _read_float_and_factor_from_env(varname)
- return r_uint(value * factor)
-
-def read_float_from_env(varname):
- value, factor = _read_float_and_factor_from_env(varname)
- if factor != 1:
- return 0.0
- return value
-
def _convert_callback_formats(callback):
callback = getattr(callback, 'im_func', callback)
if callback not in _converted_callback_formats:
Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py (original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py Tue Nov 23 15:48:01 2010
@@ -2,7 +2,7 @@
from pypy.rpython.memory.gc.semispace import SemiSpaceGC
from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
from pypy.rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from pypy.rpython.memory.gc.base import read_from_env
+from pypy.rpython.memory.gc import env
from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
from pypy.rpython.lltypesystem import lltype, llmemory, llarena
from pypy.rlib.objectmodel import free_non_gc_object
@@ -93,7 +93,7 @@
if self.auto_nursery_size:
newsize = nursery_size_from_env()
if newsize <= 0:
- newsize = estimate_best_nursery_size()
+ newsize = env.estimate_best_nursery_size()
if newsize > 0:
self.set_nursery_size(newsize)
@@ -633,139 +633,5 @@
# ____________________________________________________________
-import os
-
def nursery_size_from_env():
- return read_from_env('PYPY_GENERATIONGC_NURSERY')
-
-def best_nursery_size_for_L2cache(L2cache):
- # Heuristically, the best nursery size to choose is about half
- # of the L2 cache. XXX benchmark some more.
- return L2cache // 2
-
-
-if sys.platform == 'linux2':
- def estimate_best_nursery_size():
- """Try to estimate the best nursery size at run-time, depending
- on the machine we are running on.
- """
- debug_start("gc-L2cache")
- L2cache = sys.maxint
- try:
- fd = os.open('/proc/cpuinfo', os.O_RDONLY, 0644)
- try:
- data = []
- while True:
- buf = os.read(fd, 4096)
- if not buf:
- break
- data.append(buf)
- finally:
- os.close(fd)
- except OSError:
- pass
- else:
- data = ''.join(data)
- linepos = 0
- while True:
- start = findend(data, '\ncache size', linepos)
- if start < 0:
- break # done
- linepos = findend(data, '\n', start)
- if linepos < 0:
- break # no end-of-line??
- # *** data[start:linepos] == " : 2048 KB\n"
- start = skipspace(data, start)
- if data[start] != ':':
- continue
- # *** data[start:linepos] == ": 2048 KB\n"
- start = skipspace(data, start + 1)
- # *** data[start:linepos] == "2048 KB\n"
- end = start
- while '0' <= data[end] <= '9':
- end += 1
- # *** data[start:end] == "2048"
- if start == end:
- continue
- number = int(data[start:end])
- # *** data[end:linepos] == " KB\n"
- end = skipspace(data, end)
- if data[end] not in ('K', 'k'): # assume kilobytes for now
- continue
- number = number * 1024
- # for now we look for the smallest of the L2 caches of the CPUs
- if number < L2cache:
- L2cache = number
-
- debug_print("L2cache =", L2cache)
- debug_stop("gc-L2cache")
-
- if L2cache < sys.maxint:
- return best_nursery_size_for_L2cache(L2cache)
- else:
- # Print a top-level warning even in non-debug builds
- llop.debug_print(lltype.Void,
- "Warning: cannot find your CPU L2 cache size in /proc/cpuinfo")
- return -1
-
- def findend(data, pattern, pos):
- pos = data.find(pattern, pos)
- if pos < 0:
- return -1
- return pos + len(pattern)
-
- def skipspace(data, pos):
- while data[pos] in (' ', '\t'):
- pos += 1
- return pos
-
-elif sys.platform == 'darwin':
- from pypy.rpython.lltypesystem import rffi
-
- sysctlbyname = rffi.llexternal('sysctlbyname',
- [rffi.CCHARP, rffi.VOIDP, rffi.SIZE_TP,
- rffi.VOIDP, rffi.SIZE_T],
- rffi.INT,
- sandboxsafe=True)
-
- def estimate_best_nursery_size():
- """Try to estimate the best nursery size at run-time, depending
- on the machine we are running on.
- """
- debug_start("gc-L2cache")
- L2cache = 0
- l2cache_p = lltype.malloc(rffi.LONGLONGP.TO, 1, flavor='raw')
- try:
- len_p = lltype.malloc(rffi.SIZE_TP.TO, 1, flavor='raw')
- try:
- size = rffi.sizeof(rffi.LONGLONG)
- l2cache_p[0] = rffi.cast(rffi.LONGLONG, 0)
- len_p[0] = rffi.cast(rffi.SIZE_T, size)
- # XXX a hack for llhelper not being robust-enough
- result = sysctlbyname("hw.l2cachesize",
- rffi.cast(rffi.VOIDP, l2cache_p),
- len_p,
- lltype.nullptr(rffi.VOIDP.TO),
- rffi.cast(rffi.SIZE_T, 0))
- if (rffi.cast(lltype.Signed, result) == 0 and
- rffi.cast(lltype.Signed, len_p[0]) == size):
- L2cache = rffi.cast(lltype.Signed, l2cache_p[0])
- if rffi.cast(rffi.LONGLONG, L2cache) != l2cache_p[0]:
- L2cache = 0 # overflow!
- finally:
- lltype.free(len_p, flavor='raw')
- finally:
- lltype.free(l2cache_p, flavor='raw')
- debug_print("L2cache =", L2cache)
- debug_stop("gc-L2cache")
- if L2cache > 0:
- return best_nursery_size_for_L2cache(L2cache)
- else:
- # Print a top-level warning even in non-debug builds
- llop.debug_print(lltype.Void,
- "Warning: cannot find your CPU L2 cache size with sysctl()")
- return -1
-
-else:
- def estimate_best_nursery_size():
- return -1 # XXX implement me for other platforms
+ return env.read_from_env('PYPY_GENERATIONGC_NURSERY')
Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py (original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py Tue Nov 23 15:48:01 2010
@@ -1,5 +1,6 @@
from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup
-from pypy.rpython.memory.gc.base import MovingGCBase, read_from_env
+from pypy.rpython.memory.gc.base import MovingGCBase
+from pypy.rpython.memory.gc import env
from pypy.rlib.debug import ll_assert, have_debug_prints
from pypy.rlib.debug import debug_print, debug_start, debug_stop
from pypy.rpython.memory.support import get_address_stack, get_address_deque
@@ -110,10 +111,10 @@
return next
def setup(self):
- envsize = read_from_env('PYPY_MARKCOMPACTGC_MAX')
+ envsize = env.read_from_env('PYPY_MARKCOMPACTGC_MAX')
if envsize >= 4096:
self.space_size = envsize & ~4095
- mincollect = read_from_env('PYPY_MARKCOMPACTGC_MIN')
+ mincollect = env.read_from_env('PYPY_MARKCOMPACTGC_MIN')
if mincollect >= 4096:
self.min_next_collect_after = mincollect
Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py (original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py Tue Nov 23 15:48:01 2010
@@ -1,9 +1,48 @@
+""" MiniMark GC.
+
+Environment variables can be used to fine-tune the following parameters:
+
+ PYPY_GC_NURSERY The nursery size. Defaults to half the size of
+ the L2 cache. Try values like '1.2MB'.
+
+ PYPY_GC_MAJOR_COLLECT Major collection memory factor. Default is '1.82',
+ which means trigger a major collection when the
+ memory consumed equals 1.82 times the memory
+ really used at the end of the previous major
+ collection.
+
+ PYPY_GC_GROWTH Major collection threshold's max growth rate.
+ Default is '1.4'. Useful to collect more often
+ than normally on sudden memory growth, e.g. when
+ there is a temporary peak in memory usage.
+
+ PYPY_GC_MAX The max heap size. If coming near this limit, it
+ will first collect more often, then raise an
+ RPython MemoryError, and if that is not enough,
+ crash the program with a fatal error. Try values
+ like '1.6GB'.
+
+ PYPY_GC_MAX_DELTA The major collection threshold will never be set
+ to more than PYPY_GC_MAX_DELTA the amount really
+ used after a collection. Defaults to 1/8th of the
+ total RAM size (which is constrained to be at most
+ 4GB on 32-bit systems). Try values like '200MB'.
+
+ PYPY_GC_MIN Don't collect while the memory size is below this
+ limit. Useful to avoid spending all the time in
+ the GC in very small programs. Defaults to 8
+ times the nursery.
+"""
+# XXX Should find a way to bound the major collection threshold by the
+# XXX total addressable size. Maybe by keeping some minimarkpage arenas
+# XXX pre-reserved, enough for a few nursery collections? What about
+# XXX raw-malloced memory?
import sys
from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
-from pypy.rpython.memory.gc import minimarkpage, base, generation
+from pypy.rpython.memory.gc import minimarkpage, base, env
from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask, r_uint
from pypy.rlib.rarithmetic import LONG_BIT_SHIFT
from pypy.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
@@ -88,13 +127,8 @@
TRANSLATION_PARAMS = {
# Automatically adjust the size of the nursery and the
- # 'major_collection_threshold' from the environment. For
- # 'nursery_size' it will look it up in the env var
- # PYPY_GC_NURSERY and fall back to half the size of
- # the L2 cache. For 'major_collection_threshold' it will look
- # it up in the env var PYPY_GC_MAJOR_COLLECT. It also sets
- # 'max_heap_size' to PYPY_GC_MAX. Finally, PYPY_GC_MIN sets
- # the minimal value of 'next_major_collection_threshold'.
+ # 'major_collection_threshold' from the environment.
+ # See docstring at the start of the file.
"read_from_env": True,
# The size of the nursery. Note that this is only used as a
@@ -122,6 +156,13 @@
# we trigger the next major collection.
"major_collection_threshold": 1.82,
+ # Threshold to avoid that the total heap size grows by a factor of
+ # major_collection_threshold at every collection: it can only
+ # grow at most by the following factor from one collection to the
+ # next. Used e.g. when there is a sudden, temporary peak in memory
+ # usage; this avoids that the upper bound grows too fast.
+ "growth_rate_max": 1.4,
+
# The number of array indices that are mapped to a single bit in
# write_barrier_from_array(). Must be a power of two. The default
# value of 128 means that card pages are 512 bytes (1024 on 64-bits)
@@ -147,6 +188,7 @@
arena_size=64*WORD,
small_request_threshold=5*WORD,
major_collection_threshold=2.5,
+ growth_rate_max=2.5, # for tests
card_page_indices=0,
large_object=8*WORD,
large_object_gcptrs=10*WORD,
@@ -158,10 +200,12 @@
self.nursery_size = nursery_size
self.small_request_threshold = small_request_threshold
self.major_collection_threshold = major_collection_threshold
+ self.growth_rate_max = growth_rate_max
self.num_major_collects = 0
self.min_heap_size = 0.0
self.max_heap_size = 0.0
self.max_heap_size_already_raised = False
+ self.max_delta = 0.0
#
self.card_page_indices = card_page_indices
if self.card_page_indices > 0:
@@ -252,15 +296,19 @@
# handling of the write barrier.
self.debug_always_do_minor_collect = newsize == 1
if newsize <= 0:
- newsize = generation.estimate_best_nursery_size()
+ newsize = env.estimate_best_nursery_size()
if newsize <= 0:
newsize = defaultsize
newsize = max(newsize, minsize)
#
major_coll = base.read_float_from_env('PYPY_GC_MAJOR_COLLECT')
- if major_coll >= 1.0:
+ if major_coll > 1.0:
self.major_collection_threshold = major_coll
#
+ growth = base.read_float_from_env('PYPY_GC_GROWTH')
+ if growth > 1.0:
+ self.growth_rate_max = growth
+ #
min_heap_size = base.read_uint_from_env('PYPY_GC_MIN')
if min_heap_size > 0:
self.min_heap_size = float(min_heap_size)
@@ -272,6 +320,12 @@
if max_heap_size > 0:
self.max_heap_size = float(max_heap_size)
#
+ max_delta = base.read_uint_from_env('PYPY_GC_MAX_DELTA')
+ if max_delta > 0:
+ self.max_delta = float(max_delta)
+ else:
+ self.max_delta = 0.125 * env.get_total_memory()
+ #
self.minor_collection() # to empty the nursery
llarena.arena_free(self.nursery)
self.nursery_size = newsize
@@ -296,11 +350,22 @@
# initialize the threshold
self.min_heap_size = max(self.min_heap_size, self.nursery_size *
self.major_collection_threshold)
+ self.next_major_collection_threshold = self.min_heap_size
self.set_major_threshold_from(0.0)
debug_stop("gc-set-nursery-size")
- def set_major_threshold_from(self, threshold):
+
+ def set_major_threshold_from(self, threshold, reserving_size=0):
# Set the next_major_collection_threshold.
+ threshold_max = (self.next_major_collection_threshold *
+ self.growth_rate_max)
+ if self.max_delta > 0.0:
+ threshold_max = min(threshold_max,
+ self.next_major_collection_threshold + self.max_delta)
+ if threshold > threshold_max:
+ threshold = threshold_max
+ #
+ threshold += reserving_size
if threshold < self.min_heap_size:
threshold = self.min_heap_size
#
@@ -1199,8 +1264,8 @@
# have allocated 'major_collection_threshold' times more than
# we currently have.
bounded = self.set_major_threshold_from(
- (self.get_total_memory_used() * self.major_collection_threshold)
- + reserving_size)
+ self.get_total_memory_used() * self.major_collection_threshold,
+ reserving_size)
#
# Max heap size: gives an upper bound on the threshold. If we
# already have at least this much allocated, raise MemoryError.
Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py (original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py Tue Nov 23 15:48:01 2010
@@ -28,3 +28,42 @@
assert gc.card_marking_bytes_for_length(P+P+1) == 3
assert gc.card_marking_bytes_for_length(P+P+P+P+P+P+P+P) == 8
assert gc.card_marking_bytes_for_length(P+P+P+P+P+P+P+P+1) == 9
+
+def test_set_major_threshold():
+ gc = MiniMarkGC(None, major_collection_threshold=2.0,
+ growth_rate_max=1.5)
+ gc.min_heap_size = 100.0
+ gc.max_heap_size = 300.0
+ gc.next_major_collection_threshold = 0.0
+ # first, we don't grow past min_heap_size
+ for i in range(5):
+ gc.set_major_threshold_from(100.0)
+ assert gc.next_major_collection_threshold == 100.0
+ # then we grow a lot
+ b = gc.set_major_threshold_from(100 * 2.0)
+ assert b is False
+ assert gc.next_major_collection_threshold == 150.0
+ b = gc.set_major_threshold_from(150 * 2.0)
+ assert b is False
+ assert gc.next_major_collection_threshold == 225.0
+ b = gc.set_major_threshold_from(225 * 2.0)
+ assert b is True
+ assert gc.next_major_collection_threshold == 300.0 # max reached
+ b = gc.set_major_threshold_from(300 * 2.0)
+ assert b is True
+ assert gc.next_major_collection_threshold == 300.0
+ # then we shrink instantly
+ b = gc.set_major_threshold_from(100.0)
+ assert b is False
+ assert gc.next_major_collection_threshold == 100.0
+ # then we grow a bit
+ b = gc.set_major_threshold_from(100 * 1.25)
+ assert b is False
+ assert gc.next_major_collection_threshold == 125.0
+ b = gc.set_major_threshold_from(125 * 1.25)
+ assert b is False
+ assert gc.next_major_collection_threshold == 156.25
+ # check that we cannot shrink below min_heap_size
+ b = gc.set_major_threshold_from(42.7)
+ assert b is False
+ assert gc.next_major_collection_threshold == 100.0
More information about the Pypy-commit
mailing list