[pypy-svn] r79399 - in pypy/branch/jit-free/pypy/rpython/memory/gc: . test

arigo at codespeak.net arigo at codespeak.net
Tue Nov 23 15:48:03 CET 2010


Author: arigo
Date: Tue Nov 23 15:48:01 2010
New Revision: 79399

Added:
   pypy/branch/jit-free/pypy/rpython/memory/gc/env.py
      - copied unchanged from r79398, pypy/trunk/pypy/rpython/memory/gc/env.py
   pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_env.py
      - copied unchanged from r79398, pypy/trunk/pypy/rpython/memory/gc/test/test_env.py
Modified:
   pypy/branch/jit-free/pypy/rpython/memory/gc/base.py
   pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py
   pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py
   pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py
   pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py
Log:
Merge from trunk all changes done to pypy/rpython/memory/.


Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/base.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/base.py	(original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/base.py	Tue Nov 23 15:48:01 2010
@@ -5,7 +5,6 @@
 from pypy.rpython.memory.support import get_address_stack, get_address_deque
 from pypy.rpython.memory.support import AddressDict
 from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
-from pypy.rlib.rarithmetic import r_uint
 
 TYPEID_MAP = lltype.GcStruct('TYPEID_MAP', ('count', lltype.Signed),
                              ('size', lltype.Signed),
@@ -411,42 +410,6 @@
     GCClass = getattr(module, classname)
     return GCClass, GCClass.TRANSLATION_PARAMS
 
-def _read_float_and_factor_from_env(varname):
-    import os
-    value = os.environ.get(varname)
-    if value:
-        if len(value) > 1 and value[-1] in 'bB':
-            value = value[:-1]
-        realvalue = value[:-1]
-        if value[-1] in 'kK':
-            factor = 1024
-        elif value[-1] in 'mM':
-            factor = 1024*1024
-        elif value[-1] in 'gG':
-            factor = 1024*1024*1024
-        else:
-            factor = 1
-            realvalue = value
-        try:
-            return (float(realvalue), factor)
-        except ValueError:
-            pass
-    return (0.0, 0)
-
-def read_from_env(varname):
-    value, factor = _read_float_and_factor_from_env(varname)
-    return int(value * factor)
-
-def read_uint_from_env(varname):
-    value, factor = _read_float_and_factor_from_env(varname)
-    return r_uint(value * factor)
-
-def read_float_from_env(varname):
-    value, factor = _read_float_and_factor_from_env(varname)
-    if factor != 1:
-        return 0.0
-    return value
-
 def _convert_callback_formats(callback):
     callback = getattr(callback, 'im_func', callback)
     if callback not in _converted_callback_formats:

Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py	(original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/generation.py	Tue Nov 23 15:48:01 2010
@@ -2,7 +2,7 @@
 from pypy.rpython.memory.gc.semispace import SemiSpaceGC
 from pypy.rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
 from pypy.rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from pypy.rpython.memory.gc.base import read_from_env
+from pypy.rpython.memory.gc import env
 from pypy.rpython.lltypesystem.llmemory import NULL, raw_malloc_usage
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena
 from pypy.rlib.objectmodel import free_non_gc_object
@@ -93,7 +93,7 @@
         if self.auto_nursery_size:
             newsize = nursery_size_from_env()
             if newsize <= 0:
-                newsize = estimate_best_nursery_size()
+                newsize = env.estimate_best_nursery_size()
             if newsize > 0:
                 self.set_nursery_size(newsize)
 
@@ -633,139 +633,5 @@
 
 # ____________________________________________________________
 
-import os
-
 def nursery_size_from_env():
-    return read_from_env('PYPY_GENERATIONGC_NURSERY')
-
-def best_nursery_size_for_L2cache(L2cache):
-    # Heuristically, the best nursery size to choose is about half
-    # of the L2 cache.  XXX benchmark some more.
-    return L2cache // 2
-
-
-if sys.platform == 'linux2':
-    def estimate_best_nursery_size():
-        """Try to estimate the best nursery size at run-time, depending
-        on the machine we are running on.
-        """
-        debug_start("gc-L2cache")
-        L2cache = sys.maxint
-        try:
-            fd = os.open('/proc/cpuinfo', os.O_RDONLY, 0644)
-            try:
-                data = []
-                while True:
-                    buf = os.read(fd, 4096)
-                    if not buf:
-                        break
-                    data.append(buf)
-            finally:
-                os.close(fd)
-        except OSError:
-            pass
-        else:
-            data = ''.join(data)
-            linepos = 0
-            while True:
-                start = findend(data, '\ncache size', linepos)
-                if start < 0:
-                    break    # done
-                linepos = findend(data, '\n', start)
-                if linepos < 0:
-                    break    # no end-of-line??
-                # *** data[start:linepos] == "   : 2048 KB\n"
-                start = skipspace(data, start)
-                if data[start] != ':':
-                    continue
-                # *** data[start:linepos] == ": 2048 KB\n"
-                start = skipspace(data, start + 1)
-                # *** data[start:linepos] == "2048 KB\n"
-                end = start
-                while '0' <= data[end] <= '9':
-                    end += 1
-                # *** data[start:end] == "2048"
-                if start == end:
-                    continue
-                number = int(data[start:end])
-                # *** data[end:linepos] == " KB\n"
-                end = skipspace(data, end)
-                if data[end] not in ('K', 'k'):    # assume kilobytes for now
-                    continue
-                number = number * 1024
-                # for now we look for the smallest of the L2 caches of the CPUs
-                if number < L2cache:
-                    L2cache = number
-
-        debug_print("L2cache =", L2cache)
-        debug_stop("gc-L2cache")
-
-        if L2cache < sys.maxint:
-            return best_nursery_size_for_L2cache(L2cache)
-        else:
-            # Print a top-level warning even in non-debug builds
-            llop.debug_print(lltype.Void,
-                "Warning: cannot find your CPU L2 cache size in /proc/cpuinfo")
-            return -1
-
-    def findend(data, pattern, pos):
-        pos = data.find(pattern, pos)
-        if pos < 0:
-            return -1
-        return pos + len(pattern)
-
-    def skipspace(data, pos):
-        while data[pos] in (' ', '\t'):
-            pos += 1
-        return pos
-
-elif sys.platform == 'darwin':
-    from pypy.rpython.lltypesystem import rffi
-
-    sysctlbyname = rffi.llexternal('sysctlbyname',
-                                   [rffi.CCHARP, rffi.VOIDP, rffi.SIZE_TP,
-                                    rffi.VOIDP, rffi.SIZE_T],
-                                   rffi.INT,
-                                   sandboxsafe=True)
-
-    def estimate_best_nursery_size():
-        """Try to estimate the best nursery size at run-time, depending
-        on the machine we are running on.
-        """
-        debug_start("gc-L2cache")
-        L2cache = 0
-        l2cache_p = lltype.malloc(rffi.LONGLONGP.TO, 1, flavor='raw')
-        try:
-            len_p = lltype.malloc(rffi.SIZE_TP.TO, 1, flavor='raw')
-            try:
-                size = rffi.sizeof(rffi.LONGLONG)
-                l2cache_p[0] = rffi.cast(rffi.LONGLONG, 0)
-                len_p[0] = rffi.cast(rffi.SIZE_T, size)
-                # XXX a hack for llhelper not being robust-enough
-                result = sysctlbyname("hw.l2cachesize",
-                                      rffi.cast(rffi.VOIDP, l2cache_p),
-                                      len_p,
-                                      lltype.nullptr(rffi.VOIDP.TO), 
-                                      rffi.cast(rffi.SIZE_T, 0))
-                if (rffi.cast(lltype.Signed, result) == 0 and
-                    rffi.cast(lltype.Signed, len_p[0]) == size):
-                    L2cache = rffi.cast(lltype.Signed, l2cache_p[0])
-                    if rffi.cast(rffi.LONGLONG, L2cache) != l2cache_p[0]:
-                        L2cache = 0    # overflow!
-            finally:
-                lltype.free(len_p, flavor='raw')
-        finally:
-            lltype.free(l2cache_p, flavor='raw')
-        debug_print("L2cache =", L2cache)
-        debug_stop("gc-L2cache")
-        if L2cache > 0:
-            return best_nursery_size_for_L2cache(L2cache)
-        else:
-            # Print a top-level warning even in non-debug builds
-            llop.debug_print(lltype.Void,
-                "Warning: cannot find your CPU L2 cache size with sysctl()")
-            return -1
-
-else:
-    def estimate_best_nursery_size():
-        return -1     # XXX implement me for other platforms
+    return env.read_from_env('PYPY_GENERATIONGC_NURSERY')

Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py	(original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/markcompact.py	Tue Nov 23 15:48:01 2010
@@ -1,5 +1,6 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup
-from pypy.rpython.memory.gc.base import MovingGCBase, read_from_env
+from pypy.rpython.memory.gc.base import MovingGCBase
+from pypy.rpython.memory.gc import env
 from pypy.rlib.debug import ll_assert, have_debug_prints
 from pypy.rlib.debug import debug_print, debug_start, debug_stop
 from pypy.rpython.memory.support import get_address_stack, get_address_deque
@@ -110,10 +111,10 @@
         return next
 
     def setup(self):
-        envsize = read_from_env('PYPY_MARKCOMPACTGC_MAX')
+        envsize = env.read_from_env('PYPY_MARKCOMPACTGC_MAX')
         if envsize >= 4096:
             self.space_size = envsize & ~4095
-        mincollect = read_from_env('PYPY_MARKCOMPACTGC_MIN')
+        mincollect = env.read_from_env('PYPY_MARKCOMPACTGC_MIN')
         if mincollect >= 4096:
             self.min_next_collect_after = mincollect
 

Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py	(original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/minimark.py	Tue Nov 23 15:48:01 2010
@@ -1,9 +1,48 @@
+""" MiniMark GC.
+
+Environment variables can be used to fine-tune the following parameters:
+    
+ PYPY_GC_NURSERY        The nursery size.  Defaults to half the size of
+                        the L2 cache.  Try values like '1.2MB'.
+
+ PYPY_GC_MAJOR_COLLECT  Major collection memory factor.  Default is '1.82',
+                        which means trigger a major collection when the
+                        memory consumed equals 1.82 times the memory
+                        really used at the end of the previous major
+                        collection.
+
+ PYPY_GC_GROWTH         Major collection threshold's max growth rate.
+                        Default is '1.4'.  Useful to collect more often
+                        than normally on sudden memory growth, e.g. when
+                        there is a temporary peak in memory usage.
+
+ PYPY_GC_MAX            The max heap size.  If coming near this limit, it
+                        will first collect more often, then raise an
+                        RPython MemoryError, and if that is not enough,
+                        crash the program with a fatal error.  Try values
+                        like '1.6GB'.
+
+ PYPY_GC_MAX_DELTA      The major collection threshold will never be set
+                        to more than PYPY_GC_MAX_DELTA the amount really
+                        used after a collection.  Defaults to 1/8th of the
+                        total RAM size (which is constrained to be at most
+                        4GB on 32-bit systems).  Try values like '200MB'.
+
+ PYPY_GC_MIN            Don't collect while the memory size is below this
+                        limit.  Useful to avoid spending all the time in
+                        the GC in very small programs.  Defaults to 8
+                        times the nursery.
+"""
+# XXX Should find a way to bound the major collection threshold by the
+# XXX total addressable size.  Maybe by keeping some minimarkpage arenas
+# XXX pre-reserved, enough for a few nursery collections?  What about
+# XXX raw-malloced memory?
 import sys
 from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
 from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
-from pypy.rpython.memory.gc import minimarkpage, base, generation
+from pypy.rpython.memory.gc import minimarkpage, base, env
 from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask, r_uint
 from pypy.rlib.rarithmetic import LONG_BIT_SHIFT
 from pypy.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
@@ -88,13 +127,8 @@
 
     TRANSLATION_PARAMS = {
         # Automatically adjust the size of the nursery and the
-        # 'major_collection_threshold' from the environment.  For
-        # 'nursery_size' it will look it up in the env var
-        # PYPY_GC_NURSERY and fall back to half the size of
-        # the L2 cache.  For 'major_collection_threshold' it will look
-        # it up in the env var PYPY_GC_MAJOR_COLLECT.  It also sets
-        # 'max_heap_size' to PYPY_GC_MAX.  Finally, PYPY_GC_MIN sets
-        # the minimal value of 'next_major_collection_threshold'.
+        # 'major_collection_threshold' from the environment.
+        # See docstring at the start of the file.
         "read_from_env": True,
 
         # The size of the nursery.  Note that this is only used as a
@@ -122,6 +156,13 @@
         # we trigger the next major collection.
         "major_collection_threshold": 1.82,
 
+        # Threshold to avoid that the total heap size grows by a factor of
+        # major_collection_threshold at every collection: it can only
+        # grow at most by the following factor from one collection to the
+        # next.  Used e.g. when there is a sudden, temporary peak in memory
+        # usage; this avoids that the upper bound grows too fast.
+        "growth_rate_max": 1.4,
+
         # The number of array indices that are mapped to a single bit in
         # write_barrier_from_array().  Must be a power of two.  The default
         # value of 128 means that card pages are 512 bytes (1024 on 64-bits)
@@ -147,6 +188,7 @@
                  arena_size=64*WORD,
                  small_request_threshold=5*WORD,
                  major_collection_threshold=2.5,
+                 growth_rate_max=2.5,   # for tests
                  card_page_indices=0,
                  large_object=8*WORD,
                  large_object_gcptrs=10*WORD,
@@ -158,10 +200,12 @@
         self.nursery_size = nursery_size
         self.small_request_threshold = small_request_threshold
         self.major_collection_threshold = major_collection_threshold
+        self.growth_rate_max = growth_rate_max
         self.num_major_collects = 0
         self.min_heap_size = 0.0
         self.max_heap_size = 0.0
         self.max_heap_size_already_raised = False
+        self.max_delta = 0.0
         #
         self.card_page_indices = card_page_indices
         if self.card_page_indices > 0:
@@ -252,15 +296,19 @@
             # handling of the write barrier.
             self.debug_always_do_minor_collect = newsize == 1
             if newsize <= 0:
-                newsize = generation.estimate_best_nursery_size()
+                newsize = env.estimate_best_nursery_size()
                 if newsize <= 0:
                     newsize = defaultsize
             newsize = max(newsize, minsize)
             #
             major_coll = base.read_float_from_env('PYPY_GC_MAJOR_COLLECT')
-            if major_coll >= 1.0:
+            if major_coll > 1.0:
                 self.major_collection_threshold = major_coll
             #
+            growth = base.read_float_from_env('PYPY_GC_GROWTH')
+            if growth > 1.0:
+                self.growth_rate_max = growth
+            #
             min_heap_size = base.read_uint_from_env('PYPY_GC_MIN')
             if min_heap_size > 0:
                 self.min_heap_size = float(min_heap_size)
@@ -272,6 +320,12 @@
             if max_heap_size > 0:
                 self.max_heap_size = float(max_heap_size)
             #
+            max_delta = base.read_uint_from_env('PYPY_GC_MAX_DELTA')
+            if max_delta > 0:
+                self.max_delta = float(max_delta)
+            else:
+                self.max_delta = 0.125 * env.get_total_memory()
+            #
             self.minor_collection()    # to empty the nursery
             llarena.arena_free(self.nursery)
             self.nursery_size = newsize
@@ -296,11 +350,22 @@
         # initialize the threshold
         self.min_heap_size = max(self.min_heap_size, self.nursery_size *
                                               self.major_collection_threshold)
+        self.next_major_collection_threshold = self.min_heap_size
         self.set_major_threshold_from(0.0)
         debug_stop("gc-set-nursery-size")
 
-    def set_major_threshold_from(self, threshold):
+
+    def set_major_threshold_from(self, threshold, reserving_size=0):
         # Set the next_major_collection_threshold.
+        threshold_max = (self.next_major_collection_threshold *
+                         self.growth_rate_max)
+        if self.max_delta > 0.0:
+            threshold_max = min(threshold_max,
+                         self.next_major_collection_threshold + self.max_delta)
+        if threshold > threshold_max:
+            threshold = threshold_max
+        #
+        threshold += reserving_size
         if threshold < self.min_heap_size:
             threshold = self.min_heap_size
         #
@@ -1199,8 +1264,8 @@
         # have allocated 'major_collection_threshold' times more than
         # we currently have.
         bounded = self.set_major_threshold_from(
-            (self.get_total_memory_used() * self.major_collection_threshold)
-            + reserving_size)
+            self.get_total_memory_used() * self.major_collection_threshold,
+            reserving_size)
         #
         # Max heap size: gives an upper bound on the threshold.  If we
         # already have at least this much allocated, raise MemoryError.

Modified: pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py
==============================================================================
--- pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py	(original)
+++ pypy/branch/jit-free/pypy/rpython/memory/gc/test/test_minimark.py	Tue Nov 23 15:48:01 2010
@@ -28,3 +28,42 @@
     assert gc.card_marking_bytes_for_length(P+P+1) == 3
     assert gc.card_marking_bytes_for_length(P+P+P+P+P+P+P+P) == 8
     assert gc.card_marking_bytes_for_length(P+P+P+P+P+P+P+P+1) == 9
+
+def test_set_major_threshold():
+    gc = MiniMarkGC(None, major_collection_threshold=2.0,
+                    growth_rate_max=1.5)
+    gc.min_heap_size = 100.0
+    gc.max_heap_size = 300.0
+    gc.next_major_collection_threshold = 0.0
+    # first, we don't grow past min_heap_size
+    for i in range(5):
+        gc.set_major_threshold_from(100.0)
+        assert gc.next_major_collection_threshold == 100.0
+    # then we grow a lot
+    b = gc.set_major_threshold_from(100 * 2.0)
+    assert b is False
+    assert gc.next_major_collection_threshold == 150.0
+    b = gc.set_major_threshold_from(150 * 2.0)
+    assert b is False
+    assert gc.next_major_collection_threshold == 225.0
+    b = gc.set_major_threshold_from(225 * 2.0)
+    assert b is True
+    assert gc.next_major_collection_threshold == 300.0   # max reached
+    b = gc.set_major_threshold_from(300 * 2.0)
+    assert b is True
+    assert gc.next_major_collection_threshold == 300.0
+    # then we shrink instantly
+    b = gc.set_major_threshold_from(100.0)
+    assert b is False
+    assert gc.next_major_collection_threshold == 100.0
+    # then we grow a bit
+    b = gc.set_major_threshold_from(100 * 1.25)
+    assert b is False
+    assert gc.next_major_collection_threshold == 125.0
+    b = gc.set_major_threshold_from(125 * 1.25)
+    assert b is False
+    assert gc.next_major_collection_threshold == 156.25
+    # check that we cannot shrink below min_heap_size
+    b = gc.set_major_threshold_from(42.7)
+    assert b is False
+    assert gc.next_major_collection_threshold == 100.0



More information about the Pypy-commit mailing list