[pypy-commit] stmgc c8-card-marking: Simplify further stm_write_card() by assuming that large objects

arigo noreply at buildbot.pypy.org
Mon Mar 2 21:02:10 CET 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: c8-card-marking
Changeset: r1688:f1272b890ba0
Date: 2015-03-02 21:02 +0100
http://bitbucket.org/pypy/stmgc/changeset/f1272b890ba0/

Log:	Simplify further stm_write_card() by assuming that large objects
	with cards are allocated to a multiple of 16.

diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -961,6 +961,9 @@
        a direct way to know the length.  We know that it is smaller
        than the size in bytes. */
     assert(index < size);
+    /* this object was allocated with allocate_outside_nursery_large(),
+       which returns addresses aligned to 16 bytes */
+    assert((((uintptr_t)obj) & 15) == 0);
 #endif
 
     /* Write into the card's lock.  This is used by the next minor
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -52,10 +52,14 @@
 
 static stm_char *allocate_outside_nursery_large(uint64_t size)
 {
-    /* Allocate the object with largemalloc.c from the lower addresses. */
-    char *addr = _stm_large_malloc(size);
+    /* Allocate the object with largemalloc.c from the lower
+       addresses.  Round up the size to a multiple of 16, rather than
+       8, as a quick way to simplify the code in stm_write_card().
+    */
+    char *addr = _stm_large_malloc((size + 15) & ~15);
     if (addr == NULL)
         stm_fatalerror("not enough memory!");
+    assert((((uintptr_t)addr) & 15) == 0);    /* alignment check */
 
     if (LIKELY(addr + size <= uninitialized_page_start)) {
         dprintf(("allocate_outside_nursery_large(%lu): %p, page=%lu\n",
diff --git a/c8/stmgc.h b/c8/stmgc.h
--- a/c8/stmgc.h
+++ b/c8/stmgc.h
@@ -79,7 +79,8 @@
 
 #define _STM_CARD_MARKED 1      /* should always be 1... */
 #define _STM_GCFLAG_CARDS_SET          0x8
-#define _STM_CARD_SIZE                 32     /* must be >= 32 */
+#define _STM_CARD_BITS                 5   /* must be 5/6/7 for the pypy jit */
+#define _STM_CARD_SIZE                 (1 << _STM_CARD_BITS)
 #define _STM_MIN_CARD_COUNT            17
 #define _STM_MIN_CARD_OBJ_SIZE         (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT)
 
@@ -213,10 +214,22 @@
            array doesn't actually use card marking, the following read
            is a bit nonsensical, but in a way that should never return
            CARD_MARKED by mistake.
+
+           The computation of the card marker is further optimized by
+           assuming that large objects are allocated to multiples of
+           16 (rather than just 8, as all objects are).  Under this
+           assumption the following code is equivalent to:
+
+               (obj >> 4) + (index / _STM_CARD_SIZE) + 1
+
+           The code below however takes only a couple of assembler
+           instructions.  It also assumes that the intermediate value
+           fits in a 64-bit value, which it clearly does (all values
+           are much smaller than 2 ** 60).
         */
-        stm_read_marker_t *card = (stm_read_marker_t *)(((uintptr_t)obj) >> 4);
-        card += (index / _STM_CARD_SIZE) + 1;  /* get_index_to_card_index() */
-        if (card->rm != _STM_CARD_MARKED) {
+        uintptr_t v = (((uintptr_t)obj) << (_STM_CARD_BITS - 4)) + index;
+        stm_read_marker_t *card1 = (stm_read_marker_t *)(v >> _STM_CARD_BITS);
+        if (card1[1].rm != _STM_CARD_MARKED) {
 
             /* slow path. */
             _stm_write_slowpath_card(obj, index);
diff --git a/c8/test/test_gcpage.py b/c8/test/test_gcpage.py
--- a/c8/test/test_gcpage.py
+++ b/c8/test/test_gcpage.py
@@ -124,15 +124,15 @@
 
     def test_major_collection(self):
         self.start_transaction()
-        new = stm_allocate(5000)
+        new = stm_allocate(5008)
         self.push_root(new)
         stm_minor_collect()
-        assert lib._stm_total_allocated() == 5000 + LMO
+        assert lib._stm_total_allocated() == 5008 + LMO
 
         new = self.pop_root()
         assert not is_in_nursery(new)
         stm_minor_collect()
-        assert lib._stm_total_allocated() == 5000 + LMO
+        assert lib._stm_total_allocated() == 5008 + LMO
 
         stm_major_collect()
         assert lib._stm_total_allocated() == 0
@@ -143,12 +143,12 @@
         assert lib._stm_total_allocated() == CLEO
 
         self.start_transaction()
-        o = stm_allocate(5000)
+        o = stm_allocate(5008)
         self.push_root(o)
         self.commit_transaction()
         assert last_commit_log_entry_objs() == []
         # 2 CLEs, 1 old object
-        assert lib._stm_total_allocated() == 2*CLEO + (5000 + LMO)
+        assert lib._stm_total_allocated() == 2*CLEO + (5008 + LMO)
 
         self.start_transaction()
         o = self.pop_root()
@@ -158,13 +158,13 @@
         assert last_commit_log_entry_objs() == [o]*2
         # 3 CLEs, 1 old object
         # also, 2 slices of bk_copy and thus 2 CLE entries
-        assert lib._stm_total_allocated() == 3*CLEO + (5000+LMO) + (5000 + CLEEO*2)
+        assert lib._stm_total_allocated() == 3*CLEO + (5008+LMO) + (5008 + CLEEO*2)
 
         self.start_transaction()
-        assert lib._stm_total_allocated() == 3*CLEO + (5000+LMO) + (5000 + CLEEO*2)
+        assert lib._stm_total_allocated() == 3*CLEO + (5008+LMO) + (5008 + CLEEO*2)
         stm_major_collect()
         # all CLE and CLE entries freed:
-        assert lib._stm_total_allocated() == (5000+LMO)
+        assert lib._stm_total_allocated() == (5008+LMO)
         self.commit_transaction()
 
 
@@ -180,39 +180,39 @@
             return prev
 
         self.start_transaction()
-        self.push_root(make_chain(5000))
-        self.push_root(make_chain(4312))
+        self.push_root(make_chain(5008))
+        self.push_root(make_chain(4304))
         stm_minor_collect()
-        assert lib._stm_total_allocated() == (10 * (5000 + LMO) +
-                                              10 * (4312 + LMO))
+        assert lib._stm_total_allocated() == (10 * (5008 + LMO) +
+                                              10 * (4304 + LMO))
         stm_major_collect()
-        assert lib._stm_total_allocated() == (10 * (5000 + LMO) +
-                                              10 * (4312 + LMO))
+        assert lib._stm_total_allocated() == (10 * (5008 + LMO) +
+                                              10 * (4304 + LMO))
         stm_major_collect()
-        assert lib._stm_total_allocated() == (10 * (5000 + LMO) +
-                                              10 * (4312 + LMO))
+        assert lib._stm_total_allocated() == (10 * (5008 + LMO) +
+                                              10 * (4304 + LMO))
         self.pop_root()
         stm_major_collect()
-        assert lib._stm_total_allocated() == 10 * (5000 + LMO)
+        assert lib._stm_total_allocated() == 10 * (5008 + LMO)
 
     def test_trace_all_versions(self):
         self.start_transaction()
-        x = stm_allocate(5000)
+        x = stm_allocate(5008)
         stm_set_char(x, 'A')
         stm_set_char(x, 'a', 4999)
         self.push_root(x)
         self.commit_transaction()
-        assert lib._stm_total_allocated() == 5000 + LMO + CLEO
+        assert lib._stm_total_allocated() == 5008 + LMO + CLEO
 
         self.start_transaction()
         x = self.pop_root()
         self.push_root(x)
-        assert lib._stm_total_allocated() == 5000 + LMO + CLEO
+        assert lib._stm_total_allocated() == 5008 + LMO + CLEO
         stm_set_char(x, 'B')
         stm_set_char(x, 'b', 4999)
 
         py.test.skip("we don't account for private pages right now")
-        assert lib._stm_total_allocated() == 5000 + LMO + 2 * 4096  # 2 pages
+        assert lib._stm_total_allocated() == 5008 + LMO + 2 * 4096  # 2 pages
         stm_major_collect()
 
         assert stm_get_char(x)       == 'B'
@@ -226,7 +226,7 @@
         self.switch(0)
         assert stm_get_char(x)       == 'B'
         assert stm_get_char(x, 4999) == 'b'
-        assert lib._stm_total_allocated() == 5000 + LMO + 2 * 4096  # 2 pages
+        assert lib._stm_total_allocated() == 5008 + LMO + 2 * 4096  # 2 pages
 
     def test_trace_correct_version_of_overflow_objects_1(self, size=32):
         self.start_transaction()
@@ -245,13 +245,13 @@
         assert stm_get_char(x, size - 1) == 'E'
 
     def test_trace_correct_version_of_overflow_objects_2(self):
-        self.test_trace_correct_version_of_overflow_objects_1(size=5000)
+        self.test_trace_correct_version_of_overflow_objects_1(size=5008)
 
     def test_reshare_if_no_longer_modified_0(self, invert=0):
         if invert:
             self.switch(1)
         self.start_transaction()
-        x = stm_allocate(5000)
+        x = stm_allocate(5008)
         self.push_root(x)
         self.commit_transaction()
         x = self.pop_root()
@@ -264,12 +264,12 @@
         stm_major_collect()
 
         py.test.skip("we don't account for private pages right now")
-        assert lib._stm_total_allocated() == 5000 + LMO + 2 * 4096  # 2 pages
+        assert lib._stm_total_allocated() == 5008 + LMO + 2 * 4096  # 2 pages
         self.commit_transaction()
         #
         self.start_transaction()
         stm_major_collect()
-        assert lib._stm_total_allocated() == 5000 + LMO    # shared again
+        assert lib._stm_total_allocated() == 5008 + LMO    # shared again
 
     def test_reshare_if_no_longer_modified_1(self):
         self.test_reshare_if_no_longer_modified_0(invert=1)
@@ -307,7 +307,7 @@
         stm_set_char(p2, 't')
         self.push_root(p2)
         stm_major_collect()
-        assert lib._stm_total_allocated() == 2 * 616
+        assert lib._stm_total_allocated() == 2 * 624
         #
         p2 = self.pop_root()
         m = self.pop_root()
diff --git a/c8/test/test_nursery.py b/c8/test/test_nursery.py
--- a/c8/test/test_nursery.py
+++ b/c8/test/test_nursery.py
@@ -210,7 +210,7 @@
         stm_set_char(p2, 't')
         self.push_root(p2)
         stm_minor_collect()
-        assert lib._stm_total_allocated() == 2 * 616
+        assert lib._stm_total_allocated() == 2 * 624
         #
         p2 = self.pop_root()
         m = self.pop_root()
@@ -230,7 +230,7 @@
         stm_set_char(p2, 't')
         self.push_root(p2)
         stm_minor_collect()
-        assert lib._stm_total_allocated() == 1 * 616
+        assert lib._stm_total_allocated() == 1 * 624
         #
         p2 = self.pop_root()
         m = self.pop_root()


More information about the pypy-commit mailing list