[pypy-commit] pypy default: Try to follow exactly the logic of CPython about resizing dicts.
arigo
noreply at buildbot.pypy.org
Fri May 11 20:00:46 CEST 2012
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r55047:274128d2747e
Date: 2012-05-11 20:00 +0200
http://bitbucket.org/pypy/pypy/changeset/274128d2747e/
Log: Try to follow exactly the logic of CPython about resizing dicts. In
particular it means that small and medium dictionaries are now
quadrupled in size instead of just doubled. This might give a
performance boost in code that populates a lot of dictionaries. To
be seen.
diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -502,8 +502,6 @@
raise KeyError
_ll_dict_del(d, i)
-# XXX: Move the size checking and resize into a single call which is opauqe to
-# the JIT when the dict isn't virtual, to avoid extra branches.
@jit.look_inside_iff(lambda d, i: jit.isvirtual(d) and jit.isconstant(i))
def _ll_dict_del(d, i):
d.entries.mark_deleted(i)
@@ -516,18 +514,32 @@
entry.key = lltype.nullptr(ENTRY.key.TO)
if ENTRIES.must_clear_value:
entry.value = lltype.nullptr(ENTRY.value.TO)
- num_entries = len(d.entries)
- if num_entries > DICT_INITSIZE and d.num_items < num_entries / 4:
- ll_dict_resize(d)
+ #
+ # The rest is commented out: like CPython we no longer shrink the
+ # dictionary here. It may shrink later if we try to append a number
+ # of new items to it. Unsure if this behavior was designed in
+ # CPython or is accidental. A design reason would be that if you
+ # delete all items in a dictionary (e.g. with a series of
+ # popitem()), then CPython avoids shrinking the table several times.
+ #num_entries = len(d.entries)
+ #if num_entries > DICT_INITSIZE and d.num_items <= num_entries / 4:
+ # ll_dict_resize(d)
+ # A previous xxx: move the size checking and resize into a single
+ # call which is opaque to the JIT when the dict isn't virtual, to
+ # avoid extra branches.
def ll_dict_resize(d):
old_entries = d.entries
old_size = len(old_entries)
# make a 'new_size' estimate and shrink it if there are many
- # deleted entry markers
- new_size = old_size * 2
- while new_size > DICT_INITSIZE and d.num_items < new_size / 4:
- new_size /= 2
+ # deleted entry markers. See CPython for why it is a good idea to
+ # quadruple the dictionary size as long as it's not too big.
+ if d.num_items > 50000: new_estimate = d.num_items * 2
+ else: new_estimate = d.num_items * 4
+ new_size = DICT_INITSIZE
+ while new_size <= new_estimate:
+ new_size *= 2
+ #
d.entries = lltype.typeOf(old_entries).TO.allocate(new_size)
d.num_items = 0
d.resize_counter = new_size * 2
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -749,13 +749,20 @@
assert count_frees >= 3
def test_dict_resize(self):
+ # XXX we no longer automatically resize on 'del'. We need to
+ # hack a bit in this test to trigger a resize by continuing to
+ # fill the dict's table while keeping the actual size very low
+ # in order to force a resize to shrink the table back
def func(want_empty):
d = {}
- for i in range(rdict.DICT_INITSIZE):
+ for i in range(rdict.DICT_INITSIZE << 1):
d[chr(ord('a') + i)] = i
if want_empty:
- for i in range(rdict.DICT_INITSIZE):
+ for i in range(rdict.DICT_INITSIZE << 1):
del d[chr(ord('a') + i)]
+ for i in range(rdict.DICT_INITSIZE << 3):
+ d[chr(ord('A') - i)] = i
+ del d[chr(ord('A') - i)]
return d
res = self.interpret(func, [0])
assert len(res.entries) > rdict.DICT_INITSIZE
More information about the pypy-commit
mailing list