[pypy-commit] pypy rpython-hash: Revert some changes: now only dicts initially created and dicts frozen
arigo
pypy.commits at gmail.com
Fri Jan 27 05:28:38 EST 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: rpython-hash
Changeset: r89797:a0cc818768af
Date: 2017-01-27 11:27 +0100
http://bitbucket.org/pypy/pypy/changeset/a0cc818768af/
Log: Revert some changes: now only dicts initially created and dicts
frozen by translation have no index. Simplifies a bit the diff with
default---I could find and fix more bugs.
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -5,7 +5,7 @@
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rlib import objectmodel, jit, rgc, types
from rpython.rlib.signature import signature
-from rpython.rlib.objectmodel import specialize, likely
+from rpython.rlib.objectmodel import specialize, likely, not_rpython
from rpython.rtyper.debug import ll_assert
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rtyper import rmodel
@@ -60,11 +60,9 @@
return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
elif fun == FUNC_LONG:
return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
- elif fun == FUNC_NO_INDEX:
- ll_dict_create_index(d)
+ else:
+ ll_dict_create_initial_index(d)
# then, retry
- else:
- assert False
def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
ll_fasthash_function=None, ll_hash_function=None,
@@ -240,6 +238,7 @@
self.setup()
self.setup_final()
l_dict = ll_newdict_size(self.DICT, len(dictobj))
+ ll_no_initial_index(l_dict)
self.dict_cache[key] = l_dict
r_key = self.key_repr
if r_key.lowleveltype == llmemory.Address:
@@ -257,18 +256,15 @@
for dictkeycontainer, dictvalue in dictobj._dict.items():
llkey = r_key.convert_const(dictkeycontainer.key)
llvalue = r_value.convert_const(dictvalue)
- _ll_dict_insertclean(l_dict, llkey, llvalue,
- dictkeycontainer.hash)
+ _ll_dict_insert_no_index(l_dict, llkey, llvalue)
+ return l_dict
else:
for dictkey, dictvalue in dictobj.items():
llkey = r_key.convert_const(dictkey)
llvalue = r_value.convert_const(dictvalue)
- _ll_dict_insertclean(l_dict, llkey, llvalue,
- l_dict.keyhash(llkey))
-
- ll_remove_index(l_dict)
- return l_dict
+ _ll_dict_insert_no_index(l_dict, llkey, llvalue)
+ return l_dict
def rtype_len(self, hop):
v_dict, = hop.inputargs(self)
@@ -467,23 +463,26 @@
if IS_64BIT:
FUNC_SHIFT = 3
FUNC_MASK = 0x07 # three bits
- FUNC_NO_INDEX, FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(5)
+ FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG, FUNC_MUST_REINDEX = range(5)
else:
FUNC_SHIFT = 2
FUNC_MASK = 0x03 # two bits
- FUNC_NO_INDEX, FUNC_BYTE, FUNC_SHORT, FUNC_LONG = range(4)
+ FUNC_BYTE, FUNC_SHORT, FUNC_LONG, FUNC_MUST_REINDEX = range(4)
TYPE_BYTE = rffi.UCHAR
TYPE_SHORT = rffi.USHORT
TYPE_INT = rffi.UINT
TYPE_LONG = lltype.Unsigned
-NULL = lltype.nullptr(llmemory.GCREF.TO)
-
-def ll_remove_index(d):
- # Used in MemoryError situation, or when translating prebuilt dicts.
- # Remove the index completely.
- d.indexes = NULL
- d.lookup_function_no = FUNC_NO_INDEX
+def ll_no_initial_index(d):
+ # Used when making new empty dicts, and when translating prebuilt dicts.
+ # Remove the index completely. A dictionary must always have an
+ # index unless it is freshly created or freshly translated. Most
+ # dict operations start with ll_call_lookup_function(), which will
+ # recompute the hashes and create the index.
+ ll_assert(d.num_live_items == d.num_ever_used_items,
+ "ll_no_initial_index(): dict already in use")
+ d.lookup_function_no = FUNC_MUST_REINDEX
+ d.indexes = lltype.nullptr(llmemory.GCREF.TO)
def ll_malloc_indexes_and_choose_lookup(d, n):
# keep in sync with ll_clear_indexes() below
@@ -519,34 +518,24 @@
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_INT, d.indexes))
elif fun == FUNC_LONG:
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_LONG, d.indexes))
- elif fun == FUNC_NO_INDEX:
- # nothing to clear in this case
- ll_assert(not d.indexes,
- "ll_clear_indexes: FUNC_NO_INDEX but d.indexes != NULL")
else:
assert False
@jit.dont_look_inside
def ll_call_insert_clean_function(d, hash, i):
- while True:
- fun = d.lookup_function_no & FUNC_MASK
- if fun == FUNC_BYTE:
- return ll_dict_store_clean(d, hash, i, TYPE_BYTE)
- elif fun == FUNC_SHORT:
- return ll_dict_store_clean(d, hash, i, TYPE_SHORT)
- elif IS_64BIT and fun == FUNC_INT:
- return ll_dict_store_clean(d, hash, i, TYPE_INT)
- elif fun == FUNC_LONG:
- return ll_dict_store_clean(d, hash, i, TYPE_LONG)
- elif fun == FUNC_NO_INDEX:
- # NB. this case might be reachable or not, but I didn't manage
- # to get here. Maybe it is not actually reachable right now
- # but it depends on details. Better keep it written down
- # just in case the details change later.
- ll_dict_create_index(d)
- # then, retry
- else:
- assert False
+ fun = d.lookup_function_no & FUNC_MASK
+ if fun == FUNC_BYTE:
+ ll_dict_store_clean(d, hash, i, TYPE_BYTE)
+ elif fun == FUNC_SHORT:
+ ll_dict_store_clean(d, hash, i, TYPE_SHORT)
+ elif IS_64BIT and fun == FUNC_INT:
+ ll_dict_store_clean(d, hash, i, TYPE_INT)
+ elif fun == FUNC_LONG:
+ ll_dict_store_clean(d, hash, i, TYPE_LONG)
+ else:
+ # can't be still FUNC_MUST_REINDEX here
+ ll_assert(False, "ll_call_insert_clean_function(): invalid lookup_fun")
+ assert False
def ll_call_delete_by_entry_index(d, hash, i):
fun = d.lookup_function_no & FUNC_MASK
@@ -558,9 +547,9 @@
ll_dict_delete_by_entry_index(d, hash, i, TYPE_INT)
elif fun == FUNC_LONG:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG)
- elif fun == FUNC_NO_INDEX:
- pass # there is no 'indexes', so nothing to delete
else:
+ # can't be still FUNC_MUST_REINDEX here
+ ll_assert(False, "ll_call_delete_by_entry_index(): invalid lookup_fun")
assert False
def ll_valid_from_flag(entries, i):
@@ -644,19 +633,21 @@
try:
reindexed = ll_dict_grow(d)
except:
- ll_remove_index(d)
+ _ll_dict_rescue(d)
raise
- if d.resize_counter <= 3:
+ rc = d.resize_counter - 3
+ if rc <= 0:
try:
ll_dict_resize(d)
reindexed = True
except:
- ll_remove_index(d)
+ _ll_dict_rescue(d)
raise
+ rc = d.resize_counter - 3
+ ll_assert(rc > 0, "ll_dict_resize failed?")
if reindexed:
ll_call_insert_clean_function(d, hash, d.num_ever_used_items)
- rc = d.resize_counter - 3
- ll_assert(rc > 0, "ll_dict_resize failed?")
+ #
d.resize_counter = rc
entry = d.entries[d.num_ever_used_items]
entry.key = key
@@ -668,15 +659,22 @@
d.num_ever_used_items += 1
d.num_live_items += 1
-def _ll_dict_insertclean(d, key, value, hash):
+ at jit.dont_look_inside
+def _ll_dict_rescue(d):
+ # MemoryError situation! The 'indexes' contains an invalid entry
+ # at this point. But we can call ll_dict_reindex() with the
+ # following arguments, ensuring no further malloc occurs.
+ ll_dict_reindex(d, _ll_len_of_d_indexes(d))
+_ll_dict_rescue._dont_inline_ = True
+
+ at not_rpython
+def _ll_dict_insert_no_index(d, key, value):
# never translated
ENTRY = lltype.typeOf(d.entries).TO.OF
- ll_call_insert_clean_function(d, hash, d.num_ever_used_items)
entry = d.entries[d.num_ever_used_items]
entry.key = key
entry.value = value
- if hasattr(ENTRY, 'f_hash'):
- entry.f_hash = hash
+ # note that f_hash is left uninitialized in prebuilt dicts
if hasattr(ENTRY, 'f_valid'):
entry.f_valid = True
d.num_ever_used_items += 1
@@ -795,20 +793,7 @@
else:
d.entries = newitems
- # Estimate the index size we would like to have now. If the new
- # estimate changed from before, we need to throw away the old index
- # anyway, so simply remove it for now. If the size is the same,
- # then maybe it is a better idea to keep it instead of reallocating
- # an identical index very soon. In this case we update the index now.
- if bool(d.indexes):
- new_estimate = d.num_live_items * 2
- new_size = DICT_INITSIZE
- while new_size <= new_estimate:
- new_size *= 2
- if _ll_len_of_d_indexes(d) == new_size:
- ll_dict_reindex(d, new_size)
- else:
- ll_remove_index(d)
+ ll_dict_reindex(d, _ll_len_of_d_indexes(d))
def ll_dict_delitem(d, key):
@@ -852,10 +837,10 @@
break
d.num_ever_used_items = i + 1
- # If the dictionary is more than 75% dead items, then clean up the
- # deleted items and remove the index.
- if d.num_live_items + DICT_INITSIZE < len(d.entries) / 4:
- ll_dict_remove_deleted_items(d)
+ # If the dictionary is at least 87.5% dead items, then consider shrinking
+ # it.
+ if d.num_live_items + DICT_INITSIZE <= len(d.entries) / 8:
+ ll_dict_resize(d)
def ll_dict_resize(d):
# make a 'new_size' estimate and shrink it if there are many
@@ -873,25 +858,48 @@
while new_size <= new_estimate:
new_size *= 2
- if bool(d.indexes) and new_size < _ll_len_of_d_indexes(d):
+ if new_size < _ll_len_of_d_indexes(d):
ll_dict_remove_deleted_items(d)
else:
ll_dict_reindex(d, new_size)
-def ll_dict_create_index(d):
- if d.num_live_items < DICT_INITSIZE * 2 // 3:
- new_size = DICT_INITSIZE # fast path
+def ll_ensure_indexes(d):
+ num = d.lookup_function_no
+ if num == FUNC_MUST_REINDEX:
+ ll_dict_create_initial_index(d)
else:
- # Use a more conservative estimate than _ll_dict_resize_to() here.
- # This function is called when initially creating the index (so,
- # for prebuilt dicts, the chance is that it will never grow);
- # after we got a MemoryError; and when throwing the index away
- # because of heavy shrinking of the dict. The minimum value
- # here is such that 'new_estimate * 2 - num_live_items * 3 > 0'.
- new_estimate = (d.num_live_items * 3) // 2 + 1
- new_size = DICT_INITSIZE
- while new_size < new_estimate:
- new_size *= 2
+ ll_assert((num & FUNC_MASK) != FUNC_MUST_REINDEX,
+ "bad combination in lookup_function_no")
+
+def ll_dict_create_initial_index(d):
+ """Create the initial index for a dictionary. The common case is
+ that 'd' is empty. The uncommon case is that it is a prebuilt
+ dictionary frozen by translation, in which case we must rehash all
+ entries. The common case must be seen by the JIT.
+ """
+ if d.num_live_items == 0:
+ ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+ d.resize_counter = DICT_INITSIZE * 2
+ else:
+ ll_dict_rehash_after_translation(d)
+
+ at jit.dont_look_inside
+def ll_dict_rehash_after_translation(d):
+ assert d.num_live_items == d.num_ever_used_items
+ assert not d.indexes
+ #
+ # recompute all hashes, if they are stored in d.entries
+ ENTRY = lltype.typeOf(d.entries).TO.OF
+ if hasattr(ENTRY, 'f_hash'):
+ for i in range(d.num_ever_used_items):
+ assert d.entries.valid(i)
+ d_entry = d.entries[i]
+ d_entry.f_hash = d.keyhash(d_entry.key)
+ #
+ # Use the smallest acceptable size for ll_dict_reindex
+ new_size = DICT_INITSIZE
+ while new_size * 2 - d.num_live_items * 3 <= 0:
+ new_size *= 2
ll_dict_reindex(d, new_size)
def ll_dict_reindex(d, new_size):
@@ -909,9 +917,7 @@
ibound = d.num_ever_used_items
#
# Write four loops, moving the check for the value of 'fun' out of
- # the loops. A small speed-up, it also avoids the (unreachable)
- # recursive call from here to ll_call_insert_clean_function() to
- # ll_dict_create_index() back to here.
+ # the loops. A small speed-up over ll_call_insert_clean_function().
fun = d.lookup_function_no # == lookup_function_no & FUNC_MASK
if fun == FUNC_BYTE:
while i < ibound:
@@ -1088,9 +1094,9 @@
d.entries = _ll_empty_array(DICT)
# Don't allocate an 'indexes' for empty dict. It seems a typical
# program contains tons of empty dicts, so this might be a memory win.
- ll_remove_index(d)
d.num_live_items = 0
d.num_ever_used_items = 0
+ ll_no_initial_index(d)
return d
OrderedDictRepr.ll_newdict = staticmethod(ll_newdict)
@@ -1175,6 +1181,10 @@
# as soon as we do something like ll_dict_reindex().
if index == (dict.lookup_function_no >> FUNC_SHIFT):
dict.lookup_function_no += (1 << FUNC_SHIFT)
+ # note that we can't have modified a FUNC_MUST_REINDEX
+ # dict here because such dicts have no invalid entries
+ ll_assert((dict.lookup_function_no & FUNC_MASK) !=
+ FUNC_MUST_REINDEX, "bad combination in _ll_dictnext")
index = nextindex
# clear the reference to the dict and prevent restarts
iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO)
@@ -1220,6 +1230,8 @@
return dict.entries[index].value
def ll_dict_copy(dict):
+ ll_ensure_indexes(dict)
+
DICT = lltype.typeOf(dict).TO
newdict = DICT.allocate()
newdict.entries = DICT.entries.TO.allocate(len(dict.entries))
@@ -1244,7 +1256,7 @@
d_entry.f_hash = entry.f_hash
i += 1
- ll_remove_index(newdict)
+ ll_dict_reindex(newdict, _ll_len_of_d_indexes(dict))
return newdict
ll_dict_copy.oopspec = 'odict.copy(dict)'
@@ -1254,15 +1266,17 @@
DICT = lltype.typeOf(d).TO
old_entries = d.entries
d.entries = _ll_empty_array(DICT)
- ll_remove_index(d)
d.num_live_items = 0
d.num_ever_used_items = 0
+ ll_no_initial_index(d)
+ d.resize_counter = 0
# old_entries.delete() XXX
ll_dict_clear.oopspec = 'odict.clear(d)'
def ll_dict_update(dic1, dic2):
if dic1 == dic2:
return
+ ll_ensure_indexes(dic2)
ll_prepare_dict_update(dic1, dic2.num_live_items)
i = 0
while i < dic2.num_ever_used_items:
@@ -1289,6 +1303,7 @@
# the case where dict.update() actually has a lot of collisions.
# If num_extra is much greater than d.num_live_items the conditional_call
# will trigger anyway, which is really the goal.
+ ll_ensure_indexes(d)
x = num_extra - d.num_live_items
jit.conditional_call(d.resize_counter <= x * 3,
_ll_dict_resize_to, d, num_extra)
@@ -1348,6 +1363,7 @@
if dic.num_live_items == 0:
raise KeyError
+ ll_ensure_indexes(dic)
entries = dic.entries
# find the last entry. It's unclear if the loop below is still
diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -415,9 +415,10 @@
self.removed_keys.append(key)
def removeindex(self):
- # remove the index, as done e.g. during translation for prebuilt
- # dicts
- rodct.ll_remove_index(self.l_dict)
+ # remove the index, as done during translation for prebuilt dicts
+ # (but cannot be done if we already removed a key)
+ if not self.removed_keys:
+ rodct.ll_no_initial_index(self.l_dict)
def fullcheck(self):
# overridden to also check key order
@@ -444,7 +445,7 @@
num_lives += 1
assert num_lives == d.num_live_items
fun = d.lookup_function_no & rordereddict.FUNC_MASK
- if fun == rordereddict.FUNC_NO_INDEX:
+ if fun == rordereddict.FUNC_MUST_REINDEX:
assert not d.indexes
else:
assert d.indexes
diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -9,7 +9,7 @@
from rpython.conftest import option
from rpython.rlib import rgc
-from rpython.rlib.objectmodel import keepalive_until_here, compute_hash, compute_identity_hash
+from rpython.rlib.objectmodel import keepalive_until_here, compute_hash, compute_identity_hash, r_dict
from rpython.rlib.rstring import StringBuilder
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rtyper.lltypesystem.lloperation import llop
@@ -562,6 +562,41 @@
res = self.run('prebuilt_weakref')
assert res == self.run_orig('prebuilt_weakref')
+ def define_prebuilt_dicts_of_all_sizes(self):
+ class X:
+ pass
+ dlist = []
+ keyslist = []
+ def keq(x, y):
+ return x is y
+ def khash(x):
+ return compute_hash(x)
+ for size in ([random.randrange(0, 260) for i in range(10)] +
+ [random.randrange(260, 60000)]):
+ print 'PREBUILT DICTIONARY OF SIZE', size
+ keys = [X() for j in range(size)]
+ d = r_dict(keq, khash)
+ for j in range(size):
+ d[keys[j]] = j
+ dlist.append(d)
+ keyslist.append(keys)
+
+ def fn():
+ for n in range(len(dlist)):
+ d = dlist[n]
+ keys = keyslist[n]
+ assert len(d) == len(keys)
+ i = 0
+ while i < len(keys):
+ assert d[keys[i]] == i
+ i += 1
+ return 42
+ return fn
+
+ def test_prebuilt_dicts_of_all_sizes(self):
+ res = self.run('prebuilt_dicts_of_all_sizes')
+ assert res == 42
+
def define_framework_malloc_raw(cls):
A = lltype.Struct('A', ('value', lltype.Signed))
More information about the pypy-commit
mailing list