[pypy-commit] pypy rpython-hash: in-progress: support dicts with no indexes
arigo
pypy.commits at gmail.com
Thu Jan 26 08:20:25 EST 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: rpython-hash
Changeset: r89786:73f39f5b582e
Date: 2017-01-26 14:19 +0100
http://bitbucket.org/pypy/pypy/changeset/73f39f5b582e/
Log: in-progress: support dicts with no indexes
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -46,20 +46,25 @@
@jit.look_inside_iff(lambda d, key, hash, flag: jit.isvirtual(d))
@jit.oopspec('ordereddict.lookup(d, key, hash, flag)')
def ll_call_lookup_function(d, key, hash, flag):
- fun = d.lookup_function_no & FUNC_MASK
- # This likely() here forces gcc to compile the check for fun == FUNC_BYTE
- # first. Otherwise, this is a regular switch and gcc (at least 4.7)
- # compiles this as a series of checks, with the FUNC_BYTE case last.
- # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark.
- if likely(fun == FUNC_BYTE):
- return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
- elif fun == FUNC_SHORT:
- return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT)
- elif IS_64BIT and fun == FUNC_INT:
- return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
- elif fun == FUNC_LONG:
- return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
- assert False
+ while True:
+ fun = d.lookup_function_no & FUNC_MASK
+ # This likely() here forces gcc to compile the check for fun==FUNC_BYTE
+ # first. Otherwise, this is a regular switch and gcc (at least 4.7)
+ # compiles this as a series of checks, with the FUNC_BYTE case last.
+ # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark.
+ if likely(fun == FUNC_BYTE):
+ return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE)
+ elif fun == FUNC_SHORT:
+ return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT)
+ elif IS_64BIT and fun == FUNC_INT:
+ return ll_dict_lookup(d, key, hash, flag, TYPE_INT)
+ elif fun == FUNC_LONG:
+ return ll_dict_lookup(d, key, hash, flag, TYPE_LONG)
+ elif fun == FUNC_NO_INDEX:
+ ll_dict_create_index(d)
+ # then, retry
+ else:
+ assert False
def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
ll_fasthash_function=None, ll_hash_function=None,
@@ -254,7 +259,6 @@
llvalue = r_value.convert_const(dictvalue)
_ll_dict_insertclean(l_dict, llkey, llvalue,
dictkeycontainer.hash)
- return l_dict
else:
for dictkey, dictvalue in dictobj.items():
@@ -262,7 +266,9 @@
llvalue = r_value.convert_const(dictvalue)
_ll_dict_insertclean(l_dict, llkey, llvalue,
l_dict.keyhash(llkey))
- return l_dict
+
+ ll_remove_index(l_dict)
+ return l_dict
def rtype_len(self, hop):
v_dict, = hop.inputargs(self)
@@ -458,17 +464,27 @@
IS_64BIT = sys.maxint != 2 ** 31 - 1
-FUNC_SHIFT = 2
-FUNC_MASK = 0x03 # two bits
if IS_64BIT:
- FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(4)
+ FUNC_SHIFT = 3
+ FUNC_MASK = 0x07 # three bits
+ FUNC_NO_INDEX, FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(5)
else:
- FUNC_BYTE, FUNC_SHORT, FUNC_LONG = range(3)
+ FUNC_SHIFT = 2
+ FUNC_MASK = 0x03 # two bits
+ FUNC_NO_INDEX, FUNC_BYTE, FUNC_SHORT, FUNC_LONG = range(4)
TYPE_BYTE = rffi.UCHAR
TYPE_SHORT = rffi.USHORT
TYPE_INT = rffi.UINT
TYPE_LONG = lltype.Unsigned
+NULL = lltype.nullptr(llmemory.GCREF.TO)
+
+def ll_remove_index(d):
+ # Used in MemoryError situation, or when translating prebuilt dicts.
+ # Remove the index completely.
+ d.indexes = NULL
+ d.lookup_function_no = FUNC_NO_INDEX
+
def ll_malloc_indexes_and_choose_lookup(d, n):
# keep in sync with ll_clear_indexes() below
if n <= 256:
@@ -503,22 +519,30 @@
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_INT, d.indexes))
elif fun == FUNC_LONG:
rgc.ll_arrayclear(lltype.cast_opaque_ptr(DICTINDEX_LONG, d.indexes))
+ elif fun == FUNC_NO_INDEX:
+ # nothing to clear in this case
+ ll_assert(not d.indexes,
+ "ll_clear_indexes: FUNC_NO_INDEX but d.indexes != NULL")
else:
assert False
@jit.dont_look_inside
def ll_call_insert_clean_function(d, hash, i):
- fun = d.lookup_function_no & FUNC_MASK
- if fun == FUNC_BYTE:
- ll_dict_store_clean(d, hash, i, TYPE_BYTE)
- elif fun == FUNC_SHORT:
- ll_dict_store_clean(d, hash, i, TYPE_SHORT)
- elif IS_64BIT and fun == FUNC_INT:
- ll_dict_store_clean(d, hash, i, TYPE_INT)
- elif fun == FUNC_LONG:
- ll_dict_store_clean(d, hash, i, TYPE_LONG)
- else:
- assert False
+ while True:
+ fun = d.lookup_function_no & FUNC_MASK
+ if fun == FUNC_BYTE:
+ return ll_dict_store_clean(d, hash, i, TYPE_BYTE)
+ elif fun == FUNC_SHORT:
+ return ll_dict_store_clean(d, hash, i, TYPE_SHORT)
+ elif IS_64BIT and fun == FUNC_INT:
+ return ll_dict_store_clean(d, hash, i, TYPE_INT)
+ elif fun == FUNC_LONG:
+ return ll_dict_store_clean(d, hash, i, TYPE_LONG)
+ elif fun == FUNC_NO_INDEX:
+ ll_dict_create_index(d)
+ # then, retry
+ else:
+ assert False
def ll_call_delete_by_entry_index(d, hash, i):
fun = d.lookup_function_no & FUNC_MASK
@@ -530,6 +554,8 @@
ll_dict_delete_by_entry_index(d, hash, i, TYPE_INT)
elif fun == FUNC_LONG:
ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG)
+ elif fun == FUNC_NO_INDEX:
+ pass # there is no 'indexes', so nothing to delete
else:
assert False
@@ -614,7 +640,7 @@
try:
reindexed = ll_dict_grow(d)
except:
- _ll_dict_rescue(d)
+ ll_remove_index(d)
raise
rc = d.resize_counter - 3
if rc <= 0:
@@ -622,7 +648,7 @@
ll_dict_resize(d)
reindexed = True
except:
- _ll_dict_rescue(d)
+ ll_remove_index(d)
raise
rc = d.resize_counter - 3
ll_assert(rc > 0, "ll_dict_resize failed?")
@@ -640,14 +666,6 @@
d.num_ever_used_items += 1
d.num_live_items += 1
- at jit.dont_look_inside
-def _ll_dict_rescue(d):
- # MemoryError situation! The 'indexes' contains an invalid entry
- # at this point. But we can call ll_dict_reindex() with the
- # following arguments, ensuring no further malloc occurs.
- ll_dict_reindex(d, _ll_len_of_d_indexes(d))
-_ll_dict_rescue._dont_inline_ = True
-
def _ll_dict_insertclean(d, key, value, hash):
# never translated
ENTRY = lltype.typeOf(d.entries).TO.OF
@@ -775,7 +793,20 @@
else:
d.entries = newitems
- ll_dict_reindex(d, _ll_len_of_d_indexes(d))
+ # Estimate the index size we would like to have now. If the new
+ # estimate changed from before, we need to throw away the old index
+ # anyway, so simply remove it for now. If the size is the same,
+ # then maybe it is a better idea to keep it instead of reallocating
+ # an identical index very soon. In this case we update the index now.
+ if bool(d.indexes):
+ new_estimate = d.num_live_items * 2
+ new_size = DICT_INITSIZE
+ while new_size <= new_estimate:
+ new_size *= 2
+ if _ll_len_of_d_indexes(d) == new_size:
+ ll_dict_reindex(d, new_size)
+ else:
+ ll_remove_index(d)
def ll_dict_delitem(d, key):
@@ -818,10 +849,10 @@
assert j >= 0
d.num_ever_used_items = j
- # If the dictionary is at least 87.5% dead items, then consider shrinking
- # it.
- if d.num_live_items + DICT_INITSIZE <= len(d.entries) / 8:
- ll_dict_resize(d)
+ # If the dictionary is more than 75% dead items, then clean up the
+ # deleted items and remove the index.
+ if d.num_live_items + DICT_INITSIZE < len(d.entries) / 4:
+ ll_dict_remove_deleted_items(d)
def ll_dict_resize(d):
# make a 'new_size' estimate and shrink it if there are many
@@ -839,11 +870,21 @@
while new_size <= new_estimate:
new_size *= 2
- if new_size < _ll_len_of_d_indexes(d):
+ if bool(d.indexes) and new_size < _ll_len_of_d_indexes(d):
ll_dict_remove_deleted_items(d)
else:
ll_dict_reindex(d, new_size)
+def ll_dict_create_index(d):
+ if d.num_live_items == 0:
+ new_size = DICT_INITSIZE # fast path
+ else:
+ new_estimate = d.num_live_items * 2
+ new_size = DICT_INITSIZE
+ while new_size <= new_estimate:
+ new_size *= 2
+ ll_dict_reindex(d, new_size)
+
def ll_dict_reindex(d, new_size):
if bool(d.indexes) and _ll_len_of_d_indexes(d) == new_size:
ll_clear_indexes(d, new_size) # hack: we can reuse the same array
@@ -1013,7 +1054,9 @@
def ll_newdict(DICT):
d = DICT.allocate()
d.entries = _ll_empty_array(DICT)
- ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+ # Don't allocate an 'indexes' for empty dict. It seems a typical
+ # program contains tons of empty dicts, so this might be a memory win.
+ ll_remove_index(d)
d.num_live_items = 0
d.num_ever_used_items = 0
d.resize_counter = DICT_INITSIZE * 2
@@ -1170,7 +1213,7 @@
d_entry.f_hash = entry.f_hash
i += 1
- ll_dict_reindex(newdict, _ll_len_of_d_indexes(dict))
+ ll_remove_index(newdict)
return newdict
ll_dict_copy.oopspec = 'odict.copy(dict)'
@@ -1180,7 +1223,7 @@
DICT = lltype.typeOf(d).TO
old_entries = d.entries
d.entries = _ll_empty_array(DICT)
- ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE)
+ ll_remove_index(d)
d.num_live_items = 0
d.num_ever_used_items = 0
d.resize_counter = DICT_INITSIZE * 2
diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -196,10 +196,11 @@
num = rordereddict._ll_dictnext(ll_iter)
ll_key = ll_d.entries[num].key
assert hlstr(ll_key) == "j"
- assert ll_d.lookup_function_no == 4 # 1 free item found at the start
+ assert ll_d.lookup_function_no == ( # 1 free item found at the start
+ (1 << rordereddict.FUNC_SHIFT) | rordereddict.FUNC_BYTE)
rordereddict.ll_dict_delitem(ll_d, llstr("j"))
assert ll_d.num_ever_used_items == 0
- assert ll_d.lookup_function_no == 0 # reset
+ assert ll_d.lookup_function_no == rordereddict.FUNC_BYTE # reset
def test_direct_enter_and_del(self):
def eq(a, b):
More information about the pypy-commit
mailing list