[pypy-commit] pypy unicode-utf8: merge default
cfbolz
pypy.commits at gmail.com
Tue Dec 12 13:53:34 EST 2017
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93394:8986b7040a05
Date: 2017-12-12 18:28 +0100
http://bitbucket.org/pypy/pypy/changeset/8986b7040a05/
Log: merge default
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -36,6 +36,10 @@
.. branch: win32-vcvars
+.. branch rdict-fast-hash
+
+Make it possible to declare that the hash function of an r_dict is fast in RPython.
+
.. branch: unicode-utf8-re
.. branch: utf8-io
Utf8 handling for unicode
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -49,7 +49,7 @@
self.ll_chars = rffi.str2charp(s)
self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
self.pos = 0
- self.cache = r_dict(slice_eq, slice_hash)
+ self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True)
def close(self):
rffi.free_charp(self.ll_chars)
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -194,13 +194,14 @@
listdef.generalize_range_step(flags['range_step'])
return SomeList(listdef)
- def getdictdef(self, is_r_dict=False, force_non_null=False):
+ def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False):
"""Get the DictDef associated with the current position."""
try:
dictdef = self.dictdefs[self.position_key]
except KeyError:
dictdef = DictDef(self, is_r_dict=is_r_dict,
- force_non_null=force_non_null)
+ force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
self.dictdefs[self.position_key] = dictdef
return dictdef
diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py
--- a/rpython/annotator/builtin.py
+++ b/rpython/annotator/builtin.py
@@ -237,22 +237,30 @@
return SomeInstance(clsdef)
@analyzer_for(rpython.rlib.objectmodel.r_dict)
-def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None):
+ return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq)
+
+ at analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
+def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None):
+ return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn,
+ s_force_non_null, s_simple_hash_eq)
+
+def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq):
if s_force_non_null is None:
force_non_null = False
else:
assert s_force_non_null.is_constant()
force_non_null = s_force_non_null.const
+ if s_simple_hash_eq is None:
+ simple_hash_eq = False
+ else:
+ assert s_simple_hash_eq.is_constant()
+ simple_hash_eq = s_simple_hash_eq.const
dictdef = getbookkeeper().getdictdef(is_r_dict=True,
- force_non_null=force_non_null)
+ force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
- return SomeDict(dictdef)
-
- at analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
-def robjmodel_r_ordereddict(s_eqfn, s_hashfn):
- dictdef = getbookkeeper().getdictdef(is_r_dict=True)
- dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
- return SomeOrderedDict(dictdef)
+ return cls(dictdef)
@analyzer_for(rpython.rlib.objectmodel.hlinvoke)
def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s):
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -81,12 +81,14 @@
def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
s_value = s_ImpossibleValue,
is_r_dict = False,
- force_non_null = False):
+ force_non_null = False,
+ simple_hash_eq = False):
self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
self.dictkey.itemof[self] = True
self.dictvalue = DictValue(bookkeeper, s_value)
self.dictvalue.itemof[self] = True
self.force_non_null = force_non_null
+ self.simple_hash_eq = simple_hash_eq
def read_key(self, position_key):
self.dictkey.read_locations.add(position_key)
diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py
--- a/rpython/jit/metainterp/typesystem.py
+++ b/rpython/jit/metainterp/typesystem.py
@@ -106,11 +106,11 @@
# It is an r_dict on lltype. Two copies, to avoid conflicts with
# the value type. Note that NULL is not allowed as a key.
def new_ref_dict(self):
- return r_dict(rd_eq, rd_hash)
+ return r_dict(rd_eq, rd_hash, simple_hash_eq=True)
def new_ref_dict_2(self):
- return r_dict(rd_eq, rd_hash)
+ return r_dict(rd_eq, rd_hash, simple_hash_eq=True)
def new_ref_dict_3(self):
- return r_dict(rd_eq, rd_hash)
+ return r_dict(rd_eq, rd_hash, simple_hash_eq=True)
def cast_vtable_to_hashable(self, cpu, ptr):
adr = llmemory.cast_ptr_to_adr(ptr)
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -748,11 +748,19 @@
def _newdict(self):
return {}
- def __init__(self, key_eq, key_hash, force_non_null=False):
+ def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False):
+ """ force_non_null=True means that the key can never be None (even if
+ the annotator things it could be)
+
+ simple_hash_eq=True means that the hash function is very fast, meaning it's
+ efficient enough that the dict does not have to store the hash per key.
+ It also implies that neither the hash nor the eq function will mutate
+ the dictionary. """
self._dict = self._newdict()
self.key_eq = key_eq
self.key_hash = key_hash
self.force_non_null = force_non_null
+ self.simple_hash_eq = simple_hash_eq
def __getitem__(self, key):
return self._dict[_r_dictkey(self, key)]
diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -330,6 +330,13 @@
res = self.interpret(g, [3])
assert res == 77
+ def test_r_dict_fast_functions(self):
+ def fn():
+ d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True)
+ return play_with_r_dict(d1)
+ res = self.interpret(fn, [])
+ assert res
+
def test_prepare_dict_update(self):
def g(n):
d = {}
diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py
--- a/rpython/rtyper/lltypesystem/rdict.py
+++ b/rpython/rtyper/lltypesystem/rdict.py
@@ -42,7 +42,8 @@
class DictRepr(AbstractDictRepr):
def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
- custom_eq_hash=None, force_non_null=False):
+ custom_eq_hash=None, force_non_null=False, fast_hash=False):
+ # fast_hash is ignored (only implemented in rordereddict.py)
self.rtyper = rtyper
self.DICT = lltype.GcForwardReference()
self.lowleveltype = lltype.Ptr(self.DICT)
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -66,7 +66,7 @@
def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None,
ll_fasthash_function=None, ll_hash_function=None,
- ll_eq_function=None, method_cache={},
+ ll_eq_function=None, method_cache={}, simple_hash_eq=False,
dummykeyobj=None, dummyvalueobj=None, rtyper=None):
# get the actual DICT type. if DICT is None, it's created, otherwise
# forward reference is becoming DICT
@@ -114,11 +114,14 @@
# * the value
entryfields.append(("value", DICTVALUE))
- if ll_fasthash_function is None:
+ if simple_hash_eq:
+ assert get_custom_eq_hash is not None
+ entrymeths['entry_hash'] = ll_hash_custom_fast
+ elif ll_fasthash_function is None:
entryfields.append(("f_hash", lltype.Signed))
- entrymeths['hash'] = ll_hash_from_cache
+ entrymeths['entry_hash'] = ll_hash_from_cache
else:
- entrymeths['hash'] = ll_hash_recomputed
+ entrymeths['entry_hash'] = ll_hash_recomputed
entrymeths['fasthashfn'] = ll_fasthash_function
# Build the lltype data structures
@@ -140,7 +143,7 @@
'keyeq': ll_keyeq_custom,
'r_rdict_eqfn': r_rdict_eqfn,
'r_rdict_hashfn': r_rdict_hashfn,
- 'paranoia': True,
+ 'paranoia': not simple_hash_eq,
}
else:
# figure out which functions must be used to hash and compare
@@ -167,13 +170,14 @@
class OrderedDictRepr(AbstractDictRepr):
def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue,
- custom_eq_hash=None, force_non_null=False):
+ custom_eq_hash=None, force_non_null=False, simple_hash_eq=False):
#assert not force_non_null
self.rtyper = rtyper
self.finalized = False
self.DICT = lltype.GcForwardReference()
self.lowleveltype = lltype.Ptr(self.DICT)
self.custom_eq_hash = custom_eq_hash is not None
+ self.simple_hash_eq = simple_hash_eq
if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup()
assert callable(key_repr)
self._key_repr_computer = key_repr
@@ -211,6 +215,7 @@
self.r_rdict_eqfn, self.r_rdict_hashfn = (
self._custom_eq_hash_repr())
kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr
+ kwd['simple_hash_eq'] = self.simple_hash_eq
else:
kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function()
kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function()
@@ -600,15 +605,21 @@
dummy = ENTRIES.dummy_obj.ll_dummy_value
entries[i].value = dummy
- at signature(types.any(), types.int(), returns=types.any())
-def ll_hash_from_cache(entries, i):
+ at signature(types.any(), types.any(), types.int(), returns=types.any())
+def ll_hash_from_cache(entries, d, i):
return entries[i].f_hash
- at signature(types.any(), types.int(), returns=types.any())
-def ll_hash_recomputed(entries, i):
+ at signature(types.any(), types.any(), types.int(), returns=types.any())
+def ll_hash_recomputed(entries, d, i):
ENTRIES = lltype.typeOf(entries).TO
return ENTRIES.fasthashfn(entries[i].key)
+ at signature(types.any(), types.any(), types.int(), returns=types.any())
+def ll_hash_custom_fast(entries, d, i):
+ DICT = lltype.typeOf(d).TO
+ key = entries[i].key
+ return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)
+
def ll_keyhash_custom(d, key):
DICT = lltype.typeOf(d).TO
return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)
@@ -962,22 +973,22 @@
if fun == FUNC_BYTE:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE)
i += 1
elif fun == FUNC_SHORT:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT)
i += 1
elif IS_64BIT and fun == FUNC_INT:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT)
i += 1
elif fun == FUNC_LONG:
while i < ibound:
if entries.valid(i):
- ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG)
+ ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG)
i += 1
else:
assert False
@@ -1015,7 +1026,7 @@
checkingkey = entries[index - VALID_OFFSET].key
if direct_compare and checkingkey == key:
return index - VALID_OFFSET # found the entry
- if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash:
+ if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash:
# correct hash, maybe the key is e.g. a different pointer to
# an equal object
found = d.keyeq(checkingkey, key)
@@ -1056,7 +1067,7 @@
checkingkey = entries[index - VALID_OFFSET].key
if direct_compare and checkingkey == key:
return index - VALID_OFFSET # found the entry
- if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash:
+ if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash:
# correct hash, maybe the key is e.g. a different pointer to
# an equal object
found = d.keyeq(checkingkey, key)
@@ -1305,14 +1316,14 @@
def ll_dict_update(dic1, dic2):
if dic1 == dic2:
return
- ll_ensure_indexes(dic2) # needed for entries.hash() below
+ ll_ensure_indexes(dic2) # needed for entries.entry_hash() below
ll_prepare_dict_update(dic1, dic2.num_live_items)
i = 0
while i < dic2.num_ever_used_items:
entries = dic2.entries
if entries.valid(i):
entry = entries[i]
- hash = entries.hash(i)
+ hash = entries.entry_hash(dic2, i)
key = entry.key
value = entry.value
index = dic1.lookup_function(dic1, key, hash, FLAG_STORE)
@@ -1413,7 +1424,7 @@
r = lltype.malloc(ELEM.TO)
r.item0 = recast(ELEM.TO.item0, entry.key)
r.item1 = recast(ELEM.TO.item1, entry.value)
- _ll_dict_del(dic, dic.entries.hash(i), i)
+ _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i)
return r
def ll_dict_pop(dic, key):
diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py
--- a/rpython/rtyper/rbuiltin.py
+++ b/rpython/rtyper/rbuiltin.py
@@ -717,9 +717,9 @@
@typer_for(OrderedDict)
@typer_for(objectmodel.r_dict)
@typer_for(objectmodel.r_ordereddict)
-def rtype_dict_constructor(hop, i_force_non_null=None):
- # 'i_force_non_null' is ignored here; if it has any effect, it
- # has already been applied to 'hop.r_result'
+def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None):
+ # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any
+ # effect, it has already been applied to 'hop.r_result'
hop.exception_cannot_occur()
r_dict = hop.r_result
cDICT = hop.inputconst(lltype.Void, r_dict.DICT)
diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py
--- a/rpython/rtyper/rdict.py
+++ b/rpython/rtyper/rdict.py
@@ -15,6 +15,7 @@
s_key = dictkey.s_value
s_value = dictvalue.s_value
force_non_null = self.dictdef.force_non_null
+ simple_hash_eq = self.dictdef.simple_hash_eq
if dictkey.custom_eq_hash:
custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn),
rtyper.getrepr(dictkey.s_rdict_hashfn))
@@ -22,7 +23,7 @@
custom_eq_hash = None
return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key),
lambda: rtyper.getrepr(s_value), dictkey, dictvalue,
- custom_eq_hash, force_non_null)
+ custom_eq_hash, force_non_null, simple_hash_eq)
def rtyper_makekey(self):
self.dictdef.dictkey .dont_change_any_more = True
@@ -89,7 +90,7 @@
resulttype=ENTRIES)
# call the correct variant_*() method
method = getattr(self, 'variant_' + self.variant)
- return method(hop, ENTRIES, v_entries, v_index)
+ return method(hop, ENTRIES, v_entries, v_dict, v_index)
def get_tuple_result(self, hop, items_v):
# this allocates the tuple for the result, directly in the function
@@ -109,7 +110,7 @@
hop.genop('setfield', [v_result, c_item, v_item])
return v_result
- def variant_keys(self, hop, ENTRIES, v_entries, v_index):
+ def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index):
KEY = ENTRIES.TO.OF.key
c_key = hop.inputconst(lltype.Void, 'key')
v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key],
@@ -118,30 +119,30 @@
variant_reversed = variant_keys
- def variant_values(self, hop, ENTRIES, v_entries, v_index):
+ def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index):
VALUE = ENTRIES.TO.OF.value
c_value = hop.inputconst(lltype.Void, 'value')
v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value],
resulttype=VALUE)
return self.r_dict.recast_value(hop.llops, v_value)
- def variant_items(self, hop, ENTRIES, v_entries, v_index):
- v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index)
- v_value = self.variant_values(hop, ENTRIES, v_entries, v_index)
+ def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index):
+ v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index)
return self.get_tuple_result(hop, (v_key, v_value))
- def variant_hashes(self, hop, ENTRIES, v_entries, v_index):
+ def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index):
# there is not really a variant 'hashes', but this method is
# convenient for the following variants
- return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index)
+ return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index)
- def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index):
- v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index)
- v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index)
+ def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index):
+ v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index)
return self.get_tuple_result(hop, (v_key, v_hash))
- def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index):
- v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index)
- v_value = self.variant_values(hop, ENTRIES, v_entries, v_index)
- v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index)
+ def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index):
+ v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index)
+ v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index)
return self.get_tuple_result(hop, (v_key, v_value, v_hash))
diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py
--- a/rpython/rtyper/test/test_rdict.py
+++ b/rpython/rtyper/test/test_rdict.py
@@ -538,6 +538,25 @@
r_dict = rtyper.getrepr(s)
assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+ def test_r_dict_can_be_fast(self):
+ def myeq(n, m):
+ return n == m
+ def myhash(n):
+ return ~n
+ def f():
+ d = self.new_r_dict(myeq, myhash, simple_hash_eq=True)
+ d[5] = 7
+ d[12] = 19
+ return d
+
+ t = TranslationContext()
+ s = t.buildannotator().build_types(f, [])
+ rtyper = t.buildrtyper()
+ rtyper.specialize()
+
+ r_dict = rtyper.getrepr(s)
+ assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash")
+
def test_tuple_dict(self):
def f(i):
d = self.newdict()
@@ -1000,8 +1019,8 @@
return {}
@staticmethod
- def new_r_dict(myeq, myhash):
- return r_dict(myeq, myhash)
+ def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False):
+ return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq)
def test_two_dicts_with_different_value_types(self):
def func(i):
diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -386,8 +386,10 @@
return OrderedDict()
@staticmethod
- def new_r_dict(myeq, myhash):
- return objectmodel.r_ordereddict(myeq, myhash)
+ def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False):
+ return objectmodel.r_ordereddict(
+ myeq, myhash, force_non_null=force_non_null,
+ simple_hash_eq=simple_hash_eq)
def test_two_dicts_with_different_value_types(self):
def func(i):
More information about the pypy-commit
mailing list