[Python-ideas] Contiguous-array-based ordering for OrderedDict
Sven R. Kunze
srkunze at mail.de
Tue Dec 15 11:45:43 EST 2015
Wow. Thanks for that. :)
Well, is there some performance case scenario? Something which tests all
possible interactions with OrderedDict which could be used as the
authoritative Benchmark on this.
Otherwise, I fear, it's just talking about possible possibilities nobody
really evaluated properly.
Best,
Sven
On 15.12.2015 16:00, Franklin? Lee wrote:
> On Tue, Dec 15, 2015 at 9:53 AM, Laura Creighton <lac at openend.se> wrote:
>> In a message of Tue, 15 Dec 2015 04:09:55 -0500, "Franklin? Lee" writes:
>>> I did. It is in a pastebin link in my original message, based on the
>>> 3.5.0 Python (not C) version of OrderedDict. I was hoping for guidance
>>> on evaluating it. Maybe it wasn't seen because I used '-'*n to
>>> separate it from my intro, or maybe pastebin is so disliked that
>>> people couldn't see it. Here it is again: http://pastebin.com/LESRktJw
>> That is the problem. We absolutely do not want links to things
>> like pastebin. We want the code here, as part of the text. 5 years
>> from now, when other people are scratching their heads saying,
>> I wonder why Guido decided things the way he did, and whether that
>> decision can and should be revisited, the first thing we will do is
>> to go back and read all this discussion. And if the discussion is
>> about code we can no longer see, because the pastebin has expired,
>> then we won't be able to learn much.
>>
>> Anything that matters needs to be part of the archived discussion.
>>
>> Laura
> I feel similarly about information history, which is why I always set
> "Expire: Never" when I use pastebin :D.
>
> But alright. The rest of this message is the code as I had it when I
> sent the original message.
>
>
> from collections.abc import *
> from reprlib import recursive_repr as _recursive_repr
> from collections import OrderedDict
>
> class _ListDictKeysView(KeysView):
>
> def __reversed__(self):
> yield from reversed(self._mapping)
>
> class _ListDictItemsView(ItemsView):
>
> def __reversed__(self):
> for key in reversed(self._mapping):
> yield (key, self._mapping[key])
>
> class _ListDictValuesView(ValuesView):
>
> def __reversed__(self):
> for key in reversed(self._mapping):
> yield self._mapping[key]
>
> _sentinel = object()
>
>
> class ListDict(dict):
> 'Dictionary that remembers insertion order'
> # An inherited dict maps keys to values.
> # The inherited dict provides __getitem__, __len__, __contains__, and get.
> # The remaining methods are order-aware.
> # Big-O running times for all methods are the same as regular dictionaries.
>
> def __init__(*args, **kwds):
> '''Initialize an ordered dictionary. The signature is the same as
> regular dictionaries, but keyword arguments are not recommended because
> their insertion order is arbitrary.
>
> '''
> if not args:
> raise TypeError("descriptor '__init__' of 'ListDict' object "
> "needs an argument")
> self, *args = args
> if len(args) > 1:
> raise TypeError('expected at most 1 arguments, got %d' % len(args))
> try:
> # self.__root
> self.__list
> except AttributeError:
> self.__map = {}
> self.__list = []
> self.__size = 0
> self.__update(*args, **kwds)
>
> def __setitem__(self, key, value,
> dict_setitem=dict.__setitem__, len=len):
> 'od.__setitem__(i, y) <==> od[i]=y'
> # If it's a new key, we need to track it.
> if key not in self:
> self.__map[key] = len(self.__list)
> self.__list.append(key)
> self.__size += 1
>
> dict_setitem(self, key, value)
>
> def __delitem__(self, key, dict_delitem=dict.__delitem__,
> sentinel=_sentinel):
> 'od.__delitem__(y) <==> del od[y]'
> dict_delitem(self, key)
>
> # Remove the tracking for this item
> index = self.__map.pop(key)
> self.__list[index] = sentinel
> self.__size -= 1
>
> self.__compact()
>
> def __iter__(self, sentinel=_sentinel):
> 'od.__iter__() <==> iter(od)'
> for key in self.__list:
> if key is not sentinel:
> yield key
>
> def __reversed__(self, sentinel=_sentinel, reversed=reversed):
> 'od.__reversed__() <==> reversed(od)'
> for key in reversed(self.__list):
> if key is not sentinel:
> yield key
>
> def clear(self):
> 'od.clear() -> None. Remove all items from od.'
> self.__list.clear()
> self.__map.clear()
> self.__size = 0
> dict.clear(self) # dict.clear isn't cached?
>
> def popitem(self, last=True, sentinel=_sentinel,
> reversed=reversed, next=next):
> '''od.popitem() -> (k, v), return and remove a (key, value) pair.
> Pairs are returned in LIFO order if last is true or FIFO order if false.
>
> '''
> if not self:
> raise KeyError('dictionary is empty')
>
> if last:
> lst = reversed(self.__list)
> else:
> lst = self.__list
>
> # Could use the key lookup to find this, but... meh.
> # Note that attempting to compact first might have helped.
> index, key = next((i, k)
> for i, k in enumerate(lst)
> if k is not sentinel)
>
> # We're calling dict.pop later, which won't handle
> # the metadata.
> del self.__map[key]
> self.__list[index] = sentinel
> self.__size -= 1
>
> self.__compact()
>
> value = dict.pop(self, key) # dict.pop isn't cached?
> return key, value
>
> def __compact(self, sentinel=_sentinel,
> enumerate=enumerate, reversed=reversed):
> ''' Compact the order __list if necessary.
> '''
> # May need to use this a lot in the upcoming `else`.
> lst = self.__list
> if not lst:
> return
>
> if self.__size / len(lst) <= 0.5: #chosen at non-random
> # Implementation 1: list comprehension
> self.__list = [k for k in lst if k is not sentinel]
>
> # Implementation 2:
> # If only `list` had a `remove_all` method...
> pass
>
> ''' Update all indices after a reordering.
>
> Should only be done when full (because it shouldn't be
> necessary otherwise).
> '''
> inner_map = self.__map
> for index, key in enumerate(self.__list):
> inner_map[key] = index
>
> else:
> # Even if the list isn't mostly empty,
> # we can try to clear the back.
> # TODO: How can this be more efficient?
>
> # Note: There exists a non-sentinel because
> # otherwise, .__size/positive == 0 < positive.
>
> # # Implementation 1: Pop until it drops.
> # while lst[-1] is sentinel:
> # lst.pop()
>
> # Implementation 2: Count the number of sentinels at the end.
> emptys = next(i for i, k in enumerate(reversed(lst))
> if k is not sentinel)
> # guaranteed not to StopIteration since .__size > 0
> del lst[:-emptys] #safe even if 0
>
> def move_to_end(self, key, last=True, sentinel=_sentinel,
> enumerate=enumerate, len=len):
> '''Move an existing element to the end (or beginning if last==False).
>
> Raises KeyError if the element does not exist.
> When last=True, acts like a fast version of self[key]=self.pop(key).
> '''
>
> index = self.__map[key]
> lst = self.__list
> if last:
> if index + 1 == len(lst):
> # already last
> # Not sure if this is the right path to optimize.
> # But I think redundant move_to_ends shouldn't
> # blow up the __list.
> return
>
> lst[index] = sentinel
> if lst[-1] is sentinel:
> # can just swap with last
> lst[-1] = key
> self.__map[key] = len(lst) - 1
> else:
> # append and maybe compact
> lst[index] = sentinel
> lst.append(key)
> self.__map[key] = len(lst) - 1
> self.__compact()
> else:
> # This is costly. But this shouldn't
> # be a common case anyway, right?
> # I mean, who repeatedly adds to the front
> # of an OrderedDict?
> # And this is basically the only costly
> # operation I'm adding.
>
> lst[index] = sentinel
>
> # Propagate forward from the front.
> for i, newkey in enumerate(lst):
> self.__map[key] = i
> lst[i], key = key, newkey
> if key is sentinel:
> break
>
> def __sizeof__(self):
> sizeof = _sys.getsizeof
> size = sizeof(self.__dict__) # instance dictionary
> size += sizeof(self.__map) * 2 # internal dict and
> inherited dict
>
> size += sizeof(self.__list)
> size += sizeof(self.__size)
> return size
>
> update = __update = MutableMapping.update
>
> def keys(self):
> "D.keys() -> a set-like object providing a view on D's keys"
> return _ListDictKeysView(self)
>
> def items(self):
> "D.items() -> a set-like object providing a view on D's items"
> return _ListDictItemsView(self)
>
> def values(self):
> "D.values() -> an object providing a view on D's values"
> return _ListDictValuesView(self)
>
> __ne__ = MutableMapping.__ne__
>
> __marker = object()
>
> def pop(self, key, default=__marker):
> '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
> value. If key is not found, d is returned if given, otherwise KeyError
> is raised.
>
> '''
> if key in self:
> result = self[key]
> del self[key]
> return result
> if default is self.__marker:
> raise KeyError(key)
> return default
>
> def setdefault(self, key, default=None):
> 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
> if key in self:
> return self[key]
> self[key] = default
> return default
>
> @_recursive_repr()
> def __repr__(self):
> 'od.__repr__() <==> repr(od)'
> if not self:
> return '%s()' % (self.__class__.__name__,)
> return '%s(%r)' % (self.__class__.__name__, list(self.items()))
>
> def __reduce__(self):
> 'Return state information for pickling'
> inst_dict = vars(self).copy()
> for k in vars(ListDict()):
> inst_dict.pop(k, None)
> return self.__class__, (), inst_dict or None, None, iter(self.items())
>
> def copy(self):
> 'od.copy() -> a shallow copy of od'
> return self.__class__(self)
>
> @classmethod
> def fromkeys(cls, iterable, value=None):
> '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
> If not specified, the value defaults to None.
>
> '''
> self = cls()
> for key in iterable:
> self[key] = value
> return self
>
> def __eq__(self, other):
> '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
> while comparison to a regular mapping is order-insensitive.
>
> '''
> if isinstance(other, ListDict):
> return dict.__eq__(self, other) and all(map(_eq, self, other))
> return dict.__eq__(self, other)
More information about the Python-ideas
mailing list