Ordered Sets

Alex_Gaynor alex.gaynor at gmail.com
Mon Mar 30 22:57:39 EDT 2009


On Mar 30, 12:27 pm, pataphor <patap... at gmail.com> wrote:
> On Mon, 30 Mar 2009 03:30:04 -0500
>
> Nick Craig-Wood <n... at craig-wood.com> wrote:
> > >>> class Node(object):
> > ...     __slots__ = ["prev", "next", "this"]
> > ...     def __init__(self, prev, next, this):
> > ...         self.prev = prev
> > ...         self.next = next
> > ...         self.this = this
>
> [...]
>
> > So the Node class actually takes less memory 38 Mbytes vs 53 Mbytes
> > for the list.
>
> Well OK, but if we're going to start optimizing, it seems we don't need
> to store the key itself in the Node or the list. Here's a version of my
> script that stores only prev and next. Another twist is that it is now
> possible to arbitrarily set the starting point for the iteration. (Just
> in case someone needed a cue for further ranting)
>
> P.
>
> import collections
>
> class OrderedSet(collections.MutableSet):
>     'Set that remembers the order elements were added'
>     # Big-O running times for all methods are the same as for regular
> sets.
>     def __init__(self, iterable=None):
>         self.links = {}                     # key --> [prev,next]
>         if iterable is not None:
>             self |= iterable
>
>     def __len__(self):
>         return len(self.links)
>
>     def __contains__(self, key):
>         return key in self.links
>
>     def add(self, key):
>         if not self:
>             self.start = key
>             self.links = {key: [key,key]}
>         elif key not in self.links:
>             links = self.links
>             start = self.start
>             prev, next  = links[start]
>             links[prev][1] = key
>             links[start][0] = key
>             links[key] = [prev,start]
>
>     def discard(self, key):
>         links = self.links
>         if key in links:        
>             prev,next = links.pop(key)
>             if self.links:
>                 links[prev][1] = next
>                 links[next][0] = prev
>                 if key  == self.start:
>                     self.start = next
>             else:
>                 del self.start
>
>     def __iter__(self):
>         links = self.links
>         start = self.start
>         if links:
>             yield start
>             prev,next = links[start]
>             while next != start:
>                 yield next
>                 prev,next = links[next]
>
>     def __reversed__(self):
>         links = self.links
>         start = self.start
>         if links:
>             prev,next = links[start]
>             while prev != start:
>                 yield prev
>                 prev,next = self.links[prev]
>             yield start
>
>     def pop(self, last=True):
>         if not self:
>             raise KeyError('set is empty')
>         key = next(reversed(self)) if last else next(iter(self))
>         self.discard(key)
>         return key
>
>     def __repr__(self):
>         if not self:
>             return '%s()' % (self.__class__.__name__,)
>         return '%s(%r)' % (self.__class__.__name__, list(self))
>
>     def __eq__(self, other):
>         if isinstance(other, OrderedSet):
>             return len(self) == len(other) and list(self) == list(other)
>         return not self.isdisjoint(other)
>
> def test():
>     D = OrderedSet('abcd')
>     R = OrderedSet()
>     for x in list(D):
>         print(D,R)
>         R.add(D.pop(last = False))
>     print(D,R)
>     print()
>     R.start = 'c'
>     print(list(reversed(R)))
>
> if __name__ == '__main__':
>     test()

I really like the Ordered Set class(I've been thinking about one ever
since ordered dict hit the std lib), is there any argument against
adding one to the collections module?  I'd be willing to write a PEP
up for it.

Alex



More information about the Python-list mailing list