Best way to make a list unique?

Michael Spencer mahs at telcopartners.com
Wed Mar 9 01:55:04 EST 2005


Delaney, Timothy C (Timothy) wrote:
> Michael Hoffman wrote:
> 
> 
>>>>For those who don't know, these implement a hash set/map which
>>>>iterates in the order that the keys were first added to the set/map.
>>
>>I would love to see such a thing.
> 
> 
> I've proposed this on python-dev, but the general feeling so far is
> against it. So far the only use case is to remove duplicates without
> changing order, and there are iterator-based solutions which would
> normally be preferable.
> 
> It's pretty simple to roll your own, and I'll probably put together a
> Cookbook recipe for it.
> 
> Tim Delaney

Here's something to work with:

class OrdSet(object):
     def __init__(self, iterable):
         """Build an ordered, unique collection of hashable items"""
         self._data = {None:[None, None]} # None is the pointer to the first 		 
                                         # element.  This is unsatisfactory
                                          # because it cannot then be a member
                                          # of the collection
         self._last = None
         self.update(iterable)

     def add(self, obj):
         """Add an element to the collection"""
         data = self._data
         if not obj in data:
             last = self._last
             data[last][1] = obj
             data[obj] = [last, None]
             self._last = obj

     def update(self, iterable):
         """Update the collection with the union of itself and another"""
         obj = self._last
         data = self._data
         last = data[obj][0]
         for item in iterable:
             if item not in data:
                 data[obj] = [last, item]
                 last, obj = obj, item
         data[obj] = [last, None]
         self._last = obj

     def remove(self, item):
         """Remove an element from a set; it must be a member.

             If the element is not a member, raise a KeyError."""
         data = self._data
         prev, next = data[item]
         data[prev][1] = next
         data[next][0] = prev

     def discard(self, item):
         """Remove an element from a set if it is a member.
             If the element is not a member, do nothing."""
         try:
             self.remove(item)
         except KeyError:
             pass

     def __contains__(self, item):
         return item in self._data

     def pop(self):
         """Remove and the return the oldest element"""
         data = self._data
         prev, first =  data[None]
         data[None] = [None,data[first][1]]
         return first

     def clear(self):
         self.__init__([])

     def __iter__(self):
         """Iterate over the collection in order"""
         data = self._data
         prev, next = data[None]
         while next is not None:
             yield next
             prev, next = data[next]

     def __len__(self):
         return len(self._data)-1

     def __repr__(self):
         return "%s(%s)" % (self.__class__.__name__,list(self))

  >>> a= OrdSet(range(10))
  >>> a
  OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
  >>> a.update(range(5,15))
  >>> a
  OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
  >>> a.discard(8)
  >>> a
  OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14])
  >>>


Michael




More information about the Python-list mailing list