Best way to make a list unique?
Michael Spencer
mahs at telcopartners.com
Wed Mar 9 01:55:04 EST 2005
Delaney, Timothy C (Timothy) wrote:
> Michael Hoffman wrote:
>
>
>>>>For those who don't know, these implement a hash set/map which
>>>>iterates in the order that the keys were first added to the set/map.
>>
>>I would love to see such a thing.
>
>
> I've proposed this on python-dev, but the general feeling so far is
> against it. So far the only use case is to remove duplicates without
> changing order, and there are iterator-based solutions which would
> normally be preferable.
>
> It's pretty simple to roll your own, and I'll probably put together a
> Cookbook recipe for it.
>
> Tim Delaney
Here's something to work with:
class OrdSet(object):
def __init__(self, iterable):
"""Build an ordered, unique collection of hashable items"""
self._data = {None:[None, None]} # None is the pointer to the first
# element. This is unsatisfactory
# because it cannot then be a member
# of the collection
self._last = None
self.update(iterable)
def add(self, obj):
"""Add an element to the collection"""
data = self._data
if not obj in data:
last = self._last
data[last][1] = obj
data[obj] = [last, None]
self._last = obj
def update(self, iterable):
"""Update the collection with the union of itself and another"""
obj = self._last
data = self._data
last = data[obj][0]
for item in iterable:
if item not in data:
data[obj] = [last, item]
last, obj = obj, item
data[obj] = [last, None]
self._last = obj
def remove(self, item):
"""Remove an element from a set; it must be a member.
If the element is not a member, raise a KeyError."""
data = self._data
prev, next = data[item]
data[prev][1] = next
data[next][0] = prev
def discard(self, item):
"""Remove an element from a set if it is a member.
If the element is not a member, do nothing."""
try:
self.remove(item)
except KeyError:
pass
def __contains__(self, item):
return item in self._data
def pop(self):
"""Remove and the return the oldest element"""
data = self._data
prev, first = data[None]
data[None] = [None,data[first][1]]
return first
def clear(self):
self.__init__([])
def __iter__(self):
"""Iterate over the collection in order"""
data = self._data
prev, next = data[None]
while next is not None:
yield next
prev, next = data[next]
def __len__(self):
return len(self._data)-1
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__,list(self))
>>> a= OrdSet(range(10))
>>> a
OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> a.update(range(5,15))
>>> a
OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
>>> a.discard(8)
>>> a
OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14])
>>>
Michael
More information about the Python-list
mailing list