[Python-checkins] python/nondist/sandbox/sets set.py,1.3,1.4
gvanrossum@users.sourceforge.net
gvanrossum@users.sourceforge.net
Thu, 15 Aug 2002 13:36:53 -0700
Update of /cvsroot/python/python/nondist/sandbox/sets
In directory usw-pr-cvs1:/tmp/cvs-serv32323
Modified Files:
set.py
Log Message:
New version. I started with Alex Martelli's version from SF patch
580995, and then did a major rewrite (e.g. getting rid of list
comprehensions). I'll work on the test suite tomorrow.
Index: set.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/sets/set.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** set.py 28 Aug 2001 16:19:35 -0000 1.3
--- set.py 15 Aug 2002 20:36:51 -0000 1.4
***************
*** 1,171 ****
! """A class to represent sets in Python.
! This class implements sets as dictionaries whose values are ignored.
! The usual operations (union, intersection, deletion, etc.) are
! provided as both methods and operators. The only unusual feature of
! this class is that once a set's hash code has been calculated (for
! example, once it has been used as a dictionary key, or as an element
! in another set), that set 'freezes', and becomes immutable. See
! PEP-0218 for a full discussion.
"""
! __version__ = "$Revision$"
! __author__ = "$Author$"
! __date__ = "$Date$"
- from copy import deepcopy
! class Set(dictionary):
- # Displayed when operation forbidden because set has been frozen
- _Frozen_Msg = "Set is frozen: %s not permitted"
! #----------------------------------------
! def __init__(self, seq=None, sort_repr=0):
! """Construct a set, optionally initializing it with elements
! drawn from a sequence. If 'sort_repr' is true, the set's
! elements are displayed in sorted order. This slows down
! conversion, but simplifies comparison during testing. The
! 'hashcode' element is given a non-None value the first time
! the set's hashcode is calculated; the set is frozen
! thereafter."""
! self.sort_repr = sort_repr
if seq is not None:
! for x in seq:
! self[x] = None
! self.hashcode = None
! #----------------------------------------
! def __str__(self):
! """Convert set to string."""
! content = self.keys()
! if self.sort_repr:
! content.sort()
! return 'Set(' + `content` + ')'
! #----------------------------------------
! # '__repr__' returns the same thing as '__str__'
! __repr__ = __str__
- #----------------------------------------
def __iter__(self):
! """Return iterator for enumerating set elements. This is a
! keys iterator for the underlying dictionary."""
! return self.iterkeys()
! #----------------------------------------
! # Comparisons
def __lt__(self, other):
! return self._generic_cmp(other, dictionary.__lt__)
def __le__(self, other):
! return self._generic_cmp(other, dictionary.__le__)
def __eq__(self, other):
! return self._generic_cmp(other, dictionary.__eq__)
def __ne__(self, other):
! return self._generic_cmp(other, dictionary.__ne__)
def __gt__(self, other):
! return self._generic_cmp(other, dictionary.__gt__)
def __ge__(self, other):
! return self._generic_cmp(other, dictionary.__ge__)
!
! #----------------------------------------
! def __hash__(self):
!
! """Calculate hash code for set by xor'ing hash codes of set
! elements. This algorithm ensures that the hash code does not
! depend on the order in which elements are added to the
! code."""
!
! # If set already has hashcode, the set has been frozen, so
! # code is still valid.
! if self.hashcode is not None:
! return self.hashcode
!
! # Combine hash codes of set elements to produce set's hash code.
! self.hashcode = 0
! for elt in self:
! self.hashcode ^= hash(elt)
! return self.hashcode
!
! #----------------------------------------
! def is_frozen(self):
!
! """Report whether set is frozen or not. A frozen set is one
! whose hash code has already been calculated. Frozen sets may
! not be mutated, but unfrozen sets can be."""
! return self.hashcode is not None
! #----------------------------------------
! def __copy__(self):
! """Return a shallow copy of the set."""
! result = Set()
! for elt in self:
! result[elt] = None
! return result
! #----------------------------------------
! # Define 'copy' method as readable alias for '__copy__'.
! copy = __copy__
- #----------------------------------------
def __deepcopy__(self, memo):
! result = Set()
! memo[id(self)] = result
for elt in self:
! result[deepcopy(elt, memo)] = None
return result
! #----------------------------------------
! def clear(self):
! """Remove all elements of unfrozen set."""
! if self.hashcode is not None:
! raise ValueError, Set._Frozen_Msg % "clearing"
! dictionary.clear(self)
!
! #----------------------------------------
! def union_update(self, other):
! """Update set with union of its own elements and the elements
! in another set."""
!
! self._binary_sanity_check(other, "updating union")
! self.update(other)
! return self
- #----------------------------------------
def union(self, other):
! """Create new set whose elements are the union of this set's
! and another's."""
self._binary_sanity_check(other)
! result = self.__copy__()
! result.union_update(other)
return result
! #----------------------------------------
! def intersect_update(self, other):
! """Update set with intersection of its own elements and the
! elements in another set."""
!
! self._binary_sanity_check(other, "updating intersection")
! old_elements = self.keys()
! self.clear()
! for elt in old_elements:
! if elt in other:
! self[elt] = None
! return self
! #----------------------------------------
! def intersect(self, other):
! """Create new set whose elements are the intersection of this
! set's and another's."""
self._binary_sanity_check(other)
if len(self) <= len(other):
--- 1,172 ----
! """Classes to represent arbitrary sets (including sets of sets).
!
! This module implements sets using dictionaries whose values are
! ignored. The usual operations (union, intersection, deletion, etc.)
! are provided as both methods and operators.
!
! The following classes are provided:
!
! BaseSet -- All the operations common to both mutable and immutable
! sets. This is an abstract class, not meant to be directly
! instantiated.
!
! Set -- Mutable sets, subclass of BaseSet; not hashable.
!
! ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
! An iterable argument is mandatory to create an ImmutableSet.
!
! _TemporarilyImmutableSet -- Not a subclass of BaseSet: just a wrapper
! around a Set, hashable, giving the same hash value as the
! immutable set equivalent would have. Do not use this class
! directly.
!
! Only hashable objects can be added to a Set. In particular, you cannot
! really add a Set as an element to another Set; if you try, what is
! actuallly added is an ImmutableSet built from it (it compares equal to
! the one you tried adding).
!
! When you ask if `x in y' where x is a Set and y is a Set or
! ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
! what's tested is actually `z in y'.
!
"""
! # Code history:
! #
! # - Greg V. Wilson wrote the first version, using a different approach
! # to the mutable/immutable problem, and inheriting from dict.
! #
! # - Alex Martelli modified Greg's version to implement the current
! # Set/ImmutableSet approach, and make the data an attribute.
! #
! # - Guido van Rossum rewrote much of the code, made some API changes,
! # and cleaned up the docstrings.
! __all__ = ['BaseSet', 'Set', 'ImmutableSet']
! class BaseSet(object):
! """Common base class for mutable and immutable sets."""
! # Constructor
! def __init__(self, seq=None, sort_repr=False):
! """Construct a set, optionally initializing it from a sequence.
!
! If the optional keyword argument sort_repr is True, the set's
! elements are displayed in sorted order. This slows down
! conversion to string, but simplifies comparison during testing.
! """
! self._sort_repr = sort_repr
! self._data = {}
if seq is not None:
! # I don't know a faster way to do this in pure Python.
! # Custom code written in C only did it 65% faster,
! # preallocating the dict to len(seq); without
! # preallocation it was only 25% faster. So the speed of
! # this Python code is respectable. Just copying True into
! # a local variable is responsible for a 7-8% speedup.
! data = self._data
! value = True
! for key in seq:
! data[key] = value
! # Standard protocols: __len__, __repr__, __str__, __iter__
! def __len__(self):
! """Return the number of elements of a set."""
! return len(self._data)
!
! def __repr__(self):
! """Return string representation of a set.
!
! This looks like 'Set([<list of elements>])'.
! """
! elements = self._data.keys()
! if self._sort_repr:
! elements.sort()
! return '%s(%r)' % (self.__class__.__name__, elements)
!
! # __str__ is the same as __repr__
! __str__ = __repr__
def __iter__(self):
! """Return an iterator over the elements or a set.
! This is the keys iterator for the underlying dict.
! """
! return self._data.iterkeys()
!
! # Comparisons. Ordering is determined by the ordering of the
! # underlying dicts (which is consistent though unpredictable).
def __lt__(self, other):
! self._binary_sanity_check(other)
! return self._data < other._data
def __le__(self, other):
! self._binary_sanity_check(other)
! return self._data <= other._data
def __eq__(self, other):
! self._binary_sanity_check(other)
! return self._data == other._data
def __ne__(self, other):
! self._binary_sanity_check(other)
! return self._data != other._data
def __gt__(self, other):
! self._binary_sanity_check(other)
! return self._data > other._data
def __ge__(self, other):
! self._binary_sanity_check(other)
! return self._data >= other._data
! # Copying operations
! def copy(self):
! """Return a shallow copy of a set."""
! return self.__class__(self, sort_repr=self._sort_repr)
! __copy__ = copy # For the copy module
def __deepcopy__(self, memo):
! """Return a deep copy of a set; used by copy module."""
! # This pre-creates the result and inserts it in the memo
! # early, in case the deep copy recurses into another reference
! # to this same set. A set can't be an element of itself, but
! # it can certainly contain an object that has a reference to
! # itself.
! from copy import deepcopy
! result = self.__class__([], self._sort_repr)
! memo[id(self)] = result
! data = result._data
! value = True
for elt in self:
! data[deepcopy(elt, memo)] = value
return result
! # Standard set operations: union, intersection, both differences
def union(self, other):
! """Return the union of two sets as a new set.
+ (I.e. all elements that are in either set.)
+ """
self._binary_sanity_check(other)
! result = self.__class__(self._data, self._sort_repr)
! result._data.update(other._data)
return result
! __or__ = union
! def intersection(self, other):
! """Return the intersection of two sets as a new set.
+ (I.e. all elements that are in both sets.)
+ """
self._binary_sanity_check(other)
if len(self) <= len(other):
***************
*** 173,340 ****
else:
little, big = other, self
! result = Set()
for elt in little:
if elt in big:
! result[elt] = None
return result
! #----------------------------------------
! def sym_difference_update(self, other):
! """Update set with symmetric difference of its own elements
! and the elements in another set. A value 'x' is in the result
! if it was originally present in one or the other set, but not
! in both."""
!
! self._binary_sanity_check(other, "updating symmetric difference")
! self = self._raw_sym_difference(self, other)
! return self
! #----------------------------------------
! def sym_difference(self, other):
! """Create new set with symmetric difference of this set's own
! elements and the elements in another set. A value 'x' is in
! the result if it was originally present in one or the other
! set, but not in both."""
self._binary_sanity_check(other)
! result = Set()
! result = self._raw_sym_difference(self, other)
return result
! #----------------------------------------
! def difference_update(self, other):
! """Remove all elements of another set from this set."""
! self._binary_sanity_check(other, "updating difference")
! old_elements = self.keys()
! self.clear()
! for elt in old_elements:
if elt not in other:
! self[elt] = None
! return self
! #----------------------------------------
! def difference(self, other):
! """Create new set containing elements of this set that are not
! present in another set."""
self._binary_sanity_check(other)
- result = Set()
for elt in self:
if elt not in other:
! result[elt] = None
return result
- #----------------------------------------
- # Arithmetic forms of operations
- __or__ = union
- __ror__ = union
- __ior__ = union_update
- __and__ = intersect
- __rand__ = intersect
- __iand__ = intersect_update
- __xor__ = sym_difference
- __rxor__ = sym_difference
- __ixor__ = sym_difference_update
- __sub__ = difference
- __rsub__ = difference
- __isub__ = difference_update
! #----------------------------------------
! def add(self, item):
! """Add an item to a set. This has no effect if the item is
! already present."""
! if self.hashcode is not None:
! raise ValueError, Set._Frozen_Msg % "adding an element"
! self[item] = None
- #----------------------------------------
def update(self, iterable):
! """Add all values from an iteratable (such as a tuple, list,
! or file) to this set."""
! if self.hashcode is not None:
! raise ValueError, Set._Frozen_Msg % "adding an element"
! for item in iterable:
! self[item] = None
! #----------------------------------------
! def remove(self, item):
! """Remove an element from a set if it is present, or raise a
! LookupError if it is not."""
! if self.hashcode is not None:
! raise ValueError, Set._Frozen_Msg % "removing an element"
! del self[item]
! #----------------------------------------
! def discard(self, item):
! """Remove an element from a set if it is present, or do
! nothing if it is not."""
! if self.hashcode is not None:
! raise ValueError, Set._Frozen_Msg % "removing an element"
try:
! del self[item]
except KeyError:
pass
- #----------------------------------------
def popitem(self):
"""Remove and return a randomly-chosen set element."""
! if self.hashcode is not None:
! raise ValueError, Set._Frozen_Msg % "removing an element"
! return dictionary.popitem(self)[0]
! #----------------------------------------
! def is_subset_of(self, other):
! """Reports whether other set contains this set."""
! if not isinstance(other, Set):
! raise ValueError, "Subset tests only permitted between sets"
! for element in self:
! if element not in other:
! return 0
! return 1
- #----------------------------------------
- # Generic comparison
- def _generic_cmp(self, other, method):
- """Compare one set with another using a dictionary method.
- Sets may only be compared with sets; ordering is determined
- by the keys in the underlying dictionary."""
- if not isinstance(other, Set):
- raise ValueError, "Sets can only be compared to sets"
- return method(self, other)
! #----------------------------------------
! # Check that the other argument to a binary operation is also a
! # set, and that this set is still mutable (if appropriate),
! # raising a ValueError if either condition is not met.
! def _binary_sanity_check(self, other, updating_op=''):
! if updating_op and (self.hashcode is not None):
! raise ValueError, Set._Frozen_Msg % updating_op
! if not isinstance(other, Set):
! raise ValueError, "Binary operation only permitted between sets"
- #----------------------------------------
- # Calculate the symmetric difference between the keys in two
- # dictionaries with don't-care values.
- def _raw_sym_difference(self, left, right):
- result = {}
- for elt in left:
- if elt not in right:
- result[elt] = None
- for elt in right:
- if elt not in left:
- result[elt] = None
- return result
- #----------------------------------------------------------------------
# Rudimentary self-tests
- #----------------------------------------------------------------------
! if __name__ == "__main__":
# Empty set
--- 174,442 ----
else:
little, big = other, self
! result = self.__class__([], self._sort_repr)
! data = result._data
! value = True
for elt in little:
if elt in big:
! data[elt] = value
return result
! __and__ = intersection
! def symmetric_difference(self, other):
! """Return the symmetric difference of two sets as a new set.
+ (I.e. all elements that are in exactly one of the sets.)
+ """
self._binary_sanity_check(other)
! result = self.__class__([], self._sort_repr)
! data = result._data
! value = True
! for elt in self:
! if elt not in other:
! data[elt] = value
! for elt in other:
! if elt not in self:
! data[elt] = value
return result
! __xor__ = symmetric_difference
! def difference(self, other):
! """Return the difference of two sets as a new Set.
!
! (I.e. all elements that are in this set and not in the other.)
! """
! self._binary_sanity_check(other)
! result = self.__class__([], self._sort_repr)
! data = result._data
! value = True
! for elt in self:
if elt not in other:
! data[elt] = value
! return result
! __sub__ = difference
!
! # Membership test
+ def __contains__(self, element):
+ """Report whether an element is a member of a set.
+
+ (Called in response to the expression `element in self'.)
+ """
+ try:
+ transform = element._as_temporarily_immutable
+ except AttributeError:
+ pass
+ else:
+ element = transform()
+ return element in self._data
+
+ # Subset and superset test
+
+ def issubset(self, other):
+ """Report whether another set contains this set."""
self._binary_sanity_check(other)
for elt in self:
if elt not in other:
! return False
! return True
!
! def issuperset(self, other):
! """Report whether this set contains another set."""
! self._binary_sanity_check(other)
! for elt in other:
! if elt not in self:
! return False
! return True
!
! # Assorted helpers
!
! def _binary_sanity_check(self, other):
! # Check that the other argument to a binary operation is also
! # a set, raising a TypeError otherwise.
! if not isinstance(other, BaseSet):
! raise TypeError, "Binary operation only permitted between sets"
!
! def _compute_hash(self):
! # Calculate hash code for a set by xor'ing the hash codes of
! # the elements. This algorithm ensures that the hash code
! # does not depend on the order in which elements are added to
! # the code. This is not called __hash__ because a BaseSet
! # should not be hashable; only an ImmutableSet is hashable.
! result = 0
! for elt in self:
! result ^= hash(elt)
return result
! class ImmutableSet(BaseSet):
! """Immutable set class."""
! # BaseSet + hashing
!
! _hashcode = None
!
! def __init__(self, seq, sort_repr=0):
! """Construct an immutable set from a sequence.
!
! If the optional keyword argument sort_repr is True, the set's
! elements are displayed in sorted order. This slows down
! conversion to string, but simplifies comparison during testing.
! """
! # Override the constructor to make 'seq' a required argument
! BaseSet.__init__(self, seq, sort_repr)
!
! def __hash__(self):
! if self._hashcode is None:
! self._hashcode = self._compute_hash()
! return self._hashcode
!
!
! class Set(BaseSet):
! """ Mutable set class."""
!
! # BaseSet + operations requiring mutability; no hashing
!
! # In-place union, intersection, differences
!
! def union_update(self, other):
! """Update a set with the union of itself and another."""
! self._binary_sanity_check(other)
! self._data.update(other._data)
! return self
!
! __ior__ = union_update
!
! def intersection_update(self, other):
! """Update a set with the intersection of itself and another."""
! self._binary_sanity_check(other)
! for elt in self._data.keys():
! if elt not in other:
! del self._data[elt]
! return self
!
! __iand__ = intersection_update
!
! def symmetric_difference_update(self, other):
! """Update a set with the symmetric difference of itself and another."""
! self._binary_sanity_check(other)
! data = self._data
! value = True
! for elt in other:
! if elt in data:
! del data[elt]
! else:
! data[elt] = value
! return self
!
! __ixor__ = symmetric_difference_update
!
! def difference_update(self, other):
! """Remove all elements of another set from this set."""
! self._binary_sanity_check(other)
! data = self._data
! value = True
! for elt in other:
! if elt in data:
! del data[elt]
! return self
!
! __isub__ = difference_update
!
! # Python dict-like mass mutations: update, clear
def update(self, iterable):
! """Add all values from an iterable (such as a list or file)."""
! value = True
! for elt in iterable:
! try:
! transform = elt._as_immutable
! except AttributeError:
! pass
! else:
! elt = transform()
! self._data[elt] = elt
! def clear(self):
! """Remove all elements from this set."""
! self._data.clear()
! # Single-element mutations: add, remove, discard
! def add(self, element):
! """Add an element to a set.
! This has no effect if the element is already present.
! """
! try:
! transform = element._as_immutable
! except AttributeError:
! pass
! else:
! element = transform()
! self._data[element] = True
! def remove(self, element):
! """Remove an element from a set; it must be a member.
!
! If the element is not a member, raise a KeyError.
! """
try:
! transform = element._as_temporarily_immutable
! except AttributeError:
! pass
! else:
! element = transform()
! del self._data[element]
!
! def discard(self, element):
! """Remove an element from a set if it is a member.
!
! If the element is not a member, do nothing.
! """
! try:
! del self._data[element]
except KeyError:
pass
def popitem(self):
"""Remove and return a randomly-chosen set element."""
+ return self._data.popitem()[0]
! def _as_immutable(self):
! # Return a copy of self as an immutable set
! return ImmutableSet(self)
! def _as_temporarily_immutable(self):
! # Return self wrapped in a temporarily immutable set
! return _TemporarilyImmutableSet(self)
! class _TemporarilyImmutableSet(object):
! # Wrap a mutable set as if it was temporarily immutable.
! # This only supplies hashing and equality comparisons.
!
! _hashcode = None
!
! def __init__(self, set):
! self._set = _set
!
! def __hash__(self):
! if self._hashcode is None:
! self._hashcode = self._set._compute_hash()
! return self._hashcode
!
! def __eq__(self, other):
! return self._set == other
!
! def __ne__(self, other):
! return self._set != other
# Rudimentary self-tests
! def _test():
# Empty set
***************
*** 425,426 ****
--- 527,532 ----
print "All tests passed"
+
+
+ if __name__ == "__main__":
+ _test()