[Python-checkins] python/nondist/sandbox/sets set.py,1.3,1.4

gvanrossum@users.sourceforge.net gvanrossum@users.sourceforge.net
Thu, 15 Aug 2002 13:36:53 -0700


Update of /cvsroot/python/python/nondist/sandbox/sets
In directory usw-pr-cvs1:/tmp/cvs-serv32323

Modified Files:
	set.py 
Log Message:
New version.  I started with Alex Martelli's version from SF patch
580995, and then did a major rewrite (e.g. getting rid of list
comprehensions).  I'll work on the test suite tomorrow.


Index: set.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/sets/set.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** set.py	28 Aug 2001 16:19:35 -0000	1.3
--- set.py	15 Aug 2002 20:36:51 -0000	1.4
***************
*** 1,171 ****
! """A class to represent sets in Python.
! This class implements sets as dictionaries whose values are ignored.
! The usual operations (union, intersection, deletion, etc.) are
! provided as both methods and operators.  The only unusual feature of
! this class is that once a set's hash code has been calculated (for
! example, once it has been used as a dictionary key, or as an element
! in another set), that set 'freezes', and becomes immutable.  See
! PEP-0218 for a full discussion.
  """
  
! __version__ = "$Revision$"
! __author__  = "$Author$"
! __date__    = "$Date$"
  
- from copy import deepcopy
  
! class Set(dictionary):
  
-     # Displayed when operation forbidden because set has been frozen
-     _Frozen_Msg = "Set is frozen: %s not permitted"
  
!     #----------------------------------------
!     def __init__(self, seq=None, sort_repr=0):
  
!         """Construct a set, optionally initializing it with elements
!         drawn from a sequence.  If 'sort_repr' is true, the set's
!         elements are displayed in sorted order.  This slows down
!         conversion, but simplifies comparison during testing.  The
!         'hashcode' element is given a non-None value the first time
!         the set's hashcode is calculated; the set is frozen
!         thereafter."""
  
!         self.sort_repr = sort_repr
          if seq is not None:
!             for x in seq:
!                 self[x] = None
!         self.hashcode = None
  
!     #----------------------------------------
!     def __str__(self):
!         """Convert set to string."""
!         content = self.keys()
!         if self.sort_repr:
!             content.sort()
!         return 'Set(' + `content` + ')'
  
!     #----------------------------------------
!     # '__repr__' returns the same thing as '__str__'
!     __repr__ = __str__
  
-     #----------------------------------------
      def __iter__(self):
!         """Return iterator for enumerating set elements.  This is a
!         keys iterator for the underlying dictionary."""
!         return self.iterkeys()
  
!     #----------------------------------------
!     # Comparisons
  
      def __lt__(self, other):
!         return self._generic_cmp(other, dictionary.__lt__)
  
      def __le__(self, other):
!         return self._generic_cmp(other, dictionary.__le__)
  
      def __eq__(self, other):
!         return self._generic_cmp(other, dictionary.__eq__)
  
      def __ne__(self, other):
!         return self._generic_cmp(other, dictionary.__ne__)
  
      def __gt__(self, other):
!         return self._generic_cmp(other, dictionary.__gt__)
  
      def __ge__(self, other):
!         return self._generic_cmp(other, dictionary.__ge__)
! 
!     #----------------------------------------
!     def __hash__(self):
! 
!         """Calculate hash code for set by xor'ing hash codes of set
!         elements.  This algorithm ensures that the hash code does not
!         depend on the order in which elements are added to the
!         code."""
! 
!         # If set already has hashcode, the set has been frozen, so
!         # code is still valid.
!         if self.hashcode is not None:
!             return self.hashcode
! 
!         # Combine hash codes of set elements to produce set's hash code.
!         self.hashcode = 0
!         for elt in self:
!             self.hashcode ^= hash(elt)
!         return self.hashcode
! 
!     #----------------------------------------
!     def is_frozen(self):
! 
!         """Report whether set is frozen or not.  A frozen set is one
!         whose hash code has already been calculated.  Frozen sets may
!         not be mutated, but unfrozen sets can be."""
  
!         return self.hashcode is not None
  
!     #----------------------------------------
!     def __copy__(self):
!         """Return a shallow copy of the set."""
!         result = Set()
!         for elt in self:
!             result[elt] = None
!         return result
  
!     #----------------------------------------
!     # Define 'copy' method as readable alias for '__copy__'.
!     copy = __copy__
  
-     #----------------------------------------
      def __deepcopy__(self, memo):
!         result          = Set()
!         memo[id(self)]  = result
          for elt in self:
!             result[deepcopy(elt, memo)] = None
          return result
  
!     #----------------------------------------
!     def clear(self):
!         """Remove all elements of unfrozen set."""
!         if self.hashcode is not None:
!             raise ValueError, Set._Frozen_Msg % "clearing"
!         dictionary.clear(self)
! 
!     #----------------------------------------
!     def union_update(self, other):
!         """Update set with union of its own elements and the elements
!         in another set."""
! 
!         self._binary_sanity_check(other, "updating union")
!         self.update(other)
!         return self
  
-     #----------------------------------------
      def union(self, other):
!         """Create new set whose elements are the union of this set's
!         and another's."""
  
          self._binary_sanity_check(other)
!         result = self.__copy__()
!         result.union_update(other)
          return result
  
!     #----------------------------------------
!     def intersect_update(self, other):
!         """Update set with intersection of its own elements and the
!         elements in another set."""
! 
!         self._binary_sanity_check(other, "updating intersection")
!         old_elements = self.keys()
!         self.clear()
!         for elt in old_elements:
!             if elt in other:
!                 self[elt] = None
!         return self
  
!     #----------------------------------------
!     def intersect(self, other):
!         """Create new set whose elements are the intersection of this
!         set's and another's."""
  
          self._binary_sanity_check(other)
          if len(self) <= len(other):
--- 1,172 ----
! """Classes to represent arbitrary sets (including sets of sets).
! 
! This module implements sets using dictionaries whose values are
! ignored.  The usual operations (union, intersection, deletion, etc.)
! are provided as both methods and operators.
! 
! The following classes are provided:
! 
! BaseSet -- All the operations common to both mutable and immutable
!     sets. This is an abstract class, not meant to be directly
!     instantiated.
! 
! Set -- Mutable sets, subclass of BaseSet; not hashable.
! 
! ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
!     An iterable argument is mandatory to create an ImmutableSet.
! 
! _TemporarilyImmutableSet -- Not a subclass of BaseSet: just a wrapper
!     around a Set, hashable, giving the same hash value as the
!     immutable set equivalent would have.  Do not use this class
!     directly.
! 
! Only hashable objects can be added to a Set. In particular, you cannot
! really add a Set as an element to another Set; if you try, what is
! actuallly added is an ImmutableSet built from it (it compares equal to
! the one you tried adding).
! 
! When you ask if `x in y' where x is a Set and y is a Set or
! ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
! what's tested is actually `z in y'.
! 
  """
  
! # Code history:
! #
! # - Greg V. Wilson wrote the first version, using a different approach
! #   to the mutable/immutable problem, and inheriting from dict.
! #
! # - Alex Martelli modified Greg's version to implement the current
! #   Set/ImmutableSet approach, and make the data an attribute.
! #
! # - Guido van Rossum rewrote much of the code, made some API changes,
! #   and cleaned up the docstrings.
  
  
! __all__ = ['BaseSet', 'Set', 'ImmutableSet']
  
  
! class BaseSet(object):
!     """Common base class for mutable and immutable sets."""
  
!     # Constructor
  
!     def __init__(self, seq=None, sort_repr=False):
!         """Construct a set, optionally initializing it from a sequence.
! 
!         If the optional keyword argument sort_repr is True, the set's
!         elements are displayed in sorted order.  This slows down
!         conversion to string, but simplifies comparison during testing.
!         """
!         self._sort_repr = sort_repr
!         self._data = {}
          if seq is not None:
!             # I don't know a faster way to do this in pure Python.
!             # Custom code written in C only did it 65% faster,
!             # preallocating the dict to len(seq); without
!             # preallocation it was only 25% faster.  So the speed of
!             # this Python code is respectable.  Just copying True into
!             # a local variable is responsible for a 7-8% speedup.
!             data = self._data
!             value = True
!             for key in seq:
!                 data[key] = value
  
!     # Standard protocols: __len__, __repr__, __str__, __iter__
  
!     def __len__(self):
!         """Return the number of elements of a set."""
!         return len(self._data)
! 
!     def __repr__(self):
!         """Return string representation of a set.
! 
!         This looks like 'Set([<list of elements>])'.
!         """
!         elements = self._data.keys()
!         if self._sort_repr:
!             elements.sort()
!         return '%s(%r)' % (self.__class__.__name__, elements)
! 
!     # __str__ is the same as __repr__
!     __str__ = __repr__
  
      def __iter__(self):
!         """Return an iterator over the elements or a set.
  
!         This is the keys iterator for the underlying dict.
!         """
!         return self._data.iterkeys()
! 
!     # Comparisons.  Ordering is determined by the ordering of the
!     # underlying dicts (which is consistent though unpredictable).
  
      def __lt__(self, other):
!         self._binary_sanity_check(other)
!         return self._data < other._data
  
      def __le__(self, other):
!         self._binary_sanity_check(other)
!         return self._data <= other._data
  
      def __eq__(self, other):
!         self._binary_sanity_check(other)
!         return self._data == other._data
  
      def __ne__(self, other):
!         self._binary_sanity_check(other)
!         return self._data != other._data
  
      def __gt__(self, other):
!         self._binary_sanity_check(other)
!         return self._data > other._data
  
      def __ge__(self, other):
!         self._binary_sanity_check(other)
!         return self._data >= other._data
  
!     # Copying operations
  
!     def copy(self):
!         """Return a shallow copy of a set."""
!         return self.__class__(self, sort_repr=self._sort_repr)
  
!     __copy__ = copy # For the copy module
  
      def __deepcopy__(self, memo):
!         """Return a deep copy of a set; used by copy module."""
!         # This pre-creates the result and inserts it in the memo
!         # early, in case the deep copy recurses into another reference
!         # to this same set.  A set can't be an element of itself, but
!         # it can certainly contain an object that has a reference to
!         # itself.
!         from copy import deepcopy
!         result = self.__class__([], self._sort_repr)
!         memo[id(self)] = result
!         data = result._data
!         value = True
          for elt in self:
!             data[deepcopy(elt, memo)] = value
          return result
  
!     # Standard set operations: union, intersection, both differences
  
      def union(self, other):
!         """Return the union of two sets as a new set.
  
+         (I.e. all elements that are in either set.)
+         """
          self._binary_sanity_check(other)
!         result = self.__class__(self._data, self._sort_repr)
!         result._data.update(other._data)
          return result
  
!     __or__ = union
  
!     def intersection(self, other):
!         """Return the intersection of two sets as a new set.
  
+         (I.e. all elements that are in both sets.)
+         """
          self._binary_sanity_check(other)
          if len(self) <= len(other):
***************
*** 173,340 ****
          else:
              little, big = other, self
!         result = Set()
          for elt in little:
              if elt in big:
!                 result[elt] = None
          return result
  
!     #----------------------------------------
!     def sym_difference_update(self, other):
!         """Update set with symmetric difference of its own elements
!         and the elements in another set.  A value 'x' is in the result
!         if it was originally present in one or the other set, but not
!         in both."""
! 
!         self._binary_sanity_check(other, "updating symmetric difference")
!         self = self._raw_sym_difference(self, other)
!         return self
  
!     #----------------------------------------
!     def sym_difference(self, other):
!         """Create new set with symmetric difference of this set's own
!         elements and the elements in another set.  A value 'x' is in
!         the result if it was originally present in one or the other
!         set, but not in both."""
  
          self._binary_sanity_check(other)
!         result = Set()
!         result = self._raw_sym_difference(self, other)
          return result
  
!     #----------------------------------------
!     def difference_update(self, other):
!         """Remove all elements of another set from this set."""
  
!         self._binary_sanity_check(other, "updating difference")
!         old_elements = self.keys()
!         self.clear()
!         for elt in old_elements:
              if elt not in other:
!                 self[elt] = None
!         return self
  
!     #----------------------------------------
!     def difference(self, other):
!         """Create new set containing elements of this set that are not
!         present in another set."""
  
          self._binary_sanity_check(other)
-         result = Set()
          for elt in self:
              if elt not in other:
!                 result[elt] = None
          return result
  
-     #----------------------------------------
-     # Arithmetic forms of operations
-     __or__      = union
-     __ror__     = union
-     __ior__     = union_update
-     __and__     = intersect
-     __rand__    = intersect
-     __iand__    = intersect_update
-     __xor__     = sym_difference
-     __rxor__    = sym_difference
-     __ixor__    = sym_difference_update
-     __sub__     = difference
-     __rsub__    = difference
-     __isub__    = difference_update
  
!     #----------------------------------------
!     def add(self, item):
!         """Add an item to a set.  This has no effect if the item is
!         already present."""
  
!         if self.hashcode is not None:
!             raise ValueError, Set._Frozen_Msg % "adding an element"
!         self[item] = None
  
-     #----------------------------------------
      def update(self, iterable):
!         """Add all values from an iteratable (such as a tuple, list,
!         or file) to this set."""
  
!         if self.hashcode is not None:
!             raise ValueError, Set._Frozen_Msg % "adding an element"
!         for item in iterable:
!             self[item] = None
  
!     #----------------------------------------
!     def remove(self, item):
!         """Remove an element from a set if it is present, or raise a
!         LookupError if it is not."""
  
!         if self.hashcode is not None:
!             raise ValueError, Set._Frozen_Msg % "removing an element"
!         del self[item]
  
!     #----------------------------------------
!     def discard(self, item):
!         """Remove an element from a set if it is present, or do
!         nothing if it is not."""
  
!         if self.hashcode is not None:
!             raise ValueError, Set._Frozen_Msg % "removing an element"
          try:
!             del self[item]
          except KeyError:
              pass
  
-     #----------------------------------------
      def popitem(self):
          """Remove and return a randomly-chosen set element."""
  
!         if self.hashcode is not None:
!             raise ValueError, Set._Frozen_Msg % "removing an element"
!         return dictionary.popitem(self)[0]
  
!     #----------------------------------------
!     def is_subset_of(self, other):
!         """Reports whether other set contains this set."""
!         if not isinstance(other, Set):
!             raise ValueError, "Subset tests only permitted between sets"
!         for element in self:
!             if element not in other:
!                 return 0
!         return 1
  
-     #----------------------------------------
-     # Generic comparison
-     def _generic_cmp(self, other, method):
-         """Compare one set with another using a dictionary method.
-         Sets may only be compared with sets; ordering is determined
-         by the keys in the underlying dictionary."""
-         if not isinstance(other, Set):
-             raise ValueError, "Sets can only be compared to sets"
-         return method(self, other)
  
!     #----------------------------------------
!     # Check that the other argument to a binary operation is also a
!     # set, and that this set is still mutable (if appropriate),
!     # raising a ValueError if either condition is not met.
!     def _binary_sanity_check(self, other, updating_op=''):
!         if updating_op and (self.hashcode is not None):
!             raise ValueError, Set._Frozen_Msg % updating_op
!         if not isinstance(other, Set):
!             raise ValueError, "Binary operation only permitted between sets"
  
-     #----------------------------------------
-     # Calculate the symmetric difference between the keys in two
-     # dictionaries with don't-care values.
-     def _raw_sym_difference(self, left, right):
-         result = {}
-         for elt in left:
-             if elt not in right:
-                 result[elt] = None
-         for elt in right:
-             if elt not in left:
-                 result[elt] = None
-         return result
  
- #----------------------------------------------------------------------
  # Rudimentary self-tests
- #----------------------------------------------------------------------
  
! if __name__ == "__main__":
  
      # Empty set
--- 174,442 ----
          else:
              little, big = other, self
!         result = self.__class__([], self._sort_repr)
!         data = result._data
!         value = True
          for elt in little:
              if elt in big:
!                 data[elt] = value
          return result
  
!     __and__ = intersection
  
!     def symmetric_difference(self, other):
!         """Return the symmetric difference of two sets as a new set.
  
+         (I.e. all elements that are in exactly one of the sets.)
+         """
          self._binary_sanity_check(other)
!         result = self.__class__([], self._sort_repr)
!         data = result._data
!         value = True
!         for elt in self:
!             if elt not in other:
!                 data[elt] = value
!         for elt in other:
!             if elt not in self:
!                 data[elt] = value
          return result
  
!     __xor__ = symmetric_difference
  
!     def difference(self, other):
!         """Return the difference of two sets as a new Set.
! 
!         (I.e. all elements that are in this set and not in the other.)
!         """
!         self._binary_sanity_check(other)
!         result = self.__class__([], self._sort_repr)
!         data = result._data
!         value = True
!         for elt in self:
              if elt not in other:
!                 data[elt] = value
!         return result
  
!     __sub__ = difference
! 
!     # Membership test
  
+     def __contains__(self, element):
+         """Report whether an element is a member of a set.
+ 
+         (Called in response to the expression `element in self'.)
+         """
+         try:
+             transform = element._as_temporarily_immutable
+         except AttributeError:
+             pass
+         else:
+             element = transform()
+         return element in self._data
+ 
+     # Subset and superset test
+ 
+     def issubset(self, other):
+         """Report whether another set contains this set."""
          self._binary_sanity_check(other)
          for elt in self:
              if elt not in other:
!                 return False
!         return True
! 
!     def issuperset(self, other):
!         """Report whether this set contains another set."""
!         self._binary_sanity_check(other)
!         for elt in other:
!             if elt not in self:
!                 return False
!         return True
! 
!     # Assorted helpers
! 
!     def _binary_sanity_check(self, other):
!         # Check that the other argument to a binary operation is also
!         # a set, raising a TypeError otherwise.
!         if not isinstance(other, BaseSet):
!             raise TypeError, "Binary operation only permitted between sets"
! 
!     def _compute_hash(self):
!         # Calculate hash code for a set by xor'ing the hash codes of
!         # the elements.  This algorithm ensures that the hash code
!         # does not depend on the order in which elements are added to
!         # the code.  This is not called __hash__ because a BaseSet
!         # should not be hashable; only an ImmutableSet is hashable.
!         result = 0
!         for elt in self:
!             result ^= hash(elt)
          return result
  
  
! class ImmutableSet(BaseSet):
!     """Immutable set class."""
  
!     # BaseSet + hashing
! 
!     _hashcode = None
! 
!     def __init__(self, seq, sort_repr=0):
!         """Construct an immutable set from a sequence.
! 
!         If the optional keyword argument sort_repr is True, the set's
!         elements are displayed in sorted order.  This slows down
!         conversion to string, but simplifies comparison during testing.
!         """
!         # Override the constructor to make 'seq' a required argument
!         BaseSet.__init__(self, seq, sort_repr)
! 
!     def __hash__(self):
!         if self._hashcode is None:
!             self._hashcode = self._compute_hash()
!         return self._hashcode
! 
! 
! class Set(BaseSet):
!     """ Mutable set class."""
! 
!     # BaseSet + operations requiring mutability; no hashing
! 
!     # In-place union, intersection, differences
! 
!     def union_update(self, other):
!         """Update a set with the union of itself and another."""
!         self._binary_sanity_check(other)
!         self._data.update(other._data)
!         return self
! 
!     __ior__ = union_update
! 
!     def intersection_update(self, other):
!         """Update a set with the intersection of itself and another."""
!         self._binary_sanity_check(other)
!         for elt in self._data.keys():
!             if elt not in other:
!                 del self._data[elt]
!         return self
! 
!     __iand__ = intersection_update
! 
!     def symmetric_difference_update(self, other):
!         """Update a set with the symmetric difference of itself and another."""
!         self._binary_sanity_check(other)
!         data = self._data
!         value = True
!         for elt in other:
!             if elt in data:
!                 del data[elt]
!             else:
!                 data[elt] = value
!         return self
! 
!     __ixor__ = symmetric_difference_update
! 
!     def difference_update(self, other):
!         """Remove all elements of another set from this set."""
!         self._binary_sanity_check(other)
!         data = self._data
!         value = True
!         for elt in other:
!             if elt in data:
!                 del data[elt]
!         return self
! 
!     __isub__ = difference_update
! 
!     # Python dict-like mass mutations: update, clear
  
      def update(self, iterable):
!         """Add all values from an iterable (such as a list or file)."""
!         value = True
!         for elt in iterable:
!             try:
!                 transform = elt._as_immutable
!             except AttributeError:
!                 pass
!             else:
!                 elt = transform()
!             self._data[elt] = elt
  
!     def clear(self):
!         """Remove all elements from this set."""
!         self._data.clear()
  
!     # Single-element mutations: add, remove, discard
  
!     def add(self, element):
!         """Add an element to a set.
  
!         This has no effect if the element is already present.
!         """
!         try:
!             transform = element._as_immutable
!         except AttributeError:
!             pass
!         else:
!             element = transform()
!         self._data[element] = True
  
!     def remove(self, element):
!         """Remove an element from a set; it must be a member.
! 
!         If the element is not a member, raise a KeyError.
!         """
          try:
!             transform = element._as_temporarily_immutable
!         except AttributeError:
!             pass
!         else:
!             element = transform()
!         del self._data[element]
! 
!     def discard(self, element):
!         """Remove an element from a set if it is a member.
! 
!         If the element is not a member, do nothing.
!         """
!         try:
!             del self._data[element]
          except KeyError:
              pass
  
      def popitem(self):
          """Remove and return a randomly-chosen set element."""
+         return self._data.popitem()[0]
  
!     def _as_immutable(self):
!         # Return a copy of self as an immutable set
!         return ImmutableSet(self)
  
!     def _as_temporarily_immutable(self):
!         # Return self wrapped in a temporarily immutable set
!         return _TemporarilyImmutableSet(self)
  
  
! class _TemporarilyImmutableSet(object):
!     # Wrap a mutable set as if it was temporarily immutable.
!     # This only supplies hashing and equality comparisons.
! 
!     _hashcode = None
! 
!     def __init__(self, set):
!         self._set = _set
! 
!     def __hash__(self):
!         if self._hashcode is None:
!             self._hashcode = self._set._compute_hash()
!         return self._hashcode
! 
!     def __eq__(self, other):
!         return self._set == other
! 
!     def __ne__(self, other):
!         return self._set != other
  
  
  # Rudimentary self-tests
  
! def _test():
  
      # Empty set
***************
*** 425,426 ****
--- 527,532 ----
  
      print "All tests passed"
+ 
+ 
+ if __name__ == "__main__":
+     _test()