Trees

Ian Kelly ian.g.kelly at gmail.com
Wed Jan 21 11:15:25 EST 2015


On Wed, Jan 21, 2015 at 7:47 AM, Steven D'Aprano
<steve+comp.lang.python at pearwood.info> wrote:
>> More irksome that for the second we've to preface with
>>
>> from collections import Counter
>>
>> And still more a PITA that a straightforward standard name like bag (or
>> multiset) is called by such an ungoogleable misleading name as counter.
>
> It is not misleading. collections.Counter is a class for counting (i.e. a
> counter), not a bag. It is merely *similar* to a bag, but the API is not
> the same as the usual bag API because the motivating design is not the same
> as for bags.

To expand on this, Counter does provide a few multiset operations like
union and intersection. But there are a number of cases where it falls
short or does not perform as expected. To name a few:

* There are no subset or superset comparison operations provided.
* len(counter) returns the number of keys, not the number of elements.
* Unlike a multiset, Counters can have negative or zero counts; one
curious consequence of this is that an "empty" Counter can have a
non-zero length and evaluate as true in boolean contexts.

A while back I fiddled around with creating a true MultiSet class
using a Counter as the underlying implementation. Here's what I came
up with:


from collections import Counter
from collections.abc import MutableSet, Set


__all__ = ['MultiSet']


class MultiSet(MutableSet):

  """Multiset class containing hashable items."""

  # Class invariants:
  #   * self._counter is a Counter s/t all values are positive ints denoting
  #     multiplicity.
  #   * set(self) == self._counter.keys()

  def __init__(self, iterable_or_mapping=()):
    """Create a new, empty MultiSet. If passed an iterable argument, initialize
    the MultiSet with items from the iterable. If passed a mapping argument,
    initialize the MultiSet with the keys of the mapping, each repeated a number
    of times equal to the associated value.
    """
    self._counter = +Counter(iterable_or_mapping)

  @classmethod
  def _from_counter(cls, counter):
    self = cls.__new__(cls)
    self._counter = counter
    return self

  def __contains__(self, item):
    return item in self._counter

  def __hash__(self):
    raise TypeError('unhashable type: %s' % type(self))

  def __iter__(self):
    return self._counter.elements()

  def __len__(self):
    # TODO: consider caching the length.
    return sum(self._counter.values())

  def __repr__(self):
    if self:
      return 'MultiSet(%r)' % list(self)
    return 'MultiSet()'

  def add(self, item):
    """Add an element to the MultiSet."""
    self._counter[item] += 1

  def clear(self):
    """Remove all elements from the MultiSet, leaving it empty."""
    self._counter.clear()

  def count(self, item):
    """Return the number of occurrences of the element."""
    return self._counter[item]

  def discard(self, item):
    """Remove an element from the MultiSet.

    If there are multiple occurrences, remove only one such occurrence. If the
    element is not a member, do nothing.
    """
    if item not in self._counter:
      return
    if self._counter[item] == 1:
      del self._counter[item]
    else:
      self._counter[item] -= 1

  def __or__(self, other):
    """Return the union of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    return self._from_counter(self._counter | _get_counter(other))

  __ror__ = __or__

  def __ior__(self, other):
    """Perform the in-place union of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    self._counter |= _get_counter(other)
    return self

  def __and__(self, other):
    """Return the intersection of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    return self._from_counter(self._counter & _get_counter(other))

  __rand__ = __and__

  def __iand__(self, other):
    """Perform the in-place intersection of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    self._counter &= _get_counter(other)
    return self

  def __sub__(self, other):
    """Return the difference of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    return self._from_counter(self._counter - _get_counter(other))

  def __rsub__(self, other):
    """Return the difference of another set and the MultiSet."""
    if not isinstance(other, Set):
      return NotImplemented
    return self._from_counter(_get_counter(other) - self._counter)

  def __isub__(self, other):
    """Perform the in-place set difference of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    self._counter -= _get_counter(other)
    return self

  def __xor__(self, other):
    """Return the symmetric difference of the MultiSet and another set."""
    if not isinstance(other, Set):
      return NotImplemented
    # X ^ Y == (X - Y) | (Y - X)
    other_counter = _get_counter(other)
    counter = (self._counter - other_counter) | (other_counter - self._counter)
    return self._from_counter(counter)

  __rxor__ = __xor__

  def __ixor__(self, other):
    "Perform the in-place symmetric difference of the MultiSet and another set."
    if not isinstance(other, Set):
      return NotImplemented
    # X ^ Y == (X - Y) | (Y - X)
    other_counter = _get_counter(other)
    other_diff = other_counter - self._counter
    self._counter -= other_counter
    self._counter |= other_diff
    return self

  def __eq__(self, other):
    """Return whether the MultiSet and the other set have the same contents."""
    if not isinstance(other, Set):
      return NotImplemented
    return self._counter == _get_counter(other)

  def __ne__(self, other):
    """Return whether the MultiSet and the other set have different contents."""
    if not isinstance(other, Set):
      return NotImplemented
    return self._counter != _get_counter(other)

  def __le__(self, other):
    """Return whether the MultiSet is a subset of the other set."""
    if not isinstance(other, Set):
      return NotImplemented
    other_counter = _get_counter(other)
    return all(self._counter[x] <= other_counter[x] for x in self._counter)

  def __lt__(self, other):
    """Return whether the MultiSet is a proper subset of the other set."""
    if not isinstance(other, Set):
      return NotImplemented
    return self.__le__(other) and not self.__eq__(other)

  def __ge__(self, other):
    """Return whether the MultiSet is a superset of the other set."""
    if not isinstance(other, Set):
      return NotImplemented
    other_counter = _get_counter(other)
    return all(self._counter[x] >= other_counter[x] for x in other_counter)

  def __gt__(self, other):
    """Return whether the MultiSet is a proper superset of the other set."""
    if not isinstance(other, Set):
      return NotImplemented
    return self.__ge__(other) and not self.__eq__(other)


def _get_counter(maybe_multiset):
  if isinstance(maybe_multiset, MultiSet):
    return maybe_multiset._counter
  else:
    return Counter(maybe_multiset)



More information about the Python-list mailing list