dict generator question

MRAB google at mrabarnett.plus.com
Fri Sep 19 20:00:56 EDT 2008


On Sep 19, 2:01 pm, bearophileH... at lycos.com wrote:
> Gerard flanagan:
>
> > data.sort()
> > datadict = \
> > dict((k, len(list(g))) for k,g in groupby(data, lambda s:
> >      '.'.join(s.split('.',2)[:2])))
>
> That code may run correctly, but it's quite unreadable, while good
> Python programmers value high readability. So the right thing to do is
> to split that line into parts, giving meaningful names, and maybe even
> add comments.
>
> len(list(g))) looks like a good job for my little leniter() function
> (or better just an extension to the semantics of len) that time ago
> some people here have judged as useless, while I use it often in both
> Python and D ;-)
>
Extending len() to support iterables sounds like a good idea, except
that it could be misleading when:

    len(file(path))

returns the number of lines and /not/ the length in bytes as you might
first think! :-)

Anyway, here's another possible implementation using bags (multisets):

def major_version(version_string):
    "convert '1.2.3.2' to '1.2'"
    return '.'.join(version_string.split('.')[:2])

versions = ["1.1.1.1", "1.2.2.2", "1.2.2.3", "1.3.1.2", "1.3.4.5"]

bag_of_versions = bag(major_version(x) for x in versions)
dict_of_counts = dict(bag_of_versions.items())

Here's my implementation of the bag class in Python (sorry about the
length):

class bag(object):
    def __init__(self, iterable = None):
        self._counts = {}
        if isinstance(iterable, dict):
            for x, n in iterable.items():
                if not isinstance(n, int):
                    raise TypeError()
                if n < 0:
                    raise ValueError()
                self._counts[x] = n
        elif iterable:
            for x in iterable:
                try:
                    self._counts[x] += 1
                except KeyError:
                    self._counts[x] = 1
    def __and__(self, other):
        new_counts = {}
        for x, n in other._counts.items():
            try:
                new_counts[x] = min(self._counts[x], n)
            except KeyError:
                pass
        result = bag()
        result._counts = new_counts
        return result
    def __iand__(self):
        new_counts = {}
        for x, n in other._counts.items():
            try:
                new_counts[x] = min(self._counts[x], n)
            except KeyError:
                pass
        self._counts = new_counts
    def __or__(self, other):
        new_counts = self._counts.copy()
        for x, n in other._counts.items():
            try:
                new_counts[x] = max(new_counts[x], n)
            except KeyError:
                new_counts[x] = n
        result = bag()
        result._counts = new_counts
        return result
    def __ior__(self):
        for x, n in other._counts.items():
            try:
                self._counts[x] = max(self._counts[x], n)
            except KeyError:
                self._counts[x] = n
    def __len__(self):
        return sum(self._counts.values())
    def __list__(self):
        result = []
        for x, n in self._counts.items():
            result.extend([x] * n)
        return result
    def __repr__(self):
        return "bag([%s])" % ", ".join(", ".join([repr(x)] * n) for x,
n in self._counts.items())
    def __iter__(self):
        for x, n in self._counts.items():
            for i in range(n):
                yield x
    def keys(self):
        return self._counts.keys()
    def values(self):
        return self._counts.values()
    def items(self):
        return self._counts.items()
    def __add__(self, other):
        for x, n in other.items():
            self._counts[x] = self._counts.get(x, 0) + n
    def __contains__(self, x):
        return x in self._counts
    def add(self, x):
        try:
            self._counts[x] += 1
        except KeyError:
            self._counts[x] = 1
    def __add__(self, other):
        new_counts = self._counts.copy()
        for x, n in other.items():
            try:
                new_counts[x] += n
            except KeyError:
                new_counts[x] = n
        result = bag()
        result._counts = new_counts
        return result
    def __sub__(self, other):
        new_counts = self._counts.copy()
        for x, n in other.items():
            try:
                new_counts[x] -= n
                if new_counts[x] < 1:
                    del new_counts[x]
            except KeyError:
                pass
        result = bag()
        result._counts = new_counts
        return result
    def __iadd__(self, other):
        for x, n in other.items():
            try:
                self._counts[x] += n
            except KeyError:
                self._counts[x] = n
    def __isub__(self, other):
        for x, n in other.items():
            try:
                self._counts[x] -= n
                if self._counts[x] < 1:
                    del self._counts[x]
            except KeyError:
                pass
    def clear(self):
        self._counts = {}
    def count(self, x):
        return self._counts.get(x, 0)




More information about the Python-list mailing list