dict generator question
MRAB
google at mrabarnett.plus.com
Fri Sep 19 20:00:56 EDT 2008
On Sep 19, 2:01 pm, bearophileH... at lycos.com wrote:
> Gerard flanagan:
>
> > data.sort()
> > datadict = \
> > dict((k, len(list(g))) for k,g in groupby(data, lambda s:
> > '.'.join(s.split('.',2)[:2])))
>
> That code may run correctly, but it's quite unreadable, while good
> Python programmers value high readability. So the right thing to do is
> to split that line into parts, giving meaningful names, and maybe even
> add comments.
>
> len(list(g))) looks like a good job for my little leniter() function
> (or better just an extension to the semantics of len) that time ago
> some people here have judged as useless, while I use it often in both
> Python and D ;-)
>
Extending len() to support iterables sounds like a good idea, except
that it could be misleading when:
len(file(path))
returns the number of lines and /not/ the length in bytes as you might
first think! :-)
Anyway, here's another possible implementation using bags (multisets):
def major_version(version_string):
"convert '1.2.3.2' to '1.2'"
return '.'.join(version_string.split('.')[:2])
versions = ["1.1.1.1", "1.2.2.2", "1.2.2.3", "1.3.1.2", "1.3.4.5"]
bag_of_versions = bag(major_version(x) for x in versions)
dict_of_counts = dict(bag_of_versions.items())
Here's my implementation of the bag class in Python (sorry about the
length):
class bag(object):
def __init__(self, iterable = None):
self._counts = {}
if isinstance(iterable, dict):
for x, n in iterable.items():
if not isinstance(n, int):
raise TypeError()
if n < 0:
raise ValueError()
self._counts[x] = n
elif iterable:
for x in iterable:
try:
self._counts[x] += 1
except KeyError:
self._counts[x] = 1
def __and__(self, other):
new_counts = {}
for x, n in other._counts.items():
try:
new_counts[x] = min(self._counts[x], n)
except KeyError:
pass
result = bag()
result._counts = new_counts
return result
def __iand__(self):
new_counts = {}
for x, n in other._counts.items():
try:
new_counts[x] = min(self._counts[x], n)
except KeyError:
pass
self._counts = new_counts
def __or__(self, other):
new_counts = self._counts.copy()
for x, n in other._counts.items():
try:
new_counts[x] = max(new_counts[x], n)
except KeyError:
new_counts[x] = n
result = bag()
result._counts = new_counts
return result
def __ior__(self):
for x, n in other._counts.items():
try:
self._counts[x] = max(self._counts[x], n)
except KeyError:
self._counts[x] = n
def __len__(self):
return sum(self._counts.values())
def __list__(self):
result = []
for x, n in self._counts.items():
result.extend([x] * n)
return result
def __repr__(self):
return "bag([%s])" % ", ".join(", ".join([repr(x)] * n) for x,
n in self._counts.items())
def __iter__(self):
for x, n in self._counts.items():
for i in range(n):
yield x
def keys(self):
return self._counts.keys()
def values(self):
return self._counts.values()
def items(self):
return self._counts.items()
def __add__(self, other):
for x, n in other.items():
self._counts[x] = self._counts.get(x, 0) + n
def __contains__(self, x):
return x in self._counts
def add(self, x):
try:
self._counts[x] += 1
except KeyError:
self._counts[x] = 1
def __add__(self, other):
new_counts = self._counts.copy()
for x, n in other.items():
try:
new_counts[x] += n
except KeyError:
new_counts[x] = n
result = bag()
result._counts = new_counts
return result
def __sub__(self, other):
new_counts = self._counts.copy()
for x, n in other.items():
try:
new_counts[x] -= n
if new_counts[x] < 1:
del new_counts[x]
except KeyError:
pass
result = bag()
result._counts = new_counts
return result
def __iadd__(self, other):
for x, n in other.items():
try:
self._counts[x] += n
except KeyError:
self._counts[x] = n
def __isub__(self, other):
for x, n in other.items():
try:
self._counts[x] -= n
if self._counts[x] < 1:
del self._counts[x]
except KeyError:
pass
def clear(self):
self._counts = {}
def count(self, x):
return self._counts.get(x, 0)
More information about the Python-list
mailing list