head for grouped data - looking for best practice
Peter Otten
__peter__ at web.de
Sat Mar 12 10:02:01 EST 2005
Harald Massa wrote:
> def getdoublekey(row):
> return row[0:2]
>
> for key, bereich in groupby(eingabe,getdoublekey):
> print "Area:",key
> for data in bereich:
> print "--data--", data[2:]
>
> which indeed leeds to the expected result, while looking less "hacky" ..
> on the other hand side, that "getdoublekey" ist not very flexible; when
> doing the same with 3 Columns forming the head information, I have to
> define the next function...
Function creation is cheap and easily understood by someone reading your
code -- so you may already have the best solution. If Raymond Hettingers
recent suggestion on python-dev makes it into Python 2.5,
itemgetter()/attrgettter() could grow support for the extraction of
multiple attributes/items.
Anyway, here is a generalized getter factory that tries to handle all the
common cases in an intuitive way. E. g. you can create itemgetters using
the [] notation:
>>> extract[::3](range(5))
[0, 3]
>>> extract[3](range(5))
3
>>> extract[0,3,4](range(5))
(0, 3, 4)
>>> import os
>>> extract.path(os)
<module 'posixpath' from '/somewhere/posixpath.pyc'>
Peter
import itertools
import operator
def tuple_itemgetter(*keys):
"""Create a function that extracts a tuple of items from an
indexable object.
"""
# helper for extract
getters = map(operator.itemgetter, keys)
def get(obj):
return tuple(get(obj) for get in getters)
return get
def tuple_attrgetter(*names):
"""Create a function that extracts a tuple of attributes from an object.
"""
# helper for extract
getters = map(operator.attrgetter, names)
def get(obj):
return tuple(get(obj) for get in getters)
return get
class extract(object):
"""Present unified access to the creation of
attribute and item getters.
"""
def __getitem__(self, index):
if isinstance(index, tuple):
return tuple_itemgetter(*index)
return operator.itemgetter(index)
def __getattribute__(self, name):
return operator.attrgetter(name)
def __call__(self, *names):
return tuple_attrgetter(*names)
extract = extract() # we only ever need one instance
if __name__ == "__main__":
# the demo is an anglo-german hotchpotch, really:
eingabe=[
("Stuttgart","70197","Fernsehturm","20"),
("Stuttgart","70197","Brotmuseum","123"),
("Stuttgart","70197","Porsche","123123"),
("Leipzig","01491","Messe","91822"),
("Leipzig","01491","Schabidu","9181231"),
]
class Site(object):
def __init__(self, stadt, plz, name, nummer):
self.stadt = stadt
self.plz = plz
self.name = name
self.nummer = nummer
def __str__(self):
return "Site(stadt=%r, plz=%r, name=%r, nummer=%r)" % (
self.stadt, self.plz, self.name, self.nummer)
__repr__ = __str__
def show(iterable, groupkey):
print "-" * 20
for group, items in itertools.groupby(iterable, groupkey):
print group
for item in items:
print "\t", item
show(eingabe, extract[1])
show(eingabe, extract[0, 1, 0:2])
show(eingabe, extract[0:2])
show((Site(*e) for e in eingabe), extract("stadt", "plz"))
show((Site(*e) for e in eingabe), extract.stadt)
More information about the Python-list
mailing list