head for grouped data - looking for best practice

Peter Otten __peter__ at web.de
Sat Mar 12 10:02:01 EST 2005


Harald Massa wrote:

> def getdoublekey(row):
>     return row[0:2]
> 
> for key, bereich in groupby(eingabe,getdoublekey):
>     print "Area:",key
>     for data in bereich:
>         print "--data--", data[2:]
> 
> which indeed leeds to the expected result, while looking less "hacky" ..
> on the other hand side, that "getdoublekey" ist not very flexible; when
> doing the same with 3 Columns forming the head information, I have to
> define the next function...

Function creation is cheap and easily understood by someone reading your
code -- so you may already have the best solution. If Raymond Hettingers
recent suggestion on python-dev makes it into Python 2.5,
itemgetter()/attrgettter() could grow support for the extraction of
multiple attributes/items.

Anyway, here is a generalized getter factory that tries to handle all the
common cases in an intuitive way. E. g. you can create itemgetters using
the [] notation:

>>> extract[::3](range(5))
[0, 3]
>>> extract[3](range(5))
3
>>> extract[0,3,4](range(5))
(0, 3, 4)
>>> import os
>>> extract.path(os)
<module 'posixpath' from '/somewhere/posixpath.pyc'>

Peter

import itertools
import operator

def tuple_itemgetter(*keys):
    """Create a function that extracts a tuple of items from an 
       indexable object.
    """
    # helper for extract
    getters = map(operator.itemgetter, keys)
    def get(obj):
        return tuple(get(obj) for get in getters)
    return get

def tuple_attrgetter(*names):
    """Create a function that extracts a tuple of attributes from an object.
    """
    # helper for extract
    getters = map(operator.attrgetter, names)
    def get(obj):
        return tuple(get(obj) for get in getters)
    return get

class extract(object):
    """Present unified access to the creation of
       attribute and item getters.
    """
    def __getitem__(self, index):
        if isinstance(index, tuple):
            return tuple_itemgetter(*index)
        return operator.itemgetter(index)
    def __getattribute__(self, name):
        return operator.attrgetter(name)
    def __call__(self, *names):
        return tuple_attrgetter(*names)
            
extract = extract() # we only ever need one instance

if __name__ == "__main__":
    # the demo is an anglo-german hotchpotch, really:
    eingabe=[
    ("Stuttgart","70197","Fernsehturm","20"),
    ("Stuttgart","70197","Brotmuseum","123"),
    ("Stuttgart","70197","Porsche","123123"),
    ("Leipzig","01491","Messe","91822"),
    ("Leipzig","01491","Schabidu","9181231"),
    ]
    class Site(object):
        def __init__(self, stadt, plz, name, nummer):
            self.stadt = stadt
            self.plz = plz
            self.name = name
            self.nummer = nummer
        def __str__(self):
            return "Site(stadt=%r, plz=%r, name=%r, nummer=%r)" % (
                self.stadt, self.plz, self.name, self.nummer)
        __repr__ = __str__
    
    def show(iterable, groupkey):            
        print "-" * 20
        for group, items in itertools.groupby(iterable, groupkey):
            print group
            for item in items:
                print "\t", item
        
    show(eingabe, extract[1])
    show(eingabe, extract[0, 1, 0:2])
    show(eingabe, extract[0:2])
    show((Site(*e) for e in eingabe), extract("stadt", "plz"))
    show((Site(*e) for e in eingabe), extract.stadt)




More information about the Python-list mailing list