Pivot Table/Groupby/Sum question

John Machin sjmachin at lexicon.net
Thu Dec 27 22:03:25 EST 2007


On Dec 28, 11:48 am, John Machin <sjmac... at lexicon.net> wrote:
> On Dec 28, 10:05 am, patrick.wa... at gmail.com wrote:
>
>
> > If you have any ideas about how to solve this pivot table issue, which
> > seems to be scant on Google, I'd much appreciate it.  I know I can do
> > this in Excel easily with the automated wizard, but I want to know how
> > to do it myself and format it to my needs.
>
> Watch this space.

Tested as much as you see:

8<---
class SimplePivotTable(object):

    def __init__(
        self,
        row_order=None, col_order=None, # see example
        missing=0, # what to return for an empty cell. Alternatives:
'', 0.0, None, 'NULL'
        ):
        self.row_order = row_order
        self.col_order = col_order
        self.missing = missing
        self.cell_dict = {}
        self.row_total = {}
        self.col_total = {}
        self.grand_total = 0
        self.headings_OK = False

    def add_item(self, row_key, col_key, value):
        self.grand_total += value
        try:
            self.col_total[col_key] += value
        except KeyError:
            self.col_total[col_key] = value
        try:
            self.cell_dict[row_key][col_key] += value
            self.row_total[row_key] += value
        except KeyError:
            try:
                self.cell_dict[row_key][col_key] = value
                self.row_total[row_key] += value
            except KeyError:
                self.cell_dict[row_key] = {col_key: value}
                self.row_total[row_key] = value

    def _process_headings(self):
        if self.headings_OK:
            return
        self.row_headings = self.row_order or
list(sorted(self.row_total.keys()))
        self.col_headings = self.col_order or
list(sorted(self.col_total.keys()))
        self.headings_OK = True

    def get_col_headings(self):
        self._process_headings()
        return self.col_headings

    def generate_row_info(self):
        self._process_headings()
        for row_key in self.row_headings:
            row_dict = self.cell_dict[row_key]
            row_vals = [row_dict.get(col_key, self.missing) for
col_key in self.col_headings]
            yield row_key, self.row_total[row_key], row_vals

    def get_col_totals(self):
        self._process_headings()
        row_dict = self.col_total
        row_vals = [row_dict.get(col_key, self.missing) for col_key in
self.col_headings]
        return self.grand_total, row_vals

if __name__ == "__main__":

    data = [
        ['Bob', 'Morn', 240],
        ['Bob', 'Aft',  300],
        ['Joe', 'Morn',  70],
        ['Joe', 'Aft',   80],
        ['Jil', 'Morn', 100],
        ['Jil', 'Aft',  150],
        ['Bob', 'Aft',   40],
        ['Bob', 'Aft',    5],
        ['Dozy', 'Aft',   1], # Dozy doesn't show up till lunch-time
        ]
    NAME, TIME, AMOUNT = range(3)

    print
    ptab = SimplePivotTable(
        col_order=['Morn', 'Aft'],
        missing='uh-oh',
        )
    for s in data:
        ptab.add_item(row_key=s[NAME], col_key=s[TIME],
value=s[AMOUNT])
    print ptab.get_col_headings()
    for x in ptab.generate_row_info():
        print x
    print 'Tots', ptab.get_col_totals()
8<---



More information about the Python-list mailing list