Pivot Table/Groupby/Sum question

patrick.waldo at gmail.com patrick.waldo at gmail.com
Fri Dec 28 08:01:19 EST 2007


Wow, I did not realize it would be this complicated!  I'm fairly new
to Python and somehow I thought I could find a simpler solution.  I'll
have to mull over this to fully understand how it works for a bit.

Thanks a lot!

On Dec 28, 4:03 am, John Machin <sjmac... at lexicon.net> wrote:
> On Dec 28, 11:48 am, John Machin <sjmac... at lexicon.net> wrote:
>
> > On Dec 28, 10:05 am, patrick.wa... at gmail.com wrote:
>
> > > If you have any ideas about how to solve this pivot table issue, which
> > > seems to be scant on Google, I'd much appreciate it.  I know I can do
> > > this in Excel easily with the automated wizard, but I want to know how
> > > to do it myself and format it to my needs.
>
> > Watch this space.
>
> Tested as much as you see:
>
> 8<---
> class SimplePivotTable(object):
>
>     def __init__(
>         self,
>         row_order=None, col_order=None, # see example
>         missing=0, # what to return for an empty cell. Alternatives:
> '', 0.0, None, 'NULL'
>         ):
>         self.row_order = row_order
>         self.col_order = col_order
>         self.missing = missing
>         self.cell_dict = {}
>         self.row_total = {}
>         self.col_total = {}
>         self.grand_total = 0
>         self.headings_OK = False
>
>     def add_item(self, row_key, col_key, value):
>         self.grand_total += value
>         try:
>             self.col_total[col_key] += value
>         except KeyError:
>             self.col_total[col_key] = value
>         try:
>             self.cell_dict[row_key][col_key] += value
>             self.row_total[row_key] += value
>         except KeyError:
>             try:
>                 self.cell_dict[row_key][col_key] = value
>                 self.row_total[row_key] += value
>             except KeyError:
>                 self.cell_dict[row_key] = {col_key: value}
>                 self.row_total[row_key] = value
>
>     def _process_headings(self):
>         if self.headings_OK:
>             return
>         self.row_headings = self.row_order or
> list(sorted(self.row_total.keys()))
>         self.col_headings = self.col_order or
> list(sorted(self.col_total.keys()))
>         self.headings_OK = True
>
>     def get_col_headings(self):
>         self._process_headings()
>         return self.col_headings
>
>     def generate_row_info(self):
>         self._process_headings()
>         for row_key in self.row_headings:
>             row_dict = self.cell_dict[row_key]
>             row_vals = [row_dict.get(col_key, self.missing) for
> col_key in self.col_headings]
>             yield row_key, self.row_total[row_key], row_vals
>
>     def get_col_totals(self):
>         self._process_headings()
>         row_dict = self.col_total
>         row_vals = [row_dict.get(col_key, self.missing) for col_key in
> self.col_headings]
>         return self.grand_total, row_vals
>
> if __name__ == "__main__":
>
>     data = [
>         ['Bob', 'Morn', 240],
>         ['Bob', 'Aft',  300],
>         ['Joe', 'Morn',  70],
>         ['Joe', 'Aft',   80],
>         ['Jil', 'Morn', 100],
>         ['Jil', 'Aft',  150],
>         ['Bob', 'Aft',   40],
>         ['Bob', 'Aft',    5],
>         ['Dozy', 'Aft',   1], # Dozy doesn't show up till lunch-time
>         ]
>     NAME, TIME, AMOUNT = range(3)
>
>     print
>     ptab = SimplePivotTable(
>         col_order=['Morn', 'Aft'],
>         missing='uh-oh',
>         )
>     for s in data:
>         ptab.add_item(row_key=s[NAME], col_key=s[TIME],
> value=s[AMOUNT])
>     print ptab.get_col_headings()
>     for x in ptab.generate_row_info():
>         print x
>     print 'Tots', ptab.get_col_totals()
> 8<---




More information about the Python-list mailing list