Pivot Table/Groupby/Sum question
John Machin
sjmachin at lexicon.net
Thu Dec 27 22:03:25 EST 2007
On Dec 28, 11:48 am, John Machin <sjmac... at lexicon.net> wrote:
> On Dec 28, 10:05 am, patrick.wa... at gmail.com wrote:
>
>
> > If you have any ideas about how to solve this pivot table issue, which
> > seems to be scant on Google, I'd much appreciate it. I know I can do
> > this in Excel easily with the automated wizard, but I want to know how
> > to do it myself and format it to my needs.
>
> Watch this space.
Tested as much as you see:
8<---
class SimplePivotTable(object):
def __init__(
self,
row_order=None, col_order=None, # see example
missing=0, # what to return for an empty cell. Alternatives:
'', 0.0, None, 'NULL'
):
self.row_order = row_order
self.col_order = col_order
self.missing = missing
self.cell_dict = {}
self.row_total = {}
self.col_total = {}
self.grand_total = 0
self.headings_OK = False
def add_item(self, row_key, col_key, value):
self.grand_total += value
try:
self.col_total[col_key] += value
except KeyError:
self.col_total[col_key] = value
try:
self.cell_dict[row_key][col_key] += value
self.row_total[row_key] += value
except KeyError:
try:
self.cell_dict[row_key][col_key] = value
self.row_total[row_key] += value
except KeyError:
self.cell_dict[row_key] = {col_key: value}
self.row_total[row_key] = value
def _process_headings(self):
if self.headings_OK:
return
self.row_headings = self.row_order or
list(sorted(self.row_total.keys()))
self.col_headings = self.col_order or
list(sorted(self.col_total.keys()))
self.headings_OK = True
def get_col_headings(self):
self._process_headings()
return self.col_headings
def generate_row_info(self):
self._process_headings()
for row_key in self.row_headings:
row_dict = self.cell_dict[row_key]
row_vals = [row_dict.get(col_key, self.missing) for
col_key in self.col_headings]
yield row_key, self.row_total[row_key], row_vals
def get_col_totals(self):
self._process_headings()
row_dict = self.col_total
row_vals = [row_dict.get(col_key, self.missing) for col_key in
self.col_headings]
return self.grand_total, row_vals
if __name__ == "__main__":
data = [
['Bob', 'Morn', 240],
['Bob', 'Aft', 300],
['Joe', 'Morn', 70],
['Joe', 'Aft', 80],
['Jil', 'Morn', 100],
['Jil', 'Aft', 150],
['Bob', 'Aft', 40],
['Bob', 'Aft', 5],
['Dozy', 'Aft', 1], # Dozy doesn't show up till lunch-time
]
NAME, TIME, AMOUNT = range(3)
print
ptab = SimplePivotTable(
col_order=['Morn', 'Aft'],
missing='uh-oh',
)
for s in data:
ptab.add_item(row_key=s[NAME], col_key=s[TIME],
value=s[AMOUNT])
print ptab.get_col_headings()
for x in ptab.generate_row_info():
print x
print 'Tots', ptab.get_col_totals()
8<---
More information about the Python-list
mailing list