Creating dictionary of items from Excel with mutliple keys

Sun Aug 14 07:02:56 EDT 2016

Atri Mahapatra wrote:

> I am trying to create a following dictionary. I am reading data from excel
> which has data in the following format:
> 
> Sl no:  Name Thickness Length Material Width Quantity Side
> 
> It has 20 rows of data.
> 
> 
> The dictionary for the 20 rows, I would like to make is
> 
> Data_Dict = [
> { 'Name': 'X', 'Length': '10' , 'Width': '5', 'Quantity': 2 'Area': 50};
> { 'Name': 'Y', 'Length': '20' , 'Width': 10', 'Quantity': 1 'Area': 200};
> .
> .
> .
> .
> till 20 rows
> ];
> 
> I would like to add another key 'Area' as shown above. I used the
> following code(s):
> 
> using openpyxl:
> d={}
> for i  in range(3,sheet.max_row+1):
>     #for j in range(3,9):
>         #for k in range(0,5):
>           Name = sheet.cell(row= i,column=3).value
>           Length =sheet.cell(row =i,column=6).value
>           Breadth= sheet.cell(row=i,column=7).value
>           Quantity = sheet.cell (row=i,column=8).value
>           Area = sheet.cell(row
>           =i,column=6).value*sheet.cell(row=i,column=7).value d[Name]=
>           Length,Breadth,Quantity,Area
> 
> which gave an output like:
> ['X': (10, 5, 2, 50), 'Y': (20, 10, 1, 2232600), 'Z': (5, 2, 1, 10),
> [.............]
> 
> Another code using xlrd:
> 
> keys = [sheet.cell(2, col_index).value for col_index in range(0,8)]
> print (keys)
> dict_list = []
> d = {}
> for row_index in range(1, xl_sheet.nrows):
>    for col_index in range(0,8):
>    d = {keys[col_index]: xl_sheet.cell(row_index, col_index).value
>    for col_index in range(0,8)}
>    dict_list.append(d)
> 
> print (dict_list)
> which did not have the area and neither the output was little messy.
> 
> 
> 
> The main purpose is to sort the dictionary based on different criteria
> like Length or Area. I think the first one may be easier to sort. However
> if there are any better way to represent  the dictionary  and the code so
> that it can be sorted later based on different attributes please feel free
> to suggest.

Things become clearer when you abstract out reading the data. For example:

import openpyxl

def open_sheet(filename, sheetname):
    wb = openpyxl.load_workbook(filename=filename)
    return wb.get_sheet_by_name(sheetname)

def read_table(sheet, columnnames, header_row=0):
    name_to_index = {
        n: i for i, n
        in enumerate(c.value for c in sheet.rows[header_row])
        if n is not None}
    column_indices = [name_to_index[n] for n in columnnames]

    for row in sheet.rows[header_row + 1:]:
        yield dict(zip(columnnames, (row[x].value for x in column_indices)))

if __name__ == "__main__":
    from operator import itemgetter
    import pprint

    # read data from Excel
    rows = read_table(
        open_sheet("sample.xlsx", "SampleSheet"),
        "Name Length Width Quantity".split(),
        2  # replace with actual header row
    )

    # add Area
    dict_list = []
    for row_dict in rows:
        row_dict["Area"] = row_dict["Width"] * row_dict["Length"]
        dict_list.append(row_dict)

    # sort and print data
    print("Unsorted:")
    pprint.pprint(dict_list)
    for sort_column in "Width", "Quantity":
        print("\nby {}:".format(sort_column))
        dict_list.sort(key=itemgetter(sort_column))
        pprint.pprint(dict_list)

To use xlrd instead of openpyxl you have to replace the open_sheet() and 
read_table() functions with

def open_sheet(filename, sheetname):
    wb = xlrd.open_workbook(filename)
    return wb.sheet_by_name(sheetname)

def read_table(sheet, columnnames, header_row=0):
    name_to_index = {
        c.value: i for i, c
        in enumerate(sheet.row(header_row))
        if c.ctype == xlrd.XL_CELL_TEXT}
    column_indices = [name_to_index[n] for n in columnnames]
    for rowindex in range(header_row + 1, sheet.nrows):
        row = sheet.row(rowindex)
        yield dict(zip(columnnames, (row[x].value for x in column_indices)))