[Tutor] dictionary of lists

Peter Otten __peter__ at web.de
Thu Jun 4 09:30:56 CEST 2015


Chris Stinemetz wrote:

> Although I am certain it is not very efficient I was able to
> accomplish what I wanted with the following code I wrote:
> 
> import os
> import pprint
> import csv
> from collections import defaultdict
> 
> print_map =  {'MOU':0, 'Call_Att':1, 'Device':2}
> header = ['IMEI','MOUs','Call_Att','Device']
> 
> path = 'C:/Users/cs062x/Desktop/Panhandle'
> 
> os.chdir(path)
> running_MOU = {}
> call_attempts = {}
> d = defaultdict(list)
> for fname in os.listdir('.'):
>     with open (fname) as csvfile:
>         spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
>         next(spamreader)
>         for row in spamreader:
> 
>             if row[8]:
>                 device = row[36]
>                 Elapsed_Mins = float(row[7])
>                 IMEI = row[8].replace("'", "")
> 
>                 if IMEI in running_MOU.keys():

For big dicts in Python 2 the test 

key in some_dict.keys()

is indeed very inefficient as it builds a list of keys first and then 
performs a linear scan for the key. Much better:

key in some_dict

This test avoids building the list and can also use an efficient lookup 
algorithm that is independent of the size of the dict.

>                     running_MOU[IMEI] += Elapsed_Mins
>                 else:
>                     running_MOU[IMEI] = Elapsed_Mins
> 
>                 if IMEI in call_attempts.keys():
>                     call_attempts[IMEI] += 1
>                 else:
>                     call_attempts[IMEI] = 1
> 
>                 # if key matches append mou else append 0.
>                 d[IMEI] = [running_MOU[IMEI]]
>                 d[IMEI].append([call_attempts[IMEI]])
>                 d[IMEI].append([device])
> 
> 
> print ",".join(header)
> for k,v in sorted(d.items()):
>     print k, ",", d[k][print_map['MOU']],",",
> d[k][print_map['Call_Att']][0],",", d[k][print_map['Device']][0]
> 
> print "complete"

Here's an alternative that uses only one dict:

import csv
import os
import sys

header = ['IMEI', 'MOUs', 'Call_Att', 'Device']

path = 'C:/Users/cs062x/Desktop/Panhandle'

d = {}
for fname in os.listdir(path):
    with open(os.path.join(path, fname)) as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
        next(spamreader)
        for row in spamreader:
            if row[8]:
                device = row[36]
                elapsed_mins = float(row[7])
                IMEI = row[8].replace("'", "")

                if IMEI in d:
                    record = d[IMEI]
                    record[1] += elapsed_mins
                    record[2] += 1
                else:
                    d[IMEI] = [IMEI, elapsed_mins, 1, device]

writer = csv.writer(sys.stdout)
writer.writerow(header)
writer.writerows(sorted(d.itervalues()))

print "complete"




More information about the Tutor mailing list