[Tutor] dictionary of lists

Chris Stinemetz chrisstinemetz at gmail.com
Thu Jun 4 15:46:00 CEST 2015


On Thu, Jun 4, 2015 at 2:30 AM, Peter Otten <__peter__ at web.de> wrote:
> Chris Stinemetz wrote:
>
>> Although I am certain it is not very efficient I was able to
>> accomplish what I wanted with the following code I wrote:
>>
>> import os
>> import pprint
>> import csv
>> from collections import defaultdict
>>
>> print_map =  {'MOU':0, 'Call_Att':1, 'Device':2}
>> header = ['IMEI','MOUs','Call_Att','Device']
>>
>> path = 'C:/Users/cs062x/Desktop/Panhandle'
>>
>> os.chdir(path)
>> running_MOU = {}
>> call_attempts = {}
>> d = defaultdict(list)
>> for fname in os.listdir('.'):
>>     with open (fname) as csvfile:
>>         spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
>>         next(spamreader)
>>         for row in spamreader:
>>
>>             if row[8]:
>>                 device = row[36]
>>                 Elapsed_Mins = float(row[7])
>>                 IMEI = row[8].replace("'", "")
>>
>>                 if IMEI in running_MOU.keys():
>
> For big dicts in Python 2 the test
>
> key in some_dict.keys()
>
> is indeed very inefficient as it builds a list of keys first and then
> performs a linear scan for the key. Much better:
>
> key in some_dict
>
> This test avoids building the list and can also use an efficient lookup
> algorithm that is independent of the size of the dict.
>
>>                     running_MOU[IMEI] += Elapsed_Mins
>>                 else:
>>                     running_MOU[IMEI] = Elapsed_Mins
>>
>>                 if IMEI in call_attempts.keys():
>>                     call_attempts[IMEI] += 1
>>                 else:
>>                     call_attempts[IMEI] = 1
>>
>>                 # if key matches append mou else append 0.
>>                 d[IMEI] = [running_MOU[IMEI]]
>>                 d[IMEI].append([call_attempts[IMEI]])
>>                 d[IMEI].append([device])
>>
>>
>> print ",".join(header)
>> for k,v in sorted(d.items()):
>>     print k, ",", d[k][print_map['MOU']],",",
>> d[k][print_map['Call_Att']][0],",", d[k][print_map['Device']][0]
>>
>> print "complete"
>
> Here's an alternative that uses only one dict:
>
> import csv
> import os
> import sys
>
> header = ['IMEI', 'MOUs', 'Call_Att', 'Device']
>
> path = 'C:/Users/cs062x/Desktop/Panhandle'
>
> d = {}
> for fname in os.listdir(path):
>     with open(os.path.join(path, fname)) as csvfile:
>         spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
>         next(spamreader)
>         for row in spamreader:
>             if row[8]:
>                 device = row[36]
>                 elapsed_mins = float(row[7])
>                 IMEI = row[8].replace("'", "")
>
>                 if IMEI in d:
>                     record = d[IMEI]
>                     record[1] += elapsed_mins
>                     record[2] += 1
>                 else:
>                     d[IMEI] = [IMEI, elapsed_mins, 1, device]
>
> writer = csv.writer(sys.stdout)
> writer.writerow(header)
> writer.writerows(sorted(d.itervalues()))
>
> print "complete"


Peter - Thank you for showing me how to do this with one dictionary
and a more efficient method to lookup dictionary keys. I originally
attempted to accomplish this by using one dictionary but could not
find a good example that is why I used the defaultdict module. Your
approach sped the parsing time up from about 3 minutes to about 15
seconds! Very cool.

Thanks,

Chris


More information about the Tutor mailing list