optimizing large dictionaries
Christian Heimes
lists at cheimes.de
Thu Jan 15 17:22:48 EST 2009
> class MyClass
>
> def __str__(self):
> return "%s-%s-%s" %(self.field1, self.field2, self.field3)
>
> def __repr__(self):
> return str(self)
>
> def __hash__(self):
> return hash(str(self))
>
>
> is there anything that can be done to speed up this simply code? right
> now it is taking well over 15 minutes to process, on a 3 Ghz machine
> with lots of RAM (though this is all taking CPU power, not RAM at this
> point.)
class MyClass(object):
# a new style class with slots saves some memory
__slots__ = ("field1", "field2", "field2")
def __hash__(self):
# In general this is faster than your approach because
# it requires less function calls. However all fields must
# be hashable
return hash((self.field1, self.field2, self.field2))
def __eq__(self, other):
# you MUST provide a rich compare __eq__ function
# when you provide your own hash function. It's called
# when hash(a) == hash(b).
if not isinstance(other, MyClass):
return NotImplemented
return (self.field1 == other.field1
and self.field2 == other.field2
and self.field3 == other.field3)
def __str__(self):
return "%s-%s-%s" %(self.field1, self.field2, self.field3)
# faster than your approach because it doesn't require more function
# lookups. You can omit this alias, too. It's not required.
__repr__ = __str__
Instead of the try/except clause I recommend colletions.defaultdict.
Defaultdict takes one callable as factory function. Since int() returns
0 it has the same effect as your code.
from collections import defaultdict
elt = defaultdict(int)
Christian
More information about the Python-list
mailing list