Dictionaries as records

Yitz bmcyitz at freebox.com
Wed Dec 19 09:26:03 EST 2001


Skip Montanaro <skip at pobox.com> wrote in message news:<mailman.1008721343.24385.python-list at python.org>...

Bill> I have a file with 200K records and 16 fields.  This file is
Bill> parsed and each row is put into a dictionary and the dictionary is
Bill> added to a list.  The raw file is only about 50mb.
Bill> I was shocked to see that my memory use jumped to 500MB!

> You might want to consider storing it in an on-disk mapping object.  Check
> out the anydbm, bsddb, gdbm modules and the like.

That will solve the memory problem, but it may be slow due
to disk access.

Here is a compromise (this code assumes that the set of
keys for each dictionary is the same, otherwise modify accordingly):

from UserList import UserList
class ListOfDicts(UserList):
  '''Store a list of tuples, but make it act like a list
  of dictionaries. The constructor takes an optional list of tuples.'''
  def __init__(self, tuples=None):
    UserList.__init__(self, tuples)
    self.keys = ("name", "address", "etc")
  def _tuple2dict(self, tuple):
    d = {}
    map(d.setdefault, self.keys, tuple)
    return d
  def _dict2tuple(self, dict):
    return tuple(map(dict.get, self.keys))
  def __contains__(self, item): return self._dict2tuple(item) in self.data
  def __getitem__(self, i): return self._tuple2dict(self.data[i])
  def __setitem__(self, i, item): self.data[i] = self._dict2tuple(item)
  def __add__(self, other):
    if isinstance(other, UserList):
      return self.__class__(self.data + map(self._dict2tuple, other.data))
    else:
      return self.__class__(self.data + map(self._dict2tuple, other))
  def __radd__(self, other):
    if isinstance(other, UserList):
      return self.__class__(map(self._dict2tuple, other.data) + self.data)
    else:
      return self.__class__(map(self._dict2tuple, other) + self.data)
  def __iadd__(self, other):
    if isinstance(other, UserList):
      self.data += map(self._dict2tuple, other.data)
    else:
      self.data += map(self._dict2tuple, other)
    return self
  def append(self, item): self.data.append(self._dict2tuple(item))
  def insert(self, i, item): self.data.insert(i, self._dict2tuple(item))
  def pop(self, i=-1): return self._tuple2dict(self.data.pop(i))
  def remove(self, item): self.data.remove(self._dict2tuple(item))
  def count(self, item): return self.data.count(self._dict2tuple(item))
  def index(self, item): return self.data.index(self._dict2tuple(item))
  def extend(self, other):
    if isinstance(other, UserList):
      self.data.extend(map(self._dict2tuple, other.data))
    else:
      self.data.extend(map(self._dict2tuple, other))



More information about the Python-list mailing list