pickle complexity limit?

Simon Burton simonb at webone.com.au
Wed Nov 12 23:56:50 EST 2003


On Sun, 09 Nov 2003 15:15:12 -0800, Mark Hahn wrote:

> I have a heavily interlinked dom like tree structure of objects.  A node
> will link to many children redundantly and many children will reference
> parents redundantly.  So there are many cycles of references.
> ...

I am working on trying to flatten the data before it is pickled.
Here is the code below: it searches all references and replaces them
with dummy Ids (TankId) that key the original reference in a dict.

eg.

def fat_dump(nested):
  t=Tank()
  t.flatten(nested)
  # pickle.dump( t, ... )
  # ...
  # t = pickle.load( ... )
  nested = t.lift()

Rather untested as i am about to take a short holiday. 
( note that == doesn't work well on nested data either! )

ciao,

Simon.


#!/usr/bin/env python

class TankId(int):
  id = 0
  def __new__(self):
    TankId.id += 1
    return int(TankId.id)

class Tank:
  def __init__(self, root = None):
    self.items = {}
    self.ids = {} # XX use a list
    self.todo = []
    if root is not None:
      self.flatten( root )
  def __str__(self):
    return str( self.ids.values() )
  ###########################################
  # before pickle

  def flat_list(self, item):
    for i in range(len(item)):
      item[i] = self.queue( item[i] )
  def flat_dict(self, item):
    for key in item.keys():
      item[key] = self.queue( item[key] )
  def flat_attrs(self, item):
    for key in item.__dict__.keys():
      item.__dict__[key] = self.queue( item.__dict__[key] )
  def queue(self, item):
    assert not isinstance( item, TankId )
    if not ( isinstance(item, list) or isinstance(item, dict) or hasattr(item, "__dict__") ):
      return item
    if id(item) not in self.items:
      self.todo.append(item)
      tankid = TankId()
      self.items[ id(item) ] = tankid # temporary
      self.ids[ tankid ] = item
    else:
      tankid = self.items[ id(item) ]
    return tankid
  def flatten(self, root):
    print "flaten()"
    self.root = root
    self.queue( self.root )
    while self.todo:
      item = self.todo.pop(0)
      if isinstance(item, list):
        self.flat_list(item)
      if isinstance(item, dict):
        self.flat_dict(item)
      if hasattr(item, "__dict__"):
        self.flat_attrs(item)
      # tuple ? other immutables ? __set/getstate__ ?

    del self.items
  ###########################################
  # after pickle

  def lift_list(self, item):
    for i in range(len(item)):
      item[i] = self.recover( item[i] )
  def lift_dict(self, item):
    for key in item.keys():
      item[key] = self.recover( item[key] )
  def lift_attrs(self, item):
    for key in item.__dict__.keys():
      item.__dict__[key] = self.recover( item.__dict__[key] )
 def recover(self, item):
    if isinstance( item, TankId ):
      item = self.ids[ item ]
      assert not isinstance( item, TankId )
    return item
  def lift(self):
    print "lifting"
    for item in self.ids.values():
      if isinstance(item, list):
        self.lift_list(item)
      if isinstance(item, dict):
        self.lift_dict(item)
      if hasattr(item, "__dict__"):
        self.lift_attrs(item)
      # tuple ? other immutables ? __set/getstate__ ?
    return self.root



-- 
Simon Burton, B.Sc.
Licensed PO Box A66
ANU Canberra 2601
Australia
Ph. 02 6249 6940
http://arrowtheory.com 





More information about the Python-list mailing list