Pickling limitation with instances defining __cmp__/__hash__?

Erik Max Francis max at alcyone.com
Mon Aug 8 23:25:29 EDT 2005


Erik Max Francis wrote:

> I've come across a limitation in unpickling certain types of complex 
> data structures which involve instances that override __hash__, and was 
> wondering if it was known (basic searches didn't seem to come up with 
> anything similar) and if there is a workaround for it short of 
> restructuring the data structures in question.

Replying to my own (old) post here, I finally got back to this and found 
the best solution was to define surrogate set and dictionary classes 
that internally used the IDs as keys, eliminating the circular 
dependency.  Examples of SafeSet and SafeDict serving this purpose 
follow, though note that I only defined the methods that I used, rather 
than the full and complete interfaces for sets and dictionaries (though 
it should serve as an example for those who need to do more):

class SafeSet(_ReprMixin):

     @staticmethod
     def ider(thing):
         return thing.id

     def __init__(self, ider=None):
         if ider is not None:
             self.ider = ider
         self._map = {} # id -> thing

     def __len__(self):
         return len(self._map)

     def __contains__(self, thing):
         return self.ider(thing) in self._map

     def add(self, thing):
         key = self.ider(thing)
         if self._map.has_key(key):
             assert self._map[key] is thing
         self._map[key] = thing

     def remove(self, thing):
         del self._map[self.ider(thing)]

     def pop(self):
         iterator = self._map.iterkeys()
         next = iterator.next()
         return self._map.pop(next)

     def clear(self):
         self._map.clear()

     def copy(self):
         return copy.copy(self)

     def update(self, sequence):
         for thing in sequence:
             self.add(thing)

     def difference(self, other):
         thisSet = set(self._map.iterkeys())
         otherSet = set(other._map.iterkeys())
         newSet = thisSet.difference(otherSet)
         safeSet = SafeSet()
         for key in newSet:
             safeSet.add(self._map[key])
         return safeSet

     def __iter__(self):
         return self._map.itervalues()

     def __str__(self):
         return 'set(' + str(self._map.keys()) + ')'


class SafeDict(_ReprMixin):

     @staticmethod
     def ider(thing):
         return thing.id

     def __init__(self, ider=None):
         if ider is not None:
             self.ider = ider
         self._keys = {} # id -> key
         self._values = {} # id -> value

     def __len__(self):
         return len(self._keys)

     def __contains__(self, thing):
         return self.ider(thing) in self._keys

     def __getitem__(self, thing):
         return self._values[self.ider(thing)]

     def __setitem__(self, thing, value):
         key = self.ider(thing)
         self._keys[key] = thing
         self._values[key] = value

     def __delitem__(self, thing, value):
         key = self.ider(thing)
         del self._keys[key]
         del self._values[key]

     def keys(self):
         return self._keys.values()

     def iterkeys(self):
         return self._keys.itervalues()

     def values(self):
         return self._values.values()

     def itervalues(self):
         return self._values.itervalues()

     def items(self):
         return [(self._keys[x], self._values[x]) for x in self._keys]

     def iteritems(self):
         return ((self._keys[x], self._values[x]) for x in self._keys)

     def clear(self):
         self._keys.clear()
         self._values.clear()

     def copy(self):
         return copy.copy(self)

     def update(self, mapping):
         for key, value in mapping.iteritems():
             self[key] = value

     def has_key(self, thing):
         return self._keys.has_key(self.ider(thing))

     def get(self, thing, default=None):
         return self._values.get(self.ider(thing), default)

     def setdefault(self, thing, default):
         key = self.ider(thing)
         if key in self._keys:
             return self._values[key]
         else:
             self._keys[key] = thing
             self._values[key] = default

     def __iter__(self):
         return self._keys.itervalues()

     def __str__(self):
         return str(self._values)



-- 
Erik Max Francis && max at alcyone.com && http://www.alcyone.com/max/
San Jose, CA, USA && 37 20 N 121 53 W && AIM erikmaxfrancis
   The only completely consistent people are the dead.
   -- Aldous Huxley



More information about the Python-list mailing list