[Python-checkins] python/dist/src/Lib pickle.py,1.134,1.135

gvanrossum@users.sourceforge.net gvanrossum@users.sourceforge.net
Fri, 31 Jan 2003 10:53:30 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv32128

Modified Files:
	pickle.py 
Log Message:
Another extension to reduce().  It can return a 4- or 5-tuple now.
The 4th item can be None or an iterator yielding list items, which are
used to append() or extend() the object.  The 5th item can be None or
an iterator yielding a dict's (key, value) pairs, which are stuffed
into the object using __setitem__.

Also (as a separate, though related, feature) add "batching" for list
and dict items.  If you pickled a dict or list with a million items in
the past, it would push a million items onto the stack.  It now pushes
only 1000 items at a time on the stack, using repeated APPENDS or
SETITEMS opcodes.  (For lists, I hope that using many short extend()
calls doesn't exhibit quadratic behavior.)


Index: pickle.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickle.py,v
retrieving revision 1.134
retrieving revision 1.135
diff -C2 -d -r1.134 -r1.135
*** pickle.py	31 Jan 2003 18:33:16 -0000	1.134
--- pickle.py	31 Jan 2003 18:53:21 -0000	1.135
***************
*** 318,334 ****
              raise PicklingError("%s must return string or tuple" % reduce)
  
!         # Assert that it returned a 2-tuple or 3-tuple, and unpack it
          l = len(rv)
!         if l == 2:
!             func, args = rv
!             state = None
!         elif l == 3:
!             func, args, state = rv
!         else:
              raise PicklingError("Tuple returned by %s must have "
!                                 "exactly two or three elements" % reduce)
  
          # Save the reduce() output and finally memoize the object
!         self.save_reduce(func, args, state, obj)
  
      def persistent_id(self, obj):
--- 318,329 ----
              raise PicklingError("%s must return string or tuple" % reduce)
  
!         # Assert that it returned an appropriately sized tuple
          l = len(rv)
!         if not (2 <= l <= 5):
              raise PicklingError("Tuple returned by %s must have "
!                                 "two to five elements" % reduce)
  
          # Save the reduce() output and finally memoize the object
!         self.save_reduce(obj=obj, *rv)
  
      def persistent_id(self, obj):
***************
*** 344,348 ****
              self.write(PERSID + str(pid) + '\n')
  
!     def save_reduce(self, func, args, state=None, obj=None):
          # This API is be called by some subclasses
  
--- 339,344 ----
              self.write(PERSID + str(pid) + '\n')
  
!     def save_reduce(self, func, args, state=None,
!                     listitems=None, dictitems=None, obj=None):
          # This API is be called by some subclasses
  
***************
*** 412,415 ****
--- 408,422 ----
              self.memoize(obj)
  
+         # More new special cases (that work with older protocols as
+         # well): when __reduce__ returns a tuple with 4 or 5 items,
+         # the 4th and 5th item should be iterators that provide list
+         # items and dict items (as (key, value) tuples), or None.
+ 
+         if listitems is not None:
+             self._batch_appends(listitems)
+ 
+         if dictitems is not None:
+             self._batch_setitems(dictitems)
+ 
          if state is not None:
              save(state)
***************
*** 435,460 ****
  
          if isinstance(obj, list):
!             n = len(obj)
!             if n > 1:
!                 write(MARK)
!                 for x in obj:
!                     save(x)
!                 write(APPENDS)
!             elif n == 1:
!                 save(obj[0])
!                 write(APPEND)
          elif isinstance(obj, dict):
!             n = len(obj)
!             if n > 1:
!                 write(MARK)
!                 for k, v in obj.iteritems():
!                     save(k)
!                     save(v)
!                 write(SETITEMS)
!             elif n == 1:
!                 k, v = obj.items()[0]
!                 save(k)
!                 save(v)
!                 write(SETITEM)
  
          getstate = getattr(obj, "__getstate__", None)
--- 442,448 ----
  
          if isinstance(obj, list):
!             self._batch_appends(iter(obj))
          elif isinstance(obj, dict):
!             self._batch_setitems(obj.iteritems())
  
          getstate = getattr(obj, "__getstate__", None)
***************
*** 684,743 ****
      def save_list(self, obj):
          write = self.write
-         save  = self.save
  
          if self.bin:
              write(EMPTY_LIST)
!             self.memoize(obj)
!             n = len(obj)
              if n > 1:
                  write(MARK)
!                 for element in obj:
!                     save(element)
                  write(APPENDS)
              elif n:
!                 assert n == 1
!                 save(obj[0])
!                 write(APPEND)
!             # else the list is empty, and we're already done
! 
!         else:   # proto 0 -- can't use EMPTY_LIST or APPENDS
!             write(MARK + LIST)
!             self.memoize(obj)
!             for element in obj:
!                 save(element)
                  write(APPEND)
! 
!     dispatch[ListType] = save_list
  
      def save_dict(self, obj):
          write = self.write
-         save  = self.save
-         items = obj.iteritems()
  
          if self.bin:
              write(EMPTY_DICT)
!             self.memoize(obj)
!             if len(obj) > 1:
!                 write(MARK)
!                 for key, value in items:
!                     save(key)
!                     save(value)
!                 write(SETITEMS)
!                 return
!             # else (dict is empty or a singleton), fall through to the
!             # SETITEM code at the end
!         else:   # proto 0 -- can't use EMPTY_DICT or SETITEMS
              write(MARK + DICT)
-             self.memoize(obj)
  
!         # proto 0 or len(obj) < 2
!         for key, value in items:
!             save(key)
!             save(value)
!             write(SETITEM)
  
      dispatch[DictionaryType] = save_dict
      if not PyStringMap is None:
          dispatch[PyStringMap] = save_dict
  
      def save_inst(self, obj):
--- 672,768 ----
      def save_list(self, obj):
          write = self.write
  
          if self.bin:
              write(EMPTY_LIST)
!         else:   # proto 0 -- can't use EMPTY_LIST
!             write(MARK + LIST)
! 
!         self.memoize(obj)
!         self._batch_appends(iter(obj))
! 
!     dispatch[ListType] = save_list
! 
!     _BATCHSIZE = 1000
! 
!     def _batch_appends(self, items):
!         # Helper to batch up APPENDS sequences
!         save = self.save
!         write = self.write
! 
!         if not self.bin:
!             for x in items:
!                 save(x)
!                 write(APPEND)
!             return
! 
!         r = xrange(self._BATCHSIZE)
!         while items is not None:
!             tmp = []
!             for i in r:
!                 try:
!                     tmp.append(items.next())
!                 except StopIteration:
!                     items = None
!                     break
!             n = len(tmp)
              if n > 1:
                  write(MARK)
!                 for x in tmp:
!                     save(x)
                  write(APPENDS)
              elif n:
!                 save(tmp[0])
                  write(APPEND)
!             # else tmp is empty, and we're done
  
      def save_dict(self, obj):
          write = self.write
  
          if self.bin:
              write(EMPTY_DICT)
!         else:   # proto 0 -- can't use EMPTY_DICT
              write(MARK + DICT)
  
!         self.memoize(obj)
!         self._batch_setitems(obj.iteritems())
  
      dispatch[DictionaryType] = save_dict
      if not PyStringMap is None:
          dispatch[PyStringMap] = save_dict
+ 
+     def _batch_setitems(self, items):
+         # Helper to batch up SETITEMS sequences; proto >= 1 only
+         save = self.save
+         write = self.write
+ 
+         if not self.bin:
+             for k, v in items:
+                 save(k)
+                 save(v)
+                 write(SETITEM)
+             return
+ 
+         r = xrange(self._BATCHSIZE)
+         while items is not None:
+             tmp = []
+             for i in r:
+                 try:
+                     tmp.append(items.next())
+                 except StopIteration:
+                     items = None
+                     break
+             n = len(tmp)
+             if n > 1:
+                 write(MARK)
+                 for k, v in tmp:
+                     save(k)
+                     save(v)
+                 write(SETITEMS)
+             elif n:
+                 k, v = tmp[0]
+                 save(k)
+                 save(v)
+                 write(SETITEM)
+             # else tmp is empty, and we're done
  
      def save_inst(self, obj):