[Python-3000-checkins] r64180 - in python/branches/py3k: Lib/pickle.py Lib/pickletools.py Lib/test/pickletester.py Lib/test/test_pickle.py Lib/test/test_pickletools.py Misc/NEWS Modules/_pickle.c setup.py

alexandre.vassalotti python-3000-checkins at python.org
Thu Jun 12 20:26:05 CEST 2008


Author: alexandre.vassalotti
Date: Thu Jun 12 20:26:05 2008
New Revision: 64180

Log:
Restore _pickle module accelerator module.
Removed temporary Windows support.
64bit bug with integer unpickling is now fixed. 


Added:
   python/branches/py3k/Modules/_pickle.c
      - copied, changed from r64162, /python/branches/py3k/Modules/_pickle.c
Modified:
   python/branches/py3k/Lib/pickle.py
   python/branches/py3k/Lib/pickletools.py
   python/branches/py3k/Lib/test/pickletester.py
   python/branches/py3k/Lib/test/test_pickle.py
   python/branches/py3k/Lib/test/test_pickletools.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/setup.py

Modified: python/branches/py3k/Lib/pickle.py
==============================================================================
--- python/branches/py3k/Lib/pickle.py	(original)
+++ python/branches/py3k/Lib/pickle.py	Thu Jun 12 20:26:05 2008
@@ -174,7 +174,7 @@
 
 # Pickling machinery
 
-class Pickler:
+class _Pickler:
 
     def __init__(self, file, protocol=None):
         """This takes a binary file for writing a pickle data stream.
@@ -182,21 +182,19 @@
         All protocols now read and write bytes.
 
         The optional protocol argument tells the pickler to use the
-        given protocol; supported protocols are 0, 1, 2.  The default
-        protocol is 2; it's been supported for many years now.
-
-        Protocol 1 is more efficient than protocol 0; protocol 2 is
-        more efficient than protocol 1.
+        given protocol; supported protocols are 0, 1, 2, 3.  The default
+        protocol is 3; a backward-incompatible protocol designed for
+        Python 3.0.
 
         Specifying a negative protocol version selects the highest
         protocol version supported.  The higher the protocol used, the
         more recent the version of Python needed to read the pickle
         produced.
 
-        The file parameter must have a write() method that accepts a single
-        string argument.  It can thus be an open file object, a StringIO
-        object, or any other custom object that meets this interface.
-
+        The file argument must have a write() method that accepts a single
+        bytes argument. It can thus be a file object opened for binary
+        writing, a io.BytesIO instance, or any other custom object that
+        meets this interface.
         """
         if protocol is None:
             protocol = DEFAULT_PROTOCOL
@@ -204,7 +202,10 @@
             protocol = HIGHEST_PROTOCOL
         elif not 0 <= protocol <= HIGHEST_PROTOCOL:
             raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
-        self.write = file.write
+        try:
+            self.write = file.write
+        except AttributeError:
+            raise TypeError("file must have a 'write' attribute")
         self.memo = {}
         self.proto = int(protocol)
         self.bin = protocol >= 1
@@ -270,10 +271,10 @@
 
         return GET + repr(i).encode("ascii") + b'\n'
 
-    def save(self, obj):
+    def save(self, obj, save_persistent_id=True):
         # Check for persistent id (defined by a subclass)
         pid = self.persistent_id(obj)
-        if pid:
+        if pid is not None and save_persistent_id:
             self.save_pers(pid)
             return
 
@@ -341,7 +342,7 @@
     def save_pers(self, pid):
         # Save a persistent id reference
         if self.bin:
-            self.save(pid)
+            self.save(pid, save_persistent_id=False)
             self.write(BINPERSID)
         else:
             self.write(PERSID + str(pid).encode("ascii") + b'\n')
@@ -350,13 +351,13 @@
                     listitems=None, dictitems=None, obj=None):
         # This API is called by some subclasses
 
-        # Assert that args is a tuple or None
+        # Assert that args is a tuple
         if not isinstance(args, tuple):
-            raise PicklingError("args from reduce() should be a tuple")
+            raise PicklingError("args from save_reduce() should be a tuple")
 
         # Assert that func is callable
         if not hasattr(func, '__call__'):
-            raise PicklingError("func from reduce should be callable")
+            raise PicklingError("func from save_reduce() should be callable")
 
         save = self.save
         write = self.write
@@ -438,31 +439,6 @@
             self.write(obj and TRUE or FALSE)
     dispatch[bool] = save_bool
 
-    def save_int(self, obj, pack=struct.pack):
-        if self.bin:
-            # If the int is small enough to fit in a signed 4-byte 2's-comp
-            # format, we can store it more efficiently than the general
-            # case.
-            # First one- and two-byte unsigned ints:
-            if obj >= 0:
-                if obj <= 0xff:
-                    self.write(BININT1 + bytes([obj]))
-                    return
-                if obj <= 0xffff:
-                    self.write(BININT2 + bytes([obj&0xff, obj>>8]))
-                    return
-            # Next check for 4-byte signed ints:
-            high_bits = obj >> 31  # note that Python shift sign-extends
-            if high_bits == 0 or high_bits == -1:
-                # All high bits are copies of bit 2**31, so the value
-                # fits in a 4-byte signed int.
-                self.write(BININT + pack("<i", obj))
-                return
-        # Text pickle, or int too big to fit in signed 4-byte format.
-        self.write(INT + repr(obj).encode("ascii") + b'\n')
-    # XXX save_int is merged into save_long
-    # dispatch[int] = save_int
-
     def save_long(self, obj, pack=struct.pack):
         if self.bin:
             # If the int is small enough to fit in a signed 4-byte 2's-comp
@@ -503,7 +479,7 @@
 
     def save_bytes(self, obj, pack=struct.pack):
         if self.proto < 3:
-            self.save_reduce(bytes, (list(obj),))
+            self.save_reduce(bytes, (list(obj),), obj=obj)
             return
         n = len(obj)
         if n < 256:
@@ -579,12 +555,6 @@
 
     dispatch[tuple] = save_tuple
 
-    # save_empty_tuple() isn't used by anything in Python 2.3.  However, I
-    # found a Pickler subclass in Zope3 that calls it, so it's not harmless
-    # to remove it.
-    def save_empty_tuple(self, obj):
-        self.write(EMPTY_TUPLE)
-
     def save_list(self, obj):
         write = self.write
 
@@ -696,7 +666,7 @@
             module = whichmodule(obj, name)
 
         try:
-            __import__(module)
+            __import__(module, level=0)
             mod = sys.modules[module]
             klass = getattr(mod, name)
         except (ImportError, KeyError, AttributeError):
@@ -720,9 +690,19 @@
                 else:
                     write(EXT4 + pack("<i", code))
                 return
+        # Non-ASCII identifiers are supported only with protocols >= 3.
+        if self.proto >= 3:
+            write(GLOBAL + bytes(module, "utf-8") + b'\n' +
+                  bytes(name, "utf-8") + b'\n')
+        else:
+            try:
+                write(GLOBAL + bytes(module, "ascii") + b'\n' +
+                      bytes(name, "ascii") + b'\n')
+            except UnicodeEncodeError:
+                raise PicklingError(
+                    "can't pickle global identifier '%s.%s' using "
+                    "pickle protocol %i" % (module, name, self.proto))
 
-        write(GLOBAL + bytes(module, "utf-8") + b'\n' +
-              bytes(name, "utf-8") + b'\n')
         self.memoize(obj)
 
     dispatch[FunctionType] = save_global
@@ -781,7 +761,7 @@
 
 # Unpickling machinery
 
-class Unpickler:
+class _Unpickler:
 
     def __init__(self, file, *, encoding="ASCII", errors="strict"):
         """This takes a binary file for reading a pickle data stream.
@@ -841,6 +821,9 @@
         while stack[k] is not mark: k = k-1
         return k
 
+    def persistent_load(self, pid):
+        raise UnpickingError("unsupported persistent id encountered")
+
     dispatch = {}
 
     def load_proto(self):
@@ -850,7 +833,7 @@
     dispatch[PROTO[0]] = load_proto
 
     def load_persid(self):
-        pid = self.readline()[:-1]
+        pid = self.readline()[:-1].decode("ascii")
         self.append(self.persistent_load(pid))
     dispatch[PERSID[0]] = load_persid
 
@@ -879,9 +862,9 @@
             val = True
         else:
             try:
-                val = int(data)
+                val = int(data, 0)
             except ValueError:
-                val = int(data)
+                val = int(data, 0)
         self.append(val)
     dispatch[INT[0]] = load_int
 
@@ -933,7 +916,8 @@
                 break
         else:
             raise ValueError("insecure string pickle: %r" % orig)
-        self.append(codecs.escape_decode(rep)[0])
+        self.append(codecs.escape_decode(rep)[0]
+                    .decode(self.encoding, self.errors))
     dispatch[STRING[0]] = load_string
 
     def load_binstring(self):
@@ -975,7 +959,7 @@
     dispatch[TUPLE[0]] = load_tuple
 
     def load_empty_tuple(self):
-        self.stack.append(())
+        self.append(())
     dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
 
     def load_tuple1(self):
@@ -991,11 +975,11 @@
     dispatch[TUPLE3[0]] = load_tuple3
 
     def load_empty_list(self):
-        self.stack.append([])
+        self.append([])
     dispatch[EMPTY_LIST[0]] = load_empty_list
 
     def load_empty_dictionary(self):
-        self.stack.append({})
+        self.append({})
     dispatch[EMPTY_DICT[0]] = load_empty_dictionary
 
     def load_list(self):
@@ -1022,13 +1006,13 @@
     def _instantiate(self, klass, k):
         args = tuple(self.stack[k+1:])
         del self.stack[k:]
-        instantiated = 0
+        instantiated = False
         if (not args and
                 isinstance(klass, type) and
                 not hasattr(klass, "__getinitargs__")):
             value = _EmptyClass()
             value.__class__ = klass
-            instantiated = 1
+            instantiated = True
         if not instantiated:
             try:
                 value = klass(*args)
@@ -1038,8 +1022,8 @@
         self.append(value)
 
     def load_inst(self):
-        module = self.readline()[:-1]
-        name = self.readline()[:-1]
+        module = self.readline()[:-1].decode("ascii")
+        name = self.readline()[:-1].decode("ascii")
         klass = self.find_class(module, name)
         self._instantiate(klass, self.marker())
     dispatch[INST[0]] = load_inst
@@ -1059,8 +1043,8 @@
     dispatch[NEWOBJ[0]] = load_newobj
 
     def load_global(self):
-        module = self.readline()[:-1]
-        name = self.readline()[:-1]
+        module = self.readline()[:-1].decode("utf-8")
+        name = self.readline()[:-1].decode("utf-8")
         klass = self.find_class(module, name)
         self.append(klass)
     dispatch[GLOBAL[0]] = load_global
@@ -1095,11 +1079,7 @@
 
     def find_class(self, module, name):
         # Subclasses may override this
-        if isinstance(module, bytes_types):
-            module = module.decode("utf-8")
-        if isinstance(name, bytes_types):
-            name = name.decode("utf-8")
-        __import__(module)
+        __import__(module, level=0)
         mod = sys.modules[module]
         klass = getattr(mod, name)
         return klass
@@ -1131,31 +1111,33 @@
     dispatch[DUP[0]] = load_dup
 
     def load_get(self):
-        self.append(self.memo[self.readline()[:-1].decode("ascii")])
+        i = int(self.readline()[:-1])
+        self.append(self.memo[i])
     dispatch[GET[0]] = load_get
 
     def load_binget(self):
-        i = ord(self.read(1))
-        self.append(self.memo[repr(i)])
+        i = self.read(1)[0]
+        self.append(self.memo[i])
     dispatch[BINGET[0]] = load_binget
 
     def load_long_binget(self):
         i = mloads(b'i' + self.read(4))
-        self.append(self.memo[repr(i)])
+        self.append(self.memo[i])
     dispatch[LONG_BINGET[0]] = load_long_binget
 
     def load_put(self):
-        self.memo[self.readline()[:-1].decode("ascii")] = self.stack[-1]
+        i = int(self.readline()[:-1])
+        self.memo[i] = self.stack[-1]
     dispatch[PUT[0]] = load_put
 
     def load_binput(self):
-        i = ord(self.read(1))
-        self.memo[repr(i)] = self.stack[-1]
+        i = self.read(1)[0]
+        self.memo[i] = self.stack[-1]
     dispatch[BINPUT[0]] = load_binput
 
     def load_long_binput(self):
         i = mloads(b'i' + self.read(4))
-        self.memo[repr(i)] = self.stack[-1]
+        self.memo[i] = self.stack[-1]
     dispatch[LONG_BINPUT[0]] = load_long_binput
 
     def load_append(self):
@@ -1321,6 +1303,12 @@
         n -= 1 << (nbytes * 8)
     return n
 
+# Use the faster _pickle if possible
+try:
+    from _pickle import *
+except ImportError:
+    Pickler, Unpickler = _Pickler, _Unpickler
+
 # Shorthands
 
 def dump(obj, file, protocol=None):
@@ -1333,14 +1321,14 @@
     assert isinstance(res, bytes_types)
     return res
 
-def load(file):
-    return Unpickler(file).load()
+def load(file, *, encoding="ASCII", errors="strict"):
+    return Unpickler(file, encoding=encoding, errors=errors).load()
 
-def loads(s):
+def loads(s, *, encoding="ASCII", errors="strict"):
     if isinstance(s, str):
         raise TypeError("Can't load pickle from unicode string")
     file = io.BytesIO(s)
-    return Unpickler(file).load()
+    return Unpickler(file, encoding=encoding, errors=errors).load()
 
 # Doctest
 

Modified: python/branches/py3k/Lib/pickletools.py
==============================================================================
--- python/branches/py3k/Lib/pickletools.py	(original)
+++ python/branches/py3k/Lib/pickletools.py	Thu Jun 12 20:26:05 2008
@@ -2079,11 +2079,12 @@
    70: t        TUPLE      (MARK at 49)
    71: p    PUT        5
    74: R    REDUCE
-   75: V    UNICODE    'def'
-   80: p    PUT        6
-   83: s    SETITEM
-   84: a    APPEND
-   85: .    STOP
+   75: p    PUT        6
+   78: V    UNICODE    'def'
+   83: p    PUT        7
+   86: s    SETITEM
+   87: a    APPEND
+   88: .    STOP
 highest protocol among opcodes = 0
 
 Try again with a "binary" pickle.
@@ -2115,11 +2116,12 @@
    49: t            TUPLE      (MARK at 37)
    50: q        BINPUT     5
    52: R        REDUCE
-   53: X        BINUNICODE 'def'
-   61: q        BINPUT     6
-   63: s        SETITEM
-   64: e        APPENDS    (MARK at 3)
-   65: .    STOP
+   53: q        BINPUT     6
+   55: X        BINUNICODE 'def'
+   63: q        BINPUT     7
+   65: s        SETITEM
+   66: e        APPENDS    (MARK at 3)
+   67: .    STOP
 highest protocol among opcodes = 1
 
 Exercise the INST/OBJ/BUILD family.

Modified: python/branches/py3k/Lib/test/pickletester.py
==============================================================================
--- python/branches/py3k/Lib/test/pickletester.py	(original)
+++ python/branches/py3k/Lib/test/pickletester.py	Thu Jun 12 20:26:05 2008
@@ -362,7 +362,7 @@
     return x
 
 class AbstractPickleTests(unittest.TestCase):
-    # Subclass must define self.dumps, self.loads, self.error.
+    # Subclass must define self.dumps, self.loads.
 
     _testdata = create_data()
 
@@ -463,8 +463,9 @@
             self.assertEqual(list(x[0].attr.keys()), [1])
             self.assert_(x[0].attr[1] is x)
 
-    def test_garyp(self):
-        self.assertRaises(self.error, self.loads, b'garyp')
+    def test_get(self):
+        self.assertRaises(KeyError, self.loads, b'g0\np0')
+        self.assertEquals(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)])
 
     def test_insecure_strings(self):
         # XXX Some of these tests are temporarily disabled
@@ -955,7 +956,7 @@
         f = open(TESTFN, "wb")
         try:
             f.close()
-            self.assertRaises(ValueError, self.module.dump, 123, f)
+            self.assertRaises(ValueError, pickle.dump, 123, f)
         finally:
             os.remove(TESTFN)
 
@@ -964,24 +965,24 @@
         f = open(TESTFN, "wb")
         try:
             f.close()
-            self.assertRaises(ValueError, self.module.dump, 123, f)
+            self.assertRaises(ValueError, pickle.dump, 123, f)
         finally:
             os.remove(TESTFN)
 
     def test_highest_protocol(self):
         # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
-        self.assertEqual(self.module.HIGHEST_PROTOCOL, 3)
+        self.assertEqual(pickle.HIGHEST_PROTOCOL, 3)
 
     def test_callapi(self):
         from io import BytesIO
         f = BytesIO()
         # With and without keyword arguments
-        self.module.dump(123, f, -1)
-        self.module.dump(123, file=f, protocol=-1)
-        self.module.dumps(123, -1)
-        self.module.dumps(123, protocol=-1)
-        self.module.Pickler(f, -1)
-        self.module.Pickler(f, protocol=-1)
+        pickle.dump(123, f, -1)
+        pickle.dump(123, file=f, protocol=-1)
+        pickle.dumps(123, -1)
+        pickle.dumps(123, protocol=-1)
+        pickle.Pickler(f, -1)
+        pickle.Pickler(f, protocol=-1)
 
 class AbstractPersistentPicklerTests(unittest.TestCase):
 

Modified: python/branches/py3k/Lib/test/test_pickle.py
==============================================================================
--- python/branches/py3k/Lib/test/test_pickle.py	(original)
+++ python/branches/py3k/Lib/test/test_pickle.py	Thu Jun 12 20:26:05 2008
@@ -7,37 +7,42 @@
 from test.pickletester import AbstractPickleModuleTests
 from test.pickletester import AbstractPersistentPicklerTests
 
-class PickleTests(AbstractPickleTests, AbstractPickleModuleTests):
+try:
+    import _pickle
+    has_c_implementation = True
+except ImportError:
+    has_c_implementation = False
 
-    module = pickle
-    error = KeyError
 
-    def dumps(self, arg, proto=None):
-        return pickle.dumps(arg, proto)
+class PickleTests(AbstractPickleModuleTests):
+    pass
 
-    def loads(self, buf):
-        return pickle.loads(buf)
 
-class PicklerTests(AbstractPickleTests):
+class PyPicklerTests(AbstractPickleTests):
 
-    error = KeyError
+    pickler = pickle._Pickler
+    unpickler = pickle._Unpickler
 
     def dumps(self, arg, proto=None):
         f = io.BytesIO()
-        p = pickle.Pickler(f, proto)
+        p = self.pickler(f, proto)
         p.dump(arg)
         f.seek(0)
         return bytes(f.read())
 
     def loads(self, buf):
         f = io.BytesIO(buf)
-        u = pickle.Unpickler(f)
+        u = self.unpickler(f)
         return u.load()
 
-class PersPicklerTests(AbstractPersistentPicklerTests):
+
+class PyPersPicklerTests(AbstractPersistentPicklerTests):
+
+    pickler = pickle._Pickler
+    unpickler = pickle._Unpickler
 
     def dumps(self, arg, proto=None):
-        class PersPickler(pickle.Pickler):
+        class PersPickler(self.pickler):
             def persistent_id(subself, obj):
                 return self.persistent_id(obj)
         f = io.BytesIO()
@@ -47,19 +52,29 @@
         return f.read()
 
     def loads(self, buf):
-        class PersUnpickler(pickle.Unpickler):
+        class PersUnpickler(self.unpickler):
             def persistent_load(subself, obj):
                 return self.persistent_load(obj)
         f = io.BytesIO(buf)
         u = PersUnpickler(f)
         return u.load()
 
+
+if has_c_implementation:
+    class CPicklerTests(PyPicklerTests):
+        pickler = _pickle.Pickler
+        unpickler = _pickle.Unpickler
+
+    class CPersPicklerTests(PyPersPicklerTests):
+        pickler = _pickle.Pickler
+        unpickler = _pickle.Unpickler
+
+
 def test_main():
-    support.run_unittest(
-        PickleTests,
-        PicklerTests,
-        PersPicklerTests
-    )
+    tests = [PickleTests, PyPicklerTests, PyPersPicklerTests]
+    if has_c_implementation:
+        tests.extend([CPicklerTests, CPersPicklerTests])
+    support.run_unittest(*tests)
     support.run_doctest(pickle)
 
 if __name__ == "__main__":

Modified: python/branches/py3k/Lib/test/test_pickletools.py
==============================================================================
--- python/branches/py3k/Lib/test/test_pickletools.py	(original)
+++ python/branches/py3k/Lib/test/test_pickletools.py	Thu Jun 12 20:26:05 2008
@@ -12,8 +12,6 @@
     def loads(self, buf):
         return pickle.loads(buf)
 
-    module = pickle
-    error = KeyError
 
 def test_main():
     support.run_unittest(OptimizedPickleTests)

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Thu Jun 12 20:26:05 2008
@@ -84,6 +84,10 @@
 
 - Added C optimized implementation of io.StringIO.
 
+- The ``pickle`` module is now automatically use an optimized C
+  implementation of Pickler and Unpickler when available. The
+  ``cPickle`` module is no longer needed.
+
 - Removed the ``htmllib`` and ``sgmllib`` modules.
 
 - The deprecated ``SmartCookie`` and ``SimpleCookie`` classes have

Copied: python/branches/py3k/Modules/_pickle.c (from r64162, /python/branches/py3k/Modules/_pickle.c)
==============================================================================
--- /python/branches/py3k/Modules/_pickle.c	(original)
+++ python/branches/py3k/Modules/_pickle.c	Thu Jun 12 20:26:05 2008
@@ -2694,24 +2694,24 @@
  * of x-platform bugs.
  */
 static long
-calc_binint(char *s, int size)
+calc_binint(char *bytes, int size)
 {
-    unsigned char c;
-    int i;
-    long x = 0L;
+    unsigned char *s = (unsigned char *)bytes;
+    int i = size;
+    long x = 0;
 
     for (i = 0; i < size; i++) {
-        c = (unsigned char)s[i];
-        x |= (long)c << (i * 8);
+        x |= (long)s[i] << (i * 8);
     }
-#if SIZEOF_LONG > 4
+
     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
      * is signed, so on a box with longs bigger than 4 bytes we need
      * to extend a BININT's sign bit to the full width.
      */
-    if (x == 4 && x & (1L << 31))
-        x |= (~0L) << 32;
-#endif
+    if (SIZEOF_LONG > 4 && size == 4) {
+        x |= -(x & (1L << 31));
+    }
+
     return x;
 }
 

Modified: python/branches/py3k/setup.py
==============================================================================
--- python/branches/py3k/setup.py	(original)
+++ python/branches/py3k/setup.py	Thu Jun 12 20:26:05 2008
@@ -422,6 +422,9 @@
         exts.append( Extension("_functools", ["_functoolsmodule.c"]) )
         # Memory-based IO accelerator modules
         exts.append( Extension("_bytesio", ["_bytesio.c"]) )
+        exts.append( Extension("_stringio", ["_stringio.c"]) )
+        # C-optimized pickle replacement
+        exts.append( Extension("_pickle", ["_pickle.c"]) )
         # atexit
         exts.append( Extension("atexit", ["atexitmodule.c"]) )
         # _json speedups


More information about the Python-3000-checkins mailing list