[Python-3000-checkins] r64163 - in python/branches/py3k: Lib/pickle.py Lib/pickletools.py Lib/test/pickletester.py Lib/test/test_pickle.py Lib/test/test_pickletools.py Misc/NEWS Modules/_pickle.c PC/VC6/pythoncore.dsp PC/VS7.1/pythoncore.vcproj P
Guido van Rossum
guido at python.org
Thu Jun 12 06:08:35 CEST 2008
Whoa, I object. This seems to be rolling back functionality that was
pretty stable for several alphas, and (if I may believe the docstring
change) you're even dropping protocol 3. Isn't there a more elegant
way? This seems way too drastic a change so close to the beta release.
On Wed, Jun 11, 2008 at 8:10 PM, benjamin.peterson
<python-3000-checkins at python.org> wrote:
> Author: benjamin.peterson
> Date: Thu Jun 12 05:10:02 2008
> New Revision: 64163
>
> Log:
> revert the addition of _pickle because it was causing havok with 64-bit
>
>
> Removed:
> python/branches/py3k/Modules/_pickle.c
> Modified:
> python/branches/py3k/Lib/pickle.py
> python/branches/py3k/Lib/pickletools.py
> python/branches/py3k/Lib/test/pickletester.py
> python/branches/py3k/Lib/test/test_pickle.py
> python/branches/py3k/Lib/test/test_pickletools.py
> python/branches/py3k/Misc/NEWS
> python/branches/py3k/PC/VC6/pythoncore.dsp
> python/branches/py3k/PC/VS7.1/pythoncore.vcproj
> python/branches/py3k/PC/VS8.0/pythoncore.vcproj
> python/branches/py3k/PC/config.c
> python/branches/py3k/setup.py
>
> Modified: python/branches/py3k/Lib/pickle.py
> ==============================================================================
> --- python/branches/py3k/Lib/pickle.py (original)
> +++ python/branches/py3k/Lib/pickle.py Thu Jun 12 05:10:02 2008
> @@ -174,7 +174,7 @@
>
> # Pickling machinery
>
> -class _Pickler:
> +class Pickler:
>
> def __init__(self, file, protocol=None):
> """This takes a binary file for writing a pickle data stream.
> @@ -182,19 +182,21 @@
> All protocols now read and write bytes.
>
> The optional protocol argument tells the pickler to use the
> - given protocol; supported protocols are 0, 1, 2, 3. The default
> - protocol is 3; a backward-incompatible protocol designed for
> - Python 3.0.
> + given protocol; supported protocols are 0, 1, 2. The default
> + protocol is 2; it's been supported for many years now.
> +
> + Protocol 1 is more efficient than protocol 0; protocol 2 is
> + more efficient than protocol 1.
>
> Specifying a negative protocol version selects the highest
> protocol version supported. The higher the protocol used, the
> more recent the version of Python needed to read the pickle
> produced.
>
> - The file argument must have a write() method that accepts a single
> - bytes argument. It can thus be a file object opened for binary
> - writing, a io.BytesIO instance, or any other custom object that
> - meets this interface.
> + The file parameter must have a write() method that accepts a single
> + string argument. It can thus be an open file object, a StringIO
> + object, or any other custom object that meets this interface.
> +
> """
> if protocol is None:
> protocol = DEFAULT_PROTOCOL
> @@ -202,10 +204,7 @@
> protocol = HIGHEST_PROTOCOL
> elif not 0 <= protocol <= HIGHEST_PROTOCOL:
> raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
> - try:
> - self.write = file.write
> - except AttributeError:
> - raise TypeError("file must have a 'write' attribute")
> + self.write = file.write
> self.memo = {}
> self.proto = int(protocol)
> self.bin = protocol >= 1
> @@ -271,10 +270,10 @@
>
> return GET + repr(i).encode("ascii") + b'\n'
>
> - def save(self, obj, save_persistent_id=True):
> + def save(self, obj):
> # Check for persistent id (defined by a subclass)
> pid = self.persistent_id(obj)
> - if pid is not None and save_persistent_id:
> + if pid:
> self.save_pers(pid)
> return
>
> @@ -342,7 +341,7 @@
> def save_pers(self, pid):
> # Save a persistent id reference
> if self.bin:
> - self.save(pid, save_persistent_id=False)
> + self.save(pid)
> self.write(BINPERSID)
> else:
> self.write(PERSID + str(pid).encode("ascii") + b'\n')
> @@ -351,13 +350,13 @@
> listitems=None, dictitems=None, obj=None):
> # This API is called by some subclasses
>
> - # Assert that args is a tuple
> + # Assert that args is a tuple or None
> if not isinstance(args, tuple):
> - raise PicklingError("args from save_reduce() should be a tuple")
> + raise PicklingError("args from reduce() should be a tuple")
>
> # Assert that func is callable
> if not hasattr(func, '__call__'):
> - raise PicklingError("func from save_reduce() should be callable")
> + raise PicklingError("func from reduce should be callable")
>
> save = self.save
> write = self.write
> @@ -439,6 +438,31 @@
> self.write(obj and TRUE or FALSE)
> dispatch[bool] = save_bool
>
> + def save_int(self, obj, pack=struct.pack):
> + if self.bin:
> + # If the int is small enough to fit in a signed 4-byte 2's-comp
> + # format, we can store it more efficiently than the general
> + # case.
> + # First one- and two-byte unsigned ints:
> + if obj >= 0:
> + if obj <= 0xff:
> + self.write(BININT1 + bytes([obj]))
> + return
> + if obj <= 0xffff:
> + self.write(BININT2 + bytes([obj&0xff, obj>>8]))
> + return
> + # Next check for 4-byte signed ints:
> + high_bits = obj >> 31 # note that Python shift sign-extends
> + if high_bits == 0 or high_bits == -1:
> + # All high bits are copies of bit 2**31, so the value
> + # fits in a 4-byte signed int.
> + self.write(BININT + pack("<i", obj))
> + return
> + # Text pickle, or int too big to fit in signed 4-byte format.
> + self.write(INT + repr(obj).encode("ascii") + b'\n')
> + # XXX save_int is merged into save_long
> + # dispatch[int] = save_int
> +
> def save_long(self, obj, pack=struct.pack):
> if self.bin:
> # If the int is small enough to fit in a signed 4-byte 2's-comp
> @@ -479,7 +503,7 @@
>
> def save_bytes(self, obj, pack=struct.pack):
> if self.proto < 3:
> - self.save_reduce(bytes, (list(obj),), obj=obj)
> + self.save_reduce(bytes, (list(obj),))
> return
> n = len(obj)
> if n < 256:
> @@ -555,6 +579,12 @@
>
> dispatch[tuple] = save_tuple
>
> + # save_empty_tuple() isn't used by anything in Python 2.3. However, I
> + # found a Pickler subclass in Zope3 that calls it, so it's not harmless
> + # to remove it.
> + def save_empty_tuple(self, obj):
> + self.write(EMPTY_TUPLE)
> +
> def save_list(self, obj):
> write = self.write
>
> @@ -666,7 +696,7 @@
> module = whichmodule(obj, name)
>
> try:
> - __import__(module, level=0)
> + __import__(module)
> mod = sys.modules[module]
> klass = getattr(mod, name)
> except (ImportError, KeyError, AttributeError):
> @@ -690,19 +720,9 @@
> else:
> write(EXT4 + pack("<i", code))
> return
> - # Non-ASCII identifiers are supported only with protocols >= 3.
> - if self.proto >= 3:
> - write(GLOBAL + bytes(module, "utf-8") + b'\n' +
> - bytes(name, "utf-8") + b'\n')
> - else:
> - try:
> - write(GLOBAL + bytes(module, "ascii") + b'\n' +
> - bytes(name, "ascii") + b'\n')
> - except UnicodeEncodeError:
> - raise PicklingError(
> - "can't pickle global identifier '%s.%s' using "
> - "pickle protocol %i" % (module, name, self.proto))
>
> + write(GLOBAL + bytes(module, "utf-8") + b'\n' +
> + bytes(name, "utf-8") + b'\n')
> self.memoize(obj)
>
> dispatch[FunctionType] = save_global
> @@ -761,7 +781,7 @@
>
> # Unpickling machinery
>
> -class _Unpickler:
> +class Unpickler:
>
> def __init__(self, file, *, encoding="ASCII", errors="strict"):
> """This takes a binary file for reading a pickle data stream.
> @@ -821,9 +841,6 @@
> while stack[k] is not mark: k = k-1
> return k
>
> - def persistent_load(self, pid):
> - raise UnpickingError("unsupported persistent id encountered")
> -
> dispatch = {}
>
> def load_proto(self):
> @@ -833,7 +850,7 @@
> dispatch[PROTO[0]] = load_proto
>
> def load_persid(self):
> - pid = self.readline()[:-1].decode("ascii")
> + pid = self.readline()[:-1]
> self.append(self.persistent_load(pid))
> dispatch[PERSID[0]] = load_persid
>
> @@ -862,9 +879,9 @@
> val = True
> else:
> try:
> - val = int(data, 0)
> + val = int(data)
> except ValueError:
> - val = int(data, 0)
> + val = int(data)
> self.append(val)
> dispatch[INT[0]] = load_int
>
> @@ -916,8 +933,7 @@
> break
> else:
> raise ValueError("insecure string pickle: %r" % orig)
> - self.append(codecs.escape_decode(rep)[0]
> - .decode(self.encoding, self.errors))
> + self.append(codecs.escape_decode(rep)[0])
> dispatch[STRING[0]] = load_string
>
> def load_binstring(self):
> @@ -959,7 +975,7 @@
> dispatch[TUPLE[0]] = load_tuple
>
> def load_empty_tuple(self):
> - self.append(())
> + self.stack.append(())
> dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
>
> def load_tuple1(self):
> @@ -975,11 +991,11 @@
> dispatch[TUPLE3[0]] = load_tuple3
>
> def load_empty_list(self):
> - self.append([])
> + self.stack.append([])
> dispatch[EMPTY_LIST[0]] = load_empty_list
>
> def load_empty_dictionary(self):
> - self.append({})
> + self.stack.append({})
> dispatch[EMPTY_DICT[0]] = load_empty_dictionary
>
> def load_list(self):
> @@ -1006,13 +1022,13 @@
> def _instantiate(self, klass, k):
> args = tuple(self.stack[k+1:])
> del self.stack[k:]
> - instantiated = False
> + instantiated = 0
> if (not args and
> isinstance(klass, type) and
> not hasattr(klass, "__getinitargs__")):
> value = _EmptyClass()
> value.__class__ = klass
> - instantiated = True
> + instantiated = 1
> if not instantiated:
> try:
> value = klass(*args)
> @@ -1022,8 +1038,8 @@
> self.append(value)
>
> def load_inst(self):
> - module = self.readline()[:-1].decode("ascii")
> - name = self.readline()[:-1].decode("ascii")
> + module = self.readline()[:-1]
> + name = self.readline()[:-1]
> klass = self.find_class(module, name)
> self._instantiate(klass, self.marker())
> dispatch[INST[0]] = load_inst
> @@ -1043,8 +1059,8 @@
> dispatch[NEWOBJ[0]] = load_newobj
>
> def load_global(self):
> - module = self.readline()[:-1].decode("utf-8")
> - name = self.readline()[:-1].decode("utf-8")
> + module = self.readline()[:-1]
> + name = self.readline()[:-1]
> klass = self.find_class(module, name)
> self.append(klass)
> dispatch[GLOBAL[0]] = load_global
> @@ -1079,7 +1095,11 @@
>
> def find_class(self, module, name):
> # Subclasses may override this
> - __import__(module, level=0)
> + if isinstance(module, bytes_types):
> + module = module.decode("utf-8")
> + if isinstance(name, bytes_types):
> + name = name.decode("utf-8")
> + __import__(module)
> mod = sys.modules[module]
> klass = getattr(mod, name)
> return klass
> @@ -1111,33 +1131,31 @@
> dispatch[DUP[0]] = load_dup
>
> def load_get(self):
> - i = int(self.readline()[:-1])
> - self.append(self.memo[i])
> + self.append(self.memo[self.readline()[:-1].decode("ascii")])
> dispatch[GET[0]] = load_get
>
> def load_binget(self):
> - i = self.read(1)[0]
> - self.append(self.memo[i])
> + i = ord(self.read(1))
> + self.append(self.memo[repr(i)])
> dispatch[BINGET[0]] = load_binget
>
> def load_long_binget(self):
> i = mloads(b'i' + self.read(4))
> - self.append(self.memo[i])
> + self.append(self.memo[repr(i)])
> dispatch[LONG_BINGET[0]] = load_long_binget
>
> def load_put(self):
> - i = int(self.readline()[:-1])
> - self.memo[i] = self.stack[-1]
> + self.memo[self.readline()[:-1].decode("ascii")] = self.stack[-1]
> dispatch[PUT[0]] = load_put
>
> def load_binput(self):
> - i = self.read(1)[0]
> - self.memo[i] = self.stack[-1]
> + i = ord(self.read(1))
> + self.memo[repr(i)] = self.stack[-1]
> dispatch[BINPUT[0]] = load_binput
>
> def load_long_binput(self):
> i = mloads(b'i' + self.read(4))
> - self.memo[i] = self.stack[-1]
> + self.memo[repr(i)] = self.stack[-1]
> dispatch[LONG_BINPUT[0]] = load_long_binput
>
> def load_append(self):
> @@ -1303,12 +1321,6 @@
> n -= 1 << (nbytes * 8)
> return n
>
> -# Use the faster _pickle if possible
> -try:
> - from _pickle import *
> -except ImportError:
> - Pickler, Unpickler = _Pickler, _Unpickler
> -
> # Shorthands
>
> def dump(obj, file, protocol=None):
> @@ -1321,14 +1333,14 @@
> assert isinstance(res, bytes_types)
> return res
>
> -def load(file, *, encoding="ASCII", errors="strict"):
> - return Unpickler(file, encoding=encoding, errors=errors).load()
> +def load(file):
> + return Unpickler(file).load()
>
> -def loads(s, *, encoding="ASCII", errors="strict"):
> +def loads(s):
> if isinstance(s, str):
> raise TypeError("Can't load pickle from unicode string")
> file = io.BytesIO(s)
> - return Unpickler(file, encoding=encoding, errors=errors).load()
> + return Unpickler(file).load()
>
> # Doctest
>
>
> Modified: python/branches/py3k/Lib/pickletools.py
> ==============================================================================
> --- python/branches/py3k/Lib/pickletools.py (original)
> +++ python/branches/py3k/Lib/pickletools.py Thu Jun 12 05:10:02 2008
> @@ -2079,12 +2079,11 @@
> 70: t TUPLE (MARK at 49)
> 71: p PUT 5
> 74: R REDUCE
> - 75: p PUT 6
> - 78: V UNICODE 'def'
> - 83: p PUT 7
> - 86: s SETITEM
> - 87: a APPEND
> - 88: . STOP
> + 75: V UNICODE 'def'
> + 80: p PUT 6
> + 83: s SETITEM
> + 84: a APPEND
> + 85: . STOP
> highest protocol among opcodes = 0
>
> Try again with a "binary" pickle.
> @@ -2116,12 +2115,11 @@
> 49: t TUPLE (MARK at 37)
> 50: q BINPUT 5
> 52: R REDUCE
> - 53: q BINPUT 6
> - 55: X BINUNICODE 'def'
> - 63: q BINPUT 7
> - 65: s SETITEM
> - 66: e APPENDS (MARK at 3)
> - 67: . STOP
> + 53: X BINUNICODE 'def'
> + 61: q BINPUT 6
> + 63: s SETITEM
> + 64: e APPENDS (MARK at 3)
> + 65: . STOP
> highest protocol among opcodes = 1
>
> Exercise the INST/OBJ/BUILD family.
>
> Modified: python/branches/py3k/Lib/test/pickletester.py
> ==============================================================================
> --- python/branches/py3k/Lib/test/pickletester.py (original)
> +++ python/branches/py3k/Lib/test/pickletester.py Thu Jun 12 05:10:02 2008
> @@ -362,7 +362,7 @@
> return x
>
> class AbstractPickleTests(unittest.TestCase):
> - # Subclass must define self.dumps, self.loads.
> + # Subclass must define self.dumps, self.loads, self.error.
>
> _testdata = create_data()
>
> @@ -463,9 +463,8 @@
> self.assertEqual(list(x[0].attr.keys()), [1])
> self.assert_(x[0].attr[1] is x)
>
> - def test_get(self):
> - self.assertRaises(KeyError, self.loads, b'g0\np0')
> - self.assertEquals(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)])
> + def test_garyp(self):
> + self.assertRaises(self.error, self.loads, b'garyp')
>
> def test_insecure_strings(self):
> # XXX Some of these tests are temporarily disabled
> @@ -956,7 +955,7 @@
> f = open(TESTFN, "wb")
> try:
> f.close()
> - self.assertRaises(ValueError, pickle.dump, 123, f)
> + self.assertRaises(ValueError, self.module.dump, 123, f)
> finally:
> os.remove(TESTFN)
>
> @@ -965,24 +964,24 @@
> f = open(TESTFN, "wb")
> try:
> f.close()
> - self.assertRaises(ValueError, pickle.dump, 123, f)
> + self.assertRaises(ValueError, self.module.dump, 123, f)
> finally:
> os.remove(TESTFN)
>
> def test_highest_protocol(self):
> # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
> - self.assertEqual(pickle.HIGHEST_PROTOCOL, 3)
> + self.assertEqual(self.module.HIGHEST_PROTOCOL, 3)
>
> def test_callapi(self):
> from io import BytesIO
> f = BytesIO()
> # With and without keyword arguments
> - pickle.dump(123, f, -1)
> - pickle.dump(123, file=f, protocol=-1)
> - pickle.dumps(123, -1)
> - pickle.dumps(123, protocol=-1)
> - pickle.Pickler(f, -1)
> - pickle.Pickler(f, protocol=-1)
> + self.module.dump(123, f, -1)
> + self.module.dump(123, file=f, protocol=-1)
> + self.module.dumps(123, -1)
> + self.module.dumps(123, protocol=-1)
> + self.module.Pickler(f, -1)
> + self.module.Pickler(f, protocol=-1)
>
> class AbstractPersistentPicklerTests(unittest.TestCase):
>
>
> Modified: python/branches/py3k/Lib/test/test_pickle.py
> ==============================================================================
> --- python/branches/py3k/Lib/test/test_pickle.py (original)
> +++ python/branches/py3k/Lib/test/test_pickle.py Thu Jun 12 05:10:02 2008
> @@ -7,42 +7,37 @@
> from test.pickletester import AbstractPickleModuleTests
> from test.pickletester import AbstractPersistentPicklerTests
>
> -try:
> - import _pickle
> - has_c_implementation = True
> -except ImportError:
> - has_c_implementation = False
> +class PickleTests(AbstractPickleTests, AbstractPickleModuleTests):
>
> + module = pickle
> + error = KeyError
>
> -class PickleTests(AbstractPickleModuleTests):
> - pass
> + def dumps(self, arg, proto=None):
> + return pickle.dumps(arg, proto)
>
> + def loads(self, buf):
> + return pickle.loads(buf)
>
> -class PyPicklerTests(AbstractPickleTests):
> +class PicklerTests(AbstractPickleTests):
>
> - pickler = pickle._Pickler
> - unpickler = pickle._Unpickler
> + error = KeyError
>
> def dumps(self, arg, proto=None):
> f = io.BytesIO()
> - p = self.pickler(f, proto)
> + p = pickle.Pickler(f, proto)
> p.dump(arg)
> f.seek(0)
> return bytes(f.read())
>
> def loads(self, buf):
> f = io.BytesIO(buf)
> - u = self.unpickler(f)
> + u = pickle.Unpickler(f)
> return u.load()
>
> -
> -class PyPersPicklerTests(AbstractPersistentPicklerTests):
> -
> - pickler = pickle._Pickler
> - unpickler = pickle._Unpickler
> +class PersPicklerTests(AbstractPersistentPicklerTests):
>
> def dumps(self, arg, proto=None):
> - class PersPickler(self.pickler):
> + class PersPickler(pickle.Pickler):
> def persistent_id(subself, obj):
> return self.persistent_id(obj)
> f = io.BytesIO()
> @@ -52,29 +47,19 @@
> return f.read()
>
> def loads(self, buf):
> - class PersUnpickler(self.unpickler):
> + class PersUnpickler(pickle.Unpickler):
> def persistent_load(subself, obj):
> return self.persistent_load(obj)
> f = io.BytesIO(buf)
> u = PersUnpickler(f)
> return u.load()
>
> -
> -if has_c_implementation:
> - class CPicklerTests(PyPicklerTests):
> - pickler = _pickle.Pickler
> - unpickler = _pickle.Unpickler
> -
> - class CPersPicklerTests(PyPersPicklerTests):
> - pickler = _pickle.Pickler
> - unpickler = _pickle.Unpickler
> -
> -
> def test_main():
> - tests = [PickleTests, PyPicklerTests, PyPersPicklerTests]
> - if has_c_implementation:
> - tests.extend([CPicklerTests, CPersPicklerTests])
> - support.run_unittest(*tests)
> + support.run_unittest(
> + PickleTests,
> + PicklerTests,
> + PersPicklerTests
> + )
> support.run_doctest(pickle)
>
> if __name__ == "__main__":
>
> Modified: python/branches/py3k/Lib/test/test_pickletools.py
> ==============================================================================
> --- python/branches/py3k/Lib/test/test_pickletools.py (original)
> +++ python/branches/py3k/Lib/test/test_pickletools.py Thu Jun 12 05:10:02 2008
> @@ -12,6 +12,8 @@
> def loads(self, buf):
> return pickle.loads(buf)
>
> + module = pickle
> + error = KeyError
>
> def test_main():
> support.run_unittest(OptimizedPickleTests)
>
> Modified: python/branches/py3k/Misc/NEWS
> ==============================================================================
> --- python/branches/py3k/Misc/NEWS (original)
> +++ python/branches/py3k/Misc/NEWS Thu Jun 12 05:10:02 2008
> @@ -82,10 +82,6 @@
>
> - Added C optimized implementation of io.StringIO.
>
> -- The ``pickle`` module is now automatically use an optimized C
> - implementation of Pickler and Unpickler when available. The
> - ``cPickle`` module is no longer needed.
> -
> - Removed the ``htmllib`` and ``sgmllib`` modules.
>
> - The deprecated ``SmartCookie`` and ``SimpleCookie`` classes have
>
> Deleted: python/branches/py3k/Modules/_pickle.c
> ==============================================================================
> --- python/branches/py3k/Modules/_pickle.c Thu Jun 12 05:10:02 2008
> +++ (empty file)
> @@ -1,4546 +0,0 @@
> -#include "Python.h"
> -#include "structmember.h"
> -
> -PyDoc_STRVAR(pickle_module_doc,
> -"Optimized C implementation for the Python pickle module.");
> -
> -/* Bump this when new opcodes are added to the pickle protocol. */
> -enum {
> - HIGHEST_PROTOCOL = 3,
> - DEFAULT_PROTOCOL = 3
> -};
> -
> -
> -/* Pickle opcodes. These must be kept updated with pickle.py.
> - Extensive docs are in pickletools.py. */
> -enum opcode {
> - MARK = '(',
> - STOP = '.',
> - POP = '0',
> - POP_MARK = '1',
> - DUP = '2',
> - FLOAT = 'F',
> - INT = 'I',
> - BININT = 'J',
> - BININT1 = 'K',
> - LONG = 'L',
> - BININT2 = 'M',
> - NONE = 'N',
> - PERSID = 'P',
> - BINPERSID = 'Q',
> - REDUCE = 'R',
> - STRING = 'S',
> - BINSTRING = 'T',
> - SHORT_BINSTRING = 'U',
> - UNICODE = 'V',
> - BINUNICODE = 'X',
> - APPEND = 'a',
> - BUILD = 'b',
> - GLOBAL = 'c',
> - DICT = 'd',
> - EMPTY_DICT = '}',
> - APPENDS = 'e',
> - GET = 'g',
> - BINGET = 'h',
> - INST = 'i',
> - LONG_BINGET = 'j',
> - LIST = 'l',
> - EMPTY_LIST = ']',
> - OBJ = 'o',
> - PUT = 'p',
> - BINPUT = 'q',
> - LONG_BINPUT = 'r',
> - SETITEM = 's',
> - TUPLE = 't',
> - EMPTY_TUPLE = ')',
> - SETITEMS = 'u',
> - BINFLOAT = 'G',
> -
> - /* Protocol 2. */
> - PROTO = '\x80',
> - NEWOBJ = '\x81',
> - EXT1 = '\x82',
> - EXT2 = '\x83',
> - EXT4 = '\x84',
> - TUPLE1 = '\x85',
> - TUPLE2 = '\x86',
> - TUPLE3 = '\x87',
> - NEWTRUE = '\x88',
> - NEWFALSE = '\x89',
> - LONG1 = '\x8a',
> - LONG4 = '\x8b',
> -
> - /* Protocol 3 (Python 3.x) */
> - BINBYTES = 'B',
> - SHORT_BINBYTES = 'C',
> -};
> -
> -/* These aren't opcodes -- they're ways to pickle bools before protocol 2
> - * so that unpicklers written before bools were introduced unpickle them
> - * as ints, but unpicklers after can recognize that bools were intended.
> - * Note that protocol 2 added direct ways to pickle bools.
> - */
> -#undef TRUE
> -#define TRUE "I01\n"
> -#undef FALSE
> -#define FALSE "I00\n"
> -
> -enum {
> - /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
> - batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
> - break if this gets out of synch with pickle.py, but it's unclear that would
> - help anything either. */
> - BATCHSIZE = 1000,
> -
> - /* Nesting limit until Pickler, when running in "fast mode", starts
> - checking for self-referential data-structures. */
> - FAST_NESTING_LIMIT = 50,
> -
> - /* Size of the write buffer of Pickler. Higher values will reduce the
> - number of calls to the write() method of the output stream. */
> - WRITE_BUF_SIZE = 256,
> -};
> -
> -/* Exception classes for pickle. These should override the ones defined in
> - pickle.py, when the C-optimized Pickler and Unpickler are used. */
> -static PyObject *PickleError;
> -static PyObject *PicklingError;
> -static PyObject *UnpicklingError;
> -
> -/* copyreg.dispatch_table, {type_object: pickling_function} */
> -static PyObject *dispatch_table;
> -/* For EXT[124] opcodes. */
> -/* copyreg._extension_registry, {(module_name, function_name): code} */
> -static PyObject *extension_registry;
> -/* copyreg._inverted_registry, {code: (module_name, function_name)} */
> -static PyObject *inverted_registry;
> -/* copyreg._extension_cache, {code: object} */
> -static PyObject *extension_cache;
> -
> -/* XXX: Are these really nescessary? */
> -/* As the name says, an empty tuple. */
> -static PyObject *empty_tuple;
> -/* For looking up name pairs in copyreg._extension_registry. */
> -static PyObject *two_tuple;
> -
> -static int
> -stack_underflow(void)
> -{
> - PyErr_SetString(UnpicklingError, "unpickling stack underflow");
> - return -1;
> -}
> -
> -/* Internal data type used as the unpickling stack. */
> -typedef struct {
> - PyObject_HEAD
> - int length; /* number of initial slots in data currently used */
> - int size; /* number of slots in data allocated */
> - PyObject **data;
> -} Pdata;
> -
> -static void
> -Pdata_dealloc(Pdata *self)
> -{
> - int i;
> - PyObject **p;
> -
> - for (i = self->length, p = self->data; --i >= 0; p++) {
> - Py_DECREF(*p);
> - }
> - if (self->data)
> - PyMem_Free(self->data);
> - PyObject_Del(self);
> -}
> -
> -static PyTypeObject Pdata_Type = {
> - PyVarObject_HEAD_INIT(NULL, 0)
> - "_pickle.Pdata", /*tp_name*/
> - sizeof(Pdata), /*tp_basicsize*/
> - 0, /*tp_itemsize*/
> - (destructor)Pdata_dealloc, /*tp_dealloc*/
> -};
> -
> -static PyObject *
> -Pdata_New(void)
> -{
> - Pdata *self;
> -
> - if (!(self = PyObject_New(Pdata, &Pdata_Type)))
> - return NULL;
> - self->size = 8;
> - self->length = 0;
> - self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
> - if (self->data)
> - return (PyObject *)self;
> - Py_DECREF(self);
> - return PyErr_NoMemory();
> -}
> -
> -
> -/* Retain only the initial clearto items. If clearto >= the current
> - * number of items, this is a (non-erroneous) NOP.
> - */
> -static int
> -Pdata_clear(Pdata *self, int clearto)
> -{
> - int i;
> - PyObject **p;
> -
> - if (clearto < 0)
> - return stack_underflow();
> - if (clearto >= self->length)
> - return 0;
> -
> - for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
> - Py_CLEAR(*p);
> - }
> - self->length = clearto;
> -
> - return 0;
> -}
> -
> -static int
> -Pdata_grow(Pdata *self)
> -{
> - int bigger;
> - size_t nbytes;
> - PyObject **tmp;
> -
> - bigger = (self->size << 1) + 1;
> - if (bigger <= 0) /* was 0, or new value overflows */
> - goto nomemory;
> - if ((int)(size_t)bigger != bigger)
> - goto nomemory;
> - nbytes = (size_t)bigger * sizeof(PyObject *);
> - if (nbytes / sizeof(PyObject *) != (size_t)bigger)
> - goto nomemory;
> - tmp = PyMem_Realloc(self->data, nbytes);
> - if (tmp == NULL)
> - goto nomemory;
> - self->data = tmp;
> - self->size = bigger;
> - return 0;
> -
> - nomemory:
> - PyErr_NoMemory();
> - return -1;
> -}
> -
> -/* D is a Pdata*. Pop the topmost element and store it into V, which
> - * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
> - * is raised and V is set to NULL.
> - */
> -static PyObject *
> -Pdata_pop(Pdata *self)
> -{
> - if (self->length == 0) {
> - PyErr_SetString(UnpicklingError, "bad pickle data");
> - return NULL;
> - }
> - return self->data[--(self->length)];
> -}
> -#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
> -
> -static int
> -Pdata_push(Pdata *self, PyObject *obj)
> -{
> - if (self->length == self->size && Pdata_grow(self) < 0) {
> - return -1;
> - }
> - self->data[self->length++] = obj;
> - return 0;
> -}
> -
> -/* Push an object on stack, transferring its ownership to the stack. */
> -#define PDATA_PUSH(D, O, ER) do { \
> - if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
> -
> -/* Push an object on stack, adding a new reference to the object. */
> -#define PDATA_APPEND(D, O, ER) do { \
> - Py_INCREF((O)); \
> - if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
> -
> -static PyObject *
> -Pdata_poptuple(Pdata *self, Py_ssize_t start)
> -{
> - PyObject *tuple;
> - Py_ssize_t len, i, j;
> -
> - len = self->length - start;
> - tuple = PyTuple_New(len);
> - if (tuple == NULL)
> - return NULL;
> - for (i = start, j = 0; j < len; i++, j++)
> - PyTuple_SET_ITEM(tuple, j, self->data[i]);
> -
> - self->length = start;
> - return tuple;
> -}
> -
> -static PyObject *
> -Pdata_poplist(Pdata *self, Py_ssize_t start)
> -{
> - PyObject *list;
> - Py_ssize_t len, i, j;
> -
> - len = self->length - start;
> - list = PyList_New(len);
> - if (list == NULL)
> - return NULL;
> - for (i = start, j = 0; j < len; i++, j++)
> - PyList_SET_ITEM(list, j, self->data[i]);
> -
> - self->length = start;
> - return list;
> -}
> -
> -typedef struct PicklerObject {
> - PyObject_HEAD
> - PyObject *write; /* write() method of the output stream */
> - PyObject *memo; /* Memo dictionary, keep track of the seen
> - objects to support self-referential objects
> - pickling. */
> - PyObject *pers_func; /* persistent_id() method, can be NULL */
> - PyObject *arg;
> - int proto; /* Pickle protocol number, >= 0 */
> - int bin; /* Boolean, true if proto > 0 */
> - int nesting; /* Current nesting level, this is to guard
> - save() from going into infinite recursion
> - and segfaulting. */
> - int buf_size; /* Size of the current buffered pickle data */
> - char *write_buf; /* Write buffer, this is to avoid calling the
> - write() method of the output stream too
> - often. */
> - int fast; /* Enable fast mode if set to a true value.
> - The fast mode disable the usage of memo,
> - therefore speeding the pickling process by
> - not generating superfluous PUT opcodes. It
> - should not be used if with self-referential
> - objects. */
> - int fast_nesting;
> - PyObject *fast_memo;
> -} PicklerObject;
> -
> -typedef struct UnpicklerObject {
> - PyObject_HEAD
> - Pdata *stack; /* Pickle data stack, store unpickled objects. */
> - PyObject *readline; /* readline() method of the output stream */
> - PyObject *read; /* read() method of the output stream */
> - PyObject *memo; /* Memo dictionary, provide the objects stored
> - using the PUT opcodes. */
> - PyObject *arg;
> - PyObject *pers_func; /* persistent_load() method, can be NULL. */
> - PyObject *last_string; /* Reference to the last string read by the
> - readline() method. */
> - char *buffer; /* Reading buffer. */
> - char *encoding; /* Name of the encoding to be used for
> - decoding strings pickled using Python
> - 2.x. The default value is "ASCII" */
> - char *errors; /* Name of errors handling scheme to used when
> - decoding strings. The default value is
> - "strict". */
> - int *marks; /* Mark stack, used for unpickling container
> - objects. */
> - Py_ssize_t num_marks; /* Number of marks in the mark stack. */
> - Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
> -} UnpicklerObject;
> -
> -/* Forward declarations */
> -static int save(PicklerObject *, PyObject *, int);
> -static int save_reduce(PicklerObject *, PyObject *, PyObject *);
> -static PyTypeObject Pickler_Type;
> -static PyTypeObject Unpickler_Type;
> -
> -
> -/* Helpers for creating the argument tuple passed to functions. This has the
> - performance advantage of calling PyTuple_New() only once. */
> -
> -#define ARG_TUP(self, obj) do { \
> - if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
> - Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
> - PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
> - } \
> - else { \
> - Py_DECREF((obj)); \
> - } \
> - } while (0)
> -
> -#define FREE_ARG_TUP(self) do { \
> - if ((self)->arg->ob_refcnt > 1) \
> - Py_CLEAR((self)->arg); \
> - } while (0)
> -
> -/* A temporary cleaner API for fast single argument function call.
> -
> - XXX: Does caching the argument tuple provides any real performance benefits?
> -
> - A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
> - glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
> - when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
> - immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
> - (i.e, call PyTuple_New() and store the returned value in an array), to save
> - one second (wall clock time). Either ways, the loading time a pickle stream
> - large enough to generate this number of calls would be massively
> - overwhelmed by other factors, like I/O throughput, the GC traversal and
> - object allocation overhead. So, I really doubt these functions provide any
> - real benefits.
> -
> - On the other hand, oprofile reports that pickle spends a lot of time in
> - these functions. But, that is probably more related to the function call
> - overhead, than the argument tuple allocation.
> -
> - XXX: And, what is the reference behavior of these? Steal, borrow? At first
> - glance, it seems to steal the reference of 'arg' and borrow the reference
> - of 'func'.
> - */
> -static PyObject *
> -pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
> -{
> - PyObject *result = NULL;
> -
> - ARG_TUP(self, arg);
> - if (self->arg) {
> - result = PyObject_Call(func, self->arg, NULL);
> - FREE_ARG_TUP(self);
> - }
> - return result;
> -}
> -
> -static PyObject *
> -unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
> -{
> - PyObject *result = NULL;
> -
> - ARG_TUP(self, arg);
> - if (self->arg) {
> - result = PyObject_Call(func, self->arg, NULL);
> - FREE_ARG_TUP(self);
> - }
> - return result;
> -}
> -
> -static Py_ssize_t
> -pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
> -{
> - PyObject *data, *result;
> -
> - if (s == NULL) {
> - if (!(self->buf_size))
> - return 0;
> - data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
> - if (data == NULL)
> - return -1;
> - }
> - else {
> - if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
> - if (pickler_write(self, NULL, 0) < 0)
> - return -1;
> - }
> -
> - if (n > WRITE_BUF_SIZE) {
> - if (!(data = PyBytes_FromStringAndSize(s, n)))
> - return -1;
> - }
> - else {
> - memcpy(self->write_buf + self->buf_size, s, n);
> - self->buf_size += n;
> - return n;
> - }
> - }
> -
> - /* object with write method */
> - result = pickler_call(self, self->write, data);
> - if (result == NULL)
> - return -1;
> -
> - Py_DECREF(result);
> - self->buf_size = 0;
> - return n;
> -}
> -
> -/* XXX: These read/readline functions ought to be optimized. Buffered I/O
> - might help a lot, especially with the new (but much slower) io library.
> - On the other hand, the added complexity might not worth it.
> - */
> -
> -/* Read at least n characters from the input stream and set s to the current
> - reading position. */
> -static Py_ssize_t
> -unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
> -{
> - PyObject *len;
> - PyObject *data;
> -
> - len = PyLong_FromSsize_t(n);
> - if (len == NULL)
> - return -1;
> -
> - data = unpickler_call(self, self->read, len);
> - if (data == NULL)
> - return -1;
> -
> - /* XXX: Should bytearray be supported too? */
> - if (!PyBytes_Check(data)) {
> - PyErr_SetString(PyExc_ValueError,
> - "read() from the underlying stream did not"
> - "return bytes");
> - return -1;
> - }
> -
> - Py_XDECREF(self->last_string);
> - self->last_string = data;
> -
> - if (!(*s = PyBytes_AS_STRING(data)))
> - return -1;
> -
> - return n;
> -}
> -
> -static Py_ssize_t
> -unpickler_readline(UnpicklerObject *self, char **s)
> -{
> - PyObject *data;
> -
> - data = PyObject_CallObject(self->readline, empty_tuple);
> - if (data == NULL)
> - return -1;
> -
> - /* XXX: Should bytearray be supported too? */
> - if (!PyBytes_Check(data)) {
> - PyErr_SetString(PyExc_ValueError,
> - "readline() from the underlying stream did not"
> - "return bytes");
> - return -1;
> - }
> -
> - Py_XDECREF(self->last_string);
> - self->last_string = data;
> -
> - if (!(*s = PyBytes_AS_STRING(data)))
> - return -1;
> -
> - return PyBytes_GET_SIZE(data);
> -}
> -
> -/* Generate a GET opcode for an object stored in the memo. The 'key' argument
> - should be the address of the object as returned by PyLong_FromVoidPtr(). */
> -static int
> -memo_get(PicklerObject *self, PyObject *key)
> -{
> - PyObject *value;
> - PyObject *memo_id;
> - long x;
> - char pdata[30];
> - int len;
> -
> - value = PyDict_GetItemWithError(self->memo, key);
> - if (value == NULL) {
> - if (!PyErr_Occurred())
> - PyErr_SetObject(PyExc_KeyError, key);
> - return -1;
> - }
> -
> - memo_id = PyTuple_GetItem(value, 0);
> - if (memo_id == NULL)
> - return -1;
> -
> - if (!PyLong_Check(memo_id)) {
> - PyErr_SetString(PicklingError, "memo id must be an integer");
> - return -1;
> - }
> - x = PyLong_AsLong(memo_id);
> - if (x == -1 && PyErr_Occurred())
> - return -1;
> -
> - if (!self->bin) {
> - pdata[0] = GET;
> - PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
> - len = (int)strlen(pdata);
> - }
> - else {
> - if (x < 256) {
> - pdata[0] = BINGET;
> - pdata[1] = (unsigned char)(x & 0xff);
> - len = 2;
> - }
> - else if (x <= 0xffffffffL) {
> - pdata[0] = LONG_BINGET;
> - pdata[1] = (unsigned char)(x & 0xff);
> - pdata[2] = (unsigned char)((x >> 8) & 0xff);
> - pdata[3] = (unsigned char)((x >> 16) & 0xff);
> - pdata[4] = (unsigned char)((x >> 24) & 0xff);
> - len = 5;
> - }
> - else { /* unlikely */
> - PyErr_SetString(PicklingError,
> - "memo id too large for LONG_BINGET");
> - return -1;
> - }
> - }
> -
> - if (pickler_write(self, pdata, len) < 0)
> - return -1;
> -
> - return 0;
> -}
> -
> -/* Store an object in the memo, assign it a new unique ID based on the number
> - of objects currently stored in the memo and generate a PUT opcode. */
> -static int
> -memo_put(PicklerObject *self, PyObject *obj)
> -{
> - PyObject *key = NULL;
> - PyObject *memo_id = NULL;
> - PyObject *tuple = NULL;
> - long x;
> - char pdata[30];
> - int len;
> - int status = 0;
> -
> - if (self->fast)
> - return 0;
> -
> - key = PyLong_FromVoidPtr(obj);
> - if (key == NULL)
> - goto error;
> - if ((x = PyDict_Size(self->memo)) < 0)
> - goto error;
> - memo_id = PyLong_FromLong(x);
> - if (memo_id == NULL)
> - goto error;
> - tuple = PyTuple_New(2);
> - if (tuple == NULL)
> - goto error;
> -
> - Py_INCREF(memo_id);
> - PyTuple_SET_ITEM(tuple, 0, memo_id);
> - Py_INCREF(obj);
> - PyTuple_SET_ITEM(tuple, 1, obj);
> - if (PyDict_SetItem(self->memo, key, tuple) < 0)
> - goto error;
> -
> - if (!self->bin) {
> - pdata[0] = PUT;
> - PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
> - len = strlen(pdata);
> - }
> - else {
> - if (x < 256) {
> - pdata[0] = BINPUT;
> - pdata[1] = x;
> - len = 2;
> - }
> - else if (x <= 0xffffffffL) {
> - pdata[0] = LONG_BINPUT;
> - pdata[1] = (unsigned char)(x & 0xff);
> - pdata[2] = (unsigned char)((x >> 8) & 0xff);
> - pdata[3] = (unsigned char)((x >> 16) & 0xff);
> - pdata[4] = (unsigned char)((x >> 24) & 0xff);
> - len = 5;
> - }
> - else { /* unlikely */
> - PyErr_SetString(PicklingError,
> - "memo id too large for LONG_BINPUT");
> - return -1;
> - }
> - }
> -
> - if (pickler_write(self, pdata, len) < 0)
> - goto error;
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> -
> - Py_XDECREF(key);
> - Py_XDECREF(memo_id);
> - Py_XDECREF(tuple);
> -
> - return status;
> -}
> -
> -static PyObject *
> -whichmodule(PyObject *global, PyObject *global_name)
> -{
> - Py_ssize_t i, j;
> - static PyObject *module_str = NULL;
> - static PyObject *main_str = NULL;
> - PyObject *module_name;
> - PyObject *modules_dict;
> - PyObject *module;
> - PyObject *obj;
> -
> - if (module_str == NULL) {
> - module_str = PyUnicode_InternFromString("__module__");
> - if (module_str == NULL)
> - return NULL;
> - main_str = PyUnicode_InternFromString("__main__");
> - if (main_str == NULL)
> - return NULL;
> - }
> -
> - module_name = PyObject_GetAttr(global, module_str);
> -
> - /* In some rare cases (e.g., random.getrandbits), __module__ can be
> - None. If it is so, then search sys.modules for the module of
> - global. */
> - if (module_name == Py_None) {
> - Py_DECREF(module_name);
> - goto search;
> - }
> -
> - if (module_name) {
> - return module_name;
> - }
> - if (PyErr_ExceptionMatches(PyExc_AttributeError))
> - PyErr_Clear();
> - else
> - return NULL;
> -
> - search:
> - modules_dict = PySys_GetObject("modules");
> - if (modules_dict == NULL)
> - return NULL;
> -
> - i = 0;
> - module_name = NULL;
> - while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
> - if (PyObject_Compare(module_name, main_str) == 0)
> - continue;
> -
> - obj = PyObject_GetAttr(module, global_name);
> - if (obj == NULL) {
> - if (PyErr_ExceptionMatches(PyExc_AttributeError))
> - PyErr_Clear();
> - else
> - return NULL;
> - continue;
> - }
> -
> - if (obj != global) {
> - Py_DECREF(obj);
> - continue;
> - }
> -
> - Py_DECREF(obj);
> - break;
> - }
> -
> - /* If no module is found, use __main__. */
> - if (!j) {
> - module_name = main_str;
> - }
> -
> - Py_INCREF(module_name);
> - return module_name;
> -}
> -
> -/* fast_save_enter() and fast_save_leave() are guards against recursive
> - objects when Pickler is used with the "fast mode" (i.e., with object
> - memoization disabled). If the nesting of a list or dict object exceed
> - FAST_NESTING_LIMIT, these guards will start keeping an internal
> - reference to the seen list or dict objects and check whether these objects
> - are recursive. These are not strictly necessary, since save() has a
> - hard-coded recursion limit, but they give a nicer error message than the
> - typical RuntimeError. */
> -static int
> -fast_save_enter(PicklerObject *self, PyObject *obj)
> -{
> - /* if fast_nesting < 0, we're doing an error exit. */
> - if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
> - PyObject *key = NULL;
> - if (self->fast_memo == NULL) {
> - self->fast_memo = PyDict_New();
> - if (self->fast_memo == NULL) {
> - self->fast_nesting = -1;
> - return 0;
> - }
> - }
> - key = PyLong_FromVoidPtr(obj);
> - if (key == NULL)
> - return 0;
> - if (PyDict_GetItem(self->fast_memo, key)) {
> - Py_DECREF(key);
> - PyErr_Format(PyExc_ValueError,
> - "fast mode: can't pickle cyclic objects "
> - "including object type %.200s at %p",
> - obj->ob_type->tp_name, obj);
> - self->fast_nesting = -1;
> - return 0;
> - }
> - if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
> - Py_DECREF(key);
> - self->fast_nesting = -1;
> - return 0;
> - }
> - Py_DECREF(key);
> - }
> - return 1;
> -}
> -
> -static int
> -fast_save_leave(PicklerObject *self, PyObject *obj)
> -{
> - if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
> - PyObject *key = PyLong_FromVoidPtr(obj);
> - if (key == NULL)
> - return 0;
> - if (PyDict_DelItem(self->fast_memo, key) < 0) {
> - Py_DECREF(key);
> - return 0;
> - }
> - Py_DECREF(key);
> - }
> - return 1;
> -}
> -
> -static int
> -save_none(PicklerObject *self, PyObject *obj)
> -{
> - const char none_op = NONE;
> - if (pickler_write(self, &none_op, 1) < 0)
> - return -1;
> -
> - return 0;
> -}
> -
> -static int
> -save_bool(PicklerObject *self, PyObject *obj)
> -{
> - static const char *buf[2] = { FALSE, TRUE };
> - const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
> - int p = (obj == Py_True);
> -
> - if (self->proto >= 2) {
> - const char bool_op = p ? NEWTRUE : NEWFALSE;
> - if (pickler_write(self, &bool_op, 1) < 0)
> - return -1;
> - }
> - else if (pickler_write(self, buf[p], len[p]) < 0)
> - return -1;
> -
> - return 0;
> -}
> -
> -static int
> -save_int(PicklerObject *self, long x)
> -{
> - char pdata[32];
> - int len = 0;
> -
> - if (!self->bin
> -#if SIZEOF_LONG > 4
> - || x > 0x7fffffffL || x < -0x80000000L
> -#endif
> - ) {
> - /* Text-mode pickle, or long too big to fit in the 4-byte
> - * signed BININT format: store as a string.
> - */
> - pdata[0] = LONG; /* use LONG for consistence with pickle.py */
> - PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
> - if (pickler_write(self, pdata, strlen(pdata)) < 0)
> - return -1;
> - }
> - else {
> - /* Binary pickle and x fits in a signed 4-byte int. */
> - pdata[1] = (unsigned char)(x & 0xff);
> - pdata[2] = (unsigned char)((x >> 8) & 0xff);
> - pdata[3] = (unsigned char)((x >> 16) & 0xff);
> - pdata[4] = (unsigned char)((x >> 24) & 0xff);
> -
> - if ((pdata[4] == 0) && (pdata[3] == 0)) {
> - if (pdata[2] == 0) {
> - pdata[0] = BININT1;
> - len = 2;
> - }
> - else {
> - pdata[0] = BININT2;
> - len = 3;
> - }
> - }
> - else {
> - pdata[0] = BININT;
> - len = 5;
> - }
> -
> - if (pickler_write(self, pdata, len) < 0)
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> -static int
> -save_long(PicklerObject *self, PyObject *obj)
> -{
> - PyObject *repr = NULL;
> - Py_ssize_t size;
> - long val = PyLong_AsLong(obj);
> - int status = 0;
> -
> - const char long_op = LONG;
> -
> - if (val == -1 && PyErr_Occurred()) {
> - /* out of range for int pickling */
> - PyErr_Clear();
> - }
> - else
> - return save_int(self, val);
> -
> - if (self->proto >= 2) {
> - /* Linear-time pickling. */
> - size_t nbits;
> - size_t nbytes;
> - unsigned char *pdata;
> - char header[5];
> - int i;
> - int sign = _PyLong_Sign(obj);
> -
> - if (sign == 0) {
> - header[0] = LONG1;
> - header[1] = 0; /* It's 0 -- an empty bytestring. */
> - if (pickler_write(self, header, 2) < 0)
> - goto error;
> - return 0;
> - }
> - nbits = _PyLong_NumBits(obj);
> - if (nbits == (size_t)-1 && PyErr_Occurred())
> - goto error;
> - /* How many bytes do we need? There are nbits >> 3 full
> - * bytes of data, and nbits & 7 leftover bits. If there
> - * are any leftover bits, then we clearly need another
> - * byte. Wnat's not so obvious is that we *probably*
> - * need another byte even if there aren't any leftovers:
> - * the most-significant bit of the most-significant byte
> - * acts like a sign bit, and it's usually got a sense
> - * opposite of the one we need. The exception is longs
> - * of the form -(2**(8*j-1)) for j > 0. Such a long is
> - * its own 256's-complement, so has the right sign bit
> - * even without the extra byte. That's a pain to check
> - * for in advance, though, so we always grab an extra
> - * byte at the start, and cut it back later if possible.
> - */
> - nbytes = (nbits >> 3) + 1;
> - if (nbytes > INT_MAX) {
> - PyErr_SetString(PyExc_OverflowError,
> - "long too large to pickle");
> - goto error;
> - }
> - repr = PyUnicode_FromStringAndSize(NULL, (int)nbytes);
> - if (repr == NULL)
> - goto error;
> - pdata = (unsigned char *)PyUnicode_AsString(repr);
> - i = _PyLong_AsByteArray((PyLongObject *)obj,
> - pdata, nbytes,
> - 1 /* little endian */ , 1 /* signed */ );
> - if (i < 0)
> - goto error;
> - /* If the long is negative, this may be a byte more than
> - * needed. This is so iff the MSB is all redundant sign
> - * bits.
> - */
> - if (sign < 0 &&
> - nbytes > 1 &&
> - pdata[nbytes - 1] == 0xff &&
> - (pdata[nbytes - 2] & 0x80) != 0) {
> - nbytes--;
> - }
> -
> - if (nbytes < 256) {
> - header[0] = LONG1;
> - header[1] = (unsigned char)nbytes;
> - size = 2;
> - }
> - else {
> - header[0] = LONG4;
> - size = (int)nbytes;
> - for (i = 1; i < 5; i++) {
> - header[i] = (unsigned char)(size & 0xff);
> - size >>= 8;
> - }
> - size = 5;
> - }
> - if (pickler_write(self, header, size) < 0 ||
> - pickler_write(self, (char *)pdata, (int)nbytes) < 0)
> - goto error;
> - }
> - else {
> - char *string;
> -
> - /* proto < 2: write the repr and newline. This is quadratic-time
> - (in the number of digits), in both directions. */
> -
> - repr = PyObject_Repr(obj);
> - if (repr == NULL)
> - goto error;
> -
> - string = PyUnicode_AsStringAndSize(repr, &size);
> - if (string == NULL)
> - goto error;
> -
> - if (pickler_write(self, &long_op, 1) < 0 ||
> - pickler_write(self, string, size) < 0 ||
> - pickler_write(self, "\n", 1) < 0)
> - goto error;
> - }
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> - Py_XDECREF(repr);
> -
> - return status;
> -}
> -
> -static int
> -save_float(PicklerObject *self, PyObject *obj)
> -{
> - double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
> -
> - if (self->bin) {
> - char pdata[9];
> - pdata[0] = BINFLOAT;
> - if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
> - return -1;
> - if (pickler_write(self, pdata, 9) < 0)
> - return -1;
> - }
> - else {
> - char pdata[250];
> - pdata[0] = FLOAT;
> - PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
> - /* Extend the formatted string with a newline character */
> - strcat(pdata, "\n");
> -
> - if (pickler_write(self, pdata, strlen(pdata)) < 0)
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> -static int
> -save_bytes(PicklerObject *self, PyObject *obj)
> -{
> - if (self->proto < 3) {
> - /* Older pickle protocols do not have an opcode for pickling bytes
> - objects. Therefore, we need to fake the copy protocol (i.e.,
> - the __reduce__ method) to permit bytes object unpickling. */
> - PyObject *reduce_value = NULL;
> - PyObject *bytelist = NULL;
> - int status;
> -
> - bytelist = PySequence_List(obj);
> - if (bytelist == NULL)
> - return -1;
> -
> - reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
> - bytelist);
> - if (reduce_value == NULL) {
> - Py_DECREF(bytelist);
> - return -1;
> - }
> -
> - /* save_reduce() will memoize the object automatically. */
> - status = save_reduce(self, reduce_value, obj);
> - Py_DECREF(reduce_value);
> - Py_DECREF(bytelist);
> - return status;
> - }
> - else {
> - Py_ssize_t size;
> - char header[5];
> - int len;
> -
> - size = PyBytes_Size(obj);
> - if (size < 0)
> - return -1;
> -
> - if (size < 256) {
> - header[0] = SHORT_BINBYTES;
> - header[1] = (unsigned char)size;
> - len = 2;
> - }
> - else if (size <= 0xffffffffL) {
> - header[0] = BINBYTES;
> - header[1] = (unsigned char)(size & 0xff);
> - header[2] = (unsigned char)((size >> 8) & 0xff);
> - header[3] = (unsigned char)((size >> 16) & 0xff);
> - header[4] = (unsigned char)((size >> 24) & 0xff);
> - len = 5;
> - }
> - else {
> - return -1; /* string too large */
> - }
> -
> - if (pickler_write(self, header, len) < 0)
> - return -1;
> -
> - if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
> - return -1;
> -
> - if (memo_put(self, obj) < 0)
> - return -1;
> -
> - return 0;
> - }
> -}
> -
> -/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
> - backslash and newline characters to \uXXXX escapes. */
> -static PyObject *
> -raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
> -{
> - PyObject *repr, *result;
> - char *p;
> - char *q;
> -
> - static const char *hexdigits = "0123456789abcdef";
> -
> -#ifdef Py_UNICODE_WIDE
> - repr = PyBytes_FromStringAndSize(NULL, 10 * size);
> -#else
> - repr = PyBytes_FromStringAndSize(NULL, 6 * size);
> -#endif
> - if (repr == NULL)
> - return NULL;
> - if (size == 0)
> - goto done;
> -
> - p = q = PyBytes_AS_STRING(repr);
> - while (size-- > 0) {
> - Py_UNICODE ch = *s++;
> -#ifdef Py_UNICODE_WIDE
> - /* Map 32-bit characters to '\Uxxxxxxxx' */
> - if (ch >= 0x10000) {
> - *p++ = '\\';
> - *p++ = 'U';
> - *p++ = hexdigits[(ch >> 28) & 0xf];
> - *p++ = hexdigits[(ch >> 24) & 0xf];
> - *p++ = hexdigits[(ch >> 20) & 0xf];
> - *p++ = hexdigits[(ch >> 16) & 0xf];
> - *p++ = hexdigits[(ch >> 12) & 0xf];
> - *p++ = hexdigits[(ch >> 8) & 0xf];
> - *p++ = hexdigits[(ch >> 4) & 0xf];
> - *p++ = hexdigits[ch & 15];
> - }
> - else
> -#endif
> - /* Map 16-bit characters to '\uxxxx' */
> - if (ch >= 256 || ch == '\\' || ch == '\n') {
> - *p++ = '\\';
> - *p++ = 'u';
> - *p++ = hexdigits[(ch >> 12) & 0xf];
> - *p++ = hexdigits[(ch >> 8) & 0xf];
> - *p++ = hexdigits[(ch >> 4) & 0xf];
> - *p++ = hexdigits[ch & 15];
> - }
> - /* Copy everything else as-is */
> - else
> - *p++ = (char) ch;
> - }
> - size = p - q;
> -
> - done:
> - result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
> - Py_DECREF(repr);
> - return result;
> -}
> -
> -static int
> -save_unicode(PicklerObject *self, PyObject *obj)
> -{
> - Py_ssize_t size;
> - PyObject *encoded = NULL;
> -
> - if (self->bin) {
> - char pdata[5];
> -
> - encoded = PyUnicode_AsUTF8String(obj);
> - if (encoded == NULL)
> - goto error;
> -
> - size = PyBytes_GET_SIZE(encoded);
> - if (size < 0 || size > 0xffffffffL)
> - goto error; /* string too large */
> -
> - pdata[0] = BINUNICODE;
> - pdata[1] = (unsigned char)(size & 0xff);
> - pdata[2] = (unsigned char)((size >> 8) & 0xff);
> - pdata[3] = (unsigned char)((size >> 16) & 0xff);
> - pdata[4] = (unsigned char)((size >> 24) & 0xff);
> -
> - if (pickler_write(self, pdata, 5) < 0)
> - goto error;
> -
> - if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
> - goto error;
> - }
> - else {
> - const char unicode_op = UNICODE;
> -
> - encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
> - PyUnicode_GET_SIZE(obj));
> - if (encoded == NULL)
> - goto error;
> -
> - if (pickler_write(self, &unicode_op, 1) < 0)
> - goto error;
> -
> - size = PyBytes_GET_SIZE(encoded);
> - if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
> - goto error;
> -
> - if (pickler_write(self, "\n", 1) < 0)
> - goto error;
> - }
> - if (memo_put(self, obj) < 0)
> - goto error;
> -
> - Py_DECREF(encoded);
> - return 0;
> -
> - error:
> - Py_XDECREF(encoded);
> - return -1;
> -}
> -
> -/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
> -static int
> -store_tuple_elements(PicklerObject *self, PyObject *t, int len)
> -{
> - int i;
> -
> - assert(PyTuple_Size(t) == len);
> -
> - for (i = 0; i < len; i++) {
> - PyObject *element = PyTuple_GET_ITEM(t, i);
> -
> - if (element == NULL)
> - return -1;
> - if (save(self, element, 0) < 0)
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> -/* Tuples are ubiquitous in the pickle protocols, so many techniques are
> - * used across protocols to minimize the space needed to pickle them.
> - * Tuples are also the only builtin immutable type that can be recursive
> - * (a tuple can be reached from itself), and that requires some subtle
> - * magic so that it works in all cases. IOW, this is a long routine.
> - */
> -static int
> -save_tuple(PicklerObject *self, PyObject *obj)
> -{
> - PyObject *memo_key = NULL;
> - int len, i;
> - int status = 0;
> -
> - const char mark_op = MARK;
> - const char tuple_op = TUPLE;
> - const char pop_op = POP;
> - const char pop_mark_op = POP_MARK;
> - const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
> -
> - if ((len = PyTuple_Size(obj)) < 0)
> - return -1;
> -
> - if (len == 0) {
> - char pdata[2];
> -
> - if (self->proto) {
> - pdata[0] = EMPTY_TUPLE;
> - len = 1;
> - }
> - else {
> - pdata[0] = MARK;
> - pdata[1] = TUPLE;
> - len = 2;
> - }
> - if (pickler_write(self, pdata, len) < 0)
> - return -1;
> - return 0;
> - }
> -
> - /* id(tuple) isn't in the memo now. If it shows up there after
> - * saving the tuple elements, the tuple must be recursive, in
> - * which case we'll pop everything we put on the stack, and fetch
> - * its value from the memo.
> - */
> - memo_key = PyLong_FromVoidPtr(obj);
> - if (memo_key == NULL)
> - return -1;
> -
> - if (len <= 3 && self->proto >= 2) {
> - /* Use TUPLE{1,2,3} opcodes. */
> - if (store_tuple_elements(self, obj, len) < 0)
> - goto error;
> -
> - if (PyDict_GetItem(self->memo, memo_key)) {
> - /* pop the len elements */
> - for (i = 0; i < len; i++)
> - if (pickler_write(self, &pop_op, 1) < 0)
> - goto error;
> - /* fetch from memo */
> - if (memo_get(self, memo_key) < 0)
> - goto error;
> -
> - Py_DECREF(memo_key);
> - return 0;
> - }
> - else { /* Not recursive. */
> - if (pickler_write(self, len2opcode + len, 1) < 0)
> - goto error;
> - }
> - goto memoize;
> - }
> -
> - /* proto < 2 and len > 0, or proto >= 2 and len > 3.
> - * Generate MARK e1 e2 ... TUPLE
> - */
> - if (pickler_write(self, &mark_op, 1) < 0)
> - goto error;
> -
> - if (store_tuple_elements(self, obj, len) < 0)
> - goto error;
> -
> - if (PyDict_GetItem(self->memo, memo_key)) {
> - /* pop the stack stuff we pushed */
> - if (self->bin) {
> - if (pickler_write(self, &pop_mark_op, 1) < 0)
> - goto error;
> - }
> - else {
> - /* Note that we pop one more than len, to remove
> - * the MARK too.
> - */
> - for (i = 0; i <= len; i++)
> - if (pickler_write(self, &pop_op, 1) < 0)
> - goto error;
> - }
> - /* fetch from memo */
> - if (memo_get(self, memo_key) < 0)
> - goto error;
> -
> - Py_DECREF(memo_key);
> - return 0;
> - }
> - else { /* Not recursive. */
> - if (pickler_write(self, &tuple_op, 1) < 0)
> - goto error;
> - }
> -
> - memoize:
> - if (memo_put(self, obj) < 0)
> - goto error;
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> -
> - Py_DECREF(memo_key);
> - return status;
> -}
> -
> -/* iter is an iterator giving items, and we batch up chunks of
> - * MARK item item ... item APPENDS
> - * opcode sequences. Calling code should have arranged to first create an
> - * empty list, or list-like object, for the APPENDS to operate on.
> - * Returns 0 on success, <0 on error.
> - */
> -static int
> -batch_list(PicklerObject *self, PyObject *iter)
> -{
> - PyObject *obj;
> - PyObject *slice[BATCHSIZE];
> - int i, n;
> -
> - const char mark_op = MARK;
> - const char append_op = APPEND;
> - const char appends_op = APPENDS;
> -
> - assert(iter != NULL);
> -
> - /* XXX: I think this function could be made faster by avoiding the
> - iterator interface and fetching objects directly from list using
> - PyList_GET_ITEM.
> - */
> -
> - if (self->proto == 0) {
> - /* APPENDS isn't available; do one at a time. */
> - for (;;) {
> - obj = PyIter_Next(iter);
> - if (obj == NULL) {
> - if (PyErr_Occurred())
> - return -1;
> - break;
> - }
> - i = save(self, obj, 0);
> - Py_DECREF(obj);
> - if (i < 0)
> - return -1;
> - if (pickler_write(self, &append_op, 1) < 0)
> - return -1;
> - }
> - return 0;
> - }
> -
> - /* proto > 0: write in batches of BATCHSIZE. */
> - do {
> - /* Get next group of (no more than) BATCHSIZE elements. */
> - for (n = 0; n < BATCHSIZE; n++) {
> - obj = PyIter_Next(iter);
> - if (obj == NULL) {
> - if (PyErr_Occurred())
> - goto error;
> - break;
> - }
> - slice[n] = obj;
> - }
> -
> - if (n > 1) {
> - /* Pump out MARK, slice[0:n], APPENDS. */
> - if (pickler_write(self, &mark_op, 1) < 0)
> - goto error;
> - for (i = 0; i < n; i++) {
> - if (save(self, slice[i], 0) < 0)
> - goto error;
> - }
> - if (pickler_write(self, &appends_op, 1) < 0)
> - goto error;
> - }
> - else if (n == 1) {
> - if (save(self, slice[0], 0) < 0 ||
> - pickler_write(self, &append_op, 1) < 0)
> - goto error;
> - }
> -
> - for (i = 0; i < n; i++) {
> - Py_DECREF(slice[i]);
> - }
> - } while (n == BATCHSIZE);
> - return 0;
> -
> - error:
> - while (--n >= 0) {
> - Py_DECREF(slice[n]);
> - }
> - return -1;
> -}
> -
> -static int
> -save_list(PicklerObject *self, PyObject *obj)
> -{
> - PyObject *iter;
> - char header[3];
> - int len;
> - int status = 0;
> -
> - if (self->fast && !fast_save_enter(self, obj))
> - goto error;
> -
> - /* Create an empty list. */
> - if (self->bin) {
> - header[0] = EMPTY_LIST;
> - len = 1;
> - }
> - else {
> - header[0] = MARK;
> - header[1] = LIST;
> - len = 2;
> - }
> -
> - if (pickler_write(self, header, len) < 0)
> - goto error;
> -
> - /* Get list length, and bow out early if empty. */
> - if ((len = PyList_Size(obj)) < 0)
> - goto error;
> -
> - if (memo_put(self, obj) < 0)
> - goto error;
> -
> - if (len != 0) {
> - /* Save the list elements. */
> - iter = PyObject_GetIter(obj);
> - if (iter == NULL)
> - goto error;
> - status = batch_list(self, iter);
> - Py_DECREF(iter);
> - }
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> -
> - if (self->fast && !fast_save_leave(self, obj))
> - status = -1;
> -
> - return status;
> -}
> -
> -/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
> - * MARK key value ... key value SETITEMS
> - * opcode sequences. Calling code should have arranged to first create an
> - * empty dict, or dict-like object, for the SETITEMS to operate on.
> - * Returns 0 on success, <0 on error.
> - *
> - * This is very much like batch_list(). The difference between saving
> - * elements directly, and picking apart two-tuples, is so long-winded at
> - * the C level, though, that attempts to combine these routines were too
> - * ugly to bear.
> - */
> -static int
> -batch_dict(PicklerObject *self, PyObject *iter)
> -{
> - PyObject *obj;
> - PyObject *slice[BATCHSIZE];
> - int i, n;
> -
> - const char mark_op = MARK;
> - const char setitem_op = SETITEM;
> - const char setitems_op = SETITEMS;
> -
> - assert(iter != NULL);
> -
> - if (self->proto == 0) {
> - /* SETITEMS isn't available; do one at a time. */
> - for (;;) {
> - obj = PyIter_Next(iter);
> - if (obj == NULL) {
> - if (PyErr_Occurred())
> - return -1;
> - break;
> - }
> - if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
> - PyErr_SetString(PyExc_TypeError, "dict items "
> - "iterator must return 2-tuples");
> - return -1;
> - }
> - i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
> - if (i >= 0)
> - i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
> - Py_DECREF(obj);
> - if (i < 0)
> - return -1;
> - if (pickler_write(self, &setitem_op, 1) < 0)
> - return -1;
> - }
> - return 0;
> - }
> -
> - /* proto > 0: write in batches of BATCHSIZE. */
> - do {
> - /* Get next group of (no more than) BATCHSIZE elements. */
> - for (n = 0; n < BATCHSIZE; n++) {
> - obj = PyIter_Next(iter);
> - if (obj == NULL) {
> - if (PyErr_Occurred())
> - goto error;
> - break;
> - }
> - if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
> - PyErr_SetString(PyExc_TypeError, "dict items "
> - "iterator must return 2-tuples");
> - goto error;
> - }
> - slice[n] = obj;
> - }
> -
> - if (n > 1) {
> - /* Pump out MARK, slice[0:n], SETITEMS. */
> - if (pickler_write(self, &mark_op, 1) < 0)
> - goto error;
> - for (i = 0; i < n; i++) {
> - obj = slice[i];
> - if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
> - save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
> - goto error;
> - }
> - if (pickler_write(self, &setitems_op, 1) < 0)
> - goto error;
> - }
> - else if (n == 1) {
> - obj = slice[0];
> - if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
> - save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0 ||
> - pickler_write(self, &setitem_op, 1) < 0)
> - goto error;
> - }
> -
> - for (i = 0; i < n; i++) {
> - Py_DECREF(slice[i]);
> - }
> - } while (n == BATCHSIZE);
> - return 0;
> -
> - error:
> - while (--n >= 0) {
> - Py_DECREF(slice[n]);
> - }
> - return -1;
> -}
> -
> -static int
> -save_dict(PicklerObject *self, PyObject *obj)
> -{
> - PyObject *items, *iter;
> - char header[3];
> - int len;
> - int status = 0;
> -
> - if (self->fast && !fast_save_enter(self, obj))
> - goto error;
> -
> - /* Create an empty dict. */
> - if (self->bin) {
> - header[0] = EMPTY_DICT;
> - len = 1;
> - }
> - else {
> - header[0] = MARK;
> - header[1] = DICT;
> - len = 2;
> - }
> -
> - if (pickler_write(self, header, len) < 0)
> - goto error;
> -
> - /* Get dict size, and bow out early if empty. */
> - if ((len = PyDict_Size(obj)) < 0)
> - goto error;
> -
> - if (memo_put(self, obj) < 0)
> - goto error;
> -
> - if (len != 0) {
> - /* Save the dict items. */
> - items = PyObject_CallMethod(obj, "items", "()");
> - if (items == NULL)
> - goto error;
> - iter = PyObject_GetIter(items);
> - Py_DECREF(items);
> - if (iter == NULL)
> - goto error;
> - status = batch_dict(self, iter);
> - Py_DECREF(iter);
> - }
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> -
> - if (self->fast && !fast_save_leave(self, obj))
> - status = -1;
> -
> - return status;
> -}
> -
> -static int
> -save_global(PicklerObject *self, PyObject *obj, PyObject *name)
> -{
> - static PyObject *name_str = NULL;
> - PyObject *global_name = NULL;
> - PyObject *module_name = NULL;
> - PyObject *module = NULL;
> - PyObject *cls;
> - int status = 0;
> -
> - const char global_op = GLOBAL;
> -
> - if (name_str == NULL) {
> - name_str = PyUnicode_InternFromString("__name__");
> - if (name_str == NULL)
> - goto error;
> - }
> -
> - if (name) {
> - global_name = name;
> - Py_INCREF(global_name);
> - }
> - else {
> - global_name = PyObject_GetAttr(obj, name_str);
> - if (global_name == NULL)
> - goto error;
> - }
> -
> - module_name = whichmodule(obj, global_name);
> - if (module_name == NULL)
> - goto error;
> -
> - /* XXX: Change to use the import C API directly with level=0 to disallow
> - relative imports.
> -
> - XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
> - builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
> - custom import functions (IMHO, this would be a nice security
> - feature). The import C API would need to be extended to support the
> - extra parameters of __import__ to fix that. */
> - module = PyImport_Import(module_name);
> - if (module == NULL) {
> - PyErr_Format(PicklingError,
> - "Can't pickle %R: import of module %R failed",
> - obj, module_name);
> - goto error;
> - }
> - cls = PyObject_GetAttr(module, global_name);
> - if (cls == NULL) {
> - PyErr_Format(PicklingError,
> - "Can't pickle %R: attribute lookup %S.%S failed",
> - obj, module_name, global_name);
> - goto error;
> - }
> - if (cls != obj) {
> - Py_DECREF(cls);
> - PyErr_Format(PicklingError,
> - "Can't pickle %R: it's not the same object as %S.%S",
> - obj, module_name, global_name);
> - goto error;
> - }
> - Py_DECREF(cls);
> -
> - if (self->proto >= 2) {
> - /* See whether this is in the extension registry, and if
> - * so generate an EXT opcode.
> - */
> - PyObject *code_obj; /* extension code as Python object */
> - long code; /* extension code as C value */
> - char pdata[5];
> - int n;
> -
> - PyTuple_SET_ITEM(two_tuple, 0, module_name);
> - PyTuple_SET_ITEM(two_tuple, 1, global_name);
> - code_obj = PyDict_GetItem(extension_registry, two_tuple);
> - /* The object is not registered in the extension registry.
> - This is the most likely code path. */
> - if (code_obj == NULL)
> - goto gen_global;
> -
> - /* XXX: pickle.py doesn't check neither the type, nor the range
> - of the value returned by the extension_registry. It should for
> - consistency. */
> -
> - /* Verify code_obj has the right type and value. */
> - if (!PyLong_Check(code_obj)) {
> - PyErr_Format(PicklingError,
> - "Can't pickle %R: extension code %R isn't an integer",
> - obj, code_obj);
> - goto error;
> - }
> - code = PyLong_AS_LONG(code_obj);
> - if (code <= 0 || code > 0x7fffffffL) {
> - PyErr_Format(PicklingError,
> - "Can't pickle %R: extension code %ld is out of range",
> - obj, code);
> - goto error;
> - }
> -
> - /* Generate an EXT opcode. */
> - if (code <= 0xff) {
> - pdata[0] = EXT1;
> - pdata[1] = (unsigned char)code;
> - n = 2;
> - }
> - else if (code <= 0xffff) {
> - pdata[0] = EXT2;
> - pdata[1] = (unsigned char)(code & 0xff);
> - pdata[2] = (unsigned char)((code >> 8) & 0xff);
> - n = 3;
> - }
> - else {
> - pdata[0] = EXT4;
> - pdata[1] = (unsigned char)(code & 0xff);
> - pdata[2] = (unsigned char)((code >> 8) & 0xff);
> - pdata[3] = (unsigned char)((code >> 16) & 0xff);
> - pdata[4] = (unsigned char)((code >> 24) & 0xff);
> - n = 5;
> - }
> -
> - if (pickler_write(self, pdata, n) < 0)
> - goto error;
> - }
> - else {
> - /* Generate a normal global opcode if we are using a pickle
> - protocol <= 2, or if the object is not registered in the
> - extension registry. */
> - PyObject *encoded;
> - PyObject *(*unicode_encoder)(PyObject *);
> -
> - gen_global:
> - if (pickler_write(self, &global_op, 1) < 0)
> - goto error;
> -
> - /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
> - the module name and the global name using UTF-8. We do so only when
> - we are using the pickle protocol newer than version 3. This is to
> - ensure compatibility with older Unpickler running on Python 2.x. */
> - if (self->proto >= 3) {
> - unicode_encoder = PyUnicode_AsUTF8String;
> - }
> - else {
> - unicode_encoder = PyUnicode_AsASCIIString;
> - }
> -
> - /* Save the name of the module. */
> - encoded = unicode_encoder(module_name);
> - if (encoded == NULL) {
> - if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
> - PyErr_Format(PicklingError,
> - "can't pickle module identifier '%S' using "
> - "pickle protocol %i", module_name, self->proto);
> - goto error;
> - }
> - if (pickler_write(self, PyBytes_AS_STRING(encoded),
> - PyBytes_GET_SIZE(encoded)) < 0) {
> - Py_DECREF(encoded);
> - goto error;
> - }
> - Py_DECREF(encoded);
> - if(pickler_write(self, "\n", 1) < 0)
> - goto error;
> -
> - /* Save the name of the module. */
> - encoded = unicode_encoder(global_name);
> - if (encoded == NULL) {
> - if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
> - PyErr_Format(PicklingError,
> - "can't pickle global identifier '%S' using "
> - "pickle protocol %i", global_name, self->proto);
> - goto error;
> - }
> - if (pickler_write(self, PyBytes_AS_STRING(encoded),
> - PyBytes_GET_SIZE(encoded)) < 0) {
> - Py_DECREF(encoded);
> - goto error;
> - }
> - Py_DECREF(encoded);
> - if(pickler_write(self, "\n", 1) < 0)
> - goto error;
> -
> - /* Memoize the object. */
> - if (memo_put(self, obj) < 0)
> - goto error;
> - }
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> - Py_XDECREF(module_name);
> - Py_XDECREF(global_name);
> - Py_XDECREF(module);
> -
> - return status;
> -}
> -
> -static int
> -save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
> -{
> - PyObject *pid = NULL;
> - int status = 0;
> -
> - const char persid_op = PERSID;
> - const char binpersid_op = BINPERSID;
> -
> - Py_INCREF(obj);
> - pid = pickler_call(self, func, obj);
> - if (pid == NULL)
> - return -1;
> -
> - if (pid != Py_None) {
> - if (self->bin) {
> - if (save(self, pid, 1) < 0 ||
> - pickler_write(self, &binpersid_op, 1) < 0)
> - goto error;
> - }
> - else {
> - PyObject *pid_str = NULL;
> - char *pid_ascii_bytes;
> - Py_ssize_t size;
> -
> - pid_str = PyObject_Str(pid);
> - if (pid_str == NULL)
> - goto error;
> -
> - /* XXX: Should it check whether the persistent id only contains
> - ASCII characters? And what if the pid contains embedded
> - newlines? */
> - pid_ascii_bytes = PyUnicode_AsStringAndSize(pid_str, &size);
> - Py_DECREF(pid_str);
> - if (pid_ascii_bytes == NULL)
> - goto error;
> -
> - if (pickler_write(self, &persid_op, 1) < 0 ||
> - pickler_write(self, pid_ascii_bytes, size) < 0 ||
> - pickler_write(self, "\n", 1) < 0)
> - goto error;
> - }
> - status = 1;
> - }
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> - Py_XDECREF(pid);
> -
> - return status;
> -}
> -
> -/* We're saving obj, and args is the 2-thru-5 tuple returned by the
> - * appropriate __reduce__ method for obj.
> - */
> -static int
> -save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
> -{
> - PyObject *callable;
> - PyObject *argtup;
> - PyObject *state = NULL;
> - PyObject *listitems = NULL;
> - PyObject *dictitems = NULL;
> -
> - int use_newobj = self->proto >= 2;
> -
> - const char reduce_op = REDUCE;
> - const char build_op = BUILD;
> - const char newobj_op = NEWOBJ;
> -
> - if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
> - &callable, &argtup, &state, &listitems, &dictitems))
> - return -1;
> -
> - if (!PyCallable_Check(callable)) {
> - PyErr_SetString(PicklingError,
> - "first argument of save_reduce() must be callable");
> - return -1;
> - }
> - if (!PyTuple_Check(argtup)) {
> - PyErr_SetString(PicklingError,
> - "second argument of save_reduce() must be a tuple");
> - return -1;
> - }
> -
> - if (state == Py_None)
> - state = NULL;
> - if (listitems == Py_None)
> - listitems = NULL;
> - if (dictitems == Py_None)
> - dictitems = NULL;
> -
> - /* Protocol 2 special case: if callable's name is __newobj__, use
> - NEWOBJ. */
> - if (use_newobj) {
> - static PyObject *newobj_str = NULL;
> - PyObject *name_str;
> -
> - if (newobj_str == NULL) {
> - newobj_str = PyUnicode_InternFromString("__newobj__");
> - }
> -
> - name_str = PyObject_GetAttrString(callable, "__name__");
> - if (name_str == NULL) {
> - if (PyErr_ExceptionMatches(PyExc_AttributeError))
> - PyErr_Clear();
> - else
> - return -1;
> - use_newobj = 0;
> - }
> - else {
> - use_newobj = PyUnicode_Check(name_str) &&
> - PyUnicode_Compare(name_str, newobj_str) == 0;
> - Py_DECREF(name_str);
> - }
> - }
> - if (use_newobj) {
> - PyObject *cls;
> - PyObject *newargtup;
> - PyObject *obj_class;
> - int p;
> -
> - /* Sanity checks. */
> - if (Py_SIZE(argtup) < 1) {
> - PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
> - return -1;
> - }
> -
> - cls = PyTuple_GET_ITEM(argtup, 0);
> - if (!PyObject_HasAttrString(cls, "__new__")) {
> - PyErr_SetString(PicklingError, "args[0] from "
> - "__newobj__ args has no __new__");
> - return -1;
> - }
> -
> - if (obj != NULL) {
> - obj_class = PyObject_GetAttrString(obj, "__class__");
> - if (obj_class == NULL) {
> - if (PyErr_ExceptionMatches(PyExc_AttributeError))
> - PyErr_Clear();
> - else
> - return -1;
> - }
> - p = obj_class != cls; /* true iff a problem */
> - Py_DECREF(obj_class);
> - if (p) {
> - PyErr_SetString(PicklingError, "args[0] from "
> - "__newobj__ args has the wrong class");
> - return -1;
> - }
> - }
> - /* XXX: These calls save() are prone to infinite recursion. Imagine
> - what happen if the value returned by the __reduce__() method of
> - some extension type contains another object of the same type. Ouch!
> -
> - Here is a quick example, that I ran into, to illustrate what I
> - mean:
> -
> - >>> import pickle, copyreg
> - >>> copyreg.dispatch_table.pop(complex)
> - >>> pickle.dumps(1+2j)
> - Traceback (most recent call last):
> - ...
> - RuntimeError: maximum recursion depth exceeded
> -
> - Removing the complex class from copyreg.dispatch_table made the
> - __reduce_ex__() method emit another complex object:
> -
> - >>> (1+1j).__reduce_ex__(2)
> - (<function __newobj__ at 0xb7b71c3c>,
> - (<class 'complex'>, (1+1j)), None, None, None)
> -
> - Thus when save() was called on newargstup (the 2nd item) recursion
> - ensued. Of course, the bug was in the complex class which had a
> - broken __getnewargs__() that emitted another complex object. But,
> - the point, here, is it is quite easy to end up with a broken reduce
> - function. */
> -
> - /* Save the class and its __new__ arguments. */
> - if (save(self, cls, 0) < 0)
> - return -1;
> -
> - newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
> - if (newargtup == NULL)
> - return -1;
> -
> - p = save(self, newargtup, 0);
> - Py_DECREF(newargtup);
> - if (p < 0)
> - return -1;
> -
> - /* Add NEWOBJ opcode. */
> - if (pickler_write(self, &newobj_op, 1) < 0)
> - return -1;
> - }
> - else { /* Not using NEWOBJ. */
> - if (save(self, callable, 0) < 0 ||
> - save(self, argtup, 0) < 0 ||
> - pickler_write(self, &reduce_op, 1) < 0)
> - return -1;
> - }
> -
> - /* obj can be NULL when save_reduce() is used directly. A NULL obj means
> - the caller do not want to memoize the object. Not particularly useful,
> - but that is to mimic the behavior save_reduce() in pickle.py when
> - obj is None. */
> - if (obj && memo_put(self, obj) < 0)
> - return -1;
> -
> - if (listitems && batch_list(self, listitems) < 0)
> - return -1;
> -
> - if (dictitems && batch_dict(self, dictitems) < 0)
> - return -1;
> -
> - if (state) {
> - if (save(self, state, 0) < 0 ||
> - pickler_write(self, &build_op, 1) < 0)
> - return -1;
> - }
> -
> - return 0;
> -}
> -
> -static int
> -save(PicklerObject *self, PyObject *obj, int pers_save)
> -{
> - PyTypeObject *type;
> - PyObject *reduce_func = NULL;
> - PyObject *reduce_value = NULL;
> - PyObject *memo_key = NULL;
> - int status = 0;
> -
> - /* XXX: Use Py_EnterRecursiveCall()? */
> - if (++self->nesting > Py_GetRecursionLimit()) {
> - PyErr_SetString(PyExc_RuntimeError,
> - "maximum recursion depth exceeded");
> - goto error;
> - }
> -
> - /* The extra pers_save argument is necessary to avoid calling save_pers()
> - on its returned object. */
> - if (!pers_save && self->pers_func) {
> - /* save_pers() returns:
> - -1 to signal an error;
> - 0 if it did nothing successfully;
> - 1 if a persistent id was saved.
> - */
> - if ((status = save_pers(self, obj, self->pers_func)) != 0)
> - goto done;
> - }
> -
> - type = Py_TYPE(obj);
> -
> - /* XXX: The old cPickle had an optimization that used switch-case
> - statement dispatching on the first letter of the type name. It was
> - probably not a bad idea after all. If benchmarks shows that particular
> - optimization had some real benefits, it would be nice to add it
> - back. */
> -
> - /* Atom types; these aren't memoized, so don't check the memo. */
> -
> - if (obj == Py_None) {
> - status = save_none(self, obj);
> - goto done;
> - }
> - else if (obj == Py_False || obj == Py_True) {
> - status = save_bool(self, obj);
> - goto done;
> - }
> - else if (type == &PyLong_Type) {
> - status = save_long(self, obj);
> - goto done;
> - }
> - else if (type == &PyFloat_Type) {
> - status = save_float(self, obj);
> - goto done;
> - }
> -
> - /* Check the memo to see if it has the object. If so, generate
> - a GET (or BINGET) opcode, instead of pickling the object
> - once again. */
> - memo_key = PyLong_FromVoidPtr(obj);
> - if (memo_key == NULL)
> - goto error;
> - if (PyDict_GetItem(self->memo, memo_key)) {
> - if (memo_get(self, memo_key) < 0)
> - goto error;
> - goto done;
> - }
> -
> - if (type == &PyBytes_Type) {
> - status = save_bytes(self, obj);
> - goto done;
> - }
> - else if (type == &PyUnicode_Type) {
> - status = save_unicode(self, obj);
> - goto done;
> - }
> - else if (type == &PyDict_Type) {
> - status = save_dict(self, obj);
> - goto done;
> - }
> - else if (type == &PyList_Type) {
> - status = save_list(self, obj);
> - goto done;
> - }
> - else if (type == &PyTuple_Type) {
> - status = save_tuple(self, obj);
> - goto done;
> - }
> - else if (type == &PyType_Type) {
> - status = save_global(self, obj, NULL);
> - goto done;
> - }
> - else if (type == &PyFunction_Type) {
> - status = save_global(self, obj, NULL);
> - if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
> - /* fall back to reduce */
> - PyErr_Clear();
> - }
> - else {
> - goto done;
> - }
> - }
> - else if (type == &PyCFunction_Type) {
> - status = save_global(self, obj, NULL);
> - goto done;
> - }
> - else if (PyType_IsSubtype(type, &PyType_Type)) {
> - status = save_global(self, obj, NULL);
> - goto done;
> - }
> -
> - /* XXX: This part needs some unit tests. */
> -
> - /* Get a reduction callable, and call it. This may come from
> - * copyreg.dispatch_table, the object's __reduce_ex__ method,
> - * or the object's __reduce__ method.
> - */
> - reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
> - if (reduce_func != NULL) {
> - /* Here, the reference count of the reduce_func object returned by
> - PyDict_GetItem needs to be increased to be consistent with the one
> - returned by PyObject_GetAttr. This is allow us to blindly DECREF
> - reduce_func at the end of the save() routine.
> - */
> - Py_INCREF(reduce_func);
> - Py_INCREF(obj);
> - reduce_value = pickler_call(self, reduce_func, obj);
> - }
> - else {
> - static PyObject *reduce_str = NULL;
> - static PyObject *reduce_ex_str = NULL;
> -
> - /* Cache the name of the reduce methods. */
> - if (reduce_str == NULL) {
> - reduce_str = PyUnicode_InternFromString("__reduce__");
> - if (reduce_str == NULL)
> - goto error;
> - reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
> - if (reduce_ex_str == NULL)
> - goto error;
> - }
> -
> - /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
> - automatically defined as __reduce__. While this is convenient, this
> - make it impossible to know which method was actually called. Of
> - course, this is not a big deal. But still, it would be nice to let
> - the user know which method was called when something go
> - wrong. Incidentally, this means if __reduce_ex__ is not defined, we
> - don't actually have to check for a __reduce__ method. */
> -
> - /* Check for a __reduce_ex__ method. */
> - reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
> - if (reduce_func != NULL) {
> - PyObject *proto;
> - proto = PyLong_FromLong(self->proto);
> - if (proto != NULL) {
> - reduce_value = pickler_call(self, reduce_func, proto);
> - }
> - }
> - else {
> - if (PyErr_ExceptionMatches(PyExc_AttributeError))
> - PyErr_Clear();
> - else
> - goto error;
> - /* Check for a __reduce__ method. */
> - reduce_func = PyObject_GetAttr(obj, reduce_str);
> - if (reduce_func != NULL) {
> - reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
> - }
> - else {
> - PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
> - type->tp_name, obj);
> - goto error;
> - }
> - }
> - }
> -
> - if (reduce_value == NULL)
> - goto error;
> -
> - if (PyUnicode_Check(reduce_value)) {
> - status = save_global(self, obj, reduce_value);
> - goto done;
> - }
> -
> - if (!PyTuple_Check(reduce_value)) {
> - PyErr_SetString(PicklingError,
> - "__reduce__ must return a string or tuple");
> - goto error;
> - }
> - if (Py_SIZE(reduce_value) < 2 || Py_SIZE(reduce_value) > 5) {
> - PyErr_SetString(PicklingError, "tuple returned by __reduce__ "
> - "must contain 2 through 5 elements");
> - goto error;
> - }
> - if (!PyTuple_Check(PyTuple_GET_ITEM(reduce_value, 1))) {
> - PyErr_SetString(PicklingError, "second item of the tuple "
> - "returned by __reduce__ must be a tuple");
> - goto error;
> - }
> -
> - status = save_reduce(self, reduce_value, obj);
> -
> - if (0) {
> - error:
> - status = -1;
> - }
> - done:
> - self->nesting--;
> - Py_XDECREF(memo_key);
> - Py_XDECREF(reduce_func);
> - Py_XDECREF(reduce_value);
> -
> - return status;
> -}
> -
> -static int
> -dump(PicklerObject *self, PyObject *obj)
> -{
> - const char stop_op = STOP;
> -
> - if (self->proto >= 2) {
> - char header[2];
> -
> - header[0] = PROTO;
> - assert(self->proto >= 0 && self->proto < 256);
> - header[1] = (unsigned char)self->proto;
> - if (pickler_write(self, header, 2) < 0)
> - return -1;
> - }
> -
> - if (save(self, obj, 0) < 0 ||
> - pickler_write(self, &stop_op, 1) < 0 ||
> - pickler_write(self, NULL, 0) < 0)
> - return -1;
> -
> - return 0;
> -}
> -
> -PyDoc_STRVAR(Pickler_clear_memo_doc,
> -"clear_memo() -> None. Clears the pickler's \"memo\"."
> -"\n"
> -"The memo is the data structure that remembers which objects the\n"
> -"pickler has already seen, so that shared or recursive objects are\n"
> -"pickled by reference and not by value. This method is useful when\n"
> -"re-using picklers.");
> -
> -static PyObject *
> -Pickler_clear_memo(PicklerObject *self)
> -{
> - if (self->memo)
> - PyDict_Clear(self->memo);
> -
> - Py_RETURN_NONE;
> -}
> -
> -PyDoc_STRVAR(Pickler_dump_doc,
> -"dump(obj) -> None. Write a pickled representation of obj to the open file.");
> -
> -static PyObject *
> -Pickler_dump(PicklerObject *self, PyObject *args)
> -{
> - PyObject *obj;
> -
> - if (!PyArg_ParseTuple(args, "O:dump", &obj))
> - return NULL;
> -
> - if (dump(self, obj) < 0)
> - return NULL;
> -
> - Py_RETURN_NONE;
> -}
> -
> -static struct PyMethodDef Pickler_methods[] = {
> - {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
> - Pickler_dump_doc},
> - {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
> - Pickler_clear_memo_doc},
> - {NULL, NULL} /* sentinel */
> -};
> -
> -static void
> -Pickler_dealloc(PicklerObject *self)
> -{
> - PyObject_GC_UnTrack(self);
> -
> - Py_XDECREF(self->write);
> - Py_XDECREF(self->memo);
> - Py_XDECREF(self->pers_func);
> - Py_XDECREF(self->arg);
> - Py_XDECREF(self->fast_memo);
> -
> - PyMem_Free(self->write_buf);
> -
> - Py_TYPE(self)->tp_free((PyObject *)self);
> -}
> -
> -static int
> -Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
> -{
> - Py_VISIT(self->write);
> - Py_VISIT(self->memo);
> - Py_VISIT(self->pers_func);
> - Py_VISIT(self->arg);
> - Py_VISIT(self->fast_memo);
> - return 0;
> -}
> -
> -static int
> -Pickler_clear(PicklerObject *self)
> -{
> - Py_CLEAR(self->write);
> - Py_CLEAR(self->memo);
> - Py_CLEAR(self->pers_func);
> - Py_CLEAR(self->arg);
> - Py_CLEAR(self->fast_memo);
> -
> - PyMem_Free(self->write_buf);
> - self->write_buf = NULL;
> -
> - return 0;
> -}
> -
> -PyDoc_STRVAR(Pickler_doc,
> -"Pickler(file, protocol=None)"
> -"\n"
> -"This takes a binary file for writing a pickle data stream.\n"
> -"\n"
> -"The optional protocol argument tells the pickler to use the\n"
> -"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
> -"protocol is 3; a backward-incompatible protocol designed for\n"
> -"Python 3.0.\n"
> -"\n"
> -"Specifying a negative protocol version selects the highest\n"
> -"protocol version supported. The higher the protocol used, the\n"
> -"more recent the version of Python needed to read the pickle\n"
> -"produced.\n"
> -"\n"
> -"The file argument must have a write() method that accepts a single\n"
> -"bytes argument. It can thus be a file object opened for binary\n"
> -"writing, a io.BytesIO instance, or any other custom object that\n"
> -"meets this interface.\n");
> -
> -static int
> -Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
> -{
> - static char *kwlist[] = {"file", "protocol", 0};
> - PyObject *file;
> - PyObject *proto_obj = NULL;
> - long proto = 0;
> -
> - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
> - kwlist, &file, &proto_obj))
> - return -1;
> -
> - /* In case of multiple __init__() calls, clear previous content. */
> - if (self->write != NULL)
> - (void)Pickler_clear(self);
> -
> - if (proto_obj == NULL || proto_obj == Py_None)
> - proto = DEFAULT_PROTOCOL;
> - else
> - proto = PyLong_AsLong(proto_obj);
> -
> - if (proto < 0)
> - proto = HIGHEST_PROTOCOL;
> - if (proto > HIGHEST_PROTOCOL) {
> - PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
> - HIGHEST_PROTOCOL);
> - return -1;
> - }
> -
> - self->proto = proto;
> - self->bin = proto > 0;
> - self->arg = NULL;
> - self->nesting = 0;
> - self->fast = 0;
> - self->fast_nesting = 0;
> - self->fast_memo = NULL;
> -
> - if (!PyObject_HasAttrString(file, "write")) {
> - PyErr_SetString(PyExc_TypeError,
> - "file must have a 'write' attribute");
> - return -1;
> - }
> - self->write = PyObject_GetAttrString(file, "write");
> - if (self->write == NULL)
> - return -1;
> - self->buf_size = 0;
> - self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
> - if (self->write_buf == NULL) {
> - PyErr_NoMemory();
> - return -1;
> - }
> - self->pers_func = NULL;
> - if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
> - self->pers_func = PyObject_GetAttrString((PyObject *)self,
> - "persistent_id");
> - if (self->pers_func == NULL)
> - return -1;
> - }
> - self->memo = PyDict_New();
> - if (self->memo == NULL)
> - return -1;
> -
> - return 0;
> -}
> -
> -static PyObject *
> -Pickler_get_memo(PicklerObject *self)
> -{
> - if (self->memo == NULL)
> - PyErr_SetString(PyExc_AttributeError, "memo");
> - else
> - Py_INCREF(self->memo);
> - return self->memo;
> -}
> -
> -static int
> -Pickler_set_memo(PicklerObject *self, PyObject *value)
> -{
> - PyObject *tmp;
> -
> - if (value == NULL) {
> - PyErr_SetString(PyExc_TypeError,
> - "attribute deletion is not supported");
> - return -1;
> - }
> - if (!PyDict_Check(value)) {
> - PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
> - return -1;
> - }
> -
> - tmp = self->memo;
> - Py_INCREF(value);
> - self->memo = value;
> - Py_XDECREF(tmp);
> -
> - return 0;
> -}
> -
> -static PyObject *
> -Pickler_get_persid(PicklerObject *self)
> -{
> - if (self->pers_func == NULL)
> - PyErr_SetString(PyExc_AttributeError, "persistent_id");
> - else
> - Py_INCREF(self->pers_func);
> - return self->pers_func;
> -}
> -
> -static int
> -Pickler_set_persid(PicklerObject *self, PyObject *value)
> -{
> - PyObject *tmp;
> -
> - if (value == NULL) {
> - PyErr_SetString(PyExc_TypeError,
> - "attribute deletion is not supported");
> - return -1;
> - }
> - if (!PyCallable_Check(value)) {
> - PyErr_SetString(PyExc_TypeError,
> - "persistent_id must be a callable taking one argument");
> - return -1;
> - }
> -
> - tmp = self->pers_func;
> - Py_INCREF(value);
> - self->pers_func = value;
> - Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
> -
> - return 0;
> -}
> -
> -static PyMemberDef Pickler_members[] = {
> - {"bin", T_INT, offsetof(PicklerObject, bin)},
> - {"fast", T_INT, offsetof(PicklerObject, fast)},
> - {NULL}
> -};
> -
> -static PyGetSetDef Pickler_getsets[] = {
> - {"memo", (getter)Pickler_get_memo,
> - (setter)Pickler_set_memo},
> - {"persistent_id", (getter)Pickler_get_persid,
> - (setter)Pickler_set_persid},
> - {NULL}
> -};
> -
> -static PyTypeObject Pickler_Type = {
> - PyVarObject_HEAD_INIT(NULL, 0)
> - "_pickle.Pickler" , /*tp_name*/
> - sizeof(PicklerObject), /*tp_basicsize*/
> - 0, /*tp_itemsize*/
> - (destructor)Pickler_dealloc, /*tp_dealloc*/
> - 0, /*tp_print*/
> - 0, /*tp_getattr*/
> - 0, /*tp_setattr*/
> - 0, /*tp_compare*/
> - 0, /*tp_repr*/
> - 0, /*tp_as_number*/
> - 0, /*tp_as_sequence*/
> - 0, /*tp_as_mapping*/
> - 0, /*tp_hash*/
> - 0, /*tp_call*/
> - 0, /*tp_str*/
> - 0, /*tp_getattro*/
> - 0, /*tp_setattro*/
> - 0, /*tp_as_buffer*/
> - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
> - Pickler_doc, /*tp_doc*/
> - (traverseproc)Pickler_traverse, /*tp_traverse*/
> - (inquiry)Pickler_clear, /*tp_clear*/
> - 0, /*tp_richcompare*/
> - 0, /*tp_weaklistoffset*/
> - 0, /*tp_iter*/
> - 0, /*tp_iternext*/
> - Pickler_methods, /*tp_methods*/
> - Pickler_members, /*tp_members*/
> - Pickler_getsets, /*tp_getset*/
> - 0, /*tp_base*/
> - 0, /*tp_dict*/
> - 0, /*tp_descr_get*/
> - 0, /*tp_descr_set*/
> - 0, /*tp_dictoffset*/
> - (initproc)Pickler_init, /*tp_init*/
> - PyType_GenericAlloc, /*tp_alloc*/
> - PyType_GenericNew, /*tp_new*/
> - PyObject_GC_Del, /*tp_free*/
> - 0, /*tp_is_gc*/
> -};
> -
> -/* Temporary helper for calling self.find_class().
> -
> - XXX: It would be nice to able to avoid Python function call overhead, by
> - using directly the C version of find_class(), when find_class() is not
> - overridden by a subclass. Although, this could become rather hackish. A
> - simpler optimization would be to call the C function when self is not a
> - subclass instance. */
> -static PyObject *
> -find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
> -{
> - return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
> - module_name, global_name);
> -}
> -
> -static int
> -marker(UnpicklerObject *self)
> -{
> - if (self->num_marks < 1) {
> - PyErr_SetString(UnpicklingError, "could not find MARK");
> - return -1;
> - }
> -
> - return self->marks[--self->num_marks];
> -}
> -
> -static int
> -load_none(UnpicklerObject *self)
> -{
> - PDATA_APPEND(self->stack, Py_None, -1);
> - return 0;
> -}
> -
> -static int
> -bad_readline(void)
> -{
> - PyErr_SetString(UnpicklingError, "pickle data was truncated");
> - return -1;
> -}
> -
> -static int
> -load_int(UnpicklerObject *self)
> -{
> - PyObject *value;
> - char *endptr, *s;
> - Py_ssize_t len;
> - long x;
> -
> - if ((len = unpickler_readline(self, &s)) < 0)
> - return -1;
> - if (len < 2)
> - return bad_readline();
> -
> - errno = 0;
> - /* XXX: Should the base argument of strtol() be explicitly set to 10? */
> - x = strtol(s, &endptr, 0);
> -
> - if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
> - /* Hm, maybe we've got something long. Let's try reading
> - * it as a Python long object. */
> - errno = 0;
> - /* XXX: Same thing about the base here. */
> - value = PyLong_FromString(s, NULL, 0);
> - if (value == NULL) {
> - PyErr_SetString(PyExc_ValueError,
> - "could not convert string to int");
> - return -1;
> - }
> - }
> - else {
> - if (len == 3 && (x == 0 || x == 1)) {
> - if ((value = PyBool_FromLong(x)) == NULL)
> - retur...
>
> [Message clipped]
--
--Guido van Rossum (home page: http://www.python.org/~guido/)
More information about the Python-3000-checkins
mailing list