[Python-checkins] r55521 - in sandbox/trunk/cpy_merge: Include Include/cStringIO.h Lib Lib/StringIO.py Lib/cProfile.py Lib/copy_reg.py Lib/pickle.py Lib/pickletools.py Lib/profile.py Lib/test Lib/test/output Lib/test/output/test_cProfile Lib/test/pickletester.py Lib/test/regrtest.py Lib/test/test_StringIO.py Lib/test/test_cProfile.py Lib/test/test_copy_reg.py Lib/test/test_cpickle.py Lib/test/test_pickle.py Lib/test/test_pickletools.py Modules Modules/_lsprof.c Modules/cPickle.c Modules/cStringIO.c Modules/rotatingtree.c Modules/rotatingtree.h README setup.py
alexandre.vassalotti
python-checkins at python.org
Wed May 23 03:45:36 CEST 2007
Author: alexandre.vassalotti
Date: Wed May 23 03:45:28 2007
New Revision: 55521
Added:
sandbox/trunk/cpy_merge/
sandbox/trunk/cpy_merge/Include/
sandbox/trunk/cpy_merge/Include/cStringIO.h (contents, props changed)
sandbox/trunk/cpy_merge/Lib/
sandbox/trunk/cpy_merge/Lib/StringIO.py
sandbox/trunk/cpy_merge/Lib/cProfile.py (contents, props changed)
sandbox/trunk/cpy_merge/Lib/copy_reg.py
sandbox/trunk/cpy_merge/Lib/pickle.py
sandbox/trunk/cpy_merge/Lib/pickletools.py
sandbox/trunk/cpy_merge/Lib/profile.py (contents, props changed)
sandbox/trunk/cpy_merge/Lib/test/
sandbox/trunk/cpy_merge/Lib/test/output/
sandbox/trunk/cpy_merge/Lib/test/output/test_cProfile
sandbox/trunk/cpy_merge/Lib/test/pickletester.py
sandbox/trunk/cpy_merge/Lib/test/regrtest.py (contents, props changed)
sandbox/trunk/cpy_merge/Lib/test/test_StringIO.py
sandbox/trunk/cpy_merge/Lib/test/test_cProfile.py
sandbox/trunk/cpy_merge/Lib/test/test_copy_reg.py
sandbox/trunk/cpy_merge/Lib/test/test_cpickle.py
sandbox/trunk/cpy_merge/Lib/test/test_pickle.py
sandbox/trunk/cpy_merge/Lib/test/test_pickletools.py
sandbox/trunk/cpy_merge/Modules/
sandbox/trunk/cpy_merge/Modules/_lsprof.c (contents, props changed)
sandbox/trunk/cpy_merge/Modules/cPickle.c (contents, props changed)
sandbox/trunk/cpy_merge/Modules/cStringIO.c (contents, props changed)
sandbox/trunk/cpy_merge/Modules/rotatingtree.c (contents, props changed)
sandbox/trunk/cpy_merge/Modules/rotatingtree.h (contents, props changed)
sandbox/trunk/cpy_merge/README
sandbox/trunk/cpy_merge/setup.py (contents, props changed)
Log:
Initial import taken from the p3yk branch (r55520).
Added: sandbox/trunk/cpy_merge/Include/cStringIO.h
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Include/cStringIO.h Wed May 23 03:45:28 2007
@@ -0,0 +1,70 @@
+#ifndef Py_CSTRINGIO_H
+#define Py_CSTRINGIO_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+
+ This header provides access to cStringIO objects from C.
+ Functions are provided for calling cStringIO objects and
+ macros are provided for testing whether you have cStringIO
+ objects.
+
+ Before calling any of the functions or macros, you must initialize
+ the routines with:
+
+ PycString_IMPORT
+
+ This would typically be done in your init function.
+
+*/
+#define PycString_IMPORT \
+ PycStringIO = (struct PycStringIO_CAPI*)PyCObject_Import("cStringIO", \
+ "cStringIO_CAPI")
+
+/* Basic functions to manipulate cStringIO objects from C */
+
+static struct PycStringIO_CAPI {
+
+ /* Read a string from an input object. If the last argument
+ is -1, the remainder will be read.
+ */
+ int(*cread)(PyObject *, char **, Py_ssize_t);
+
+ /* Read a line from an input object. Returns the length of the read
+ line as an int and a pointer inside the object buffer as char** (so
+ the caller doesn't have to provide its own buffer as destination).
+ */
+ int(*creadline)(PyObject *, char **);
+
+ /* Write a string to an output object*/
+ int(*cwrite)(PyObject *, const char *, Py_ssize_t);
+
+ /* Get the output object as a Python string (returns new reference). */
+ PyObject *(*cgetvalue)(PyObject *);
+
+ /* Create a new output object */
+ PyObject *(*NewOutput)(int);
+
+ /* Create an input object from a Python string
+ (copies the Python string reference).
+ */
+ PyObject *(*NewInput)(PyObject *);
+
+ /* The Python types for cStringIO input and output objects.
+ Note that you can do input on an output object.
+ */
+ PyTypeObject *InputType, *OutputType;
+
+} *PycStringIO;
+
+/* These can be used to test if you have one */
+#define PycStringIO_InputCheck(O) \
+ ((O)->ob_type==PycStringIO->InputType)
+#define PycStringIO_OutputCheck(O) \
+ ((O)->ob_type==PycStringIO->OutputType)
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_CSTRINGIO_H */
Added: sandbox/trunk/cpy_merge/Lib/StringIO.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/StringIO.py Wed May 23 03:45:28 2007
@@ -0,0 +1,324 @@
+r"""File-like objects that read from or write to a string buffer.
+
+This implements (nearly) all stdio methods.
+
+f = StringIO() # ready for writing
+f = StringIO(buf) # ready for reading
+f.close() # explicitly release resources held
+flag = f.isatty() # always false
+pos = f.tell() # get current position
+f.seek(pos) # set current position
+f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
+buf = f.read() # read until EOF
+buf = f.read(n) # read up to n bytes
+buf = f.readline() # read until end of line ('\n') or EOF
+list = f.readlines()# list of f.readline() results until EOF
+f.truncate([size]) # truncate file at to at most size (default: current pos)
+f.write(buf) # write at current position
+f.writelines(list) # for line in list: f.write(line)
+f.getvalue() # return whole file's contents as a string
+
+Notes:
+- Using a real file is often faster (but less convenient).
+- There's also a much faster implementation in C, called cStringIO, but
+ it's not subclassable.
+- fileno() is left unimplemented so that code which uses it triggers
+ an exception early.
+- Seeking far beyond EOF and then writing will insert real null
+ bytes that occupy space in the buffer.
+- There's a simple test set (see end of this file).
+"""
+try:
+ from errno import EINVAL
+except ImportError:
+ EINVAL = 22
+
+__all__ = ["StringIO"]
+
+def _complain_ifclosed(closed):
+ if closed:
+ raise ValueError, "I/O operation on closed file"
+
+class StringIO:
+ """class StringIO([buffer])
+
+ When a StringIO object is created, it can be initialized to an existing
+ string by passing the string to the constructor. If no string is given,
+ the StringIO will start empty.
+
+ The StringIO object can accept either Unicode or 8-bit strings, but
+ mixing the two may take some care. If both are used, 8-bit strings that
+ cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
+ a UnicodeError to be raised when getvalue() is called.
+ """
+ def __init__(self, buf = ''):
+ # Force self.buf to be a string or unicode
+ if not isinstance(buf, basestring):
+ buf = str(buf)
+ self.buf = buf
+ self.len = len(buf)
+ self.buflist = []
+ self.pos = 0
+ self.closed = False
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ """A file object is its own iterator, for example iter(f) returns f
+ (unless f is closed). When a file is used as an iterator, typically
+ in a for loop (for example, for line in f: print line), the __next__()
+ method is called repeatedly. This method returns the next input line,
+ or raises StopIteration when EOF is hit.
+ """
+ _complain_ifclosed(self.closed)
+ r = self.readline()
+ if not r:
+ raise StopIteration
+ return r
+
+ def close(self):
+ """Free the memory buffer.
+ """
+ if not self.closed:
+ self.closed = True
+ del self.buf, self.pos
+
+ def isatty(self):
+ """Returns False because StringIO objects are not connected to a
+ tty-like device.
+ """
+ _complain_ifclosed(self.closed)
+ return False
+
+ def seek(self, pos, mode = 0):
+ """Set the file's current position.
+
+ The mode argument is optional and defaults to 0 (absolute file
+ positioning); other values are 1 (seek relative to the current
+ position) and 2 (seek relative to the file's end).
+
+ There is no return value.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ if mode == 1:
+ pos += self.pos
+ elif mode == 2:
+ pos += self.len
+ self.pos = max(0, pos)
+
+ def tell(self):
+ """Return the file's current position."""
+ _complain_ifclosed(self.closed)
+ return self.pos
+
+ def read(self, n=None):
+ """Read at most size bytes from the file
+ (less if the read hits EOF before obtaining size bytes).
+
+ If the size argument is negative or omitted, read all data until EOF
+ is reached. The bytes are returned as a string object. An empty
+ string is returned when EOF is encountered immediately.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ if n is None:
+ n = -1
+ if n < 0:
+ newpos = self.len
+ else:
+ newpos = min(self.pos+n, self.len)
+ r = self.buf[self.pos:newpos]
+ self.pos = newpos
+ return r
+
+ def readline(self, length=None):
+ r"""Read one entire line from the file.
+
+ A trailing newline character is kept in the string (but may be absent
+ when a file ends with an incomplete line). If the size argument is
+ present and non-negative, it is a maximum byte count (including the
+ trailing newline) and an incomplete line may be returned.
+
+ An empty string is returned only when EOF is encountered immediately.
+
+ Note: Unlike stdio's fgets(), the returned string contains null
+ characters ('\0') if they occurred in the input.
+ """
+ _complain_ifclosed(self.closed)
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ i = self.buf.find('\n', self.pos)
+ if i < 0:
+ newpos = self.len
+ else:
+ newpos = i+1
+ if length is not None:
+ if self.pos + length < newpos:
+ newpos = self.pos + length
+ r = self.buf[self.pos:newpos]
+ self.pos = newpos
+ return r
+
+ def readlines(self, sizehint = 0):
+ """Read until EOF using readline() and return a list containing the
+ lines thus read.
+
+ If the optional sizehint argument is present, instead of reading up
+ to EOF, whole lines totalling approximately sizehint bytes (or more
+ to accommodate a final whole line).
+ """
+ total = 0
+ lines = []
+ line = self.readline()
+ while line:
+ lines.append(line)
+ total += len(line)
+ if 0 < sizehint <= total:
+ break
+ line = self.readline()
+ return lines
+
+ def truncate(self, size=None):
+ """Truncate the file's size.
+
+ If the optional size argument is present, the file is truncated to
+ (at most) that size. The size defaults to the current position.
+ The current file position is not changed unless the position
+ is beyond the new file size.
+
+ If the specified size exceeds the file's current size, the
+ file remains unchanged.
+ """
+ _complain_ifclosed(self.closed)
+ if size is None:
+ size = self.pos
+ elif size < 0:
+ raise IOError(EINVAL, "Negative size not allowed")
+ elif size < self.pos:
+ self.pos = size
+ self.buf = self.getvalue()[:size]
+ self.len = size
+
+ def write(self, s):
+ """Write a string to the file.
+
+ There is no return value.
+ """
+ _complain_ifclosed(self.closed)
+ if not s: return
+ # Force s to be a string or unicode
+ if not isinstance(s, basestring):
+ s = str(s)
+ spos = self.pos
+ slen = self.len
+ if spos == slen:
+ self.buflist.append(s)
+ self.len = self.pos = spos + len(s)
+ return
+ if spos > slen:
+ self.buflist.append('\0'*(spos - slen))
+ slen = spos
+ newpos = spos + len(s)
+ if spos < slen:
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
+ self.buf = ''
+ if newpos > slen:
+ slen = newpos
+ else:
+ self.buflist.append(s)
+ slen = newpos
+ self.len = slen
+ self.pos = newpos
+
+ def writelines(self, iterable):
+ """Write a sequence of strings to the file. The sequence can be any
+ iterable object producing strings, typically a list of strings. There
+ is no return value.
+
+ (The name is intended to match readlines(); writelines() does not add
+ line separators.)
+ """
+ write = self.write
+ for line in iterable:
+ write(line)
+
+ def flush(self):
+ """Flush the internal buffer
+ """
+ _complain_ifclosed(self.closed)
+
+ def getvalue(self):
+ """
+ Retrieve the entire contents of the "file" at any time before
+ the StringIO object's close() method is called.
+
+ The StringIO object can accept either Unicode or 8-bit strings,
+ but mixing the two may take some care. If both are used, 8-bit
+ strings that cannot be interpreted as 7-bit ASCII (that use the
+ 8th bit) will cause a UnicodeError to be raised when getvalue()
+ is called.
+ """
+ if self.buflist:
+ self.buf += ''.join(self.buflist)
+ self.buflist = []
+ return self.buf
+
+
+# A little test suite
+
+def test():
+ import sys
+ if sys.argv[1:]:
+ file = sys.argv[1]
+ else:
+ file = '/etc/passwd'
+ lines = open(file, 'r').readlines()
+ text = open(file, 'r').read()
+ f = StringIO()
+ for line in lines[:-2]:
+ f.write(line)
+ f.writelines(lines[-2:])
+ if f.getvalue() != text:
+ raise RuntimeError, 'write failed'
+ length = f.tell()
+ print('File length =', length)
+ f.seek(len(lines[0]))
+ f.write(lines[1])
+ f.seek(0)
+ print('First line =', repr(f.readline()))
+ print('Position =', f.tell())
+ line = f.readline()
+ print('Second line =', repr(line))
+ f.seek(-len(line), 1)
+ line2 = f.read(len(line))
+ if line != line2:
+ raise RuntimeError, 'bad result after seek back'
+ f.seek(len(line2), 1)
+ list = f.readlines()
+ line = list[-1]
+ f.seek(f.tell() - len(line))
+ line2 = f.read()
+ if line != line2:
+ raise RuntimeError, 'bad result after seek back from EOF'
+ print('Read', len(list), 'more lines')
+ print('File length =', f.tell())
+ if f.tell() != length:
+ raise RuntimeError, 'bad length'
+ f.truncate(length/2)
+ f.seek(0, 2)
+ print('Truncated length =', f.tell())
+ if f.tell() != length/2:
+ raise RuntimeError, 'truncate did not adjust length'
+ f.close()
+
+if __name__ == '__main__':
+ test()
Added: sandbox/trunk/cpy_merge/Lib/cProfile.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/cProfile.py Wed May 23 03:45:28 2007
@@ -0,0 +1,190 @@
+#! /usr/bin/env python
+
+"""Python interface for the 'lsprof' profiler.
+ Compatible with the 'profile' module.
+"""
+
+__all__ = ["run", "runctx", "help", "Profile"]
+
+import _lsprof
+
+# ____________________________________________________________
+# Simple interface
+
+def run(statement, filename=None, sort=-1):
+ """Run statement under profiler optionally saving results in filename
+
+ This function takes a single argument that can be passed to the
+ "exec" statement, and an optional file name. In all cases this
+ routine attempts to "exec" its first argument and gather profiling
+ statistics from the execution. If no file name is present, then this
+ function automatically prints a simple profiling report, sorted by the
+ standard name string (file/line/function-name) that is presented in
+ each line.
+ """
+ prof = Profile()
+ result = None
+ try:
+ try:
+ prof = prof.run(statement)
+ except SystemExit:
+ pass
+ finally:
+ if filename is not None:
+ prof.dump_stats(filename)
+ else:
+ result = prof.print_stats(sort)
+ return result
+
+def runctx(statement, globals, locals, filename=None):
+ """Run statement under profiler, supplying your own globals and locals,
+ optionally saving results in filename.
+
+ statement and filename have the same semantics as profile.run
+ """
+ prof = Profile()
+ result = None
+ try:
+ try:
+ prof = prof.runctx(statement, globals, locals)
+ except SystemExit:
+ pass
+ finally:
+ if filename is not None:
+ prof.dump_stats(filename)
+ else:
+ result = prof.print_stats()
+ return result
+
+# Backwards compatibility.
+def help():
+ print("Documentation for the profile/cProfile modules can be found ")
+ print("in the Python Library Reference, section 'The Python Profiler'.")
+
+# ____________________________________________________________
+
+class Profile(_lsprof.Profiler):
+ """Profile(custom_timer=None, time_unit=None, subcalls=True, builtins=True)
+
+ Builds a profiler object using the specified timer function.
+ The default timer is a fast built-in one based on real time.
+ For custom timer functions returning integers, time_unit can
+ be a float specifying a scale (i.e. how long each integer unit
+ is, in seconds).
+ """
+
+ # Most of the functionality is in the base class.
+ # This subclass only adds convenient and backward-compatible methods.
+
+ def print_stats(self, sort=-1):
+ import pstats
+ pstats.Stats(self).strip_dirs().sort_stats(sort).print_stats()
+
+ def dump_stats(self, file):
+ import marshal
+ f = open(file, 'wb')
+ self.create_stats()
+ marshal.dump(self.stats, f)
+ f.close()
+
+ def create_stats(self):
+ self.disable()
+ self.snapshot_stats()
+
+ def snapshot_stats(self):
+ entries = self.getstats()
+ self.stats = {}
+ callersdicts = {}
+ # call information
+ for entry in entries:
+ func = label(entry.code)
+ nc = entry.callcount # ncalls column of pstats (before '/')
+ cc = nc - entry.reccallcount # ncalls column of pstats (after '/')
+ tt = entry.inlinetime # tottime column of pstats
+ ct = entry.totaltime # cumtime column of pstats
+ callers = {}
+ callersdicts[id(entry.code)] = callers
+ self.stats[func] = cc, nc, tt, ct, callers
+ # subcall information
+ for entry in entries:
+ if entry.calls:
+ func = label(entry.code)
+ for subentry in entry.calls:
+ try:
+ callers = callersdicts[id(subentry.code)]
+ except KeyError:
+ continue
+ nc = subentry.callcount
+ cc = nc - subentry.reccallcount
+ tt = subentry.inlinetime
+ ct = subentry.totaltime
+ if func in callers:
+ prev = callers[func]
+ nc += prev[0]
+ cc += prev[1]
+ tt += prev[2]
+ ct += prev[3]
+ callers[func] = nc, cc, tt, ct
+
+ # The following two methods can be called by clients to use
+ # a profiler to profile a statement, given as a string.
+
+ def run(self, cmd):
+ import __main__
+ dict = __main__.__dict__
+ return self.runctx(cmd, dict, dict)
+
+ def runctx(self, cmd, globals, locals):
+ self.enable()
+ try:
+ exec(cmd, globals, locals)
+ finally:
+ self.disable()
+ return self
+
+ # This method is more useful to profile a single function call.
+ def runcall(self, func, *args, **kw):
+ self.enable()
+ try:
+ return func(*args, **kw)
+ finally:
+ self.disable()
+
+# ____________________________________________________________
+
+def label(code):
+ if isinstance(code, str):
+ return ('~', 0, code) # built-in functions ('~' sorts at the end)
+ else:
+ return (code.co_filename, code.co_firstlineno, code.co_name)
+
+# ____________________________________________________________
+
+def main():
+ import os, sys
+ from optparse import OptionParser
+ usage = "cProfile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
+ parser = OptionParser(usage=usage)
+ parser.allow_interspersed_args = False
+ parser.add_option('-o', '--outfile', dest="outfile",
+ help="Save stats to <outfile>", default=None)
+ parser.add_option('-s', '--sort', dest="sort",
+ help="Sort order when printing to stdout, based on pstats.Stats class", default=-1)
+
+ if not sys.argv[1:]:
+ parser.print_usage()
+ sys.exit(2)
+
+ (options, args) = parser.parse_args()
+ sys.argv[:] = args
+
+ if (len(sys.argv) > 0):
+ sys.path.insert(0, os.path.dirname(sys.argv[0]))
+ run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort)
+ else:
+ parser.print_usage()
+ return parser
+
+# When invoked as main program, invoke the profiler on a script
+if __name__ == '__main__':
+ main()
Added: sandbox/trunk/cpy_merge/Lib/copy_reg.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/copy_reg.py Wed May 23 03:45:28 2007
@@ -0,0 +1,196 @@
+"""Helper to provide extensibility for pickle/cPickle.
+
+This is only useful to add pickle support for extension types defined in
+C, not for instances of user-defined classes.
+"""
+
+__all__ = ["pickle", "constructor",
+ "add_extension", "remove_extension", "clear_extension_cache"]
+
+dispatch_table = {}
+
+def pickle(ob_type, pickle_function, constructor_ob=None):
+ if not hasattr(pickle_function, '__call__'):
+ raise TypeError("reduction functions must be callable")
+ dispatch_table[ob_type] = pickle_function
+
+ # The constructor_ob function is a vestige of safe for unpickling.
+ # There is no reason for the caller to pass it anymore.
+ if constructor_ob is not None:
+ constructor(constructor_ob)
+
+def constructor(object):
+ if not hasattr(object, '__call__'):
+ raise TypeError("constructors must be callable")
+
+# Example: provide pickling support for complex numbers.
+
+try:
+ complex
+except NameError:
+ pass
+else:
+
+ def pickle_complex(c):
+ return complex, (c.real, c.imag)
+
+ pickle(complex, pickle_complex, complex)
+
+# Support for pickling new-style objects
+
+def _reconstructor(cls, base, state):
+ if base is object:
+ obj = object.__new__(cls)
+ else:
+ obj = base.__new__(cls, state)
+ if base.__init__ != object.__init__:
+ base.__init__(obj, state)
+ return obj
+
+_HEAPTYPE = 1<<9
+
+# Python code for object.__reduce_ex__ for protocols 0 and 1
+
+def _reduce_ex(self, proto):
+ assert proto < 2
+ for base in self.__class__.__mro__:
+ if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE:
+ break
+ else:
+ base = object # not really reachable
+ if base is object:
+ state = None
+ else:
+ if base is self.__class__:
+ raise TypeError, "can't pickle %s objects" % base.__name__
+ state = base(self)
+ args = (self.__class__, base, state)
+ try:
+ getstate = self.__getstate__
+ except AttributeError:
+ if getattr(self, "__slots__", None):
+ raise TypeError("a class that defines __slots__ without "
+ "defining __getstate__ cannot be pickled")
+ try:
+ dict = self.__dict__
+ except AttributeError:
+ dict = None
+ else:
+ dict = getstate()
+ if dict:
+ return _reconstructor, args, dict
+ else:
+ return _reconstructor, args
+
+# Helper for __reduce_ex__ protocol 2
+
+def __newobj__(cls, *args):
+ return cls.__new__(cls, *args)
+
+def _slotnames(cls):
+ """Return a list of slot names for a given class.
+
+ This needs to find slots defined by the class and its bases, so we
+ can't simply return the __slots__ attribute. We must walk down
+ the Method Resolution Order and concatenate the __slots__ of each
+ class found there. (This assumes classes don't modify their
+ __slots__ attribute to misrepresent their slots after the class is
+ defined.)
+ """
+
+ # Get the value from a cache in the class if possible
+ names = cls.__dict__.get("__slotnames__")
+ if names is not None:
+ return names
+
+ # Not cached -- calculate the value
+ names = []
+ if not hasattr(cls, "__slots__"):
+ # This class has no slots
+ pass
+ else:
+ # Slots found -- gather slot names from all base classes
+ for c in cls.__mro__:
+ if "__slots__" in c.__dict__:
+ slots = c.__dict__['__slots__']
+ # if class has a single slot, it can be given as a string
+ if isinstance(slots, basestring):
+ slots = (slots,)
+ for name in slots:
+ # special descriptors
+ if name in ("__dict__", "__weakref__"):
+ continue
+ # mangled names
+ elif name.startswith('__') and not name.endswith('__'):
+ names.append('_%s%s' % (c.__name__, name))
+ else:
+ names.append(name)
+
+ # Cache the outcome in the class if at all possible
+ try:
+ cls.__slotnames__ = names
+ except:
+ pass # But don't die if we can't
+
+ return names
+
+# A registry of extension codes. This is an ad-hoc compression
+# mechanism. Whenever a global reference to <module>, <name> is about
+# to be pickled, the (<module>, <name>) tuple is looked up here to see
+# if it is a registered extension code for it. Extension codes are
+# universal, so that the meaning of a pickle does not depend on
+# context. (There are also some codes reserved for local use that
+# don't have this restriction.) Codes are positive ints; 0 is
+# reserved.
+
+_extension_registry = {} # key -> code
+_inverted_registry = {} # code -> key
+_extension_cache = {} # code -> object
+# Don't ever rebind those names: cPickle grabs a reference to them when
+# it's initialized, and won't see a rebinding.
+
+def add_extension(module, name, code):
+ """Register an extension code."""
+ code = int(code)
+ if not 1 <= code <= 0x7fffffff:
+ raise ValueError, "code out of range"
+ key = (module, name)
+ if (_extension_registry.get(key) == code and
+ _inverted_registry.get(code) == key):
+ return # Redundant registrations are benign
+ if key in _extension_registry:
+ raise ValueError("key %s is already registered with code %s" %
+ (key, _extension_registry[key]))
+ if code in _inverted_registry:
+ raise ValueError("code %s is already in use for key %s" %
+ (code, _inverted_registry[code]))
+ _extension_registry[key] = code
+ _inverted_registry[code] = key
+
+def remove_extension(module, name, code):
+ """Unregister an extension code. For testing only."""
+ key = (module, name)
+ if (_extension_registry.get(key) != code or
+ _inverted_registry.get(code) != key):
+ raise ValueError("key %s is not registered with code %s" %
+ (key, code))
+ del _extension_registry[key]
+ del _inverted_registry[code]
+ if code in _extension_cache:
+ del _extension_cache[code]
+
+def clear_extension_cache():
+ _extension_cache.clear()
+
+# Standard extension code assignments
+
+# Reserved ranges
+
+# First Last Count Purpose
+# 1 127 127 Reserved for Python standard library
+# 128 191 64 Reserved for Zope
+# 192 239 48 Reserved for 3rd parties
+# 240 255 16 Reserved for private use (will never be assigned)
+# 256 Inf Inf Reserved for future assignment
+
+# Extension codes are assigned by the Python Software Foundation.
Added: sandbox/trunk/cpy_merge/Lib/pickle.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/pickle.py Wed May 23 03:45:28 2007
@@ -0,0 +1,1328 @@
+"""Create portable serialized representations of Python objects.
+
+See module cPickle for a (much) faster implementation.
+See module copy_reg for a mechanism for registering custom picklers.
+See module pickletools source for extensive comments.
+
+Classes:
+
+ Pickler
+ Unpickler
+
+Functions:
+
+ dump(object, file)
+ dumps(object) -> string
+ load(file) -> object
+ loads(string) -> object
+
+Misc variables:
+
+ __version__
+ format_version
+ compatible_formats
+
+"""
+
+__version__ = "$Revision: 55514 $" # Code version
+
+from types import *
+from copy_reg import dispatch_table
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import marshal
+import sys
+import struct
+import re
+import io
+
+__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
+ "Unpickler", "dump", "dumps", "load", "loads"]
+
+# These are purely informational; no code uses these.
+format_version = "2.0" # File format version we write
+compatible_formats = ["1.0", # Original protocol 0
+ "1.1", # Protocol 0 with INST added
+ "1.2", # Original protocol 1
+ "1.3", # Protocol 1 with BINFLOAT added
+ "2.0", # Protocol 2
+ ] # Old format versions we can read
+
+# Keep in synch with cPickle. This is the highest protocol number we
+# know how to read.
+HIGHEST_PROTOCOL = 2
+
+# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
+DEFAULT_PROTOCOL = 2
+
+# Why use struct.pack() for pickling but marshal.loads() for
+# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
+# marshal.loads() is twice as fast as struct.unpack()!
+mloads = marshal.loads
+
+class PickleError(Exception):
+ """A common base class for the other pickling exceptions."""
+ pass
+
+class PicklingError(PickleError):
+ """This exception is raised when an unpicklable object is passed to the
+ dump() method.
+
+ """
+ pass
+
+class UnpicklingError(PickleError):
+ """This exception is raised when there is a problem unpickling an object,
+ such as a security violation.
+
+ Note that other exceptions may also be raised during unpickling, including
+ (but not necessarily limited to) AttributeError, EOFError, ImportError,
+ and IndexError.
+
+ """
+ pass
+
+# An instance of _Stop is raised by Unpickler.load_stop() in response to
+# the STOP opcode, passing the object that is the result of unpickling.
+class _Stop(Exception):
+ def __init__(self, value):
+ self.value = value
+
+# Jython has PyStringMap; it's a dict subclass with string keys
+try:
+ from org.python.core import PyStringMap
+except ImportError:
+ PyStringMap = None
+
+# Pickle opcodes. See pickletools.py for extensive docs. The listing
+# here is in kind-of alphabetical order of 1-character pickle code.
+# pickletools groups them by purpose.
+
+MARK = b'(' # push special markobject on stack
+STOP = b'.' # every pickle ends with STOP
+POP = b'0' # discard topmost stack item
+POP_MARK = b'1' # discard stack top through topmost markobject
+DUP = b'2' # duplicate top stack item
+FLOAT = b'F' # push float object; decimal string argument
+INT = b'I' # push integer or bool; decimal string argument
+BININT = b'J' # push four-byte signed int
+BININT1 = b'K' # push 1-byte unsigned int
+LONG = b'L' # push long; decimal string argument
+BININT2 = b'M' # push 2-byte unsigned int
+NONE = b'N' # push None
+PERSID = b'P' # push persistent object; id is taken from string arg
+BINPERSID = b'Q' # " " " ; " " " " stack
+REDUCE = b'R' # apply callable to argtuple, both on stack
+STRING = b'S' # push string; NL-terminated string argument
+BINSTRING = b'T' # push string; counted binary string argument
+SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
+UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
+BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
+APPEND = b'a' # append stack top to list below it
+BUILD = b'b' # call __setstate__ or __dict__.update()
+GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
+DICT = b'd' # build a dict from stack items
+EMPTY_DICT = b'}' # push empty dict
+APPENDS = b'e' # extend list on stack by topmost stack slice
+GET = b'g' # push item from memo on stack; index is string arg
+BINGET = b'h' # " " " " " " ; " " 1-byte arg
+INST = b'i' # build & push class instance
+LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
+LIST = b'l' # build list from topmost stack items
+EMPTY_LIST = b']' # push empty list
+OBJ = b'o' # build & push class instance
+PUT = b'p' # store stack top in memo; index is string arg
+BINPUT = b'q' # " " " " " ; " " 1-byte arg
+LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
+SETITEM = b's' # add key+value pair to dict
+TUPLE = b't' # build tuple from topmost stack items
+EMPTY_TUPLE = b')' # push empty tuple
+SETITEMS = b'u' # modify dict by adding topmost key+value pairs
+BINFLOAT = b'G' # push float; arg is 8-byte float encoding
+
+TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
+FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
+
+# Protocol 2
+
+PROTO = b'\x80' # identify pickle protocol
+NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
+EXT1 = b'\x82' # push object from extension registry; 1-byte index
+EXT2 = b'\x83' # ditto, but 2-byte index
+EXT4 = b'\x84' # ditto, but 4-byte index
+TUPLE1 = b'\x85' # build 1-tuple from stack top
+TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
+TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
+NEWTRUE = b'\x88' # push True
+NEWFALSE = b'\x89' # push False
+LONG1 = b'\x8a' # push long from < 256 bytes
+LONG4 = b'\x8b' # push really big long
+
+_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+
+
+__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
+
+
+# Pickling machinery
+
+class Pickler:
+
+ def __init__(self, file, protocol=None):
+ """This takes a binary file for writing a pickle data stream.
+
+ All protocols now read and write bytes.
+
+ The optional protocol argument tells the pickler to use the
+ given protocol; supported protocols are 0, 1, 2. The default
+ protocol is 2; it's been supported for many years now.
+
+ Protocol 1 is more efficient than protocol 0; protocol 2 is
+ more efficient than protocol 1.
+
+ Specifying a negative protocol version selects the highest
+ protocol version supported. The higher the protocol used, the
+ more recent the version of Python needed to read the pickle
+ produced.
+
+ The file parameter must have a write() method that accepts a single
+ string argument. It can thus be an open file object, a StringIO
+ object, or any other custom object that meets this interface.
+
+ """
+ if protocol is None:
+ protocol = DEFAULT_PROTOCOL
+ if protocol < 0:
+ protocol = HIGHEST_PROTOCOL
+ elif not 0 <= protocol <= HIGHEST_PROTOCOL:
+ raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
+ self.write = file.write
+ self.memo = {}
+ self.proto = int(protocol)
+ self.bin = protocol >= 1
+ self.fast = 0
+
+ def clear_memo(self):
+ """Clears the pickler's "memo".
+
+ The memo is the data structure that remembers which objects the
+ pickler has already seen, so that shared or recursive objects are
+ pickled by reference and not by value. This method is useful when
+ re-using picklers.
+
+ """
+ self.memo.clear()
+
+ def dump(self, obj):
+ """Write a pickled representation of obj to the open file."""
+ if self.proto >= 2:
+ self.write(PROTO + bytes([self.proto]))
+ self.save(obj)
+ self.write(STOP)
+
+ def memoize(self, obj):
+ """Store an object in the memo."""
+
+ # The Pickler memo is a dictionary mapping object ids to 2-tuples
+ # that contain the Unpickler memo key and the object being memoized.
+ # The memo key is written to the pickle and will become
+ # the key in the Unpickler's memo. The object is stored in the
+ # Pickler memo so that transient objects are kept alive during
+ # pickling.
+
+ # The use of the Unpickler memo length as the memo key is just a
+ # convention. The only requirement is that the memo values be unique.
+ # But there appears no advantage to any other scheme, and this
+ # scheme allows the Unpickler memo to be implemented as a plain (but
+ # growable) array, indexed by memo key.
+ if self.fast:
+ return
+ assert id(obj) not in self.memo
+ memo_len = len(self.memo)
+ self.write(self.put(memo_len))
+ self.memo[id(obj)] = memo_len, obj
+
+ # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
+ def put(self, i, pack=struct.pack):
+ if self.bin:
+ if i < 256:
+ return BINPUT + bytes([i])
+ else:
+ return LONG_BINPUT + pack("<i", i)
+
+ return PUT + bytes(repr(i)) + b'\n'
+
+ # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
+ def get(self, i, pack=struct.pack):
+ if self.bin:
+ if i < 256:
+ return BINGET + bytes([i])
+ else:
+ return LONG_BINGET + pack("<i", i)
+
+ return GET + bytes(repr(i)) + b'\n'
+
+ def save(self, obj):
+ # Check for persistent id (defined by a subclass)
+ pid = self.persistent_id(obj)
+ if pid:
+ self.save_pers(pid)
+ return
+
+ # Check the memo
+ x = self.memo.get(id(obj))
+ if x:
+ self.write(self.get(x[0]))
+ return
+
+ # Check the type dispatch table
+ t = type(obj)
+ f = self.dispatch.get(t)
+ if f:
+ f(self, obj) # Call unbound method with explicit self
+ return
+
+ # Check for a class with a custom metaclass; treat as regular class
+ try:
+ issc = issubclass(t, TypeType)
+ except TypeError: # t is not a class (old Boost; see SF #502085)
+ issc = 0
+ if issc:
+ self.save_global(obj)
+ return
+
+ # Check copy_reg.dispatch_table
+ reduce = dispatch_table.get(t)
+ if reduce:
+ rv = reduce(obj)
+ else:
+ # Check for a __reduce_ex__ method, fall back to __reduce__
+ reduce = getattr(obj, "__reduce_ex__", None)
+ if reduce:
+ rv = reduce(self.proto)
+ else:
+ reduce = getattr(obj, "__reduce__", None)
+ if reduce:
+ rv = reduce()
+ else:
+ raise PicklingError("Can't pickle %r object: %r" %
+ (t.__name__, obj))
+
+ # Check for string returned by reduce(), meaning "save as global"
+ if isinstance(rv, basestring):
+ self.save_global(obj, rv)
+ return
+
+ # Assert that reduce() returned a tuple
+ if type(rv) is not TupleType:
+ raise PicklingError("%s must return string or tuple" % reduce)
+
+ # Assert that it returned an appropriately sized tuple
+ l = len(rv)
+ if not (2 <= l <= 5):
+ raise PicklingError("Tuple returned by %s must have "
+ "two to five elements" % reduce)
+
+ # Save the reduce() output and finally memoize the object
+ self.save_reduce(obj=obj, *rv)
+
+ def persistent_id(self, obj):
+ # This exists so a subclass can override it
+ return None
+
+ def save_pers(self, pid):
+ # Save a persistent id reference
+ if self.bin:
+ self.save(pid)
+ self.write(BINPERSID)
+ else:
+ self.write(PERSID + bytes(str(pid)) + b'\n')
+
+ def save_reduce(self, func, args, state=None,
+ listitems=None, dictitems=None, obj=None):
+ # This API is called by some subclasses
+
+ # Assert that args is a tuple or None
+ if not isinstance(args, TupleType):
+ raise PicklingError("args from reduce() should be a tuple")
+
+ # Assert that func is callable
+ if not hasattr(func, '__call__'):
+ raise PicklingError("func from reduce should be callable")
+
+ save = self.save
+ write = self.write
+
+ # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
+ if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
+ # A __reduce__ implementation can direct protocol 2 to
+ # use the more efficient NEWOBJ opcode, while still
+ # allowing protocol 0 and 1 to work normally. For this to
+ # work, the function returned by __reduce__ should be
+ # called __newobj__, and its first argument should be a
+ # new-style class. The implementation for __newobj__
+ # should be as follows, although pickle has no way to
+ # verify this:
+ #
+ # def __newobj__(cls, *args):
+ # return cls.__new__(cls, *args)
+ #
+ # Protocols 0 and 1 will pickle a reference to __newobj__,
+ # while protocol 2 (and above) will pickle a reference to
+ # cls, the remaining args tuple, and the NEWOBJ code,
+ # which calls cls.__new__(cls, *args) at unpickling time
+ # (see load_newobj below). If __reduce__ returns a
+ # three-tuple, the state from the third tuple item will be
+ # pickled regardless of the protocol, calling __setstate__
+ # at unpickling time (see load_build below).
+ #
+ # Note that no standard __newobj__ implementation exists;
+ # you have to provide your own. This is to enforce
+ # compatibility with Python 2.2 (pickles written using
+ # protocol 0 or 1 in Python 2.3 should be unpicklable by
+ # Python 2.2).
+ cls = args[0]
+ if not hasattr(cls, "__new__"):
+ raise PicklingError(
+ "args[0] from __newobj__ args has no __new__")
+ if obj is not None and cls is not obj.__class__:
+ raise PicklingError(
+ "args[0] from __newobj__ args has the wrong class")
+ args = args[1:]
+ save(cls)
+ save(args)
+ write(NEWOBJ)
+ else:
+ save(func)
+ save(args)
+ write(REDUCE)
+
+ if obj is not None:
+ self.memoize(obj)
+
+ # More new special cases (that work with older protocols as
+ # well): when __reduce__ returns a tuple with 4 or 5 items,
+ # the 4th and 5th item should be iterators that provide list
+ # items and dict items (as (key, value) tuples), or None.
+
+ if listitems is not None:
+ self._batch_appends(listitems)
+
+ if dictitems is not None:
+ self._batch_setitems(dictitems)
+
+ if state is not None:
+ save(state)
+ write(BUILD)
+
+ # Methods below this point are dispatched through the dispatch table
+
+ dispatch = {}
+
+ def save_none(self, obj):
+ self.write(NONE)
+ dispatch[NoneType] = save_none
+
+ def save_bool(self, obj):
+ if self.proto >= 2:
+ self.write(obj and NEWTRUE or NEWFALSE)
+ else:
+ self.write(obj and TRUE or FALSE)
+ dispatch[bool] = save_bool
+
+ def save_int(self, obj, pack=struct.pack):
+ if self.bin:
+ # If the int is small enough to fit in a signed 4-byte 2's-comp
+ # format, we can store it more efficiently than the general
+ # case.
+ # First one- and two-byte unsigned ints:
+ if obj >= 0:
+ if obj <= 0xff:
+ self.write(BININT1 + bytes([obj]))
+ return
+ if obj <= 0xffff:
+ self.write(BININT2 + bytes([obj&0xff, obj>>8]))
+ return
+ # Next check for 4-byte signed ints:
+ high_bits = obj >> 31 # note that Python shift sign-extends
+ if high_bits == 0 or high_bits == -1:
+ # All high bits are copies of bit 2**31, so the value
+ # fits in a 4-byte signed int.
+ self.write(BININT + pack("<i", obj))
+ return
+ # Text pickle, or int too big to fit in signed 4-byte format.
+ self.write(INT + bytes(repr(obj)) + b'\n')
+ # XXX save_int is merged into save_long
+ # dispatch[IntType] = save_int
+
+ def save_long(self, obj, pack=struct.pack):
+ if self.bin:
+ # If the int is small enough to fit in a signed 4-byte 2's-comp
+ # format, we can store it more efficiently than the general
+ # case.
+ # First one- and two-byte unsigned ints:
+ if obj >= 0:
+ if obj <= 0xff:
+ self.write(BININT1 + bytes([obj]))
+ return
+ if obj <= 0xffff:
+ self.write(BININT2 + bytes([obj&0xff, obj>>8]))
+ return
+ # Next check for 4-byte signed ints:
+ high_bits = obj >> 31 # note that Python shift sign-extends
+ if high_bits == 0 or high_bits == -1:
+ # All high bits are copies of bit 2**31, so the value
+ # fits in a 4-byte signed int.
+ self.write(BININT + pack("<i", obj))
+ return
+ if self.proto >= 2:
+ encoded = encode_long(obj)
+ n = len(encoded)
+ if n < 256:
+ self.write(LONG1 + bytes([n]) + encoded)
+ else:
+ self.write(LONG4 + pack("<i", n) + encoded)
+ return
+ self.write(LONG + bytes(repr(obj)) + b'\n')
+ dispatch[LongType] = save_long
+
+ def save_float(self, obj, pack=struct.pack):
+ if self.bin:
+ self.write(BINFLOAT + pack('>d', obj))
+ else:
+ self.write(FLOAT + bytes(repr(obj)) + b'\n')
+ dispatch[FloatType] = save_float
+
+ def save_string(self, obj, pack=struct.pack):
+ if self.bin:
+ n = len(obj)
+ if n < 256:
+ self.write(SHORT_BINSTRING + bytes([n]) + bytes(obj))
+ else:
+ self.write(BINSTRING + pack("<i", n) + bytes(obj))
+ else:
+ self.write(STRING + bytes(repr(obj)) + b'\n')
+ self.memoize(obj)
+ dispatch[str8] = save_string
+
+ def save_unicode(self, obj, pack=struct.pack):
+ if self.bin:
+ encoded = obj.encode('utf-8')
+ n = len(encoded)
+ self.write(BINUNICODE + pack("<i", n) + encoded)
+ else:
+ obj = obj.replace("\\", "\\u005c")
+ obj = obj.replace("\n", "\\u000a")
+ self.write(UNICODE + bytes(obj.encode('raw-unicode-escape')) +
+ b'\n')
+ self.memoize(obj)
+ dispatch[str] = save_unicode
+
+ def save_tuple(self, obj):
+ write = self.write
+ proto = self.proto
+
+ n = len(obj)
+ if n == 0:
+ if proto:
+ write(EMPTY_TUPLE)
+ else:
+ write(MARK + TUPLE)
+ return
+
+ save = self.save
+ memo = self.memo
+ if n <= 3 and proto >= 2:
+ for element in obj:
+ save(element)
+ # Subtle. Same as in the big comment below.
+ if id(obj) in memo:
+ get = self.get(memo[id(obj)][0])
+ write(POP * n + get)
+ else:
+ write(_tuplesize2code[n])
+ self.memoize(obj)
+ return
+
+ # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
+ # has more than 3 elements.
+ write(MARK)
+ for element in obj:
+ save(element)
+
+ if id(obj) in memo:
+ # Subtle. d was not in memo when we entered save_tuple(), so
+ # the process of saving the tuple's elements must have saved
+ # the tuple itself: the tuple is recursive. The proper action
+ # now is to throw away everything we put on the stack, and
+ # simply GET the tuple (it's already constructed). This check
+ # could have been done in the "for element" loop instead, but
+ # recursive tuples are a rare thing.
+ get = self.get(memo[id(obj)][0])
+ if proto:
+ write(POP_MARK + get)
+ else: # proto 0 -- POP_MARK not available
+ write(POP * (n+1) + get)
+ return
+
+ # No recursion.
+ self.write(TUPLE)
+ self.memoize(obj)
+
+ dispatch[TupleType] = save_tuple
+
+ # save_empty_tuple() isn't used by anything in Python 2.3. However, I
+ # found a Pickler subclass in Zope3 that calls it, so it's not harmless
+ # to remove it.
+ def save_empty_tuple(self, obj):
+ self.write(EMPTY_TUPLE)
+
+ def save_list(self, obj):
+ write = self.write
+
+ if self.bin:
+ write(EMPTY_LIST)
+ else: # proto 0 -- can't use EMPTY_LIST
+ write(MARK + LIST)
+
+ self.memoize(obj)
+ self._batch_appends(iter(obj))
+
+ dispatch[ListType] = save_list
+
+ # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
+ # out of synch, though.
+ _BATCHSIZE = 1000
+
+ def _batch_appends(self, items):
+ # Helper to batch up APPENDS sequences
+ save = self.save
+ write = self.write
+
+ if not self.bin:
+ for x in items:
+ save(x)
+ write(APPEND)
+ return
+
+ r = range(self._BATCHSIZE)
+ while items is not None:
+ tmp = []
+ for i in r:
+ try:
+ x = next(items)
+ tmp.append(x)
+ except StopIteration:
+ items = None
+ break
+ n = len(tmp)
+ if n > 1:
+ write(MARK)
+ for x in tmp:
+ save(x)
+ write(APPENDS)
+ elif n:
+ save(tmp[0])
+ write(APPEND)
+ # else tmp is empty, and we're done
+
+ def save_dict(self, obj):
+ write = self.write
+
+ if self.bin:
+ write(EMPTY_DICT)
+ else: # proto 0 -- can't use EMPTY_DICT
+ write(MARK + DICT)
+
+ self.memoize(obj)
+ self._batch_setitems(iter(obj.items()))
+
+ dispatch[DictionaryType] = save_dict
+ if not PyStringMap is None:
+ dispatch[PyStringMap] = save_dict
+
+ def _batch_setitems(self, items):
+ # Helper to batch up SETITEMS sequences; proto >= 1 only
+ save = self.save
+ write = self.write
+
+ if not self.bin:
+ for k, v in items:
+ save(k)
+ save(v)
+ write(SETITEM)
+ return
+
+ r = range(self._BATCHSIZE)
+ while items is not None:
+ tmp = []
+ for i in r:
+ try:
+ tmp.append(next(items))
+ except StopIteration:
+ items = None
+ break
+ n = len(tmp)
+ if n > 1:
+ write(MARK)
+ for k, v in tmp:
+ save(k)
+ save(v)
+ write(SETITEMS)
+ elif n:
+ k, v = tmp[0]
+ save(k)
+ save(v)
+ write(SETITEM)
+ # else tmp is empty, and we're done
+
+ def save_global(self, obj, name=None, pack=struct.pack):
+ write = self.write
+ memo = self.memo
+
+ if name is None:
+ name = obj.__name__
+
+ module = getattr(obj, "__module__", None)
+ if module is None:
+ module = whichmodule(obj, name)
+
+ try:
+ __import__(module)
+ mod = sys.modules[module]
+ klass = getattr(mod, name)
+ except (ImportError, KeyError, AttributeError):
+ raise PicklingError(
+ "Can't pickle %r: it's not found as %s.%s" %
+ (obj, module, name))
+ else:
+ if klass is not obj:
+ raise PicklingError(
+ "Can't pickle %r: it's not the same object as %s.%s" %
+ (obj, module, name))
+
+ if self.proto >= 2:
+ code = _extension_registry.get((module, name))
+ if code:
+ assert code > 0
+ if code <= 0xff:
+ write(EXT1 + bytes([code]))
+ elif code <= 0xffff:
+ write(EXT2 + bytes([code&0xff, code>>8]))
+ else:
+ write(EXT4 + pack("<i", code))
+ return
+
+ write(GLOBAL + bytes(module) + b'\n' + bytes(name) + b'\n')
+ self.memoize(obj)
+
+ dispatch[ClassType] = save_global
+ dispatch[FunctionType] = save_global
+ dispatch[BuiltinFunctionType] = save_global
+ dispatch[TypeType] = save_global
+
+# Pickling helpers
+
+def _keep_alive(x, memo):
+ """Keeps a reference to the object x in the memo.
+
+ Because we remember objects by their id, we have
+ to assure that possibly temporary objects are kept
+ alive by referencing them.
+ We store a reference at the id of the memo, which should
+ normally not be used unless someone tries to deepcopy
+ the memo itself...
+ """
+ try:
+ memo[id(memo)].append(x)
+ except KeyError:
+ # aha, this is the first one :-)
+ memo[id(memo)]=[x]
+
+
+# A cache for whichmodule(), mapping a function object to the name of
+# the module in which the function was found.
+
+classmap = {} # called classmap for backwards compatibility
+
+def whichmodule(func, funcname):
+ """Figure out the module in which a function occurs.
+
+ Search sys.modules for the module.
+ Cache in classmap.
+ Return a module name.
+ If the function cannot be found, return "__main__".
+ """
+ # Python functions should always get an __module__ from their globals.
+ mod = getattr(func, "__module__", None)
+ if mod is not None:
+ return mod
+ if func in classmap:
+ return classmap[func]
+
+ for name, module in list(sys.modules.items()):
+ if module is None:
+ continue # skip dummy package entries
+ if name != '__main__' and getattr(module, funcname, None) is func:
+ break
+ else:
+ name = '__main__'
+ classmap[func] = name
+ return name
+
+
+# Unpickling machinery
+
+class Unpickler:
+
+ def __init__(self, file):
+ """This takes a binary file for reading a pickle data stream.
+
+ The protocol version of the pickle is detected automatically, so no
+ proto argument is needed.
+
+ The file-like object must have two methods, a read() method that
+ takes an integer argument, and a readline() method that requires no
+ arguments. Both methods should return a string. Thus file-like
+ object can be a file object opened for reading, a StringIO object,
+ or any other custom object that meets this interface.
+ """
+ try:
+ self.readline = file.readline
+ except AttributeError:
+ self.file = file
+ self.read = file.read
+ self.memo = {}
+
+ def readline(self):
+ # XXX Slow but at least correct
+ b = bytes()
+ while True:
+ c = self.file.read(1)
+ if not c:
+ break
+ b += c
+ if c == b'\n':
+ break
+ return b
+
+ def load(self):
+ """Read a pickled object representation from the open file.
+
+ Return the reconstituted object hierarchy specified in the file.
+ """
+ self.mark = object() # any new unique object
+ self.stack = []
+ self.append = self.stack.append
+ read = self.read
+ dispatch = self.dispatch
+ try:
+ while 1:
+ key = read(1)
+ if not key:
+ raise EOFError
+ assert isinstance(key, bytes)
+ dispatch[key[0]](self)
+ except _Stop as stopinst:
+ return stopinst.value
+
+ # Return largest index k such that self.stack[k] is self.mark.
+ # If the stack doesn't contain a mark, eventually raises IndexError.
+ # This could be sped by maintaining another stack, of indices at which
+ # the mark appears. For that matter, the latter stack would suffice,
+ # and we wouldn't need to push mark objects on self.stack at all.
+ # Doing so is probably a good thing, though, since if the pickle is
+ # corrupt (or hostile) we may get a clue from finding self.mark embedded
+ # in unpickled objects.
+ def marker(self):
+ stack = self.stack
+ mark = self.mark
+ k = len(stack)-1
+ while stack[k] is not mark: k = k-1
+ return k
+
+ dispatch = {}
+
+ def load_proto(self):
+ proto = ord(self.read(1))
+ if not 0 <= proto <= 2:
+ raise ValueError, "unsupported pickle protocol: %d" % proto
+ dispatch[PROTO[0]] = load_proto
+
+ def load_persid(self):
+ pid = self.readline()[:-1]
+ self.append(self.persistent_load(pid))
+ dispatch[PERSID[0]] = load_persid
+
+ def load_binpersid(self):
+ pid = self.stack.pop()
+ self.append(self.persistent_load(pid))
+ dispatch[BINPERSID[0]] = load_binpersid
+
+ def load_none(self):
+ self.append(None)
+ dispatch[NONE[0]] = load_none
+
+ def load_false(self):
+ self.append(False)
+ dispatch[NEWFALSE[0]] = load_false
+
+ def load_true(self):
+ self.append(True)
+ dispatch[NEWTRUE[0]] = load_true
+
+ def load_int(self):
+ data = self.readline()
+ if data == FALSE[1:]:
+ val = False
+ elif data == TRUE[1:]:
+ val = True
+ else:
+ try:
+ val = int(data)
+ except ValueError:
+ val = int(data)
+ self.append(val)
+ dispatch[INT[0]] = load_int
+
+ def load_binint(self):
+ self.append(mloads(b'i' + self.read(4)))
+ dispatch[BININT[0]] = load_binint
+
+ def load_binint1(self):
+ self.append(ord(self.read(1)))
+ dispatch[BININT1[0]] = load_binint1
+
+ def load_binint2(self):
+ self.append(mloads(b'i' + self.read(2) + b'\000\000'))
+ dispatch[BININT2[0]] = load_binint2
+
+ def load_long(self):
+ self.append(int(str(self.readline()[:-1]), 0))
+ dispatch[LONG[0]] = load_long
+
+ def load_long1(self):
+ n = ord(self.read(1))
+ data = self.read(n)
+ self.append(decode_long(data))
+ dispatch[LONG1[0]] = load_long1
+
+ def load_long4(self):
+ n = mloads(b'i' + self.read(4))
+ data = self.read(n)
+ self.append(decode_long(data))
+ dispatch[LONG4[0]] = load_long4
+
+ def load_float(self):
+ self.append(float(self.readline()[:-1]))
+ dispatch[FLOAT[0]] = load_float
+
+ def load_binfloat(self, unpack=struct.unpack):
+ self.append(unpack('>d', self.read(8))[0])
+ dispatch[BINFLOAT[0]] = load_binfloat
+
+ def load_string(self):
+ rep = self.readline()[:-1]
+ for q in "\"'": # double or single quote
+ if rep.startswith(q):
+ if not rep.endswith(q):
+ raise ValueError, "insecure string pickle"
+ rep = rep[len(q):-len(q)]
+ break
+ else:
+ raise ValueError, "insecure string pickle"
+ self.append(str8(rep.decode("string-escape")))
+ dispatch[STRING[0]] = load_string
+
+ def load_binstring(self):
+ len = mloads(b'i' + self.read(4))
+ self.append(str8(self.read(len)))
+ dispatch[BINSTRING[0]] = load_binstring
+
+ def load_unicode(self):
+ self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
+ dispatch[UNICODE[0]] = load_unicode
+
+ def load_binunicode(self):
+ len = mloads(b'i' + self.read(4))
+ self.append(str(self.read(len), 'utf-8'))
+ dispatch[BINUNICODE[0]] = load_binunicode
+
+ def load_short_binstring(self):
+ len = ord(self.read(1))
+ self.append(str8(self.read(len)))
+ dispatch[SHORT_BINSTRING[0]] = load_short_binstring
+
+ def load_tuple(self):
+ k = self.marker()
+ self.stack[k:] = [tuple(self.stack[k+1:])]
+ dispatch[TUPLE[0]] = load_tuple
+
+ def load_empty_tuple(self):
+ self.stack.append(())
+ dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
+
+ def load_tuple1(self):
+ self.stack[-1] = (self.stack[-1],)
+ dispatch[TUPLE1[0]] = load_tuple1
+
+ def load_tuple2(self):
+ self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
+ dispatch[TUPLE2[0]] = load_tuple2
+
+ def load_tuple3(self):
+ self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
+ dispatch[TUPLE3[0]] = load_tuple3
+
+ def load_empty_list(self):
+ self.stack.append([])
+ dispatch[EMPTY_LIST[0]] = load_empty_list
+
+ def load_empty_dictionary(self):
+ self.stack.append({})
+ dispatch[EMPTY_DICT[0]] = load_empty_dictionary
+
+ def load_list(self):
+ k = self.marker()
+ self.stack[k:] = [self.stack[k+1:]]
+ dispatch[LIST[0]] = load_list
+
+ def load_dict(self):
+ k = self.marker()
+ d = {}
+ items = self.stack[k+1:]
+ for i in range(0, len(items), 2):
+ key = items[i]
+ value = items[i+1]
+ d[key] = value
+ self.stack[k:] = [d]
+ dispatch[DICT[0]] = load_dict
+
+ # INST and OBJ differ only in how they get a class object. It's not
+ # only sensible to do the rest in a common routine, the two routines
+ # previously diverged and grew different bugs.
+ # klass is the class to instantiate, and k points to the topmost mark
+ # object, following which are the arguments for klass.__init__.
+ def _instantiate(self, klass, k):
+ args = tuple(self.stack[k+1:])
+ del self.stack[k:]
+ instantiated = 0
+ if (not args and
+ type(klass) is ClassType and
+ not hasattr(klass, "__getinitargs__")):
+ value = _EmptyClass()
+ value.__class__ = klass
+ instantiated = 1
+ if not instantiated:
+ try:
+ value = klass(*args)
+ except TypeError as err:
+ raise TypeError, "in constructor for %s: %s" % (
+ klass.__name__, str(err)), sys.exc_info()[2]
+ self.append(value)
+
+ def load_inst(self):
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ klass = self.find_class(module, name)
+ self._instantiate(klass, self.marker())
+ dispatch[INST[0]] = load_inst
+
+ def load_obj(self):
+ # Stack is ... markobject classobject arg1 arg2 ...
+ k = self.marker()
+ klass = self.stack.pop(k+1)
+ self._instantiate(klass, k)
+ dispatch[OBJ[0]] = load_obj
+
+ def load_newobj(self):
+ args = self.stack.pop()
+ cls = self.stack[-1]
+ obj = cls.__new__(cls, *args)
+ self.stack[-1] = obj
+ dispatch[NEWOBJ[0]] = load_newobj
+
+ def load_global(self):
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ klass = self.find_class(module, name)
+ self.append(klass)
+ dispatch[GLOBAL[0]] = load_global
+
+ def load_ext1(self):
+ code = ord(self.read(1))
+ self.get_extension(code)
+ dispatch[EXT1[0]] = load_ext1
+
+ def load_ext2(self):
+ code = mloads(b'i' + self.read(2) + b'\000\000')
+ self.get_extension(code)
+ dispatch[EXT2[0]] = load_ext2
+
+ def load_ext4(self):
+ code = mloads(b'i' + self.read(4))
+ self.get_extension(code)
+ dispatch[EXT4[0]] = load_ext4
+
+ def get_extension(self, code):
+ nil = []
+ obj = _extension_cache.get(code, nil)
+ if obj is not nil:
+ self.append(obj)
+ return
+ key = _inverted_registry.get(code)
+ if not key:
+ raise ValueError("unregistered extension code %d" % code)
+ obj = self.find_class(*key)
+ _extension_cache[code] = obj
+ self.append(obj)
+
+ def find_class(self, module, name):
+ # Subclasses may override this
+ module = str(module)
+ name = str(name)
+ __import__(module)
+ mod = sys.modules[module]
+ klass = getattr(mod, name)
+ return klass
+
+ def load_reduce(self):
+ stack = self.stack
+ args = stack.pop()
+ func = stack[-1]
+ value = func(*args)
+ stack[-1] = value
+ dispatch[REDUCE[0]] = load_reduce
+
+ def load_pop(self):
+ del self.stack[-1]
+ dispatch[POP[0]] = load_pop
+
+ def load_pop_mark(self):
+ k = self.marker()
+ del self.stack[k:]
+ dispatch[POP_MARK[0]] = load_pop_mark
+
+ def load_dup(self):
+ self.append(self.stack[-1])
+ dispatch[DUP[0]] = load_dup
+
+ def load_get(self):
+ self.append(self.memo[str8(self.readline())[:-1]])
+ dispatch[GET[0]] = load_get
+
+ def load_binget(self):
+ i = ord(self.read(1))
+ self.append(self.memo[repr(i)])
+ dispatch[BINGET[0]] = load_binget
+
+ def load_long_binget(self):
+ i = mloads(b'i' + self.read(4))
+ self.append(self.memo[repr(i)])
+ dispatch[LONG_BINGET[0]] = load_long_binget
+
+ def load_put(self):
+ self.memo[str(self.readline()[:-1])] = self.stack[-1]
+ dispatch[PUT[0]] = load_put
+
+ def load_binput(self):
+ i = ord(self.read(1))
+ self.memo[repr(i)] = self.stack[-1]
+ dispatch[BINPUT[0]] = load_binput
+
+ def load_long_binput(self):
+ i = mloads(b'i' + self.read(4))
+ self.memo[repr(i)] = self.stack[-1]
+ dispatch[LONG_BINPUT[0]] = load_long_binput
+
+ def load_append(self):
+ stack = self.stack
+ value = stack.pop()
+ list = stack[-1]
+ list.append(value)
+ dispatch[APPEND[0]] = load_append
+
+ def load_appends(self):
+ stack = self.stack
+ mark = self.marker()
+ list = stack[mark - 1]
+ list.extend(stack[mark + 1:])
+ del stack[mark:]
+ dispatch[APPENDS[0]] = load_appends
+
+ def load_setitem(self):
+ stack = self.stack
+ value = stack.pop()
+ key = stack.pop()
+ dict = stack[-1]
+ dict[key] = value
+ dispatch[SETITEM[0]] = load_setitem
+
+ def load_setitems(self):
+ stack = self.stack
+ mark = self.marker()
+ dict = stack[mark - 1]
+ for i in range(mark + 1, len(stack), 2):
+ dict[stack[i]] = stack[i + 1]
+
+ del stack[mark:]
+ dispatch[SETITEMS[0]] = load_setitems
+
+ def load_build(self):
+ stack = self.stack
+ state = stack.pop()
+ inst = stack[-1]
+ setstate = getattr(inst, "__setstate__", None)
+ if setstate:
+ setstate(state)
+ return
+ slotstate = None
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ if state:
+ inst.__dict__.update(state)
+ if slotstate:
+ for k, v in slotstate.items():
+ setattr(inst, k, v)
+ dispatch[BUILD[0]] = load_build
+
+ def load_mark(self):
+ self.append(self.mark)
+ dispatch[MARK[0]] = load_mark
+
+ def load_stop(self):
+ value = self.stack.pop()
+ raise _Stop(value)
+ dispatch[STOP[0]] = load_stop
+
+# Helper class for load_inst/load_obj
+
+class _EmptyClass:
+ pass
+
+# Encode/decode longs in linear time.
+
+import binascii as _binascii
+
+def encode_long(x):
+ r"""Encode a long to a two's complement little-endian binary string.
+ Note that 0 is a special case, returning an empty string, to save a
+ byte in the LONG1 pickling context.
+
+ >>> encode_long(0)
+ b''
+ >>> encode_long(255)
+ b'\xff\x00'
+ >>> encode_long(32767)
+ b'\xff\x7f'
+ >>> encode_long(-256)
+ b'\x00\xff'
+ >>> encode_long(-32768)
+ b'\x00\x80'
+ >>> encode_long(-128)
+ b'\x80'
+ >>> encode_long(127)
+ b'\x7f'
+ >>>
+ """
+
+ if x == 0:
+ return b''
+ if x > 0:
+ ashex = hex(x)
+ assert ashex.startswith("0x")
+ njunkchars = 2 + ashex.endswith('L')
+ nibbles = len(ashex) - njunkchars
+ if nibbles & 1:
+ # need an even # of nibbles for unhexlify
+ ashex = "0x0" + ashex[2:]
+ elif int(ashex[2], 16) >= 8:
+ # "looks negative", so need a byte of sign bits
+ ashex = "0x00" + ashex[2:]
+ else:
+ # Build the 256's-complement: (1L << nbytes) + x. The trick is
+ # to find the number of bytes in linear time (although that should
+ # really be a constant-time task).
+ ashex = hex(-x)
+ assert ashex.startswith("0x")
+ njunkchars = 2 + ashex.endswith('L')
+ nibbles = len(ashex) - njunkchars
+ if nibbles & 1:
+ # Extend to a full byte.
+ nibbles += 1
+ nbits = nibbles * 4
+ x += 1 << nbits
+ assert x > 0
+ ashex = hex(x)
+ njunkchars = 2 + ashex.endswith('L')
+ newnibbles = len(ashex) - njunkchars
+ if newnibbles < nibbles:
+ ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
+ if int(ashex[2], 16) < 8:
+ # "looks positive", so need a byte of sign bits
+ ashex = "0xff" + ashex[2:]
+
+ if ashex.endswith('L'):
+ ashex = ashex[2:-1]
+ else:
+ ashex = ashex[2:]
+ assert len(ashex) & 1 == 0, (x, ashex)
+ binary = _binascii.unhexlify(ashex)
+ return bytes(binary[::-1])
+
+def decode_long(data):
+ r"""Decode a long from a two's complement little-endian binary string.
+
+ >>> decode_long(b'')
+ 0
+ >>> decode_long(b"\xff\x00")
+ 255
+ >>> decode_long(b"\xff\x7f")
+ 32767
+ >>> decode_long(b"\x00\xff")
+ -256
+ >>> decode_long(b"\x00\x80")
+ -32768
+ >>> decode_long(b"\x80")
+ -128
+ >>> decode_long(b"\x7f")
+ 127
+ """
+
+ nbytes = len(data)
+ if nbytes == 0:
+ return 0
+ ashex = _binascii.hexlify(data[::-1])
+ n = int(ashex, 16) # quadratic time before Python 2.3; linear now
+ if data[-1] >= 0x80:
+ n -= 1 << (nbytes * 8)
+ return n
+
+# Shorthands
+
+def dump(obj, file, protocol=None):
+ Pickler(file, protocol).dump(obj)
+
+def dumps(obj, protocol=None):
+ f = io.BytesIO()
+ Pickler(f, protocol).dump(obj)
+ res = f.getvalue()
+ assert isinstance(res, bytes)
+ return res
+
+def load(file):
+ return Unpickler(file).load()
+
+def loads(s):
+ if isinstance(s, str):
+ raise TypeError("Can't load pickle from unicode string")
+ file = io.BytesIO(s)
+ return Unpickler(file).load()
+
+# Doctest
+
+def _test():
+ import doctest
+ return doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
Added: sandbox/trunk/cpy_merge/Lib/pickletools.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/pickletools.py Wed May 23 03:45:28 2007
@@ -0,0 +1,2257 @@
+'''"Executable documentation" for the pickle module.
+
+Extensive comments about the pickle protocols and pickle-machine opcodes
+can be found here. Some functions meant for external use:
+
+genops(pickle)
+ Generate all the opcodes in a pickle, as (opcode, arg, position) triples.
+
+dis(pickle, out=None, memo=None, indentlevel=4)
+ Print a symbolic disassembly of a pickle.
+'''
+
+__all__ = ['dis',
+ 'genops',
+ ]
+
+# Other ideas:
+#
+# - A pickle verifier: read a pickle and check it exhaustively for
+# well-formedness. dis() does a lot of this already.
+#
+# - A protocol identifier: examine a pickle and return its protocol number
+# (== the highest .proto attr value among all the opcodes in the pickle).
+# dis() already prints this info at the end.
+#
+# - A pickle optimizer: for example, tuple-building code is sometimes more
+# elaborate than necessary, catering for the possibility that the tuple
+# is recursive. Or lots of times a PUT is generated that's never accessed
+# by a later GET.
+
+
+"""
+"A pickle" is a program for a virtual pickle machine (PM, but more accurately
+called an unpickling machine). It's a sequence of opcodes, interpreted by the
+PM, building an arbitrarily complex Python object.
+
+For the most part, the PM is very simple: there are no looping, testing, or
+conditional instructions, no arithmetic and no function calls. Opcodes are
+executed once each, from first to last, until a STOP opcode is reached.
+
+The PM has two data areas, "the stack" and "the memo".
+
+Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
+integer object on the stack, whose value is gotten from a decimal string
+literal immediately following the INT opcode in the pickle bytestream. Other
+opcodes take Python objects off the stack. The result of unpickling is
+whatever object is left on the stack when the final STOP opcode is executed.
+
+The memo is simply an array of objects, or it can be implemented as a dict
+mapping little integers to objects. The memo serves as the PM's "long term
+memory", and the little integers indexing the memo are akin to variable
+names. Some opcodes pop a stack object into the memo at a given index,
+and others push a memo object at a given index onto the stack again.
+
+At heart, that's all the PM has. Subtleties arise for these reasons:
+
++ Object identity. Objects can be arbitrarily complex, and subobjects
+ may be shared (for example, the list [a, a] refers to the same object a
+ twice). It can be vital that unpickling recreate an isomorphic object
+ graph, faithfully reproducing sharing.
+
++ Recursive objects. For example, after "L = []; L.append(L)", L is a
+ list, and L[0] is the same list. This is related to the object identity
+ point, and some sequences of pickle opcodes are subtle in order to
+ get the right result in all cases.
+
++ Things pickle doesn't know everything about. Examples of things pickle
+ does know everything about are Python's builtin scalar and container
+ types, like ints and tuples. They generally have opcodes dedicated to
+ them. For things like module references and instances of user-defined
+ classes, pickle's knowledge is limited. Historically, many enhancements
+ have been made to the pickle protocol in order to do a better (faster,
+ and/or more compact) job on those.
+
++ Backward compatibility and micro-optimization. As explained below,
+ pickle opcodes never go away, not even when better ways to do a thing
+ get invented. The repertoire of the PM just keeps growing over time.
+ For example, protocol 0 had two opcodes for building Python integers (INT
+ and LONG), protocol 1 added three more for more-efficient pickling of short
+ integers, and protocol 2 added two more for more-efficient pickling of
+ long integers (before protocol 2, the only ways to pickle a Python long
+ took time quadratic in the number of digits, for both pickling and
+ unpickling). "Opcode bloat" isn't so much a subtlety as a source of
+ wearying complication.
+
+
+Pickle protocols:
+
+For compatibility, the meaning of a pickle opcode never changes. Instead new
+pickle opcodes get added, and each version's unpickler can handle all the
+pickle opcodes in all protocol versions to date. So old pickles continue to
+be readable forever. The pickler can generally be told to restrict itself to
+the subset of opcodes available under previous protocol versions too, so that
+users can create pickles under the current version readable by older
+versions. However, a pickle does not contain its version number embedded
+within it. If an older unpickler tries to read a pickle using a later
+protocol, the result is most likely an exception due to seeing an unknown (in
+the older unpickler) opcode.
+
+The original pickle used what's now called "protocol 0", and what was called
+"text mode" before Python 2.3. The entire pickle bytestream is made up of
+printable 7-bit ASCII characters, plus the newline character, in protocol 0.
+That's why it was called text mode. Protocol 0 is small and elegant, but
+sometimes painfully inefficient.
+
+The second major set of additions is now called "protocol 1", and was called
+"binary mode" before Python 2.3. This added many opcodes with arguments
+consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
+bytes. Binary mode pickles can be substantially smaller than equivalent
+text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
+int as 4 bytes following the opcode, which is cheaper to unpickle than the
+(perhaps) 11-character decimal string attached to INT. Protocol 1 also added
+a number of opcodes that operate on many stack elements at once (like APPENDS
+and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
+
+The third major set of additions came in Python 2.3, and is called "protocol
+2". This added:
+
+- A better way to pickle instances of new-style classes (NEWOBJ).
+
+- A way for a pickle to identify its protocol (PROTO).
+
+- Time- and space- efficient pickling of long ints (LONG{1,4}).
+
+- Shortcuts for small tuples (TUPLE{1,2,3}}.
+
+- Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
+
+- The "extension registry", a vector of popular objects that can be pushed
+ efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
+ the registry contents are predefined (there's nothing akin to the memo's
+ PUT).
+
+Another independent change with Python 2.3 is the abandonment of any
+pretense that it might be safe to load pickles received from untrusted
+parties -- no sufficient security analysis has been done to guarantee
+this and there isn't a use case that warrants the expense of such an
+analysis.
+
+To this end, all tests for __safe_for_unpickling__ or for
+copy_reg.safe_constructors are removed from the unpickling code.
+References to these variables in the descriptions below are to be seen
+as describing unpickling in Python 2.2 and before.
+"""
+
+# Meta-rule: Descriptions are stored in instances of descriptor objects,
+# with plain constructors. No meta-language is defined from which
+# descriptors could be constructed. If you want, e.g., XML, write a little
+# program to generate XML from the objects.
+
+##############################################################################
+# Some pickle opcodes have an argument, following the opcode in the
+# bytestream. An argument is of a specific type, described by an instance
+# of ArgumentDescriptor. These are not to be confused with arguments taken
+# off the stack -- ArgumentDescriptor applies only to arguments embedded in
+# the opcode stream, immediately following an opcode.
+
+# Represents the number of bytes consumed by an argument delimited by the
+# next newline character.
+UP_TO_NEWLINE = -1
+
+# Represents the number of bytes consumed by a two-argument opcode where
+# the first argument gives the number of bytes in the second argument.
+TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
+TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
+
+class ArgumentDescriptor(object):
+ __slots__ = (
+ # name of descriptor record, also a module global name; a string
+ 'name',
+
+ # length of argument, in bytes; an int; UP_TO_NEWLINE and
+ # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length
+ # cases
+ 'n',
+
+ # a function taking a file-like object, reading this kind of argument
+ # from the object at the current position, advancing the current
+ # position by n bytes, and returning the value of the argument
+ 'reader',
+
+ # human-readable docs for this arg descriptor; a string
+ 'doc',
+ )
+
+ def __init__(self, name, n, reader, doc):
+ assert isinstance(name, str)
+ self.name = name
+
+ assert isinstance(n, int) and (n >= 0 or
+ n in (UP_TO_NEWLINE,
+ TAKEN_FROM_ARGUMENT1,
+ TAKEN_FROM_ARGUMENT4))
+ self.n = n
+
+ self.reader = reader
+
+ assert isinstance(doc, str)
+ self.doc = doc
+
+from struct import unpack as _unpack
+
+def read_uint1(f):
+ r"""
+ >>> import io
+ >>> read_uint1(io.BytesIO(b'\xff'))
+ 255
+ """
+
+ data = f.read(1)
+ if data:
+ return data[0]
+ raise ValueError("not enough data in stream to read uint1")
+
+uint1 = ArgumentDescriptor(
+ name='uint1',
+ n=1,
+ reader=read_uint1,
+ doc="One-byte unsigned integer.")
+
+
+def read_uint2(f):
+ r"""
+ >>> import io
+ >>> read_uint2(io.BytesIO(b'\xff\x00'))
+ 255
+ >>> read_uint2(io.BytesIO(b'\xff\xff'))
+ 65535
+ """
+
+ data = f.read(2)
+ if len(data) == 2:
+ return _unpack("<H", data)[0]
+ raise ValueError("not enough data in stream to read uint2")
+
+uint2 = ArgumentDescriptor(
+ name='uint2',
+ n=2,
+ reader=read_uint2,
+ doc="Two-byte unsigned integer, little-endian.")
+
+
+def read_int4(f):
+ r"""
+ >>> import io
+ >>> read_int4(io.BytesIO(b'\xff\x00\x00\x00'))
+ 255
+ >>> read_int4(io.BytesIO(b'\x00\x00\x00\x80')) == -(2**31)
+ True
+ """
+
+ data = f.read(4)
+ if len(data) == 4:
+ return _unpack("<i", data)[0]
+ raise ValueError("not enough data in stream to read int4")
+
+int4 = ArgumentDescriptor(
+ name='int4',
+ n=4,
+ reader=read_int4,
+ doc="Four-byte signed integer, little-endian, 2's complement.")
+
+
+def readline(f):
+ """Read a line from a binary file."""
+ # XXX Slow but at least correct
+ b = bytes()
+ while True:
+ c = f.read(1)
+ if not c:
+ break
+ b += c
+ if c == b'\n':
+ break
+ return b
+
+
+def read_stringnl(f, decode=True, stripquotes=True):
+ r"""
+ >>> import io
+ >>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n"))
+ 'abcd'
+
+ >>> read_stringnl(io.BytesIO(b"\n"))
+ Traceback (most recent call last):
+ ...
+ ValueError: no string quotes around b''
+
+ >>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False)
+ ''
+
+ >>> read_stringnl(io.BytesIO(b"''\n"))
+ ''
+
+ >>> read_stringnl(io.BytesIO(b'"abcd"'))
+ Traceback (most recent call last):
+ ...
+ ValueError: no newline found when trying to read stringnl
+
+ Embedded escapes are undone in the result.
+ >>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'"))
+ 'a\n\\b\x00c\td'
+ """
+
+ data = readline(f)
+ if not data.endswith('\n'):
+ raise ValueError("no newline found when trying to read stringnl")
+ data = data[:-1] # lose the newline
+
+ if stripquotes:
+ for q in "'\"":
+ if data.startswith(q):
+ if not data.endswith(q):
+ raise ValueError("strinq quote %r not found at both "
+ "ends of %r" % (q, data))
+ data = data[1:-1]
+ break
+ else:
+ raise ValueError("no string quotes around %r" % data)
+
+ # I'm not sure when 'string_escape' was added to the std codecs; it's
+ # crazy not to use it if it's there.
+ if decode:
+ data = data.decode('string_escape')
+ return data
+
+stringnl = ArgumentDescriptor(
+ name='stringnl',
+ n=UP_TO_NEWLINE,
+ reader=read_stringnl,
+ doc="""A newline-terminated string.
+
+ This is a repr-style string, with embedded escapes, and
+ bracketing quotes.
+ """)
+
+def read_stringnl_noescape(f):
+ return read_stringnl(f, decode=False, stripquotes=False)
+
+stringnl_noescape = ArgumentDescriptor(
+ name='stringnl_noescape',
+ n=UP_TO_NEWLINE,
+ reader=read_stringnl_noescape,
+ doc="""A newline-terminated string.
+
+ This is a str-style string, without embedded escapes,
+ or bracketing quotes. It should consist solely of
+ printable ASCII characters.
+ """)
+
+def read_stringnl_noescape_pair(f):
+ r"""
+ >>> import io
+ >>> read_stringnl_noescape_pair(io.BytesIO(b"Queue\nEmpty\njunk"))
+ 'Queue Empty'
+ """
+
+ return "%s %s" % (read_stringnl_noescape(f), read_stringnl_noescape(f))
+
+stringnl_noescape_pair = ArgumentDescriptor(
+ name='stringnl_noescape_pair',
+ n=UP_TO_NEWLINE,
+ reader=read_stringnl_noescape_pair,
+ doc="""A pair of newline-terminated strings.
+
+ These are str-style strings, without embedded
+ escapes, or bracketing quotes. They should
+ consist solely of printable ASCII characters.
+ The pair is returned as a single string, with
+ a single blank separating the two strings.
+ """)
+
+def read_string4(f):
+ r"""
+ >>> import io
+ >>> read_string4(io.BytesIO(b"\x00\x00\x00\x00abc"))
+ ''
+ >>> read_string4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
+ 'abc'
+ >>> read_string4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 50331648 bytes in a string4, but only 6 remain
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("string4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return data.decode("latin-1")
+ raise ValueError("expected %d bytes in a string4, but only %d remain" %
+ (n, len(data)))
+
+string4 = ArgumentDescriptor(
+ name="string4",
+ n=TAKEN_FROM_ARGUMENT4,
+ reader=read_string4,
+ doc="""A counted string.
+
+ The first argument is a 4-byte little-endian signed int giving
+ the number of bytes in the string, and the second argument is
+ that many bytes.
+ """)
+
+
+def read_string1(f):
+ r"""
+ >>> import io
+ >>> read_string1(io.BytesIO(b"\x00"))
+ ''
+ >>> read_string1(io.BytesIO(b"\x03abcdef"))
+ 'abc'
+ """
+
+ n = read_uint1(f)
+ assert n >= 0
+ data = f.read(n)
+ if len(data) == n:
+ return data.decode("latin-1")
+ raise ValueError("expected %d bytes in a string1, but only %d remain" %
+ (n, len(data)))
+
+string1 = ArgumentDescriptor(
+ name="string1",
+ n=TAKEN_FROM_ARGUMENT1,
+ reader=read_string1,
+ doc="""A counted string.
+
+ The first argument is a 1-byte unsigned int giving the number
+ of bytes in the string, and the second argument is that many
+ bytes.
+ """)
+
+
+def read_unicodestringnl(f):
+ r"""
+ >>> import io
+ >>> read_unicodestringnl(io.BytesIO(b"abc\\uabcd\njunk")) == 'abc\uabcd'
+ True
+ """
+
+ data = readline(f)
+ if not data.endswith('\n'):
+ raise ValueError("no newline found when trying to read "
+ "unicodestringnl")
+ data = data[:-1] # lose the newline
+ return str(data, 'raw-unicode-escape')
+
+unicodestringnl = ArgumentDescriptor(
+ name='unicodestringnl',
+ n=UP_TO_NEWLINE,
+ reader=read_unicodestringnl,
+ doc="""A newline-terminated Unicode string.
+
+ This is raw-unicode-escape encoded, so consists of
+ printable ASCII characters, and may contain embedded
+ escape sequences.
+ """)
+
+def read_unicodestring4(f):
+ r"""
+ >>> import io
+ >>> s = 'abcd\uabcd'
+ >>> enc = s.encode('utf-8')
+ >>> enc
+ b'abcd\xea\xaf\x8d'
+ >>> n = bytes([len(enc), 0, 0, 0]) # little-endian 4-byte length
+ >>> t = read_unicodestring4(io.BytesIO(n + enc + b'junk'))
+ >>> s == t
+ True
+
+ >>> read_unicodestring4(io.BytesIO(n + enc[:-1]))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("unicodestring4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return str(data, 'utf-8')
+ raise ValueError("expected %d bytes in a unicodestring4, but only %d "
+ "remain" % (n, len(data)))
+
+unicodestring4 = ArgumentDescriptor(
+ name="unicodestring4",
+ n=TAKEN_FROM_ARGUMENT4,
+ reader=read_unicodestring4,
+ doc="""A counted Unicode string.
+
+ The first argument is a 4-byte little-endian signed int
+ giving the number of bytes in the string, and the second
+ argument-- the UTF-8 encoding of the Unicode string --
+ contains that many bytes.
+ """)
+
+
+def read_decimalnl_short(f):
+ r"""
+ >>> import io
+ >>> read_decimalnl_short(io.BytesIO(b"1234\n56"))
+ 1234
+
+ >>> read_decimalnl_short(io.BytesIO(b"1234L\n56"))
+ Traceback (most recent call last):
+ ...
+ ValueError: trailing 'L' not allowed in b'1234L'
+ """
+
+ s = read_stringnl(f, decode=False, stripquotes=False)
+ if s.endswith("L"):
+ raise ValueError("trailing 'L' not allowed in %r" % s)
+
+ # It's not necessarily true that the result fits in a Python short int:
+ # the pickle may have been written on a 64-bit box. There's also a hack
+ # for True and False here.
+ if s == "00":
+ return False
+ elif s == "01":
+ return True
+
+ try:
+ return int(s)
+ except OverflowError:
+ return int(s)
+
+def read_decimalnl_long(f):
+ r"""
+ >>> import io
+
+ >>> read_decimalnl_long(io.BytesIO(b"1234L\n56"))
+ 1234
+
+ >>> read_decimalnl_long(io.BytesIO(b"123456789012345678901234L\n6"))
+ 123456789012345678901234
+ """
+
+ s = read_stringnl(f, decode=False, stripquotes=False)
+ return int(s)
+
+
+decimalnl_short = ArgumentDescriptor(
+ name='decimalnl_short',
+ n=UP_TO_NEWLINE,
+ reader=read_decimalnl_short,
+ doc="""A newline-terminated decimal integer literal.
+
+ This never has a trailing 'L', and the integer fit
+ in a short Python int on the box where the pickle
+ was written -- but there's no guarantee it will fit
+ in a short Python int on the box where the pickle
+ is read.
+ """)
+
+decimalnl_long = ArgumentDescriptor(
+ name='decimalnl_long',
+ n=UP_TO_NEWLINE,
+ reader=read_decimalnl_long,
+ doc="""A newline-terminated decimal integer literal.
+
+ This has a trailing 'L', and can represent integers
+ of any size.
+ """)
+
+
+def read_floatnl(f):
+ r"""
+ >>> import io
+ >>> read_floatnl(io.BytesIO(b"-1.25\n6"))
+ -1.25
+ """
+ s = read_stringnl(f, decode=False, stripquotes=False)
+ return float(s)
+
+floatnl = ArgumentDescriptor(
+ name='floatnl',
+ n=UP_TO_NEWLINE,
+ reader=read_floatnl,
+ doc="""A newline-terminated decimal floating literal.
+
+ In general this requires 17 significant digits for roundtrip
+ identity, and pickling then unpickling infinities, NaNs, and
+ minus zero doesn't work across boxes, or on some boxes even
+ on itself (e.g., Windows can't read the strings it produces
+ for infinities or NaNs).
+ """)
+
+def read_float8(f):
+ r"""
+ >>> import io, struct
+ >>> raw = struct.pack(">d", -1.25)
+ >>> raw
+ b'\xbf\xf4\x00\x00\x00\x00\x00\x00'
+ >>> read_float8(io.BytesIO(raw + b"\n"))
+ -1.25
+ """
+
+ data = f.read(8)
+ if len(data) == 8:
+ return _unpack(">d", data)[0]
+ raise ValueError("not enough data in stream to read float8")
+
+
+float8 = ArgumentDescriptor(
+ name='float8',
+ n=8,
+ reader=read_float8,
+ doc="""An 8-byte binary representation of a float, big-endian.
+
+ The format is unique to Python, and shared with the struct
+ module (format string '>d') "in theory" (the struct and cPickle
+ implementations don't share the code -- they should). It's
+ strongly related to the IEEE-754 double format, and, in normal
+ cases, is in fact identical to the big-endian 754 double format.
+ On other boxes the dynamic range is limited to that of a 754
+ double, and "add a half and chop" rounding is used to reduce
+ the precision to 53 bits. However, even on a 754 box,
+ infinities, NaNs, and minus zero may not be handled correctly
+ (may not survive roundtrip pickling intact).
+ """)
+
+# Protocol 2 formats
+
+from pickle import decode_long
+
+def read_long1(f):
+ r"""
+ >>> import io
+ >>> read_long1(io.BytesIO(b"\x00"))
+ 0
+ >>> read_long1(io.BytesIO(b"\x02\xff\x00"))
+ 255
+ >>> read_long1(io.BytesIO(b"\x02\xff\x7f"))
+ 32767
+ >>> read_long1(io.BytesIO(b"\x02\x00\xff"))
+ -256
+ >>> read_long1(io.BytesIO(b"\x02\x00\x80"))
+ -32768
+ """
+
+ n = read_uint1(f)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long1")
+ return decode_long(data)
+
+long1 = ArgumentDescriptor(
+ name="long1",
+ n=TAKEN_FROM_ARGUMENT1,
+ reader=read_long1,
+ doc="""A binary long, little-endian, using 1-byte size.
+
+ This first reads one byte as an unsigned size, then reads that
+ many bytes and interprets them as a little-endian 2's-complement long.
+ If the size is 0, that's taken as a shortcut for the long 0L.
+ """)
+
+def read_long4(f):
+ r"""
+ >>> import io
+ >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x00"))
+ 255
+ >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x7f"))
+ 32767
+ >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\xff"))
+ -256
+ >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\x80"))
+ -32768
+ >>> read_long1(io.BytesIO(b"\x00\x00\x00\x00"))
+ 0
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("long4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long4")
+ return decode_long(data)
+
+long4 = ArgumentDescriptor(
+ name="long4",
+ n=TAKEN_FROM_ARGUMENT4,
+ reader=read_long4,
+ doc="""A binary representation of a long, little-endian.
+
+ This first reads four bytes as a signed size (but requires the
+ size to be >= 0), then reads that many bytes and interprets them
+ as a little-endian 2's-complement long. If the size is 0, that's taken
+ as a shortcut for the int 0, although LONG1 should really be used
+ then instead (and in any case where # of bytes < 256).
+ """)
+
+
+##############################################################################
+# Object descriptors. The stack used by the pickle machine holds objects,
+# and in the stack_before and stack_after attributes of OpcodeInfo
+# descriptors we need names to describe the various types of objects that can
+# appear on the stack.
+
+class StackObject(object):
+ __slots__ = (
+ # name of descriptor record, for info only
+ 'name',
+
+ # type of object, or tuple of type objects (meaning the object can
+ # be of any type in the tuple)
+ 'obtype',
+
+ # human-readable docs for this kind of stack object; a string
+ 'doc',
+ )
+
+ def __init__(self, name, obtype, doc):
+ assert isinstance(name, basestring)
+ self.name = name
+
+ assert isinstance(obtype, type) or isinstance(obtype, tuple)
+ if isinstance(obtype, tuple):
+ for contained in obtype:
+ assert isinstance(contained, type)
+ self.obtype = obtype
+
+ assert isinstance(doc, basestring)
+ self.doc = doc
+
+ def __repr__(self):
+ return self.name
+
+
+pyint = StackObject(
+ name='int',
+ obtype=int,
+ doc="A short (as opposed to long) Python integer object.")
+
+pylong = StackObject(
+ name='long',
+ obtype=int,
+ doc="A long (as opposed to short) Python integer object.")
+
+pyinteger_or_bool = StackObject(
+ name='int_or_bool',
+ obtype=(int, int, bool),
+ doc="A Python integer object (short or long), or "
+ "a Python bool.")
+
+pybool = StackObject(
+ name='bool',
+ obtype=(bool,),
+ doc="A Python bool object.")
+
+pyfloat = StackObject(
+ name='float',
+ obtype=float,
+ doc="A Python float object.")
+
+pystring = StackObject(
+ name='str',
+ obtype=str,
+ doc="A Python string object.")
+
+pyunicode = StackObject(
+ name='unicode',
+ obtype=str,
+ doc="A Python Unicode string object.")
+
+pynone = StackObject(
+ name="None",
+ obtype=type(None),
+ doc="The Python None object.")
+
+pytuple = StackObject(
+ name="tuple",
+ obtype=tuple,
+ doc="A Python tuple object.")
+
+pylist = StackObject(
+ name="list",
+ obtype=list,
+ doc="A Python list object.")
+
+pydict = StackObject(
+ name="dict",
+ obtype=dict,
+ doc="A Python dict object.")
+
+anyobject = StackObject(
+ name='any',
+ obtype=object,
+ doc="Any kind of object whatsoever.")
+
+markobject = StackObject(
+ name="mark",
+ obtype=StackObject,
+ doc="""'The mark' is a unique object.
+
+ Opcodes that operate on a variable number of objects
+ generally don't embed the count of objects in the opcode,
+ or pull it off the stack. Instead the MARK opcode is used
+ to push a special marker object on the stack, and then
+ some other opcodes grab all the objects from the top of
+ the stack down to (but not including) the topmost marker
+ object.
+ """)
+
+stackslice = StackObject(
+ name="stackslice",
+ obtype=StackObject,
+ doc="""An object representing a contiguous slice of the stack.
+
+ This is used in conjuction with markobject, to represent all
+ of the stack following the topmost markobject. For example,
+ the POP_MARK opcode changes the stack from
+
+ [..., markobject, stackslice]
+ to
+ [...]
+
+ No matter how many object are on the stack after the topmost
+ markobject, POP_MARK gets rid of all of them (including the
+ topmost markobject too).
+ """)
+
+##############################################################################
+# Descriptors for pickle opcodes.
+
+class OpcodeInfo(object):
+
+ __slots__ = (
+ # symbolic name of opcode; a string
+ 'name',
+
+ # the code used in a bytestream to represent the opcode; a
+ # one-character string
+ 'code',
+
+ # If the opcode has an argument embedded in the byte string, an
+ # instance of ArgumentDescriptor specifying its type. Note that
+ # arg.reader(s) can be used to read and decode the argument from
+ # the bytestream s, and arg.doc documents the format of the raw
+ # argument bytes. If the opcode doesn't have an argument embedded
+ # in the bytestream, arg should be None.
+ 'arg',
+
+ # what the stack looks like before this opcode runs; a list
+ 'stack_before',
+
+ # what the stack looks like after this opcode runs; a list
+ 'stack_after',
+
+ # the protocol number in which this opcode was introduced; an int
+ 'proto',
+
+ # human-readable docs for this opcode; a string
+ 'doc',
+ )
+
+ def __init__(self, name, code, arg,
+ stack_before, stack_after, proto, doc):
+ assert isinstance(name, basestring)
+ self.name = name
+
+ assert isinstance(code, basestring)
+ assert len(code) == 1
+ self.code = code
+
+ assert arg is None or isinstance(arg, ArgumentDescriptor)
+ self.arg = arg
+
+ assert isinstance(stack_before, list)
+ for x in stack_before:
+ assert isinstance(x, StackObject)
+ self.stack_before = stack_before
+
+ assert isinstance(stack_after, list)
+ for x in stack_after:
+ assert isinstance(x, StackObject)
+ self.stack_after = stack_after
+
+ assert isinstance(proto, int) and 0 <= proto <= 2
+ self.proto = proto
+
+ assert isinstance(doc, basestring)
+ self.doc = doc
+
+I = OpcodeInfo
+opcodes = [
+
+ # Ways to spell integers.
+
+ I(name='INT',
+ code='I',
+ arg=decimalnl_short,
+ stack_before=[],
+ stack_after=[pyinteger_or_bool],
+ proto=0,
+ doc="""Push an integer or bool.
+
+ The argument is a newline-terminated decimal literal string.
+
+ The intent may have been that this always fit in a short Python int,
+ but INT can be generated in pickles written on a 64-bit box that
+ require a Python long on a 32-bit box. The difference between this
+ and LONG then is that INT skips a trailing 'L', and produces a short
+ int whenever possible.
+
+ Another difference is due to that, when bool was introduced as a
+ distinct type in 2.3, builtin names True and False were also added to
+ 2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
+ True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
+ Leading zeroes are never produced for a genuine integer. The 2.3
+ (and later) unpicklers special-case these and return bool instead;
+ earlier unpicklers ignore the leading "0" and return the int.
+ """),
+
+ I(name='BININT',
+ code='J',
+ arg=int4,
+ stack_before=[],
+ stack_after=[pyint],
+ proto=1,
+ doc="""Push a four-byte signed integer.
+
+ This handles the full range of Python (short) integers on a 32-bit
+ box, directly as binary bytes (1 for the opcode and 4 for the integer).
+ If the integer is non-negative and fits in 1 or 2 bytes, pickling via
+ BININT1 or BININT2 saves space.
+ """),
+
+ I(name='BININT1',
+ code='K',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[pyint],
+ proto=1,
+ doc="""Push a one-byte unsigned integer.
+
+ This is a space optimization for pickling very small non-negative ints,
+ in range(256).
+ """),
+
+ I(name='BININT2',
+ code='M',
+ arg=uint2,
+ stack_before=[],
+ stack_after=[pyint],
+ proto=1,
+ doc="""Push a two-byte unsigned integer.
+
+ This is a space optimization for pickling small positive ints, in
+ range(256, 2**16). Integers in range(256) can also be pickled via
+ BININT2, but BININT1 instead saves a byte.
+ """),
+
+ I(name='LONG',
+ code='L',
+ arg=decimalnl_long,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=0,
+ doc="""Push a long integer.
+
+ The same as INT, except that the literal ends with 'L', and always
+ unpickles to a Python long. There doesn't seem a real purpose to the
+ trailing 'L'.
+
+ Note that LONG takes time quadratic in the number of digits when
+ unpickling (this is simply due to the nature of decimal->binary
+ conversion). Proto 2 added linear-time (in C; still quadratic-time
+ in Python) LONG1 and LONG4 opcodes.
+ """),
+
+ I(name="LONG1",
+ code='\x8a',
+ arg=long1,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using one-byte length.
+
+ A more efficient encoding of a Python long; the long1 encoding
+ says it all."""),
+
+ I(name="LONG4",
+ code='\x8b',
+ arg=long4,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using found-byte length.
+
+ A more efficient encoding of a Python long; the long4 encoding
+ says it all."""),
+
+ # Ways to spell strings (8-bit, not Unicode).
+
+ I(name='STRING',
+ code='S',
+ arg=stringnl,
+ stack_before=[],
+ stack_after=[pystring],
+ proto=0,
+ doc="""Push a Python string object.
+
+ The argument is a repr-style string, with bracketing quote characters,
+ and perhaps embedded escapes. The argument extends until the next
+ newline character.
+ """),
+
+ I(name='BINSTRING',
+ code='T',
+ arg=string4,
+ stack_before=[],
+ stack_after=[pystring],
+ proto=1,
+ doc="""Push a Python string object.
+
+ There are two arguments: the first is a 4-byte little-endian signed int
+ giving the number of bytes in the string, and the second is that many
+ bytes, which are taken literally as the string content.
+ """),
+
+ I(name='SHORT_BINSTRING',
+ code='U',
+ arg=string1,
+ stack_before=[],
+ stack_after=[pystring],
+ proto=1,
+ doc="""Push a Python string object.
+
+ There are two arguments: the first is a 1-byte unsigned int giving
+ the number of bytes in the string, and the second is that many bytes,
+ which are taken literally as the string content.
+ """),
+
+ # Ways to spell None.
+
+ I(name='NONE',
+ code='N',
+ arg=None,
+ stack_before=[],
+ stack_after=[pynone],
+ proto=0,
+ doc="Push None on the stack."),
+
+ # Ways to spell bools, starting with proto 2. See INT for how this was
+ # done before proto 2.
+
+ I(name='NEWTRUE',
+ code='\x88',
+ arg=None,
+ stack_before=[],
+ stack_after=[pybool],
+ proto=2,
+ doc="""True.
+
+ Push True onto the stack."""),
+
+ I(name='NEWFALSE',
+ code='\x89',
+ arg=None,
+ stack_before=[],
+ stack_after=[pybool],
+ proto=2,
+ doc="""True.
+
+ Push False onto the stack."""),
+
+ # Ways to spell Unicode strings.
+
+ I(name='UNICODE',
+ code='V',
+ arg=unicodestringnl,
+ stack_before=[],
+ stack_after=[pyunicode],
+ proto=0, # this may be pure-text, but it's a later addition
+ doc="""Push a Python Unicode string object.
+
+ The argument is a raw-unicode-escape encoding of a Unicode string,
+ and so may contain embedded escape sequences. The argument extends
+ until the next newline character.
+ """),
+
+ I(name='BINUNICODE',
+ code='X',
+ arg=unicodestring4,
+ stack_before=[],
+ stack_after=[pyunicode],
+ proto=1,
+ doc="""Push a Python Unicode string object.
+
+ There are two arguments: the first is a 4-byte little-endian signed int
+ giving the number of bytes in the string. The second is that many
+ bytes, and is the UTF-8 encoding of the Unicode string.
+ """),
+
+ # Ways to spell floats.
+
+ I(name='FLOAT',
+ code='F',
+ arg=floatnl,
+ stack_before=[],
+ stack_after=[pyfloat],
+ proto=0,
+ doc="""Newline-terminated decimal float literal.
+
+ The argument is repr(a_float), and in general requires 17 significant
+ digits for roundtrip conversion to be an identity (this is so for
+ IEEE-754 double precision values, which is what Python float maps to
+ on most boxes).
+
+ In general, FLOAT cannot be used to transport infinities, NaNs, or
+ minus zero across boxes (or even on a single box, if the platform C
+ library can't read the strings it produces for such things -- Windows
+ is like that), but may do less damage than BINFLOAT on boxes with
+ greater precision or dynamic range than IEEE-754 double.
+ """),
+
+ I(name='BINFLOAT',
+ code='G',
+ arg=float8,
+ stack_before=[],
+ stack_after=[pyfloat],
+ proto=1,
+ doc="""Float stored in binary form, with 8 bytes of data.
+
+ This generally requires less than half the space of FLOAT encoding.
+ In general, BINFLOAT cannot be used to transport infinities, NaNs, or
+ minus zero, raises an exception if the exponent exceeds the range of
+ an IEEE-754 double, and retains no more than 53 bits of precision (if
+ there are more than that, "add a half and chop" rounding is used to
+ cut it back to 53 significant bits).
+ """),
+
+ # Ways to build lists.
+
+ I(name='EMPTY_LIST',
+ code=']',
+ arg=None,
+ stack_before=[],
+ stack_after=[pylist],
+ proto=1,
+ doc="Push an empty list."),
+
+ I(name='APPEND',
+ code='a',
+ arg=None,
+ stack_before=[pylist, anyobject],
+ stack_after=[pylist],
+ proto=0,
+ doc="""Append an object to a list.
+
+ Stack before: ... pylist anyobject
+ Stack after: ... pylist+[anyobject]
+
+ although pylist is really extended in-place.
+ """),
+
+ I(name='APPENDS',
+ code='e',
+ arg=None,
+ stack_before=[pylist, markobject, stackslice],
+ stack_after=[pylist],
+ proto=1,
+ doc="""Extend a list by a slice of stack objects.
+
+ Stack before: ... pylist markobject stackslice
+ Stack after: ... pylist+stackslice
+
+ although pylist is really extended in-place.
+ """),
+
+ I(name='LIST',
+ code='l',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[pylist],
+ proto=0,
+ doc="""Build a list out of the topmost stack slice, after markobject.
+
+ All the stack entries following the topmost markobject are placed into
+ a single Python list, which single list object replaces all of the
+ stack from the topmost markobject onward. For example,
+
+ Stack before: ... markobject 1 2 3 'abc'
+ Stack after: ... [1, 2, 3, 'abc']
+ """),
+
+ # Ways to build tuples.
+
+ I(name='EMPTY_TUPLE',
+ code=')',
+ arg=None,
+ stack_before=[],
+ stack_after=[pytuple],
+ proto=1,
+ doc="Push an empty tuple."),
+
+ I(name='TUPLE',
+ code='t',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[pytuple],
+ proto=0,
+ doc="""Build a tuple out of the topmost stack slice, after markobject.
+
+ All the stack entries following the topmost markobject are placed into
+ a single Python tuple, which single tuple object replaces all of the
+ stack from the topmost markobject onward. For example,
+
+ Stack before: ... markobject 1 2 3 'abc'
+ Stack after: ... (1, 2, 3, 'abc')
+ """),
+
+ I(name='TUPLE1',
+ code='\x85',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops one value off the stack and pushes a tuple of
+ length 1 whose one item is that value back onto it. IOW:
+
+ stack[-1] = tuple(stack[-1:])
+ """),
+
+ I(name='TUPLE2',
+ code='\x86',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops two values off the stack and pushes a tuple
+ of length 2 whose items are those values back onto it. IOW:
+
+ stack[-2:] = [tuple(stack[-2:])]
+ """),
+
+ I(name='TUPLE3',
+ code='\x87',
+ arg=None,
+ stack_before=[anyobject, anyobject, anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops three values off the stack and pushes a tuple
+ of length 3 whose items are those values back onto it. IOW:
+
+ stack[-3:] = [tuple(stack[-3:])]
+ """),
+
+ # Ways to build dicts.
+
+ I(name='EMPTY_DICT',
+ code='}',
+ arg=None,
+ stack_before=[],
+ stack_after=[pydict],
+ proto=1,
+ doc="Push an empty dict."),
+
+ I(name='DICT',
+ code='d',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[pydict],
+ proto=0,
+ doc="""Build a dict out of the topmost stack slice, after markobject.
+
+ All the stack entries following the topmost markobject are placed into
+ a single Python dict, which single dict object replaces all of the
+ stack from the topmost markobject onward. The stack slice alternates
+ key, value, key, value, .... For example,
+
+ Stack before: ... markobject 1 2 3 'abc'
+ Stack after: ... {1: 2, 3: 'abc'}
+ """),
+
+ I(name='SETITEM',
+ code='s',
+ arg=None,
+ stack_before=[pydict, anyobject, anyobject],
+ stack_after=[pydict],
+ proto=0,
+ doc="""Add a key+value pair to an existing dict.
+
+ Stack before: ... pydict key value
+ Stack after: ... pydict
+
+ where pydict has been modified via pydict[key] = value.
+ """),
+
+ I(name='SETITEMS',
+ code='u',
+ arg=None,
+ stack_before=[pydict, markobject, stackslice],
+ stack_after=[pydict],
+ proto=1,
+ doc="""Add an arbitrary number of key+value pairs to an existing dict.
+
+ The slice of the stack following the topmost markobject is taken as
+ an alternating sequence of keys and values, added to the dict
+ immediately under the topmost markobject. Everything at and after the
+ topmost markobject is popped, leaving the mutated dict at the top
+ of the stack.
+
+ Stack before: ... pydict markobject key_1 value_1 ... key_n value_n
+ Stack after: ... pydict
+
+ where pydict has been modified via pydict[key_i] = value_i for i in
+ 1, 2, ..., n, and in that order.
+ """),
+
+ # Stack manipulation.
+
+ I(name='POP',
+ code='0',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[],
+ proto=0,
+ doc="Discard the top stack item, shrinking the stack by one item."),
+
+ I(name='DUP',
+ code='2',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[anyobject, anyobject],
+ proto=0,
+ doc="Push the top stack item onto the stack again, duplicating it."),
+
+ I(name='MARK',
+ code='(',
+ arg=None,
+ stack_before=[],
+ stack_after=[markobject],
+ proto=0,
+ doc="""Push markobject onto the stack.
+
+ markobject is a unique object, used by other opcodes to identify a
+ region of the stack containing a variable number of objects for them
+ to work on. See markobject.doc for more detail.
+ """),
+
+ I(name='POP_MARK',
+ code='1',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[],
+ proto=0,
+ doc="""Pop all the stack objects at and above the topmost markobject.
+
+ When an opcode using a variable number of stack objects is done,
+ POP_MARK is used to remove those objects, and to remove the markobject
+ that delimited their starting position on the stack.
+ """),
+
+ # Memo manipulation. There are really only two operations (get and put),
+ # each in all-text, "short binary", and "long binary" flavors.
+
+ I(name='GET',
+ code='g',
+ arg=decimalnl_short,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Read an object from the memo and push it on the stack.
+
+ The index of the memo object to push is given by the newline-teriminated
+ decimal string following. BINGET and LONG_BINGET are space-optimized
+ versions.
+ """),
+
+ I(name='BINGET',
+ code='h',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Read an object from the memo and push it on the stack.
+
+ The index of the memo object to push is given by the 1-byte unsigned
+ integer following.
+ """),
+
+ I(name='LONG_BINGET',
+ code='j',
+ arg=int4,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Read an object from the memo and push it on the stack.
+
+ The index of the memo object to push is given by the 4-byte signed
+ little-endian integer following.
+ """),
+
+ I(name='PUT',
+ code='p',
+ arg=decimalnl_short,
+ stack_before=[],
+ stack_after=[],
+ proto=0,
+ doc="""Store the stack top into the memo. The stack is not popped.
+
+ The index of the memo location to write into is given by the newline-
+ terminated decimal string following. BINPUT and LONG_BINPUT are
+ space-optimized versions.
+ """),
+
+ I(name='BINPUT',
+ code='q',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[],
+ proto=1,
+ doc="""Store the stack top into the memo. The stack is not popped.
+
+ The index of the memo location to write into is given by the 1-byte
+ unsigned integer following.
+ """),
+
+ I(name='LONG_BINPUT',
+ code='r',
+ arg=int4,
+ stack_before=[],
+ stack_after=[],
+ proto=1,
+ doc="""Store the stack top into the memo. The stack is not popped.
+
+ The index of the memo location to write into is given by the 4-byte
+ signed little-endian integer following.
+ """),
+
+ # Access the extension registry (predefined objects). Akin to the GET
+ # family.
+
+ I(name='EXT1',
+ code='\x82',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ This code and the similar EXT2 and EXT4 allow using a registry
+ of popular objects that are pickled by name, typically classes.
+ It is envisioned that through a global negotiation and
+ registration process, third parties can set up a mapping between
+ ints and object names.
+
+ In order to guarantee pickle interchangeability, the extension
+ code registry ought to be global, although a range of codes may
+ be reserved for private use.
+
+ EXT1 has a 1-byte integer argument. This is used to index into the
+ extension registry, and the object at that index is pushed on the stack.
+ """),
+
+ I(name='EXT2',
+ code='\x83',
+ arg=uint2,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ See EXT1. EXT2 has a two-byte integer argument.
+ """),
+
+ I(name='EXT4',
+ code='\x84',
+ arg=int4,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ See EXT1. EXT4 has a four-byte integer argument.
+ """),
+
+ # Push a class object, or module function, on the stack, via its module
+ # and name.
+
+ I(name='GLOBAL',
+ code='c',
+ arg=stringnl_noescape_pair,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Push a global object (module.attr) on the stack.
+
+ Two newline-terminated strings follow the GLOBAL opcode. The first is
+ taken as a module name, and the second as a class name. The class
+ object module.class is pushed on the stack. More accurately, the
+ object returned by self.find_class(module, class) is pushed on the
+ stack, so unpickling subclasses can override this form of lookup.
+ """),
+
+ # Ways to build objects of classes pickle doesn't know about directly
+ # (user-defined classes). I despair of documenting this accurately
+ # and comprehensibly -- you really have to read the pickle code to
+ # find all the special cases.
+
+ I(name='REDUCE',
+ code='R',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Push an object built from a callable and an argument tuple.
+
+ The opcode is named to remind of the __reduce__() method.
+
+ Stack before: ... callable pytuple
+ Stack after: ... callable(*pytuple)
+
+ The callable and the argument tuple are the first two items returned
+ by a __reduce__ method. Applying the callable to the argtuple is
+ supposed to reproduce the original object, or at least get it started.
+ If the __reduce__ method returns a 3-tuple, the last component is an
+ argument to be passed to the object's __setstate__, and then the REDUCE
+ opcode is followed by code to create setstate's argument, and then a
+ BUILD opcode to apply __setstate__ to that argument.
+
+ If type(callable) is not ClassType, REDUCE complains unless the
+ callable has been registered with the copy_reg module's
+ safe_constructors dict, or the callable has a magic
+ '__safe_for_unpickling__' attribute with a true value. I'm not sure
+ why it does this, but I've sure seen this complaint often enough when
+ I didn't want to <wink>.
+ """),
+
+ I(name='BUILD',
+ code='b',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Finish building an object, via __setstate__ or dict update.
+
+ Stack before: ... anyobject argument
+ Stack after: ... anyobject
+
+ where anyobject may have been mutated, as follows:
+
+ If the object has a __setstate__ method,
+
+ anyobject.__setstate__(argument)
+
+ is called.
+
+ Else the argument must be a dict, the object must have a __dict__, and
+ the object is updated via
+
+ anyobject.__dict__.update(argument)
+ """),
+
+ I(name='INST',
+ code='i',
+ arg=stringnl_noescape_pair,
+ stack_before=[markobject, stackslice],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Build a class instance.
+
+ This is the protocol 0 version of protocol 1's OBJ opcode.
+ INST is followed by two newline-terminated strings, giving a
+ module and class name, just as for the GLOBAL opcode (and see
+ GLOBAL for more details about that). self.find_class(module, name)
+ is used to get a class object.
+
+ In addition, all the objects on the stack following the topmost
+ markobject are gathered into a tuple and popped (along with the
+ topmost markobject), just as for the TUPLE opcode.
+
+ Now it gets complicated. If all of these are true:
+
+ + The argtuple is empty (markobject was at the top of the stack
+ at the start).
+
+ + It's an old-style class object (the type of the class object is
+ ClassType).
+
+ + The class object does not have a __getinitargs__ attribute.
+
+ then we want to create an old-style class instance without invoking
+ its __init__() method (pickle has waffled on this over the years; not
+ calling __init__() is current wisdom). In this case, an instance of
+ an old-style dummy class is created, and then we try to rebind its
+ __class__ attribute to the desired class object. If this succeeds,
+ the new instance object is pushed on the stack, and we're done.
+
+ Else (the argtuple is not empty, it's not an old-style class object,
+ or the class object does have a __getinitargs__ attribute), the code
+ first insists that the class object have a __safe_for_unpickling__
+ attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE,
+ it doesn't matter whether this attribute has a true or false value, it
+ only matters whether it exists (XXX this is a bug; cPickle
+ requires the attribute to be true). If __safe_for_unpickling__
+ doesn't exist, UnpicklingError is raised.
+
+ Else (the class object does have a __safe_for_unpickling__ attr),
+ the class object obtained from INST's arguments is applied to the
+ argtuple obtained from the stack, and the resulting instance object
+ is pushed on the stack.
+
+ NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
+ """),
+
+ I(name='OBJ',
+ code='o',
+ arg=None,
+ stack_before=[markobject, anyobject, stackslice],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Build a class instance.
+
+ This is the protocol 1 version of protocol 0's INST opcode, and is
+ very much like it. The major difference is that the class object
+ is taken off the stack, allowing it to be retrieved from the memo
+ repeatedly if several instances of the same class are created. This
+ can be much more efficient (in both time and space) than repeatedly
+ embedding the module and class names in INST opcodes.
+
+ Unlike INST, OBJ takes no arguments from the opcode stream. Instead
+ the class object is taken off the stack, immediately above the
+ topmost markobject:
+
+ Stack before: ... markobject classobject stackslice
+ Stack after: ... new_instance_object
+
+ As for INST, the remainder of the stack above the markobject is
+ gathered into an argument tuple, and then the logic seems identical,
+ except that no __safe_for_unpickling__ check is done (XXX this is
+ a bug; cPickle does test __safe_for_unpickling__). See INST for
+ the gory details.
+
+ NOTE: In Python 2.3, INST and OBJ are identical except for how they
+ get the class object. That was always the intent; the implementations
+ had diverged for accidental reasons.
+ """),
+
+ I(name='NEWOBJ',
+ code='\x81',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Build an object instance.
+
+ The stack before should be thought of as containing a class
+ object followed by an argument tuple (the tuple being the stack
+ top). Call these cls and args. They are popped off the stack,
+ and the value returned by cls.__new__(cls, *args) is pushed back
+ onto the stack.
+ """),
+
+ # Machine control.
+
+ I(name='PROTO',
+ code='\x80',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[],
+ proto=2,
+ doc="""Protocol version indicator.
+
+ For protocol 2 and above, a pickle must start with this opcode.
+ The argument is the protocol version, an int in range(2, 256).
+ """),
+
+ I(name='STOP',
+ code='.',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[],
+ proto=0,
+ doc="""Stop the unpickling machine.
+
+ Every pickle ends with this opcode. The object at the top of the stack
+ is popped, and that's the result of unpickling. The stack should be
+ empty then.
+ """),
+
+ # Ways to deal with persistent IDs.
+
+ I(name='PERSID',
+ code='P',
+ arg=stringnl_noescape,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Push an object identified by a persistent ID.
+
+ The pickle module doesn't define what a persistent ID means. PERSID's
+ argument is a newline-terminated str-style (no embedded escapes, no
+ bracketing quote characters) string, which *is* "the persistent ID".
+ The unpickler passes this string to self.persistent_load(). Whatever
+ object that returns is pushed on the stack. There is no implementation
+ of persistent_load() in Python's unpickler: it must be supplied by an
+ unpickler subclass.
+ """),
+
+ I(name='BINPERSID',
+ code='Q',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Push an object identified by a persistent ID.
+
+ Like PERSID, except the persistent ID is popped off the stack (instead
+ of being a string embedded in the opcode bytestream). The persistent
+ ID is passed to self.persistent_load(), and whatever object that
+ returns is pushed on the stack. See PERSID for more detail.
+ """),
+]
+del I
+
+# Verify uniqueness of .name and .code members.
+name2i = {}
+code2i = {}
+
+for i, d in enumerate(opcodes):
+ if d.name in name2i:
+ raise ValueError("repeated name %r at indices %d and %d" %
+ (d.name, name2i[d.name], i))
+ if d.code in code2i:
+ raise ValueError("repeated code %r at indices %d and %d" %
+ (d.code, code2i[d.code], i))
+
+ name2i[d.name] = i
+ code2i[d.code] = i
+
+del name2i, code2i, i, d
+
+##############################################################################
+# Build a code2op dict, mapping opcode characters to OpcodeInfo records.
+# Also ensure we've got the same stuff as pickle.py, although the
+# introspection here is dicey.
+
+code2op = {}
+for d in opcodes:
+ code2op[d.code] = d
+del d
+
+def assure_pickle_consistency(verbose=False):
+ import pickle, re
+
+ copy = code2op.copy()
+ for name in pickle.__all__:
+ if not re.match("[A-Z][A-Z0-9_]+$", name):
+ if verbose:
+ print("skipping %r: it doesn't look like an opcode name" % name)
+ continue
+ picklecode = getattr(pickle, name)
+ if not isinstance(picklecode, bytes) or len(picklecode) != 1:
+ if verbose:
+ print(("skipping %r: value %r doesn't look like a pickle "
+ "code" % (name, picklecode)))
+ continue
+ picklecode = picklecode.decode("latin-1")
+ if picklecode in copy:
+ if verbose:
+ print("checking name %r w/ code %r for consistency" % (
+ name, picklecode))
+ d = copy[picklecode]
+ if d.name != name:
+ raise ValueError("for pickle code %r, pickle.py uses name %r "
+ "but we're using name %r" % (picklecode,
+ name,
+ d.name))
+ # Forget this one. Any left over in copy at the end are a problem
+ # of a different kind.
+ del copy[picklecode]
+ else:
+ raise ValueError("pickle.py appears to have a pickle opcode with "
+ "name %r and code %r, but we don't" %
+ (name, picklecode))
+ if copy:
+ msg = ["we appear to have pickle opcodes that pickle.py doesn't have:"]
+ for code, d in copy.items():
+ msg.append(" name %r with code %r" % (d.name, code))
+ raise ValueError("\n".join(msg))
+
+assure_pickle_consistency()
+del assure_pickle_consistency
+
+##############################################################################
+# A pickle opcode generator.
+
+def genops(pickle):
+ """Generate all the opcodes in a pickle.
+
+ 'pickle' is a file-like object, or string, containing the pickle.
+
+ Each opcode in the pickle is generated, from the current pickle position,
+ stopping after a STOP opcode is delivered. A triple is generated for
+ each opcode:
+
+ opcode, arg, pos
+
+ opcode is an OpcodeInfo record, describing the current opcode.
+
+ If the opcode has an argument embedded in the pickle, arg is its decoded
+ value, as a Python object. If the opcode doesn't have an argument, arg
+ is None.
+
+ If the pickle has a tell() method, pos was the value of pickle.tell()
+ before reading the current opcode. If the pickle is a string object,
+ it's wrapped in a StringIO object, and the latter's tell() result is
+ used. Else (the pickle doesn't have a tell(), and it's not obvious how
+ to query its current position) pos is None.
+ """
+
+ if isinstance(pickle, bytes):
+ import io
+ pickle = io.BytesIO(pickle)
+
+ if hasattr(pickle, "tell"):
+ getpos = pickle.tell
+ else:
+ getpos = lambda: None
+
+ while True:
+ pos = getpos()
+ code = pickle.read(1)
+ opcode = code2op.get(code.decode("latin-1"))
+ if opcode is None:
+ if code == b"":
+ raise ValueError("pickle exhausted before seeing STOP")
+ else:
+ raise ValueError("at position %s, opcode %r unknown" % (
+ pos is None and "<unknown>" or pos,
+ code))
+ if opcode.arg is None:
+ arg = None
+ else:
+ arg = opcode.arg.reader(pickle)
+ yield opcode, arg, pos
+ if code == b'.':
+ assert opcode.name == 'STOP'
+ break
+
+##############################################################################
+# A symbolic pickle disassembler.
+
+def dis(pickle, out=None, memo=None, indentlevel=4):
+ """Produce a symbolic disassembly of a pickle.
+
+ 'pickle' is a file-like object, or string, containing a (at least one)
+ pickle. The pickle is disassembled from the current position, through
+ the first STOP opcode encountered.
+
+ Optional arg 'out' is a file-like object to which the disassembly is
+ printed. It defaults to sys.stdout.
+
+ Optional arg 'memo' is a Python dict, used as the pickle's memo. It
+ may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes.
+ Passing the same memo object to another dis() call then allows disassembly
+ to proceed across multiple pickles that were all created by the same
+ pickler with the same memo. Ordinarily you don't need to worry about this.
+
+ Optional arg indentlevel is the number of blanks by which to indent
+ a new MARK level. It defaults to 4.
+
+ In addition to printing the disassembly, some sanity checks are made:
+
+ + All embedded opcode arguments "make sense".
+
+ + Explicit and implicit pop operations have enough items on the stack.
+
+ + When an opcode implicitly refers to a markobject, a markobject is
+ actually on the stack.
+
+ + A memo entry isn't referenced before it's defined.
+
+ + The markobject isn't stored in the memo.
+
+ + A memo entry isn't redefined.
+ """
+
+ # Most of the hair here is for sanity checks, but most of it is needed
+ # anyway to detect when a protocol 0 POP takes a MARK off the stack
+ # (which in turn is needed to indent MARK blocks correctly).
+
+ stack = [] # crude emulation of unpickler stack
+ if memo is None:
+ memo = {} # crude emulation of unpicker memo
+ maxproto = -1 # max protocol number seen
+ markstack = [] # bytecode positions of MARK opcodes
+ indentchunk = ' ' * indentlevel
+ errormsg = None
+ for opcode, arg, pos in genops(pickle):
+ if pos is not None:
+ print("%5d:" % pos, end=' ', file=out)
+
+ line = "%-4s %s%s" % (repr(opcode.code)[1:-1],
+ indentchunk * len(markstack),
+ opcode.name)
+
+ maxproto = max(maxproto, opcode.proto)
+ before = opcode.stack_before # don't mutate
+ after = opcode.stack_after # don't mutate
+ numtopop = len(before)
+
+ # See whether a MARK should be popped.
+ markmsg = None
+ if markobject in before or (opcode.name == "POP" and
+ stack and
+ stack[-1] is markobject):
+ assert markobject not in after
+ if __debug__:
+ if markobject in before:
+ assert before[-1] is stackslice
+ if markstack:
+ markpos = markstack.pop()
+ if markpos is None:
+ markmsg = "(MARK at unknown opcode offset)"
+ else:
+ markmsg = "(MARK at %d)" % markpos
+ # Pop everything at and after the topmost markobject.
+ while stack[-1] is not markobject:
+ stack.pop()
+ stack.pop()
+ # Stop later code from popping too much.
+ try:
+ numtopop = before.index(markobject)
+ except ValueError:
+ assert opcode.name == "POP"
+ numtopop = 0
+ else:
+ errormsg = markmsg = "no MARK exists on stack"
+
+ # Check for correct memo usage.
+ if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT"):
+ assert arg is not None
+ if arg in memo:
+ errormsg = "memo key %r already defined" % arg
+ elif not stack:
+ errormsg = "stack is empty -- can't store into memo"
+ elif stack[-1] is markobject:
+ errormsg = "can't store markobject in the memo"
+ else:
+ memo[arg] = stack[-1]
+
+ elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
+ if arg in memo:
+ assert len(after) == 1
+ after = [memo[arg]] # for better stack emulation
+ else:
+ errormsg = "memo key %r has never been stored into" % arg
+
+ if arg is not None or markmsg:
+ # make a mild effort to align arguments
+ line += ' ' * (10 - len(opcode.name))
+ if arg is not None:
+ line += ' ' + repr(arg)
+ if markmsg:
+ line += ' ' + markmsg
+ print(line, file=out)
+
+ if errormsg:
+ # Note that we delayed complaining until the offending opcode
+ # was printed.
+ raise ValueError(errormsg)
+
+ # Emulate the stack effects.
+ if len(stack) < numtopop:
+ raise ValueError("tries to pop %d items from stack with "
+ "only %d items" % (numtopop, len(stack)))
+ if numtopop:
+ del stack[-numtopop:]
+ if markobject in after:
+ assert markobject not in before
+ markstack.append(pos)
+
+ stack.extend(after)
+
+ print("highest protocol among opcodes =", maxproto, file=out)
+ if stack:
+ raise ValueError("stack not empty after STOP: %r" % stack)
+
+# For use in the doctest, simply as an example of a class to pickle.
+class _Example:
+ def __init__(self, value):
+ self.value = value
+
+_dis_test = r"""
+>>> import pickle
+>>> x = [1, 2, (3, 4), {str8('abc'): "def"}]
+>>> pkl = pickle.dumps(x, 0)
+>>> dis(pkl)
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 0
+ 5: L LONG 1
+ 8: a APPEND
+ 9: L LONG 2
+ 12: a APPEND
+ 13: ( MARK
+ 14: L LONG 3
+ 17: L LONG 4
+ 20: t TUPLE (MARK at 13)
+ 21: p PUT 1
+ 24: a APPEND
+ 25: ( MARK
+ 26: d DICT (MARK at 25)
+ 27: p PUT 2
+ 30: S STRING 'abc'
+ 37: p PUT 3
+ 40: V UNICODE 'def'
+ 45: p PUT 4
+ 48: s SETITEM
+ 49: a APPEND
+ 50: . STOP
+highest protocol among opcodes = 0
+
+Try again with a "binary" pickle.
+
+>>> pkl = pickle.dumps(x, 1)
+>>> dis(pkl)
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 0
+ 3: ( MARK
+ 4: K BININT1 1
+ 6: K BININT1 2
+ 8: ( MARK
+ 9: K BININT1 3
+ 11: K BININT1 4
+ 13: t TUPLE (MARK at 8)
+ 14: q BINPUT 1
+ 16: } EMPTY_DICT
+ 17: q BINPUT 2
+ 19: U SHORT_BINSTRING 'abc'
+ 24: q BINPUT 3
+ 26: X BINUNICODE 'def'
+ 34: q BINPUT 4
+ 36: s SETITEM
+ 37: e APPENDS (MARK at 3)
+ 38: . STOP
+highest protocol among opcodes = 1
+
+Exercise the INST/OBJ/BUILD family.
+
+>>> import random
+>>> dis(pickle.dumps(random.getrandbits, 0))
+ 0: c GLOBAL 'random getrandbits'
+ 20: p PUT 0
+ 23: . STOP
+highest protocol among opcodes = 0
+
+>>> from pickletools import _Example
+>>> x = [_Example(42)] * 2
+>>> dis(pickle.dumps(x, 0))
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 0
+ 5: c GLOBAL 'copy_reg _reconstructor'
+ 30: p PUT 1
+ 33: ( MARK
+ 34: c GLOBAL 'pickletools _Example'
+ 56: p PUT 2
+ 59: c GLOBAL '__builtin__ object'
+ 79: p PUT 3
+ 82: N NONE
+ 83: t TUPLE (MARK at 33)
+ 84: p PUT 4
+ 87: R REDUCE
+ 88: p PUT 5
+ 91: ( MARK
+ 92: d DICT (MARK at 91)
+ 93: p PUT 6
+ 96: S STRING 'value'
+ 105: p PUT 7
+ 108: L LONG 42
+ 112: s SETITEM
+ 113: b BUILD
+ 114: a APPEND
+ 115: g GET 5
+ 118: a APPEND
+ 119: . STOP
+highest protocol among opcodes = 0
+
+>>> dis(pickle.dumps(x, 1))
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 0
+ 3: ( MARK
+ 4: c GLOBAL 'copy_reg _reconstructor'
+ 29: q BINPUT 1
+ 31: ( MARK
+ 32: c GLOBAL 'pickletools _Example'
+ 54: q BINPUT 2
+ 56: c GLOBAL '__builtin__ object'
+ 76: q BINPUT 3
+ 78: N NONE
+ 79: t TUPLE (MARK at 31)
+ 80: q BINPUT 4
+ 82: R REDUCE
+ 83: q BINPUT 5
+ 85: } EMPTY_DICT
+ 86: q BINPUT 6
+ 88: U SHORT_BINSTRING 'value'
+ 95: q BINPUT 7
+ 97: K BININT1 42
+ 99: s SETITEM
+ 100: b BUILD
+ 101: h BINGET 5
+ 103: e APPENDS (MARK at 3)
+ 104: . STOP
+highest protocol among opcodes = 1
+
+Try "the canonical" recursive-object test.
+
+>>> L = []
+>>> T = L,
+>>> L.append(T)
+>>> L[0] is T
+True
+>>> T[0] is L
+True
+>>> L[0][0] is L
+True
+>>> T[0][0] is T
+True
+>>> dis(pickle.dumps(L, 0))
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 0
+ 5: ( MARK
+ 6: g GET 0
+ 9: t TUPLE (MARK at 5)
+ 10: p PUT 1
+ 13: a APPEND
+ 14: . STOP
+highest protocol among opcodes = 0
+
+>>> dis(pickle.dumps(L, 1))
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 0
+ 3: ( MARK
+ 4: h BINGET 0
+ 6: t TUPLE (MARK at 3)
+ 7: q BINPUT 1
+ 9: a APPEND
+ 10: . STOP
+highest protocol among opcodes = 1
+
+Note that, in the protocol 0 pickle of the recursive tuple, the disassembler
+has to emulate the stack in order to realize that the POP opcode at 16 gets
+rid of the MARK at 0.
+
+>>> dis(pickle.dumps(T, 0))
+ 0: ( MARK
+ 1: ( MARK
+ 2: l LIST (MARK at 1)
+ 3: p PUT 0
+ 6: ( MARK
+ 7: g GET 0
+ 10: t TUPLE (MARK at 6)
+ 11: p PUT 1
+ 14: a APPEND
+ 15: 0 POP
+ 16: 0 POP (MARK at 0)
+ 17: g GET 1
+ 20: . STOP
+highest protocol among opcodes = 0
+
+>>> dis(pickle.dumps(T, 1))
+ 0: ( MARK
+ 1: ] EMPTY_LIST
+ 2: q BINPUT 0
+ 4: ( MARK
+ 5: h BINGET 0
+ 7: t TUPLE (MARK at 4)
+ 8: q BINPUT 1
+ 10: a APPEND
+ 11: 1 POP_MARK (MARK at 0)
+ 12: h BINGET 1
+ 14: . STOP
+highest protocol among opcodes = 1
+
+Try protocol 2.
+
+>>> dis(pickle.dumps(L, 2))
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 0
+ 5: h BINGET 0
+ 7: \x85 TUPLE1
+ 8: q BINPUT 1
+ 10: a APPEND
+ 11: . STOP
+highest protocol among opcodes = 2
+
+>>> dis(pickle.dumps(T, 2))
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 0
+ 5: h BINGET 0
+ 7: \x85 TUPLE1
+ 8: q BINPUT 1
+ 10: a APPEND
+ 11: 0 POP
+ 12: h BINGET 1
+ 14: . STOP
+highest protocol among opcodes = 2
+"""
+
+_memo_test = r"""
+>>> import pickle
+>>> import io
+>>> f = io.BytesIO()
+>>> p = pickle.Pickler(f, 2)
+>>> x = [1, 2, 3]
+>>> p.dump(x)
+>>> p.dump(x)
+>>> f.seek(0)
+0
+>>> memo = {}
+>>> dis(f, memo=memo)
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 0
+ 5: ( MARK
+ 6: K BININT1 1
+ 8: K BININT1 2
+ 10: K BININT1 3
+ 12: e APPENDS (MARK at 5)
+ 13: . STOP
+highest protocol among opcodes = 2
+>>> dis(f, memo=memo)
+ 14: \x80 PROTO 2
+ 16: h BINGET 0
+ 18: . STOP
+highest protocol among opcodes = 2
+"""
+
+__test__ = {'disassembler_test': _dis_test,
+ 'disassembler_memo_test': _memo_test,
+ }
+
+def _test():
+ import doctest
+ return doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
Added: sandbox/trunk/cpy_merge/Lib/profile.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/profile.py Wed May 23 03:45:28 2007
@@ -0,0 +1,619 @@
+#! /usr/bin/env python
+#
+# Class for profiling python code. rev 1.0 6/2/94
+#
+# Based on prior profile module by Sjoerd Mullender...
+# which was hacked somewhat by: Guido van Rossum
+
+"""Class for profiling Python code."""
+
+# Copyright 1994, by InfoSeek Corporation, all rights reserved.
+# Written by James Roskind
+#
+# Permission to use, copy, modify, and distribute this Python software
+# and its associated documentation for any purpose (subject to the
+# restriction in the following sentence) without fee is hereby granted,
+# provided that the above copyright notice appears in all copies, and
+# that both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of InfoSeek not be used in
+# advertising or publicity pertaining to distribution of the software
+# without specific, written prior permission. This permission is
+# explicitly restricted to the copying and modification of the software
+# to remain in Python, compiled Python, or other languages (such as C)
+# wherein the modified or derived code is exclusively imported into a
+# Python module.
+#
+# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
+# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
+# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+
+import sys
+import os
+import time
+import marshal
+from optparse import OptionParser
+
+__all__ = ["run", "runctx", "help", "Profile"]
+
+# Sample timer for use with
+#i_count = 0
+#def integer_timer():
+# global i_count
+# i_count = i_count + 1
+# return i_count
+#itimes = integer_timer # replace with C coded timer returning integers
+
+#**************************************************************************
+# The following are the static member functions for the profiler class
+# Note that an instance of Profile() is *not* needed to call them.
+#**************************************************************************
+
+def run(statement, filename=None, sort=-1):
+ """Run statement under profiler optionally saving results in filename
+
+ This function takes a single argument that can be passed to the
+ "exec" statement, and an optional file name. In all cases this
+ routine attempts to "exec" its first argument and gather profiling
+ statistics from the execution. If no file name is present, then this
+ function automatically prints a simple profiling report, sorted by the
+ standard name string (file/line/function-name) that is presented in
+ each line.
+ """
+ prof = Profile()
+ try:
+ prof = prof.run(statement)
+ except SystemExit:
+ pass
+ if filename is not None:
+ prof.dump_stats(filename)
+ else:
+ return prof.print_stats(sort)
+
+def runctx(statement, globals, locals, filename=None):
+ """Run statement under profiler, supplying your own globals and locals,
+ optionally saving results in filename.
+
+ statement and filename have the same semantics as profile.run
+ """
+ prof = Profile()
+ try:
+ prof = prof.runctx(statement, globals, locals)
+ except SystemExit:
+ pass
+
+ if filename is not None:
+ prof.dump_stats(filename)
+ else:
+ return prof.print_stats()
+
+# Backwards compatibility.
+def help():
+ print("Documentation for the profile module can be found ")
+ print("in the Python Library Reference, section 'The Python Profiler'.")
+
+if os.name == "mac":
+ import MacOS
+ def _get_time_mac(timer=MacOS.GetTicks):
+ return timer() / 60.0
+
+if hasattr(os, "times"):
+ def _get_time_times(timer=os.times):
+ t = timer()
+ return t[0] + t[1]
+
+# Using getrusage(3) is better than clock(3) if available:
+# on some systems (e.g. FreeBSD), getrusage has a higher resolution
+# Furthermore, on a POSIX system, returns microseconds, which
+# wrap around after 36min.
+_has_res = 0
+try:
+ import resource
+ resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
+ def _get_time_resource(timer=resgetrusage):
+ t = timer()
+ return t[0] + t[1]
+ _has_res = 1
+except ImportError:
+ pass
+
+class Profile:
+ """Profiler class.
+
+ self.cur is always a tuple. Each such tuple corresponds to a stack
+ frame that is currently active (self.cur[-2]). The following are the
+ definitions of its members. We use this external "parallel stack" to
+ avoid contaminating the program that we are profiling. (old profiler
+ used to write into the frames local dictionary!!) Derived classes
+ can change the definition of some entries, as long as they leave
+ [-2:] intact (frame and previous tuple). In case an internal error is
+ detected, the -3 element is used as the function name.
+
+ [ 0] = Time that needs to be charged to the parent frame's function.
+ It is used so that a function call will not have to access the
+ timing data for the parent frame.
+ [ 1] = Total time spent in this frame's function, excluding time in
+ subfunctions (this latter is tallied in cur[2]).
+ [ 2] = Total time spent in subfunctions, excluding time executing the
+ frame's function (this latter is tallied in cur[1]).
+ [-3] = Name of the function that corresponds to this frame.
+ [-2] = Actual frame that we correspond to (used to sync exception handling).
+ [-1] = Our parent 6-tuple (corresponds to frame.f_back).
+
+ Timing data for each function is stored as a 5-tuple in the dictionary
+ self.timings[]. The index is always the name stored in self.cur[-3].
+ The following are the definitions of the members:
+
+ [0] = The number of times this function was called, not counting direct
+ or indirect recursion,
+ [1] = Number of times this function appears on the stack, minus one
+ [2] = Total time spent internal to this function
+ [3] = Cumulative time that this function was present on the stack. In
+ non-recursive functions, this is the total execution time from start
+ to finish of each invocation of a function, including time spent in
+ all subfunctions.
+ [4] = A dictionary indicating for each function name, the number of times
+ it was called by us.
+ """
+
+ bias = 0 # calibration constant
+
+ def __init__(self, timer=None, bias=None):
+ self.timings = {}
+ self.cur = None
+ self.cmd = ""
+ self.c_func_name = ""
+
+ if bias is None:
+ bias = self.bias
+ self.bias = bias # Materialize in local dict for lookup speed.
+
+ if not timer:
+ if _has_res:
+ self.timer = resgetrusage
+ self.dispatcher = self.trace_dispatch
+ self.get_time = _get_time_resource
+ elif os.name == 'mac':
+ self.timer = MacOS.GetTicks
+ self.dispatcher = self.trace_dispatch_mac
+ self.get_time = _get_time_mac
+ elif hasattr(time, 'clock'):
+ self.timer = self.get_time = time.clock
+ self.dispatcher = self.trace_dispatch_i
+ elif hasattr(os, 'times'):
+ self.timer = os.times
+ self.dispatcher = self.trace_dispatch
+ self.get_time = _get_time_times
+ else:
+ self.timer = self.get_time = time.time
+ self.dispatcher = self.trace_dispatch_i
+ else:
+ self.timer = timer
+ t = self.timer() # test out timer function
+ try:
+ length = len(t)
+ except TypeError:
+ self.get_time = timer
+ self.dispatcher = self.trace_dispatch_i
+ else:
+ if length == 2:
+ self.dispatcher = self.trace_dispatch
+ else:
+ self.dispatcher = self.trace_dispatch_l
+ # This get_time() implementation needs to be defined
+ # here to capture the passed-in timer in the parameter
+ # list (for performance). Note that we can't assume
+ # the timer() result contains two values in all
+ # cases.
+ def get_time_timer(timer=timer, sum=sum):
+ return sum(timer())
+ self.get_time = get_time_timer
+ self.t = self.get_time()
+ self.simulate_call('profiler')
+
+ # Heavily optimized dispatch routine for os.times() timer
+
+ def trace_dispatch(self, frame, event, arg):
+ timer = self.timer
+ t = timer()
+ t = t[0] + t[1] - self.t - self.bias
+
+ if event == "c_call":
+ self.c_func_name = arg.__name__
+
+ if self.dispatch[event](self, frame,t):
+ t = timer()
+ self.t = t[0] + t[1]
+ else:
+ r = timer()
+ self.t = r[0] + r[1] - t # put back unrecorded delta
+
+ # Dispatch routine for best timer program (return = scalar, fastest if
+ # an integer but float works too -- and time.clock() relies on that).
+
+ def trace_dispatch_i(self, frame, event, arg):
+ timer = self.timer
+ t = timer() - self.t - self.bias
+
+ if event == "c_call":
+ self.c_func_name = arg.__name__
+
+ if self.dispatch[event](self, frame, t):
+ self.t = timer()
+ else:
+ self.t = timer() - t # put back unrecorded delta
+
+ # Dispatch routine for macintosh (timer returns time in ticks of
+ # 1/60th second)
+
+ def trace_dispatch_mac(self, frame, event, arg):
+ timer = self.timer
+ t = timer()/60.0 - self.t - self.bias
+
+ if event == "c_call":
+ self.c_func_name = arg.__name__
+
+ if self.dispatch[event](self, frame, t):
+ self.t = timer()/60.0
+ else:
+ self.t = timer()/60.0 - t # put back unrecorded delta
+
+ # SLOW generic dispatch routine for timer returning lists of numbers
+
+ def trace_dispatch_l(self, frame, event, arg):
+ get_time = self.get_time
+ t = get_time() - self.t - self.bias
+
+ if event == "c_call":
+ self.c_func_name = arg.__name__
+
+ if self.dispatch[event](self, frame, t):
+ self.t = get_time()
+ else:
+ self.t = get_time() - t # put back unrecorded delta
+
+ # In the event handlers, the first 3 elements of self.cur are unpacked
+ # into vrbls w/ 3-letter names. The last two characters are meant to be
+ # mnemonic:
+ # _pt self.cur[0] "parent time" time to be charged to parent frame
+ # _it self.cur[1] "internal time" time spent directly in the function
+ # _et self.cur[2] "external time" time spent in subfunctions
+
+ def trace_dispatch_exception(self, frame, t):
+ rpt, rit, ret, rfn, rframe, rcur = self.cur
+ if (rframe is not frame) and rcur:
+ return self.trace_dispatch_return(rframe, t)
+ self.cur = rpt, rit+t, ret, rfn, rframe, rcur
+ return 1
+
+
+ def trace_dispatch_call(self, frame, t):
+ if self.cur and frame.f_back is not self.cur[-2]:
+ rpt, rit, ret, rfn, rframe, rcur = self.cur
+ if not isinstance(rframe, Profile.fake_frame):
+ assert rframe.f_back is frame.f_back, ("Bad call", rfn,
+ rframe, rframe.f_back,
+ frame, frame.f_back)
+ self.trace_dispatch_return(rframe, 0)
+ assert (self.cur is None or \
+ frame.f_back is self.cur[-2]), ("Bad call",
+ self.cur[-3])
+ fcode = frame.f_code
+ fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
+ self.cur = (t, 0, 0, fn, frame, self.cur)
+ timings = self.timings
+ if fn in timings:
+ cc, ns, tt, ct, callers = timings[fn]
+ timings[fn] = cc, ns + 1, tt, ct, callers
+ else:
+ timings[fn] = 0, 0, 0, 0, {}
+ return 1
+
+ def trace_dispatch_c_call (self, frame, t):
+ fn = ("", 0, self.c_func_name)
+ self.cur = (t, 0, 0, fn, frame, self.cur)
+ timings = self.timings
+ if fn in timings:
+ cc, ns, tt, ct, callers = timings[fn]
+ timings[fn] = cc, ns+1, tt, ct, callers
+ else:
+ timings[fn] = 0, 0, 0, 0, {}
+ return 1
+
+ def trace_dispatch_return(self, frame, t):
+ if frame is not self.cur[-2]:
+ assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
+ self.trace_dispatch_return(self.cur[-2], 0)
+
+ # Prefix "r" means part of the Returning or exiting frame.
+ # Prefix "p" means part of the Previous or Parent or older frame.
+
+ rpt, rit, ret, rfn, frame, rcur = self.cur
+ rit = rit + t
+ frame_total = rit + ret
+
+ ppt, pit, pet, pfn, pframe, pcur = rcur
+ self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
+
+ timings = self.timings
+ cc, ns, tt, ct, callers = timings[rfn]
+ if not ns:
+ # This is the only occurrence of the function on the stack.
+ # Else this is a (directly or indirectly) recursive call, and
+ # its cumulative time will get updated when the topmost call to
+ # it returns.
+ ct = ct + frame_total
+ cc = cc + 1
+
+ if pfn in callers:
+ callers[pfn] = callers[pfn] + 1 # hack: gather more
+ # stats such as the amount of time added to ct courtesy
+ # of this specific call, and the contribution to cc
+ # courtesy of this call.
+ else:
+ callers[pfn] = 1
+
+ timings[rfn] = cc, ns - 1, tt + rit, ct, callers
+
+ return 1
+
+
+ dispatch = {
+ "call": trace_dispatch_call,
+ "exception": trace_dispatch_exception,
+ "return": trace_dispatch_return,
+ "c_call": trace_dispatch_c_call,
+ "c_exception": trace_dispatch_return, # the C function returned
+ "c_return": trace_dispatch_return,
+ }
+
+
+ # The next few functions play with self.cmd. By carefully preloading
+ # our parallel stack, we can force the profiled result to include
+ # an arbitrary string as the name of the calling function.
+ # We use self.cmd as that string, and the resulting stats look
+ # very nice :-).
+
+ def set_cmd(self, cmd):
+ if self.cur[-1]: return # already set
+ self.cmd = cmd
+ self.simulate_call(cmd)
+
+ class fake_code:
+ def __init__(self, filename, line, name):
+ self.co_filename = filename
+ self.co_line = line
+ self.co_name = name
+ self.co_firstlineno = 0
+
+ def __repr__(self):
+ return repr((self.co_filename, self.co_line, self.co_name))
+
+ class fake_frame:
+ def __init__(self, code, prior):
+ self.f_code = code
+ self.f_back = prior
+
+ def simulate_call(self, name):
+ code = self.fake_code('profile', 0, name)
+ if self.cur:
+ pframe = self.cur[-2]
+ else:
+ pframe = None
+ frame = self.fake_frame(code, pframe)
+ self.dispatch['call'](self, frame, 0)
+
+ # collect stats from pending stack, including getting final
+ # timings for self.cmd frame.
+
+ def simulate_cmd_complete(self):
+ get_time = self.get_time
+ t = get_time() - self.t
+ while self.cur[-1]:
+ # We *can* cause assertion errors here if
+ # dispatch_trace_return checks for a frame match!
+ self.dispatch['return'](self, self.cur[-2], t)
+ t = 0
+ self.t = get_time() - t
+
+
+ def print_stats(self, sort=-1):
+ import pstats
+ pstats.Stats(self).strip_dirs().sort_stats(sort). \
+ print_stats()
+
+ def dump_stats(self, file):
+ f = open(file, 'wb')
+ self.create_stats()
+ marshal.dump(self.stats, f)
+ f.close()
+
+ def create_stats(self):
+ self.simulate_cmd_complete()
+ self.snapshot_stats()
+
+ def snapshot_stats(self):
+ self.stats = {}
+ for func, (cc, ns, tt, ct, callers) in self.timings.items():
+ callers = callers.copy()
+ nc = 0
+ for callcnt in callers.values():
+ nc += callcnt
+ self.stats[func] = cc, nc, tt, ct, callers
+
+
+ # The following two methods can be called by clients to use
+ # a profiler to profile a statement, given as a string.
+
+ def run(self, cmd):
+ import __main__
+ dict = __main__.__dict__
+ return self.runctx(cmd, dict, dict)
+
+ def runctx(self, cmd, globals, locals):
+ self.set_cmd(cmd)
+ sys.setprofile(self.dispatcher)
+ try:
+ exec(cmd, globals, locals)
+ finally:
+ sys.setprofile(None)
+ return self
+
+ # This method is more useful to profile a single function call.
+ def runcall(self, func, *args, **kw):
+ self.set_cmd(repr(func))
+ sys.setprofile(self.dispatcher)
+ try:
+ return func(*args, **kw)
+ finally:
+ sys.setprofile(None)
+
+
+ #******************************************************************
+ # The following calculates the overhead for using a profiler. The
+ # problem is that it takes a fair amount of time for the profiler
+ # to stop the stopwatch (from the time it receives an event).
+ # Similarly, there is a delay from the time that the profiler
+ # re-starts the stopwatch before the user's code really gets to
+ # continue. The following code tries to measure the difference on
+ # a per-event basis.
+ #
+ # Note that this difference is only significant if there are a lot of
+ # events, and relatively little user code per event. For example,
+ # code with small functions will typically benefit from having the
+ # profiler calibrated for the current platform. This *could* be
+ # done on the fly during init() time, but it is not worth the
+ # effort. Also note that if too large a value specified, then
+ # execution time on some functions will actually appear as a
+ # negative number. It is *normal* for some functions (with very
+ # low call counts) to have such negative stats, even if the
+ # calibration figure is "correct."
+ #
+ # One alternative to profile-time calibration adjustments (i.e.,
+ # adding in the magic little delta during each event) is to track
+ # more carefully the number of events (and cumulatively, the number
+ # of events during sub functions) that are seen. If this were
+ # done, then the arithmetic could be done after the fact (i.e., at
+ # display time). Currently, we track only call/return events.
+ # These values can be deduced by examining the callees and callers
+ # vectors for each functions. Hence we *can* almost correct the
+ # internal time figure at print time (note that we currently don't
+ # track exception event processing counts). Unfortunately, there
+ # is currently no similar information for cumulative sub-function
+ # time. It would not be hard to "get all this info" at profiler
+ # time. Specifically, we would have to extend the tuples to keep
+ # counts of this in each frame, and then extend the defs of timing
+ # tuples to include the significant two figures. I'm a bit fearful
+ # that this additional feature will slow the heavily optimized
+ # event/time ratio (i.e., the profiler would run slower, fur a very
+ # low "value added" feature.)
+ #**************************************************************
+
+ def calibrate(self, m, verbose=0):
+ if self.__class__ is not Profile:
+ raise TypeError("Subclasses must override .calibrate().")
+
+ saved_bias = self.bias
+ self.bias = 0
+ try:
+ return self._calibrate_inner(m, verbose)
+ finally:
+ self.bias = saved_bias
+
+ def _calibrate_inner(self, m, verbose):
+ get_time = self.get_time
+
+ # Set up a test case to be run with and without profiling. Include
+ # lots of calls, because we're trying to quantify stopwatch overhead.
+ # Do not raise any exceptions, though, because we want to know
+ # exactly how many profile events are generated (one call event, +
+ # one return event, per Python-level call).
+
+ def f1(n):
+ for i in range(n):
+ x = 1
+
+ def f(m, f1=f1):
+ for i in range(m):
+ f1(100)
+
+ f(m) # warm up the cache
+
+ # elapsed_noprofile <- time f(m) takes without profiling.
+ t0 = get_time()
+ f(m)
+ t1 = get_time()
+ elapsed_noprofile = t1 - t0
+ if verbose:
+ print("elapsed time without profiling =", elapsed_noprofile)
+
+ # elapsed_profile <- time f(m) takes with profiling. The difference
+ # is profiling overhead, only some of which the profiler subtracts
+ # out on its own.
+ p = Profile()
+ t0 = get_time()
+ p.runctx('f(m)', globals(), locals())
+ t1 = get_time()
+ elapsed_profile = t1 - t0
+ if verbose:
+ print("elapsed time with profiling =", elapsed_profile)
+
+ # reported_time <- "CPU seconds" the profiler charged to f and f1.
+ total_calls = 0.0
+ reported_time = 0.0
+ for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
+ p.timings.items():
+ if funcname in ("f", "f1"):
+ total_calls += cc
+ reported_time += tt
+
+ if verbose:
+ print("'CPU seconds' profiler reported =", reported_time)
+ print("total # calls =", total_calls)
+ if total_calls != m + 1:
+ raise ValueError("internal error: total calls = %d" % total_calls)
+
+ # reported_time - elapsed_noprofile = overhead the profiler wasn't
+ # able to measure. Divide by twice the number of calls (since there
+ # are two profiler events per call in this test) to get the hidden
+ # overhead per event.
+ mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
+ if verbose:
+ print("mean stopwatch overhead per profile event =", mean)
+ return mean
+
+#****************************************************************************
+def Stats(*args):
+ print('Report generating functions are in the "pstats" module\a')
+
+def main():
+ usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
+ parser = OptionParser(usage=usage)
+ parser.allow_interspersed_args = False
+ parser.add_option('-o', '--outfile', dest="outfile",
+ help="Save stats to <outfile>", default=None)
+ parser.add_option('-s', '--sort', dest="sort",
+ help="Sort order when printing to stdout, based on pstats.Stats class", default=-1)
+
+ if not sys.argv[1:]:
+ parser.print_usage()
+ sys.exit(2)
+
+ (options, args) = parser.parse_args()
+ sys.argv[:] = args
+
+ if (len(sys.argv) > 0):
+ sys.path.insert(0, os.path.dirname(sys.argv[0]))
+ run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort)
+ else:
+ parser.print_usage()
+ return parser
+
+# When invoked as main program, invoke the profiler on a script
+if __name__ == '__main__':
+ main()
Added: sandbox/trunk/cpy_merge/Lib/test/output/test_cProfile
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/output/test_cProfile Wed May 23 03:45:28 2007
@@ -0,0 +1,78 @@
+test_cProfile
+ 119 function calls (99 primitive calls) in 1.000 CPU seconds
+
+ Ordered by: standard name
+
+ ncalls tottime percall cumtime percall filename:lineno(function)
+ 1 0.000 0.000 1.000 1.000 <string>:1(<module>)
+ 8 0.064 0.008 0.080 0.010 test_cProfile.py:103(subhelper)
+ 28 0.028 0.001 0.028 0.001 test_cProfile.py:115(__getattr__)
+ 1 0.270 0.270 1.000 1.000 test_cProfile.py:30(testfunc)
+ 23/3 0.150 0.007 0.170 0.057 test_cProfile.py:40(factorial)
+ 20 0.020 0.001 0.020 0.001 test_cProfile.py:53(mul)
+ 2 0.040 0.020 0.600 0.300 test_cProfile.py:60(helper)
+ 4 0.116 0.029 0.120 0.030 test_cProfile.py:78(helper1)
+ 2 0.000 0.000 0.140 0.070 test_cProfile.py:89(helper2_indirect)
+ 8 0.312 0.039 0.400 0.050 test_cProfile.py:93(helper2)
+ 1 0.000 0.000 1.000 1.000 {exec}
+ 12 0.000 0.000 0.012 0.001 {hasattr}
+ 4 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}
+ 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
+ 4 0.000 0.000 0.000 0.000 {sys.exc_info}
+
+
+ Ordered by: standard name
+
+Function called...
+ ncalls tottime cumtime
+<string>:1(<module>) -> 1 0.270 1.000 test_cProfile.py:30(testfunc)
+test_cProfile.py:103(subhelper) -> 16 0.016 0.016 test_cProfile.py:115(__getattr__)
+test_cProfile.py:115(__getattr__) ->
+test_cProfile.py:30(testfunc) -> 1 0.014 0.130 test_cProfile.py:40(factorial)
+ 2 0.040 0.600 test_cProfile.py:60(helper)
+test_cProfile.py:40(factorial) -> 20/3 0.130 0.147 test_cProfile.py:40(factorial)
+ 20 0.020 0.020 test_cProfile.py:53(mul)
+test_cProfile.py:53(mul) ->
+test_cProfile.py:60(helper) -> 4 0.116 0.120 test_cProfile.py:78(helper1)
+ 2 0.000 0.140 test_cProfile.py:89(helper2_indirect)
+ 6 0.234 0.300 test_cProfile.py:93(helper2)
+test_cProfile.py:78(helper1) -> 4 0.000 0.004 {hasattr}
+ 4 0.000 0.000 {method 'append' of 'list' objects}
+ 4 0.000 0.000 {sys.exc_info}
+test_cProfile.py:89(helper2_indirect) -> 2 0.006 0.040 test_cProfile.py:40(factorial)
+ 2 0.078 0.100 test_cProfile.py:93(helper2)
+test_cProfile.py:93(helper2) -> 8 0.064 0.080 test_cProfile.py:103(subhelper)
+ 8 0.000 0.008 {hasattr}
+{exec} -> 1 0.000 1.000 <string>:1(<module>)
+{hasattr} -> 12 0.012 0.012 test_cProfile.py:115(__getattr__)
+{method 'append' of 'list' objects} ->
+{method 'disable' of '_lsprof.Profiler' objects} ->
+{sys.exc_info} ->
+
+
+ Ordered by: standard name
+
+Function was called by...
+ ncalls tottime cumtime
+<string>:1(<module>) <- 1 0.000 1.000 {exec}
+test_cProfile.py:103(subhelper) <- 8 0.064 0.080 test_cProfile.py:93(helper2)
+test_cProfile.py:115(__getattr__) <- 16 0.016 0.016 test_cProfile.py:103(subhelper)
+ 12 0.012 0.012 {hasattr}
+test_cProfile.py:30(testfunc) <- 1 0.270 1.000 <string>:1(<module>)
+test_cProfile.py:40(factorial) <- 1 0.014 0.130 test_cProfile.py:30(testfunc)
+ 20/3 0.130 0.147 test_cProfile.py:40(factorial)
+ 2 0.006 0.040 test_cProfile.py:89(helper2_indirect)
+test_cProfile.py:53(mul) <- 20 0.020 0.020 test_cProfile.py:40(factorial)
+test_cProfile.py:60(helper) <- 2 0.040 0.600 test_cProfile.py:30(testfunc)
+test_cProfile.py:78(helper1) <- 4 0.116 0.120 test_cProfile.py:60(helper)
+test_cProfile.py:89(helper2_indirect) <- 2 0.000 0.140 test_cProfile.py:60(helper)
+test_cProfile.py:93(helper2) <- 6 0.234 0.300 test_cProfile.py:60(helper)
+ 2 0.078 0.100 test_cProfile.py:89(helper2_indirect)
+{exec} <-
+{hasattr} <- 4 0.000 0.004 test_cProfile.py:78(helper1)
+ 8 0.000 0.008 test_cProfile.py:93(helper2)
+{method 'append' of 'list' objects} <- 4 0.000 0.000 test_cProfile.py:78(helper1)
+{method 'disable' of '_lsprof.Profiler' objects} <-
+{sys.exc_info} <- 4 0.000 0.000 test_cProfile.py:78(helper1)
+
+
Added: sandbox/trunk/cpy_merge/Lib/test/pickletester.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/pickletester.py Wed May 23 03:45:28 2007
@@ -0,0 +1,1006 @@
+import unittest
+import pickle
+try:
+ import cPickle
+except ImportError:
+ cPickle = None
+import pickletools
+import copy_reg
+
+from test.test_support import TestFailed, have_unicode, TESTFN, \
+ run_with_locale
+
+# Tests that try a number of pickle protocols should have a
+# for proto in protocols:
+# kind of outer loop.
+if cPickle is not None:
+ assert pickle.HIGHEST_PROTOCOL == cPickle.HIGHEST_PROTOCOL == 2
+protocols = range(pickle.HIGHEST_PROTOCOL + 1)
+
+
+# Return True if opcode code appears in the pickle, else False.
+def opcode_in_pickle(code, pickle):
+ for op, dummy, dummy in pickletools.genops(pickle):
+ if op.code == code.decode("latin-1"):
+ return True
+ return False
+
+# Return the number of times opcode code appears in pickle.
+def count_opcode(code, pickle):
+ n = 0
+ for op, dummy, dummy in pickletools.genops(pickle):
+ if op.code == code.decode("latin-1"):
+ n += 1
+ return n
+
+# We can't very well test the extension registry without putting known stuff
+# in it, but we have to be careful to restore its original state. Code
+# should do this:
+#
+# e = ExtensionSaver(extension_code)
+# try:
+# fiddle w/ the extension registry's stuff for extension_code
+# finally:
+# e.restore()
+
+class ExtensionSaver:
+ # Remember current registration for code (if any), and remove it (if
+ # there is one).
+ def __init__(self, code):
+ self.code = code
+ if code in copy_reg._inverted_registry:
+ self.pair = copy_reg._inverted_registry[code]
+ copy_reg.remove_extension(self.pair[0], self.pair[1], code)
+ else:
+ self.pair = None
+
+ # Restore previous registration for code.
+ def restore(self):
+ code = self.code
+ curpair = copy_reg._inverted_registry.get(code)
+ if curpair is not None:
+ copy_reg.remove_extension(curpair[0], curpair[1], code)
+ pair = self.pair
+ if pair is not None:
+ copy_reg.add_extension(pair[0], pair[1], code)
+
+class C:
+ def __eq__(self, other):
+ return self.__dict__ == other.__dict__
+
+import __main__
+__main__.C = C
+C.__module__ = "__main__"
+
+class myint(int):
+ def __init__(self, x):
+ self.str = str(x)
+
+class initarg(C):
+
+ def __init__(self, a, b):
+ self.a = a
+ self.b = b
+
+ def __getinitargs__(self):
+ return self.a, self.b
+
+class metaclass(type):
+ pass
+
+class use_metaclass(object, metaclass=metaclass):
+ pass
+
+# DATA0 .. DATA2 are the pickles we expect under the various protocols, for
+# the object returned by create_data().
+
+# break into multiple strings to avoid confusing font-lock-mode
+DATA0 = b"""(lp1
+I0
+aL1L
+aF2
+ac__builtin__
+complex
+p2
+""" + \
+b"""(F3
+F0
+tRp3
+aI1
+aI-1
+aI255
+aI-255
+aI-256
+aI65535
+aI-65535
+aI-65536
+aI2147483647
+aI-2147483647
+aI-2147483648
+a""" + \
+b"""(S'abc'
+p4
+g4
+""" + \
+b"""(i__main__
+C
+p5
+""" + \
+b"""(dp6
+S'foo'
+p7
+I1
+sS'bar'
+p8
+I2
+sbg5
+tp9
+ag9
+aI5
+a.
+"""
+
+# Disassembly of DATA0.
+DATA0_DIS = """\
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 1
+ 5: I INT 0
+ 8: a APPEND
+ 9: L LONG 1L
+ 13: a APPEND
+ 14: F FLOAT 2.0
+ 17: a APPEND
+ 18: c GLOBAL '__builtin__ complex'
+ 39: p PUT 2
+ 42: ( MARK
+ 43: F FLOAT 3.0
+ 46: F FLOAT 0.0
+ 49: t TUPLE (MARK at 42)
+ 50: R REDUCE
+ 51: p PUT 3
+ 54: a APPEND
+ 55: I INT 1
+ 58: a APPEND
+ 59: I INT -1
+ 63: a APPEND
+ 64: I INT 255
+ 69: a APPEND
+ 70: I INT -255
+ 76: a APPEND
+ 77: I INT -256
+ 83: a APPEND
+ 84: I INT 65535
+ 91: a APPEND
+ 92: I INT -65535
+ 100: a APPEND
+ 101: I INT -65536
+ 109: a APPEND
+ 110: I INT 2147483647
+ 122: a APPEND
+ 123: I INT -2147483647
+ 136: a APPEND
+ 137: I INT -2147483648
+ 150: a APPEND
+ 151: ( MARK
+ 152: S STRING 'abc'
+ 159: p PUT 4
+ 162: g GET 4
+ 165: ( MARK
+ 166: i INST '__main__ C' (MARK at 165)
+ 178: p PUT 5
+ 181: ( MARK
+ 182: d DICT (MARK at 181)
+ 183: p PUT 6
+ 186: S STRING 'foo'
+ 193: p PUT 7
+ 196: I INT 1
+ 199: s SETITEM
+ 200: S STRING 'bar'
+ 207: p PUT 8
+ 210: I INT 2
+ 213: s SETITEM
+ 214: b BUILD
+ 215: g GET 5
+ 218: t TUPLE (MARK at 151)
+ 219: p PUT 9
+ 222: a APPEND
+ 223: g GET 9
+ 226: a APPEND
+ 227: I INT 5
+ 230: a APPEND
+ 231: . STOP
+highest protocol among opcodes = 0
+"""
+
+DATA1 = (b']q\x01(K\x00L1L\nG@\x00\x00\x00\x00\x00\x00\x00'
+ b'c__builtin__\ncomplex\nq\x02(G@\x08\x00\x00\x00\x00\x00'
+ b'\x00G\x00\x00\x00\x00\x00\x00\x00\x00tRq\x03K\x01J\xff\xff'
+ b'\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xff'
+ b'J\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00'
+ b'\x00\x80J\x00\x00\x00\x80(U\x03abcq\x04h\x04(c__main__\n'
+ b'C\nq\x05oq\x06}q\x07(U\x03fooq\x08K\x01U\x03barq\tK\x02ubh'
+ b'\x06tq\nh\nK\x05e.'
+ )
+
+# Disassembly of DATA1.
+DATA1_DIS = """\
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 1
+ 3: ( MARK
+ 4: K BININT1 0
+ 6: L LONG 1L
+ 10: G BINFLOAT 2.0
+ 19: c GLOBAL '__builtin__ complex'
+ 40: q BINPUT 2
+ 42: ( MARK
+ 43: G BINFLOAT 3.0
+ 52: G BINFLOAT 0.0
+ 61: t TUPLE (MARK at 42)
+ 62: R REDUCE
+ 63: q BINPUT 3
+ 65: K BININT1 1
+ 67: J BININT -1
+ 72: K BININT1 255
+ 74: J BININT -255
+ 79: J BININT -256
+ 84: M BININT2 65535
+ 87: J BININT -65535
+ 92: J BININT -65536
+ 97: J BININT 2147483647
+ 102: J BININT -2147483647
+ 107: J BININT -2147483648
+ 112: ( MARK
+ 113: U SHORT_BINSTRING 'abc'
+ 118: q BINPUT 4
+ 120: h BINGET 4
+ 122: ( MARK
+ 123: c GLOBAL '__main__ C'
+ 135: q BINPUT 5
+ 137: o OBJ (MARK at 122)
+ 138: q BINPUT 6
+ 140: } EMPTY_DICT
+ 141: q BINPUT 7
+ 143: ( MARK
+ 144: U SHORT_BINSTRING 'foo'
+ 149: q BINPUT 8
+ 151: K BININT1 1
+ 153: U SHORT_BINSTRING 'bar'
+ 158: q BINPUT 9
+ 160: K BININT1 2
+ 162: u SETITEMS (MARK at 143)
+ 163: b BUILD
+ 164: h BINGET 6
+ 166: t TUPLE (MARK at 112)
+ 167: q BINPUT 10
+ 169: h BINGET 10
+ 171: K BININT1 5
+ 173: e APPENDS (MARK at 3)
+ 174: . STOP
+highest protocol among opcodes = 1
+"""
+
+DATA2 = (b'\x80\x02]q\x01(K\x00\x8a\x01\x01G@\x00\x00\x00\x00\x00\x00\x00'
+ b'c__builtin__\ncomplex\nq\x02G@\x08\x00\x00\x00\x00\x00\x00G\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x86Rq\x03K\x01J\xff\xff\xff\xffK'
+ b'\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xff'
+ b'J\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00'
+ b'\x80(U\x03abcq\x04h\x04(c__main__\nC\nq\x05oq\x06}q\x07(U\x03foo'
+ b'q\x08K\x01U\x03barq\tK\x02ubh\x06tq\nh\nK\x05e.')
+
+# Disassembly of DATA2.
+DATA2_DIS = """\
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 1
+ 5: ( MARK
+ 6: K BININT1 0
+ 8: \x8a LONG1 1L
+ 11: G BINFLOAT 2.0
+ 20: c GLOBAL '__builtin__ complex'
+ 41: q BINPUT 2
+ 43: G BINFLOAT 3.0
+ 52: G BINFLOAT 0.0
+ 61: \x86 TUPLE2
+ 62: R REDUCE
+ 63: q BINPUT 3
+ 65: K BININT1 1
+ 67: J BININT -1
+ 72: K BININT1 255
+ 74: J BININT -255
+ 79: J BININT -256
+ 84: M BININT2 65535
+ 87: J BININT -65535
+ 92: J BININT -65536
+ 97: J BININT 2147483647
+ 102: J BININT -2147483647
+ 107: J BININT -2147483648
+ 112: ( MARK
+ 113: U SHORT_BINSTRING 'abc'
+ 118: q BINPUT 4
+ 120: h BINGET 4
+ 122: ( MARK
+ 123: c GLOBAL '__main__ C'
+ 135: q BINPUT 5
+ 137: o OBJ (MARK at 122)
+ 138: q BINPUT 6
+ 140: } EMPTY_DICT
+ 141: q BINPUT 7
+ 143: ( MARK
+ 144: U SHORT_BINSTRING 'foo'
+ 149: q BINPUT 8
+ 151: K BININT1 1
+ 153: U SHORT_BINSTRING 'bar'
+ 158: q BINPUT 9
+ 160: K BININT1 2
+ 162: u SETITEMS (MARK at 143)
+ 163: b BUILD
+ 164: h BINGET 6
+ 166: t TUPLE (MARK at 112)
+ 167: q BINPUT 10
+ 169: h BINGET 10
+ 171: K BININT1 5
+ 173: e APPENDS (MARK at 5)
+ 174: . STOP
+highest protocol among opcodes = 2
+"""
+
+def create_data():
+ c = C()
+ c.foo = 1
+ c.bar = 2
+ x = [0, 1, 2.0, 3.0+0j]
+ # Append some integer test cases at cPickle.c's internal size
+ # cutoffs.
+ uint1max = 0xff
+ uint2max = 0xffff
+ int4max = 0x7fffffff
+ x.extend([1, -1,
+ uint1max, -uint1max, -uint1max-1,
+ uint2max, -uint2max, -uint2max-1,
+ int4max, -int4max, -int4max-1])
+ y = ('abc', 'abc', c, c)
+ x.append(y)
+ x.append(y)
+ x.append(5)
+ return x
+
+class AbstractPickleTests(unittest.TestCase):
+ # Subclass must define self.dumps, self.loads, self.error.
+
+ _testdata = create_data()
+
+ def setUp(self):
+ pass
+
+ def test_misc(self):
+ # test various datatypes not tested by testdata
+ for proto in protocols:
+ x = myint(4)
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+
+ x = (1, ())
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+
+ x = initarg(1, x)
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+
+ # XXX test __reduce__ protocol?
+
+ def test_roundtrip_equality(self):
+ expected = self._testdata
+ for proto in protocols:
+ s = self.dumps(expected, proto)
+ got = self.loads(s)
+ self.assertEqual(expected, got)
+
+ def test_load_from_canned_string(self):
+ expected = self._testdata
+ for canned in DATA0, DATA1, DATA2:
+ got = self.loads(canned)
+ self.assertEqual(expected, got)
+
+ # There are gratuitous differences between pickles produced by
+ # pickle and cPickle, largely because cPickle starts PUT indices at
+ # 1 and pickle starts them at 0. See XXX comment in cPickle's put2() --
+ # there's a comment with an exclamation point there whose meaning
+ # is a mystery. cPickle also suppresses PUT for objects with a refcount
+ # of 1.
+ def dont_test_disassembly(self):
+ from cStringIO import StringIO
+ from pickletools import dis
+
+ for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS):
+ s = self.dumps(self._testdata, proto)
+ filelike = StringIO()
+ dis(s, out=filelike)
+ got = filelike.getvalue()
+ self.assertEqual(expected, got)
+
+ def test_recursive_list(self):
+ l = []
+ l.append(l)
+ for proto in protocols:
+ s = self.dumps(l, proto)
+ x = self.loads(s)
+ self.assertEqual(len(x), 1)
+ self.assert_(x is x[0])
+
+ def test_recursive_dict(self):
+ d = {}
+ d[1] = d
+ for proto in protocols:
+ s = self.dumps(d, proto)
+ x = self.loads(s)
+ self.assertEqual(list(x.keys()), [1])
+ self.assert_(x[1] is x)
+
+ def test_recursive_inst(self):
+ i = C()
+ i.attr = i
+ for proto in protocols:
+ s = self.dumps(i, 2)
+ x = self.loads(s)
+ self.assertEqual(dir(x), dir(i))
+ self.assert_(x.attr is x)
+
+ def test_recursive_multi(self):
+ l = []
+ d = {1:l}
+ i = C()
+ i.attr = d
+ l.append(i)
+ for proto in protocols:
+ s = self.dumps(l, proto)
+ x = self.loads(s)
+ self.assertEqual(len(x), 1)
+ self.assertEqual(dir(x[0]), dir(i))
+ self.assertEqual(list(x[0].attr.keys()), [1])
+ self.assert_(x[0].attr[1] is x)
+
+ def test_garyp(self):
+ self.assertRaises(self.error, self.loads, b'garyp')
+
+ def test_insecure_strings(self):
+ insecure = ["abc", "2 + 2", # not quoted
+ #"'abc' + 'def'", # not a single quoted string
+ "'abc", # quote is not closed
+ "'abc\"", # open quote and close quote don't match
+ "'abc' ?", # junk after close quote
+ "'\\'", # trailing backslash
+ # some tests of the quoting rules
+ #"'abc\"\''",
+ #"'\\\\a\'\'\'\\\'\\\\\''",
+ ]
+ for s in insecure:
+ buf = b"S" + bytes(s) + b"\012p0\012."
+ self.assertRaises(ValueError, self.loads, buf)
+
+ if have_unicode:
+ def test_unicode(self):
+ endcases = [str(''), str('<\\u>'), str('<\\\u1234>'),
+ str('<\n>'), str('<\\>')]
+ for proto in protocols:
+ for u in endcases:
+ p = self.dumps(u, proto)
+ u2 = self.loads(p)
+ self.assertEqual(u2, u)
+
+ def test_ints(self):
+ import sys
+ for proto in protocols:
+ n = sys.maxint
+ while n:
+ for expected in (-n, n):
+ s = self.dumps(expected, proto)
+ n2 = self.loads(s)
+ self.assertEqual(expected, n2)
+ n = n >> 1
+
+ def test_maxint64(self):
+ maxint64 = (1 << 63) - 1
+ data = b'I' + bytes(str(maxint64)) + b'\n.'
+ got = self.loads(data)
+ self.assertEqual(got, maxint64)
+
+ # Try too with a bogus literal.
+ data = b'I' + bytes(str(maxint64)) + b'JUNK\n.'
+ self.assertRaises(ValueError, self.loads, data)
+
+ def test_long(self):
+ for proto in protocols:
+ # 256 bytes is where LONG4 begins.
+ for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
+ nbase = 1 << nbits
+ for npos in nbase-1, nbase, nbase+1:
+ for n in npos, -npos:
+ pickle = self.dumps(n, proto)
+ got = self.loads(pickle)
+ self.assertEqual(n, got)
+ # Try a monster. This is quadratic-time in protos 0 & 1, so don't
+ # bother with those.
+ nbase = int("deadbeeffeedface", 16)
+ nbase += nbase << 1000000
+ for n in nbase, -nbase:
+ p = self.dumps(n, 2)
+ got = self.loads(p)
+ self.assertEqual(n, got)
+
+ @run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
+ def test_float_format(self):
+ # make sure that floats are formatted locale independent
+ self.assertEqual(self.dumps(1.2)[0:3], b'F1.')
+
+ def test_reduce(self):
+ pass
+
+ def test_getinitargs(self):
+ pass
+
+ def test_metaclass(self):
+ a = use_metaclass()
+ for proto in protocols:
+ s = self.dumps(a, proto)
+ b = self.loads(s)
+ self.assertEqual(a.__class__, b.__class__)
+
+ def test_structseq(self):
+ import time
+ import os
+
+ t = time.localtime()
+ for proto in protocols:
+ s = self.dumps(t, proto)
+ u = self.loads(s)
+ self.assertEqual(t, u)
+ if hasattr(os, "stat"):
+ t = os.stat(os.curdir)
+ s = self.dumps(t, proto)
+ u = self.loads(s)
+ self.assertEqual(t, u)
+ if hasattr(os, "statvfs"):
+ t = os.statvfs(os.curdir)
+ s = self.dumps(t, proto)
+ u = self.loads(s)
+ self.assertEqual(t, u)
+
+ # Tests for protocol 2
+
+ def test_proto(self):
+ build_none = pickle.NONE + pickle.STOP
+ for proto in protocols:
+ expected = build_none
+ if proto >= 2:
+ expected = pickle.PROTO + bytes([proto]) + expected
+ p = self.dumps(None, proto)
+ self.assertEqual(p, expected)
+
+ oob = protocols[-1] + 1 # a future protocol
+ badpickle = pickle.PROTO + bytes([oob]) + build_none
+ try:
+ self.loads(badpickle)
+ except ValueError as detail:
+ self.failUnless(str(detail).startswith(
+ "unsupported pickle protocol"))
+ else:
+ self.fail("expected bad protocol number to raise ValueError")
+
+ def test_long1(self):
+ x = 12345678910111213141516178920
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+ self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2)
+
+ def test_long4(self):
+ x = 12345678910111213141516178920 << (256*8)
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+ self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2)
+
+ def test_short_tuples(self):
+ # Map (proto, len(tuple)) to expected opcode.
+ expected_opcode = {(0, 0): pickle.TUPLE,
+ (0, 1): pickle.TUPLE,
+ (0, 2): pickle.TUPLE,
+ (0, 3): pickle.TUPLE,
+ (0, 4): pickle.TUPLE,
+
+ (1, 0): pickle.EMPTY_TUPLE,
+ (1, 1): pickle.TUPLE,
+ (1, 2): pickle.TUPLE,
+ (1, 3): pickle.TUPLE,
+ (1, 4): pickle.TUPLE,
+
+ (2, 0): pickle.EMPTY_TUPLE,
+ (2, 1): pickle.TUPLE1,
+ (2, 2): pickle.TUPLE2,
+ (2, 3): pickle.TUPLE3,
+ (2, 4): pickle.TUPLE,
+ }
+ a = ()
+ b = (1,)
+ c = (1, 2)
+ d = (1, 2, 3)
+ e = (1, 2, 3, 4)
+ for proto in protocols:
+ for x in a, b, c, d, e:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y, (proto, x, s, y))
+ expected = expected_opcode[proto, len(x)]
+ self.assertEqual(opcode_in_pickle(expected, s), True)
+
+ def test_singletons(self):
+ # Map (proto, singleton) to expected opcode.
+ expected_opcode = {(0, None): pickle.NONE,
+ (1, None): pickle.NONE,
+ (2, None): pickle.NONE,
+
+ (0, True): pickle.INT,
+ (1, True): pickle.INT,
+ (2, True): pickle.NEWTRUE,
+
+ (0, False): pickle.INT,
+ (1, False): pickle.INT,
+ (2, False): pickle.NEWFALSE,
+ }
+ for proto in protocols:
+ for x in None, False, True:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assert_(x is y, (proto, x, s, y))
+ expected = expected_opcode[proto, x]
+ self.assertEqual(opcode_in_pickle(expected, s), True)
+
+ def test_newobj_tuple(self):
+ x = MyTuple([1, 2, 3])
+ x.foo = 42
+ x.bar = "hello"
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(tuple(x), tuple(y))
+ self.assertEqual(x.__dict__, y.__dict__)
+
+ def test_newobj_list(self):
+ x = MyList([1, 2, 3])
+ x.foo = 42
+ x.bar = "hello"
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(list(x), list(y))
+ self.assertEqual(x.__dict__, y.__dict__)
+
+ def test_newobj_generic(self):
+ for proto in protocols:
+ for C in myclasses:
+ B = C.__base__
+ x = C(C.sample)
+ x.foo = 42
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ detail = (proto, C, B, x, y, type(y))
+ self.assertEqual(B(x), B(y), detail)
+ self.assertEqual(x.__dict__, y.__dict__, detail)
+
+ # Register a type with copy_reg, with extension code extcode. Pickle
+ # an object of that type. Check that the resulting pickle uses opcode
+ # (EXT[124]) under proto 2, and not in proto 1.
+
+ def produce_global_ext(self, extcode, opcode):
+ e = ExtensionSaver(extcode)
+ try:
+ copy_reg.add_extension(__name__, "MyList", extcode)
+ x = MyList([1, 2, 3])
+ x.foo = 42
+ x.bar = "hello"
+
+ # Dump using protocol 1 for comparison.
+ s1 = self.dumps(x, 1)
+ self.assert_(bytes(__name__) in s1)
+ self.assert_(b"MyList" in s1)
+ self.assertEqual(opcode_in_pickle(opcode, s1), False)
+
+ y = self.loads(s1)
+ self.assertEqual(list(x), list(y))
+ self.assertEqual(x.__dict__, y.__dict__)
+
+ # Dump using protocol 2 for test.
+ s2 = self.dumps(x, 2)
+ self.assert_(bytes(__name__) not in s2)
+ self.assert_(b"MyList" not in s2)
+ self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))
+
+ y = self.loads(s2)
+ self.assertEqual(list(x), list(y))
+ self.assertEqual(x.__dict__, y.__dict__)
+
+ finally:
+ e.restore()
+
+ def test_global_ext1(self):
+ self.produce_global_ext(0x00000001, pickle.EXT1) # smallest EXT1 code
+ self.produce_global_ext(0x000000ff, pickle.EXT1) # largest EXT1 code
+
+ def test_global_ext2(self):
+ self.produce_global_ext(0x00000100, pickle.EXT2) # smallest EXT2 code
+ self.produce_global_ext(0x0000ffff, pickle.EXT2) # largest EXT2 code
+ self.produce_global_ext(0x0000abcd, pickle.EXT2) # check endianness
+
+ def test_global_ext4(self):
+ self.produce_global_ext(0x00010000, pickle.EXT4) # smallest EXT4 code
+ self.produce_global_ext(0x7fffffff, pickle.EXT4) # largest EXT4 code
+ self.produce_global_ext(0x12abcdef, pickle.EXT4) # check endianness
+
+ def test_list_chunking(self):
+ n = 10 # too small to chunk
+ x = list(range(n))
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+ num_appends = count_opcode(pickle.APPENDS, s)
+ self.assertEqual(num_appends, proto > 0)
+
+ n = 2500 # expect at least two chunks when proto > 0
+ x = list(range(n))
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+ num_appends = count_opcode(pickle.APPENDS, s)
+ if proto == 0:
+ self.assertEqual(num_appends, 0)
+ else:
+ self.failUnless(num_appends >= 2)
+
+ def test_dict_chunking(self):
+ n = 10 # too small to chunk
+ x = dict.fromkeys(range(n))
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ assert isinstance(s, bytes)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+ num_setitems = count_opcode(pickle.SETITEMS, s)
+ self.assertEqual(num_setitems, proto > 0)
+
+ n = 2500 # expect at least two chunks when proto > 0
+ x = dict.fromkeys(range(n))
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ y = self.loads(s)
+ self.assertEqual(x, y)
+ num_setitems = count_opcode(pickle.SETITEMS, s)
+ if proto == 0:
+ self.assertEqual(num_setitems, 0)
+ else:
+ self.failUnless(num_setitems >= 2)
+
+ def test_simple_newobj(self):
+ x = object.__new__(SimpleNewObj) # avoid __init__
+ x.abc = 666
+ for proto in protocols:
+ s = self.dumps(x, proto)
+ self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), proto >= 2)
+ y = self.loads(s) # will raise TypeError if __init__ called
+ self.assertEqual(y.abc, 666)
+ self.assertEqual(x.__dict__, y.__dict__)
+
+ def test_newobj_list_slots(self):
+ x = SlotList([1, 2, 3])
+ x.foo = 42
+ x.bar = "hello"
+ s = self.dumps(x, 2)
+ y = self.loads(s)
+ self.assertEqual(list(x), list(y))
+ self.assertEqual(x.__dict__, y.__dict__)
+ self.assertEqual(x.foo, y.foo)
+ self.assertEqual(x.bar, y.bar)
+
+ def test_reduce_overrides_default_reduce_ex(self):
+ for proto in 0, 1, 2:
+ x = REX_one()
+ self.assertEqual(x._reduce_called, 0)
+ s = self.dumps(x, proto)
+ self.assertEqual(x._reduce_called, 1)
+ y = self.loads(s)
+ self.assertEqual(y._reduce_called, 0)
+
+ def test_reduce_ex_called(self):
+ for proto in 0, 1, 2:
+ x = REX_two()
+ self.assertEqual(x._proto, None)
+ s = self.dumps(x, proto)
+ self.assertEqual(x._proto, proto)
+ y = self.loads(s)
+ self.assertEqual(y._proto, None)
+
+ def test_reduce_ex_overrides_reduce(self):
+ for proto in 0, 1, 2:
+ x = REX_three()
+ self.assertEqual(x._proto, None)
+ s = self.dumps(x, proto)
+ self.assertEqual(x._proto, proto)
+ y = self.loads(s)
+ self.assertEqual(y._proto, None)
+
+ def test_reduce_ex_calls_base(self):
+ for proto in 0, 1, 2:
+ x = REX_four()
+ self.assertEqual(x._proto, None)
+ s = self.dumps(x, proto)
+ self.assertEqual(x._proto, proto)
+ y = self.loads(s)
+ self.assertEqual(y._proto, proto)
+
+ def test_reduce_calls_base(self):
+ for proto in 0, 1, 2:
+ x = REX_five()
+ self.assertEqual(x._reduce_called, 0)
+ s = self.dumps(x, proto)
+ self.assertEqual(x._reduce_called, 1)
+ y = self.loads(s)
+ self.assertEqual(y._reduce_called, 1)
+
+# Test classes for reduce_ex
+
+class REX_one(object):
+ _reduce_called = 0
+ def __reduce__(self):
+ self._reduce_called = 1
+ return REX_one, ()
+ # No __reduce_ex__ here, but inheriting it from object
+
+class REX_two(object):
+ _proto = None
+ def __reduce_ex__(self, proto):
+ self._proto = proto
+ return REX_two, ()
+ # No __reduce__ here, but inheriting it from object
+
+class REX_three(object):
+ _proto = None
+ def __reduce_ex__(self, proto):
+ self._proto = proto
+ return REX_two, ()
+ def __reduce__(self):
+ raise TestFailed, "This __reduce__ shouldn't be called"
+
+class REX_four(object):
+ _proto = None
+ def __reduce_ex__(self, proto):
+ self._proto = proto
+ return object.__reduce_ex__(self, proto)
+ # Calling base class method should succeed
+
+class REX_five(object):
+ _reduce_called = 0
+ def __reduce__(self):
+ self._reduce_called = 1
+ return object.__reduce__(self)
+ # This one used to fail with infinite recursion
+
+# Test classes for newobj
+
+class MyInt(int):
+ sample = 1
+
+class MyLong(int):
+ sample = 1
+
+class MyFloat(float):
+ sample = 1.0
+
+class MyComplex(complex):
+ sample = 1.0 + 0.0j
+
+class MyStr(str):
+ sample = "hello"
+
+class MyUnicode(str):
+ sample = "hello \u1234"
+
+class MyTuple(tuple):
+ sample = (1, 2, 3)
+
+class MyList(list):
+ sample = [1, 2, 3]
+
+class MyDict(dict):
+ sample = {"a": 1, "b": 2}
+
+myclasses = [MyInt, MyLong, MyFloat,
+ MyComplex,
+ MyStr, MyUnicode,
+ MyTuple, MyList, MyDict]
+
+
+class SlotList(MyList):
+ __slots__ = ["foo"]
+
+class SimpleNewObj(object):
+ def __init__(self, a, b, c):
+ # raise an error, to make sure this isn't called
+ raise TypeError("SimpleNewObj.__init__() didn't expect to get called")
+
+class AbstractPickleModuleTests(unittest.TestCase):
+
+ def test_dump_closed_file(self):
+ import os
+ f = open(TESTFN, "w")
+ try:
+ f.close()
+ self.assertRaises(ValueError, self.module.dump, 123, f)
+ finally:
+ os.remove(TESTFN)
+
+ def test_load_closed_file(self):
+ import os
+ f = open(TESTFN, "w")
+ try:
+ f.close()
+ self.assertRaises(ValueError, self.module.dump, 123, f)
+ finally:
+ os.remove(TESTFN)
+
+ def test_highest_protocol(self):
+ # Of course this needs to be changed when HIGHEST_PROTOCOL changes.
+ self.assertEqual(self.module.HIGHEST_PROTOCOL, 2)
+
+ def test_callapi(self):
+ from cStringIO import StringIO
+ f = StringIO()
+ # With and without keyword arguments
+ self.module.dump(123, f, -1)
+ self.module.dump(123, file=f, protocol=-1)
+ self.module.dumps(123, -1)
+ self.module.dumps(123, protocol=-1)
+ self.module.Pickler(f, -1)
+ self.module.Pickler(f, protocol=-1)
+
+class AbstractPersistentPicklerTests(unittest.TestCase):
+
+ # This class defines persistent_id() and persistent_load()
+ # functions that should be used by the pickler. All even integers
+ # are pickled using persistent ids.
+
+ def persistent_id(self, object):
+ if isinstance(object, int) and object % 2 == 0:
+ self.id_count += 1
+ return str(object)
+ else:
+ return None
+
+ def persistent_load(self, oid):
+ self.load_count += 1
+ object = int(oid)
+ assert object % 2 == 0
+ return object
+
+ def test_persistence(self):
+ self.id_count = 0
+ self.load_count = 0
+ L = list(range(10))
+ self.assertEqual(self.loads(self.dumps(L)), L)
+ self.assertEqual(self.id_count, 5)
+ self.assertEqual(self.load_count, 5)
+
+ def test_bin_persistence(self):
+ self.id_count = 0
+ self.load_count = 0
+ L = list(range(10))
+ self.assertEqual(self.loads(self.dumps(L, 1)), L)
+ self.assertEqual(self.id_count, 5)
+ self.assertEqual(self.load_count, 5)
Added: sandbox/trunk/cpy_merge/Lib/test/regrtest.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/regrtest.py Wed May 23 03:45:28 2007
@@ -0,0 +1,1275 @@
+#! /usr/bin/env python
+
+"""Regression test.
+
+This will find all modules whose name is "test_*" in the test
+directory, and run them. Various command line options provide
+additional facilities.
+
+Command line options:
+
+-v: verbose -- run tests in verbose mode with output to stdout
+-w: verbose2 -- re-run failed tests in verbose mode
+-d: debug -- print traceback for failed tests
+-q: quiet -- don't print anything except if a test fails
+-g: generate -- write the output file for a test instead of comparing it
+-x: exclude -- arguments are tests to *exclude*
+-s: single -- run only a single test (see below)
+-r: random -- randomize test execution order
+-f: fromfile -- read names of tests to run from a file (see below)
+-l: findleaks -- if GC is available detect tests that leak memory
+-u: use -- specify which special resource intensive tests to run
+-h: help -- print this text and exit
+-t: threshold -- call gc.set_threshold(N)
+-T: coverage -- turn on code coverage using the trace module
+-D: coverdir -- Directory where coverage files are put
+-N: nocoverdir -- Put coverage files alongside modules
+-L: runleaks -- run the leaks(1) command just before exit
+-R: huntrleaks -- search for reference leaks (needs debug build, v. slow)
+-M: memlimit -- run very large memory-consuming tests
+
+If non-option arguments are present, they are names for tests to run,
+unless -x is given, in which case they are names for tests not to run.
+If no test names are given, all tests are run.
+
+-v is incompatible with -g and does not compare test output files.
+
+-T turns on code coverage tracing with the trace module.
+
+-D specifies the directory where coverage files are put.
+
+-N Put coverage files alongside modules.
+
+-s means to run only a single test and exit. This is useful when
+doing memory analysis on the Python interpreter (which tend to consume
+too many resources to run the full regression test non-stop). The
+file /tmp/pynexttest is read to find the next test to run. If this
+file is missing, the first test_*.py file in testdir or on the command
+line is used. (actually tempfile.gettempdir() is used instead of
+/tmp).
+
+-f reads the names of tests from the file given as f's argument, one
+or more test names per line. Whitespace is ignored. Blank lines and
+lines beginning with '#' are ignored. This is especially useful for
+whittling down failures involving interactions among tests.
+
+-L causes the leaks(1) command to be run just before exit if it exists.
+leaks(1) is available on Mac OS X and presumably on some other
+FreeBSD-derived systems.
+
+-R runs each test several times and examines sys.gettotalrefcount() to
+see if the test appears to be leaking references. The argument should
+be of the form stab:run:fname where 'stab' is the number of times the
+test is run to let gettotalrefcount settle down, 'run' is the number
+of times further it is run and 'fname' is the name of the file the
+reports are written to. These parameters all have defaults (5, 4 and
+"reflog.txt" respectively), so the minimal invocation is '-R ::'.
+
+-M runs tests that require an exorbitant amount of memory. These tests
+typically try to ascertain containers keep working when containing more than
+2 billion objects, which only works on 64-bit systems. There are also some
+tests that try to exhaust the address space of the process, which only makes
+sense on 32-bit systems with at least 2Gb of memory. The passed-in memlimit,
+which is a string in the form of '2.5Gb', determines howmuch memory the
+tests will limit themselves to (but they may go slightly over.) The number
+shouldn't be more memory than the machine has (including swap memory). You
+should also keep in mind that swap memory is generally much, much slower
+than RAM, and setting memlimit to all available RAM or higher will heavily
+tax the machine. On the other hand, it is no use running these tests with a
+limit of less than 2.5Gb, and many require more than 20Gb. Tests that expect
+to use more than memlimit memory will be skipped. The big-memory tests
+generally run very, very long.
+
+-u is used to specify which special resource intensive tests to run,
+such as those requiring large file support or network connectivity.
+The argument is a comma-separated list of words indicating the
+resources to test. Currently only the following are defined:
+
+ all - Enable all special resources.
+
+ audio - Tests that use the audio device. (There are known
+ cases of broken audio drivers that can crash Python or
+ even the Linux kernel.)
+
+ curses - Tests that use curses and will modify the terminal's
+ state and output modes.
+
+ largefile - It is okay to run some test that may create huge
+ files. These tests can take a long time and may
+ consume >2GB of disk space temporarily.
+
+ network - It is okay to run tests that use external network
+ resource, e.g. testing SSL support for sockets.
+
+ bsddb - It is okay to run the bsddb testsuite, which takes
+ a long time to complete.
+
+ decimal - Test the decimal module against a large suite that
+ verifies compliance with standards.
+
+ compiler - Allow test_tokenize to verify round-trip lexing on
+ every file in the test library.
+
+ subprocess Run all tests for the subprocess module.
+
+ urlfetch - It is okay to download files required on testing.
+
+To enable all resources except one, use '-uall,-<resource>'. For
+example, to run all the tests except for the bsddb tests, give the
+option '-uall,-bsddb'.
+"""
+
+import os
+import sys
+import getopt
+import random
+import warnings
+import re
+import StringIO
+import traceback
+
+# I see no other way to suppress these warnings;
+# putting them in test_grammar.py has no effect:
+warnings.filterwarnings("ignore", "hex/oct constants", FutureWarning,
+ ".*test.test_grammar$")
+if sys.maxint > 0x7fffffff:
+ # Also suppress them in <string>, because for 64-bit platforms,
+ # that's where test_grammar.py hides them.
+ warnings.filterwarnings("ignore", "hex/oct constants", FutureWarning,
+ "<string>")
+
+# Ignore ImportWarnings that only occur in the source tree,
+# (because of modules with the same name as source-directories in Modules/)
+for mod in ("ctypes", "gzip", "zipfile", "tarfile", "encodings.zlib_codec",
+ "test.test_zipimport", "test.test_zlib", "test.test_zipfile",
+ "test.test_codecs", "test.string_tests"):
+ warnings.filterwarnings(module=".*%s$" % (mod,),
+ action="ignore", category=ImportWarning)
+
+# MacOSX (a.k.a. Darwin) has a default stack size that is too small
+# for deeply recursive regular expressions. We see this as crashes in
+# the Python test suite when running test_re.py and test_sre.py. The
+# fix is to set the stack limit to 2048.
+# This approach may also be useful for other Unixy platforms that
+# suffer from small default stack limits.
+if sys.platform == 'darwin':
+ try:
+ import resource
+ except ImportError:
+ pass
+ else:
+ soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
+ newsoft = min(hard, max(soft, 1024*2048))
+ resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard))
+
+from test import test_support
+
+RESOURCE_NAMES = ('audio', 'curses', 'largefile', 'network', 'bsddb',
+ 'decimal', 'compiler', 'subprocess', 'urlfetch')
+
+
+def usage(code, msg=''):
+ print(__doc__)
+ if msg: print(msg)
+ sys.exit(code)
+
+
+def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False,
+ exclude=False, single=False, randomize=False, fromfile=None,
+ findleaks=False, use_resources=None, trace=False, coverdir='coverage',
+ runleaks=False, huntrleaks=False, verbose2=False, debug=False):
+ """Execute a test suite.
+
+ This also parses command-line options and modifies its behavior
+ accordingly.
+
+ tests -- a list of strings containing test names (optional)
+ testdir -- the directory in which to look for tests (optional)
+
+ Users other than the Python test suite will certainly want to
+ specify testdir; if it's omitted, the directory containing the
+ Python test suite is searched for.
+
+ If the tests argument is omitted, the tests listed on the
+ command-line will be used. If that's empty, too, then all *.py
+ files beginning with test_ will be used.
+
+ The other default arguments (verbose, quiet, generate, exclude, single,
+ randomize, findleaks, use_resources, trace and coverdir) allow programmers
+ calling main() directly to set the values that would normally be set by
+ flags on the command line.
+ """
+
+ test_support.record_original_stdout(sys.stdout)
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'dhvgqxsrf:lu:t:TD:NLR:wM:',
+ ['help', 'verbose', 'quiet', 'generate',
+ 'exclude', 'single', 'random', 'fromfile',
+ 'findleaks', 'use=', 'threshold=', 'trace',
+ 'coverdir=', 'nocoverdir', 'runleaks',
+ 'huntrleaks=', 'verbose2', 'memlimit=',
+ 'debug',
+ ])
+ except getopt.error as msg:
+ usage(2, msg)
+
+ # Defaults
+ if use_resources is None:
+ use_resources = []
+ for o, a in opts:
+ if o in ('-h', '--help'):
+ usage(0)
+ elif o in ('-v', '--verbose'):
+ verbose += 1
+ elif o in ('-w', '--verbose2'):
+ verbose2 = True
+ elif o in ('-d', '--debug'):
+ debug = True
+ elif o in ('-q', '--quiet'):
+ quiet = True;
+ verbose = 0
+ elif o in ('-g', '--generate'):
+ generate = True
+ elif o in ('-x', '--exclude'):
+ exclude = True
+ elif o in ('-s', '--single'):
+ single = True
+ elif o in ('-r', '--randomize'):
+ randomize = True
+ elif o in ('-f', '--fromfile'):
+ fromfile = a
+ elif o in ('-l', '--findleaks'):
+ findleaks = True
+ elif o in ('-L', '--runleaks'):
+ runleaks = True
+ elif o in ('-t', '--threshold'):
+ import gc
+ gc.set_threshold(int(a))
+ elif o in ('-T', '--coverage'):
+ trace = True
+ elif o in ('-D', '--coverdir'):
+ coverdir = os.path.join(os.getcwd(), a)
+ elif o in ('-N', '--nocoverdir'):
+ coverdir = None
+ elif o in ('-R', '--huntrleaks'):
+ huntrleaks = a.split(':')
+ if len(huntrleaks) != 3:
+ print(a, huntrleaks)
+ usage(2, '-R takes three colon-separated arguments')
+ if len(huntrleaks[0]) == 0:
+ huntrleaks[0] = 5
+ else:
+ huntrleaks[0] = int(huntrleaks[0])
+ if len(huntrleaks[1]) == 0:
+ huntrleaks[1] = 4
+ else:
+ huntrleaks[1] = int(huntrleaks[1])
+ if len(huntrleaks[2]) == 0:
+ huntrleaks[2] = "reflog.txt"
+ elif o in ('-M', '--memlimit'):
+ test_support.set_memlimit(a)
+ elif o in ('-u', '--use'):
+ u = [x.lower() for x in a.split(',')]
+ for r in u:
+ if r == 'all':
+ use_resources[:] = RESOURCE_NAMES
+ continue
+ remove = False
+ if r[0] == '-':
+ remove = True
+ r = r[1:]
+ if r not in RESOURCE_NAMES:
+ usage(1, 'Invalid -u/--use option: ' + a)
+ if remove:
+ if r in use_resources:
+ use_resources.remove(r)
+ elif r not in use_resources:
+ use_resources.append(r)
+ if generate and verbose:
+ usage(2, "-g and -v don't go together!")
+ if single and fromfile:
+ usage(2, "-s and -f don't go together!")
+
+ good = []
+ bad = []
+ skipped = []
+ resource_denieds = []
+
+ if findleaks:
+ try:
+ import gc
+ except ImportError:
+ print('No GC available, disabling findleaks.')
+ findleaks = False
+ else:
+ # Uncomment the line below to report garbage that is not
+ # freeable by reference counting alone. By default only
+ # garbage that is not collectable by the GC is reported.
+ #gc.set_debug(gc.DEBUG_SAVEALL)
+ found_garbage = []
+
+ if single:
+ from tempfile import gettempdir
+ filename = os.path.join(gettempdir(), 'pynexttest')
+ try:
+ fp = open(filename, 'r')
+ next = fp.read().strip()
+ tests = [next]
+ fp.close()
+ except IOError:
+ pass
+
+ if fromfile:
+ tests = []
+ fp = open(fromfile)
+ for line in fp:
+ guts = line.split() # assuming no test has whitespace in its name
+ if guts and not guts[0].startswith('#'):
+ tests.extend(guts)
+ fp.close()
+
+ # Strip .py extensions.
+ if args:
+ args = map(removepy, args)
+ if tests:
+ tests = map(removepy, tests)
+
+ stdtests = STDTESTS[:]
+ nottests = NOTTESTS[:]
+ if exclude:
+ for arg in args:
+ if arg in stdtests:
+ stdtests.remove(arg)
+ nottests[:0] = args
+ args = []
+ tests = tests or args or findtests(testdir, stdtests, nottests)
+ if single:
+ tests = tests[:1]
+ if randomize:
+ random.shuffle(tests)
+ if trace:
+ import trace
+ tracer = trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix],
+ trace=False, count=True)
+ test_support.verbose = verbose # Tell tests to be moderately quiet
+ test_support.use_resources = use_resources
+ save_modules = sys.modules.keys()
+ for test in tests:
+ if not quiet:
+ print(test)
+ sys.stdout.flush()
+ if trace:
+ # If we're tracing code coverage, then we don't exit with status
+ # if on a false return value from main.
+ tracer.runctx('runtest(test, generate, verbose, quiet, testdir)',
+ globals=globals(), locals=vars())
+ else:
+ try:
+ ok = runtest(test, generate, verbose, quiet, testdir,
+ huntrleaks)
+ except KeyboardInterrupt:
+ # print a newline separate from the ^C
+ print()
+ break
+ except:
+ raise
+ if ok > 0:
+ good.append(test)
+ elif ok == 0:
+ bad.append(test)
+ else:
+ skipped.append(test)
+ if ok == -2:
+ resource_denieds.append(test)
+ if findleaks:
+ gc.collect()
+ if gc.garbage:
+ print("Warning: test created", len(gc.garbage), end=' ')
+ print("uncollectable object(s).")
+ # move the uncollectable objects somewhere so we don't see
+ # them again
+ found_garbage.extend(gc.garbage)
+ del gc.garbage[:]
+ # Unload the newly imported modules (best effort finalization)
+ for module in sys.modules.keys():
+ if module not in save_modules and module.startswith("test."):
+ test_support.unload(module)
+
+ # The lists won't be sorted if running with -r
+ good.sort()
+ bad.sort()
+ skipped.sort()
+
+ if good and not quiet:
+ if not bad and not skipped and len(good) > 1:
+ print("All", end=' ')
+ print(count(len(good), "test"), "OK.")
+ if verbose:
+ print("CAUTION: stdout isn't compared in verbose mode:")
+ print("a test that passes in verbose mode may fail without it.")
+ if bad:
+ print(count(len(bad), "test"), "failed:")
+ printlist(bad)
+ if skipped and not quiet:
+ print(count(len(skipped), "test"), "skipped:")
+ printlist(skipped)
+
+ e = _ExpectedSkips()
+ plat = sys.platform
+ if e.isvalid():
+ surprise = set(skipped) - e.getexpected() - set(resource_denieds)
+ if surprise:
+ print(count(len(surprise), "skip"), \
+ "unexpected on", plat + ":")
+ printlist(surprise)
+ else:
+ print("Those skips are all expected on", plat + ".")
+ else:
+ print("Ask someone to teach regrtest.py about which tests are")
+ print("expected to get skipped on", plat + ".")
+
+ if verbose2 and bad:
+ print("Re-running failed tests in verbose mode")
+ for test in bad:
+ print("Re-running test %r in verbose mode" % test)
+ sys.stdout.flush()
+ try:
+ test_support.verbose = 1
+ ok = runtest(test, generate, 1, quiet, testdir,
+ huntrleaks, debug)
+ except KeyboardInterrupt:
+ # print a newline separate from the ^C
+ print()
+ break
+ except:
+ raise
+
+ if single:
+ alltests = findtests(testdir, stdtests, nottests)
+ for i in range(len(alltests)):
+ if tests[0] == alltests[i]:
+ if i == len(alltests) - 1:
+ os.unlink(filename)
+ else:
+ fp = open(filename, 'w')
+ fp.write(alltests[i+1] + '\n')
+ fp.close()
+ break
+ else:
+ os.unlink(filename)
+
+ if trace:
+ r = tracer.results()
+ r.write_results(show_missing=True, summary=True, coverdir=coverdir)
+
+ if runleaks:
+ os.system("leaks %d" % os.getpid())
+
+ sys.exit(len(bad) > 0)
+
+
+STDTESTS = [
+ 'test_grammar',
+ 'test_opcodes',
+ 'test_dict',
+ 'test_builtin',
+ 'test_exceptions',
+ 'test_types',
+ 'test_unittest',
+ 'test_doctest',
+ 'test_doctest2',
+ ]
+
+NOTTESTS = [
+ 'test_support',
+ 'test_future1',
+ 'test_future2',
+ 'test_future3',
+ ]
+
+def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS):
+ """Return a list of all applicable test modules."""
+ if not testdir: testdir = findtestdir()
+ names = os.listdir(testdir)
+ tests = []
+ for name in names:
+ if name[:5] == "test_" and name[-3:] == os.extsep+"py":
+ modname = name[:-3]
+ if modname not in stdtests and modname not in nottests:
+ tests.append(modname)
+ tests.sort()
+ return stdtests + tests
+
+def runtest(test, generate, verbose, quiet, testdir=None,
+ huntrleaks=False, debug=False):
+ """Run a single test.
+
+ test -- the name of the test
+ generate -- if true, generate output, instead of running the test
+ and comparing it to a previously created output file
+ verbose -- if true, print more messages
+ quiet -- if true, don't print 'skipped' messages (probably redundant)
+ testdir -- test directory
+ huntrleaks -- run multiple times to test for leaks; requires a debug
+ build; a triple corresponding to -R's three arguments
+ debug -- if true, print tracebacks for failed tests regardless of
+ verbose setting
+ Return:
+ -2 test skipped because resource denied
+ -1 test skipped for some other reason
+ 0 test failed
+ 1 test passed
+ """
+
+ try:
+ return runtest_inner(test, generate, verbose, quiet, testdir,
+ huntrleaks, debug)
+ finally:
+ cleanup_test_droppings(test, verbose)
+
+def runtest_inner(test, generate, verbose, quiet,
+ testdir=None, huntrleaks=False, debug=False):
+ test_support.unload(test)
+ if not testdir:
+ testdir = findtestdir()
+ outputdir = os.path.join(testdir, "output")
+ outputfile = os.path.join(outputdir, test)
+ if verbose:
+ cfp = None
+ else:
+ cfp = StringIO.StringIO() # XXX Should use io.StringIO()
+
+ try:
+ save_stdout = sys.stdout
+ try:
+ if cfp:
+ sys.stdout = cfp
+ print(test) # Output file starts with test name
+ if test.startswith('test.'):
+ abstest = test
+ else:
+ # Always import it from the test package
+ abstest = 'test.' + test
+ the_package = __import__(abstest, globals(), locals(), [])
+ the_module = getattr(the_package, test)
+ # Most tests run to completion simply as a side-effect of
+ # being imported. For the benefit of tests that can't run
+ # that way (like test_threaded_import), explicitly invoke
+ # their test_main() function (if it exists).
+ indirect_test = getattr(the_module, "test_main", None)
+ if indirect_test is not None:
+ indirect_test()
+ if huntrleaks:
+ dash_R(the_module, test, indirect_test, huntrleaks)
+ finally:
+ sys.stdout = save_stdout
+ except test_support.ResourceDenied as msg:
+ if not quiet:
+ print(test, "skipped --", msg)
+ sys.stdout.flush()
+ return -2
+ except (ImportError, test_support.TestSkipped) as msg:
+ if not quiet:
+ print(test, "skipped --", msg)
+ sys.stdout.flush()
+ return -1
+ except KeyboardInterrupt:
+ raise
+ except test_support.TestFailed as msg:
+ print("test", test, "failed --", msg)
+ sys.stdout.flush()
+ return 0
+ except:
+ type, value = sys.exc_info()[:2]
+ print("test", test, "crashed --", str(type) + ":", value)
+ sys.stdout.flush()
+ if verbose or debug:
+ traceback.print_exc(file=sys.stdout)
+ sys.stdout.flush()
+ return 0
+ else:
+ if not cfp:
+ return 1
+ output = cfp.getvalue()
+ if generate:
+ if output == test + "\n":
+ if os.path.exists(outputfile):
+ # Write it since it already exists (and the contents
+ # may have changed), but let the user know it isn't
+ # needed:
+ print("output file", outputfile, \
+ "is no longer needed; consider removing it")
+ else:
+ # We don't need it, so don't create it.
+ return 1
+ fp = open(outputfile, "w")
+ fp.write(output)
+ fp.close()
+ return 1
+ if os.path.exists(outputfile):
+ fp = open(outputfile, "r")
+ expected = fp.read()
+ fp.close()
+ else:
+ expected = test + "\n"
+ if output == expected or huntrleaks:
+ return 1
+ print("test", test, "produced unexpected output:")
+ sys.stdout.flush()
+ reportdiff(expected, output)
+ sys.stdout.flush()
+ return 0
+
+def cleanup_test_droppings(testname, verbose):
+ import shutil
+
+ # Try to clean up junk commonly left behind. While tests shouldn't leave
+ # any files or directories behind, when a test fails that can be tedious
+ # for it to arrange. The consequences can be especially nasty on Windows,
+ # since if a test leaves a file open, it cannot be deleted by name (while
+ # there's nothing we can do about that here either, we can display the
+ # name of the offending test, which is a real help).
+ for name in (test_support.TESTFN,
+ "db_home",
+ ):
+ if not os.path.exists(name):
+ continue
+
+ if os.path.isdir(name):
+ kind, nuker = "directory", shutil.rmtree
+ elif os.path.isfile(name):
+ kind, nuker = "file", os.unlink
+ else:
+ raise SystemError("os.path says %r exists but is neither "
+ "directory nor file" % name)
+
+ if verbose:
+ print("%r left behind %s %r" % (testname, kind, name))
+ try:
+ nuker(name)
+ except Exception as msg:
+ print(("%r left behind %s %r and it couldn't be "
+ "removed: %s" % (testname, kind, name, msg)), file=sys.stderr)
+
+def dash_R(the_module, test, indirect_test, huntrleaks):
+ # This code is hackish and inelegant, but it seems to do the job.
+ import copy_reg
+
+ if not hasattr(sys, 'gettotalrefcount'):
+ raise Exception("Tracking reference leaks requires a debug build "
+ "of Python")
+
+ # Save current values for dash_R_cleanup() to restore.
+ fs = warnings.filters[:]
+ ps = copy_reg.dispatch_table.copy()
+ pic = sys.path_importer_cache.copy()
+
+ if indirect_test:
+ def run_the_test():
+ indirect_test()
+ else:
+ def run_the_test():
+ reload(the_module)
+
+ deltas = []
+ nwarmup, ntracked, fname = huntrleaks
+ repcount = nwarmup + ntracked
+ print("beginning", repcount, "repetitions", file=sys.stderr)
+ print(("1234567890"*(repcount//10 + 1))[:repcount], file=sys.stderr)
+ dash_R_cleanup(fs, ps, pic)
+ for i in range(repcount):
+ rc = sys.gettotalrefcount()
+ run_the_test()
+ sys.stderr.write('.')
+ dash_R_cleanup(fs, ps, pic)
+ if i >= nwarmup:
+ deltas.append(sys.gettotalrefcount() - rc - 2)
+ print(file=sys.stderr)
+ if any(deltas):
+ msg = '%s leaked %s references, sum=%s' % (test, deltas, sum(deltas))
+ print(msg, file=sys.stderr)
+ refrep = open(fname, "a")
+ print(msg, file=refrep)
+ refrep.close()
+
+def dash_R_cleanup(fs, ps, pic):
+ import gc, copy_reg
+ import _strptime, linecache, dircache
+ import urlparse, urllib, urllib2, mimetypes, doctest
+ import struct, filecmp
+ from distutils.dir_util import _path_created
+
+ # Restore some original values.
+ warnings.filters[:] = fs
+ copy_reg.dispatch_table.clear()
+ copy_reg.dispatch_table.update(ps)
+ sys.path_importer_cache.clear()
+ sys.path_importer_cache.update(pic)
+
+ # Clear assorted module caches.
+ _path_created.clear()
+ re.purge()
+ _strptime._regex_cache.clear()
+ urlparse.clear_cache()
+ urllib.urlcleanup()
+ urllib2.install_opener(None)
+ dircache.reset()
+ linecache.clearcache()
+ mimetypes._default_mime_types()
+ struct._cache.clear()
+ filecmp._cache.clear()
+ doctest.master = None
+
+ # Collect cyclic trash.
+ gc.collect()
+
+def reportdiff(expected, output):
+ import difflib
+ print("*" * 70)
+ a = expected.splitlines(1)
+ b = output.splitlines(1)
+ sm = difflib.SequenceMatcher(a=a, b=b)
+ tuples = sm.get_opcodes()
+
+ def pair(x0, x1):
+ # x0:x1 are 0-based slice indices; convert to 1-based line indices.
+ x0 += 1
+ if x0 >= x1:
+ return "line " + str(x0)
+ else:
+ return "lines %d-%d" % (x0, x1)
+
+ for op, a0, a1, b0, b1 in tuples:
+ if op == 'equal':
+ pass
+
+ elif op == 'delete':
+ print("***", pair(a0, a1), "of expected output missing:")
+ for line in a[a0:a1]:
+ print("-", line, end='')
+
+ elif op == 'replace':
+ print("*** mismatch between", pair(a0, a1), "of expected", \
+ "output and", pair(b0, b1), "of actual output:")
+ for line in difflib.ndiff(a[a0:a1], b[b0:b1]):
+ print(line, end='')
+
+ elif op == 'insert':
+ print("***", pair(b0, b1), "of actual output doesn't appear", \
+ "in expected output after line", str(a1)+":")
+ for line in b[b0:b1]:
+ print("+", line, end='')
+
+ else:
+ print("get_opcodes() returned bad tuple?!?!", (op, a0, a1, b0, b1))
+
+ print("*" * 70)
+
+def findtestdir():
+ if __name__ == '__main__':
+ file = sys.argv[0]
+ else:
+ file = __file__
+ testdir = os.path.dirname(file) or os.curdir
+ return testdir
+
+def removepy(name):
+ if name.endswith(os.extsep + "py"):
+ name = name[:-3]
+ return name
+
+def count(n, word):
+ if n == 1:
+ return "%d %s" % (n, word)
+ else:
+ return "%d %ss" % (n, word)
+
+def printlist(x, width=70, indent=4):
+ """Print the elements of iterable x to stdout.
+
+ Optional arg width (default 70) is the maximum line length.
+ Optional arg indent (default 4) is the number of blanks with which to
+ begin each line.
+ """
+
+ from textwrap import fill
+ blanks = ' ' * indent
+ print(fill(' '.join(map(str, x)), width,
+ initial_indent=blanks, subsequent_indent=blanks))
+
+# Map sys.platform to a string containing the basenames of tests
+# expected to be skipped on that platform.
+#
+# Special cases:
+# test_pep277
+# The _ExpectedSkips constructor adds this to the set of expected
+# skips if not os.path.supports_unicode_filenames.
+# test_socket_ssl
+# Controlled by test_socket_ssl.skip_expected. Requires the network
+# resource, and a socket module with ssl support.
+# test_timeout
+# Controlled by test_timeout.skip_expected. Requires the network
+# resource and a socket module.
+
+_expectations = {
+ 'win32':
+ """
+ test__locale
+ test_applesingle
+ test_bsddb3
+ test_commands
+ test_crypt
+ test_curses
+ test_dbm
+ test_dl
+ test_fcntl
+ test_fork1
+ test_gdbm
+ test_grp
+ test_ioctl
+ test_largefile
+ test_linuxaudiodev
+ test_mhlib
+ test_nis
+ test_openpty
+ test_ossaudiodev
+ test_poll
+ test_posix
+ test_pty
+ test_pwd
+ test_resource
+ test_signal
+ test_sunaudiodev
+ test_threadsignals
+ test_timing
+ test_wait3
+ test_wait4
+ """,
+ 'linux2':
+ """
+ test_applesingle
+ test_curses
+ test_dl
+ test_largefile
+ test_linuxaudiodev
+ test_nis
+ test_ntpath
+ test_ossaudiodev
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ """,
+ 'mac':
+ """
+ test_atexit
+ test_bsddb
+ test_bsddb3
+ test_bz2
+ test_commands
+ test_crypt
+ test_curses
+ test_dbm
+ test_dl
+ test_fcntl
+ test_fork1
+ test_grp
+ test_ioctl
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_mmap
+ test_nis
+ test_ntpath
+ test_openpty
+ test_ossaudiodev
+ test_poll
+ test_popen
+ test_posix
+ test_pty
+ test_pwd
+ test_resource
+ test_signal
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_sundry
+ test_tarfile
+ test_timing
+ """,
+ 'unixware7':
+ """
+ test_applesingle
+ test_bsddb
+ test_dl
+ test_largefile
+ test_linuxaudiodev
+ test_minidom
+ test_nis
+ test_ntpath
+ test_openpty
+ test_pyexpat
+ test_sax
+ test_startfile
+ test_sqlite
+ test_sunaudiodev
+ test_sundry
+ """,
+ 'openunix8':
+ """
+ test_applesingle
+ test_bsddb
+ test_dl
+ test_largefile
+ test_linuxaudiodev
+ test_minidom
+ test_nis
+ test_ntpath
+ test_openpty
+ test_pyexpat
+ test_sax
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_sundry
+ """,
+ 'sco_sv3':
+ """
+ test_applesingle
+ test_asynchat
+ test_bsddb
+ test_dl
+ test_fork1
+ test_gettext
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_minidom
+ test_nis
+ test_ntpath
+ test_openpty
+ test_pyexpat
+ test_queue
+ test_sax
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_sundry
+ test_thread
+ test_threaded_import
+ test_threadedtempfile
+ test_threading
+ """,
+ 'riscos':
+ """
+ test_applesingle
+ test_asynchat
+ test_atexit
+ test_bsddb
+ test_bsddb3
+ test_commands
+ test_crypt
+ test_dbm
+ test_dl
+ test_fcntl
+ test_fork1
+ test_gdbm
+ test_grp
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_mmap
+ test_nis
+ test_ntpath
+ test_openpty
+ test_poll
+ test_pty
+ test_pwd
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_sundry
+ test_thread
+ test_threaded_import
+ test_threadedtempfile
+ test_threading
+ test_timing
+ """,
+ 'darwin':
+ """
+ test_gdbm
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_nis
+ test_ossaudiodev
+ test_startfile
+ test_sunaudiodev
+ """,
+ 'sunos5':
+ """
+ test_applesingle
+ test_bsddb
+ test_curses
+ test_dbm
+ test_gdbm
+ test_gzip
+ test_linuxaudiodev
+ test_openpty
+ test_sqlite
+ test_startfile
+ test_zipfile
+ test_zlib
+ """,
+ 'hp-ux11':
+ """
+ test_applesingle
+ test_bsddb
+ test_curses
+ test_dl
+ test_gdbm
+ test_gzip
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_minidom
+ test_nis
+ test_ntpath
+ test_openpty
+ test_pyexpat
+ test_sax
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_zipfile
+ test_zlib
+ """,
+ 'atheos':
+ """
+ test_applesingle
+ test_curses
+ test_dl
+ test_gdbm
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_mhlib
+ test_mmap
+ test_nis
+ test_poll
+ test_resource
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ """,
+ 'cygwin':
+ """
+ test_applesingle
+ test_bsddb3
+ test_curses
+ test_dbm
+ test_ioctl
+ test_largefile
+ test_linuxaudiodev
+ test_locale
+ test_nis
+ test_ossaudiodev
+ test_socketserver
+ test_sqlite
+ test_sunaudiodev
+ """,
+ 'os2emx':
+ """
+ test_applesingle
+ test_audioop
+ test_bsddb3
+ test_commands
+ test_curses
+ test_dl
+ test_largefile
+ test_linuxaudiodev
+ test_mhlib
+ test_mmap
+ test_nis
+ test_openpty
+ test_ossaudiodev
+ test_pty
+ test_resource
+ test_signal
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ """,
+ 'freebsd4':
+ """
+ test_aepack
+ test_applesingle
+ test_bsddb
+ test_bsddb3
+ test_gdbm
+ test_linuxaudiodev
+ test_locale
+ test_macostools
+ test_nis
+ test_ossaudiodev
+ test_pep277
+ test_plistlib
+ test_pty
+ test_scriptpackages
+ test_socket_ssl
+ test_socketserver
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_tcl
+ test_timeout
+ test_unicode_file
+ test_urllibnet
+ test_winreg
+ test_winsound
+ """,
+ 'aix5':
+ """
+ test_aepack
+ test_applesingle
+ test_bsddb
+ test_bsddb3
+ test_bz2
+ test_dl
+ test_gdbm
+ test_gzip
+ test_linuxaudiodev
+ test_macostools
+ test_nis
+ test_ossaudiodev
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_tcl
+ test_winreg
+ test_winsound
+ test_zipimport
+ test_zlib
+ """,
+ 'openbsd3':
+ """
+ test_aepack
+ test_applesingle
+ test_bsddb
+ test_bsddb3
+ test_ctypes
+ test_dl
+ test_gdbm
+ test_linuxaudiodev
+ test_locale
+ test_macostools
+ test_nis
+ test_normalization
+ test_ossaudiodev
+ test_pep277
+ test_plistlib
+ test_scriptpackages
+ test_tcl
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_unicode_file
+ test_winreg
+ test_winsound
+ """,
+ 'netbsd3':
+ """
+ test_aepack
+ test_applesingle
+ test_bsddb
+ test_bsddb3
+ test_ctypes
+ test_curses
+ test_dl
+ test_gdbm
+ test_linuxaudiodev
+ test_locale
+ test_macostools
+ test_nis
+ test_ossaudiodev
+ test_pep277
+ test_sqlite
+ test_startfile
+ test_sunaudiodev
+ test_tcl
+ test_unicode_file
+ test_winreg
+ test_winsound
+ """,
+}
+_expectations['freebsd5'] = _expectations['freebsd4']
+_expectations['freebsd6'] = _expectations['freebsd4']
+_expectations['freebsd7'] = _expectations['freebsd4']
+
+class _ExpectedSkips:
+ def __init__(self):
+ import os.path
+ from test import test_socket_ssl
+ from test import test_timeout
+
+ self.valid = False
+ if sys.platform in _expectations:
+ s = _expectations[sys.platform]
+ self.expected = set(s.split())
+
+ if not os.path.supports_unicode_filenames:
+ self.expected.add('test_pep277')
+
+ if test_socket_ssl.skip_expected:
+ self.expected.add('test_socket_ssl')
+
+ if test_timeout.skip_expected:
+ self.expected.add('test_timeout')
+
+ if not sys.platform in ("mac", "darwin"):
+ MAC_ONLY = ["test_macostools", "test_aepack",
+ "test_plistlib", "test_scriptpackages"]
+ for skip in MAC_ONLY:
+ self.expected.add(skip)
+
+ if sys.platform != "win32":
+ WIN_ONLY = ["test_unicode_file", "test_winreg",
+ "test_winsound"]
+ for skip in WIN_ONLY:
+ self.expected.add(skip)
+
+ if sys.platform != 'irix':
+ IRIX_ONLY =["test_imageop"]
+ for skip in IRIX_ONLY:
+ self.expected.add(skip)
+
+ self.valid = True
+
+ def isvalid(self):
+ "Return true iff _ExpectedSkips knows about the current platform."
+ return self.valid
+
+ def getexpected(self):
+ """Return set of test names we expect to skip on current platform.
+
+ self.isvalid() must be true.
+ """
+
+ assert self.isvalid()
+ return self.expected
+
+if __name__ == '__main__':
+ # Remove regrtest.py's own directory from the module search path. This
+ # prevents relative imports from working, and relative imports will screw
+ # up the testing framework. E.g. if both test.test_support and
+ # test_support are imported, they will not contain the same globals, and
+ # much of the testing framework relies on the globals in the
+ # test.test_support module.
+ mydir = os.path.abspath(os.path.normpath(os.path.dirname(sys.argv[0])))
+ i = pathlen = len(sys.path)
+ while i >= 0:
+ i -= 1
+ if os.path.abspath(os.path.normpath(sys.path[i])) == mydir:
+ del sys.path[i]
+ if len(sys.path) == pathlen:
+ print('Could not find %r in sys.path to remove it' % mydir)
+ main()
Added: sandbox/trunk/cpy_merge/Lib/test/test_StringIO.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/test_StringIO.py Wed May 23 03:45:28 2007
@@ -0,0 +1,172 @@
+# Tests StringIO and cStringIO
+
+import sys
+import unittest
+import StringIO
+import cStringIO
+from test import test_support
+
+
+class TestGenericStringIO:
+ # use a class variable MODULE to define which module is being tested
+
+ # Line of data to test as string
+ _line = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!'
+
+ # Constructor to use for the test data (._line is passed to this
+ # constructor)
+ constructor = str
+
+ def setUp(self):
+ self._line = self.constructor(self._line)
+ self._lines = self.constructor((self._line + '\n') * 5)
+ self._fp = self.MODULE.StringIO(self._lines)
+
+ def test_reads(self):
+ eq = self.assertEqual
+ self.assertRaises(TypeError, self._fp.seek)
+ eq(self._fp.read(10), self._line[:10])
+ eq(self._fp.readline(), self._line[10:] + '\n')
+ eq(len(self._fp.readlines(60)), 2)
+
+ def test_writes(self):
+ f = self.MODULE.StringIO()
+ self.assertRaises(TypeError, f.seek)
+ f.write(self._line[:6])
+ f.seek(3)
+ f.write(self._line[20:26])
+ f.write(self._line[52])
+ self.assertEqual(f.getvalue(), 'abcuvwxyz!')
+
+ def test_writelines(self):
+ f = self.MODULE.StringIO()
+ f.writelines([self._line[0], self._line[1], self._line[2]])
+ f.seek(0)
+ self.assertEqual(f.getvalue(), 'abc')
+
+ def test_writelines_error(self):
+ def errorGen():
+ yield 'a'
+ raise KeyboardInterrupt()
+ f = self.MODULE.StringIO()
+ self.assertRaises(KeyboardInterrupt, f.writelines, errorGen())
+
+ def test_truncate(self):
+ eq = self.assertEqual
+ f = self.MODULE.StringIO()
+ f.write(self._lines)
+ f.seek(10)
+ f.truncate()
+ eq(f.getvalue(), 'abcdefghij')
+ f.truncate(5)
+ eq(f.getvalue(), 'abcde')
+ f.write('xyz')
+ eq(f.getvalue(), 'abcdexyz')
+ self.assertRaises(IOError, f.truncate, -1)
+ f.close()
+ self.assertRaises(ValueError, f.write, 'frobnitz')
+
+ def test_closed_flag(self):
+ f = self.MODULE.StringIO()
+ self.assertEqual(f.closed, False)
+ f.close()
+ self.assertEqual(f.closed, True)
+ f = self.MODULE.StringIO(self.constructor("abc"))
+ self.assertEqual(f.closed, False)
+ f.close()
+ self.assertEqual(f.closed, True)
+
+ def test_isatty(self):
+ f = self.MODULE.StringIO()
+ self.assertRaises(TypeError, f.isatty, None)
+ self.assertEqual(f.isatty(), False)
+ f.close()
+ self.assertRaises(ValueError, f.isatty)
+
+ def test_iterator(self):
+ eq = self.assertEqual
+ unless = self.failUnless
+ eq(iter(self._fp), self._fp)
+ # Does this object support the iteration protocol?
+ unless(hasattr(self._fp, '__iter__'))
+ unless(hasattr(self._fp, '__next__'))
+ i = 0
+ for line in self._fp:
+ eq(line, self._line + '\n')
+ i += 1
+ eq(i, 5)
+ self._fp.close()
+ self.assertRaises(ValueError, next, self._fp)
+
+class TestStringIO(TestGenericStringIO, unittest.TestCase):
+ MODULE = StringIO
+
+ def test_unicode(self):
+
+ if not test_support.have_unicode: return
+
+ # The StringIO module also supports concatenating Unicode
+ # snippets to larger Unicode strings. This is tested by this
+ # method. Note that cStringIO does not support this extension.
+
+ f = self.MODULE.StringIO()
+ f.write(self._line[:6])
+ f.seek(3)
+ f.write(str(self._line[20:26]))
+ f.write(str(self._line[52]))
+ s = f.getvalue()
+ self.assertEqual(s, str('abcuvwxyz!'))
+ self.assertEqual(type(s), str)
+
+class TestcStringIO(TestGenericStringIO, unittest.TestCase):
+ MODULE = cStringIO
+ constructor = str8
+
+ def test_unicode(self):
+
+ if not test_support.have_unicode: return
+
+ # The cStringIO module converts Unicode strings to character
+ # strings when writing them to cStringIO objects.
+ # Check that this works.
+
+ f = self.MODULE.StringIO()
+ f.write(str(self._line[:5]))
+ s = f.getvalue()
+ self.assertEqual(s, 'abcde')
+ self.assertEqual(type(s), str8)
+
+ f = self.MODULE.StringIO(str(self._line[:5]))
+ s = f.getvalue()
+ self.assertEqual(s, 'abcde')
+ self.assertEqual(type(s), str8)
+
+ # XXX This no longer fails -- the default encoding is always UTF-8.
+ ##self.assertRaises(UnicodeDecodeError, self.MODULE.StringIO, '\xf4')
+
+class TestBufferStringIO(TestStringIO):
+
+ def constructor(self, s):
+ return buffer(str8(s))
+
+class TestBuffercStringIO(TestcStringIO):
+
+ def constructor(self, s):
+ return buffer(str8(s))
+
+
+def test_main():
+ classes = [
+ TestStringIO,
+ TestcStringIO,
+ ]
+ if not sys.platform.startswith('java'):
+ classes.extend([
+ TestBufferStringIO,
+ TestBuffercStringIO
+ ])
+ test_support.run_unittest(*classes)
+
+
+if __name__ == '__main__':
+ unittest.main()
Added: sandbox/trunk/cpy_merge/Lib/test/test_cProfile.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/test_cProfile.py Wed May 23 03:45:28 2007
@@ -0,0 +1,123 @@
+"""Test suite for the cProfile module."""
+
+import cProfile, pstats, sys
+
+# In order to have reproducible time, we simulate a timer in the global
+# variable 'ticks', which represents simulated time in milliseconds.
+# (We can't use a helper function increment the timer since it would be
+# included in the profile and would appear to consume all the time.)
+ticks = 0
+
+# IMPORTANT: this is an output test. *ALL* NUMBERS in the expected
+# output are relevant. If you change the formatting of pstats,
+# please don't just regenerate output/test_cProfile without checking
+# very carefully that not a single number has changed.
+
+def test_main():
+ global ticks
+ ticks = 42000
+ prof = cProfile.Profile(timer, 0.001)
+ prof.runctx("testfunc()", globals(), locals())
+ assert ticks == 43000, ticks
+ st = pstats.Stats(prof)
+ st.strip_dirs().sort_stats('stdname').print_stats()
+ st.print_callees()
+ st.print_callers()
+
+def timer():
+ return ticks
+
+def testfunc():
+ # 1 call
+ # 1000 ticks total: 270 ticks local, 730 ticks in subfunctions
+ global ticks
+ ticks += 99
+ helper() # 300
+ helper() # 300
+ ticks += 171
+ factorial(14) # 130
+
+def factorial(n):
+ # 23 calls total
+ # 170 ticks total, 150 ticks local
+ # 3 primitive calls, 130, 20 and 20 ticks total
+ # including 116, 17, 17 ticks local
+ global ticks
+ if n > 0:
+ ticks += n
+ return mul(n, factorial(n-1))
+ else:
+ ticks += 11
+ return 1
+
+def mul(a, b):
+ # 20 calls
+ # 1 tick, local
+ global ticks
+ ticks += 1
+ return a * b
+
+def helper():
+ # 2 calls
+ # 300 ticks total: 20 ticks local, 260 ticks in subfunctions
+ global ticks
+ ticks += 1
+ helper1() # 30
+ ticks += 2
+ helper1() # 30
+ ticks += 6
+ helper2() # 50
+ ticks += 3
+ helper2() # 50
+ ticks += 2
+ helper2() # 50
+ ticks += 5
+ helper2_indirect() # 70
+ ticks += 1
+
+def helper1():
+ # 4 calls
+ # 30 ticks total: 29 ticks local, 1 tick in subfunctions
+ global ticks
+ ticks += 10
+ hasattr(C(), "foo") # 1
+ ticks += 19
+ lst = []
+ lst.append(42) # 0
+ sys.exc_info() # 0
+
+def helper2_indirect():
+ helper2() # 50
+ factorial(3) # 20
+
+def helper2():
+ # 8 calls
+ # 50 ticks local: 39 ticks local, 11 ticks in subfunctions
+ global ticks
+ ticks += 11
+ hasattr(C(), "bar") # 1
+ ticks += 13
+ subhelper() # 10
+ ticks += 15
+
+def subhelper():
+ # 8 calls
+ # 10 ticks total: 8 ticks local, 2 ticks in subfunctions
+ global ticks
+ ticks += 2
+ for i in range(2): # 0
+ try:
+ C().foo # 1 x 2
+ except AttributeError:
+ ticks += 3 # 3 x 2
+
+class C:
+ def __getattr__(self, name):
+ # 28 calls
+ # 1 tick, local
+ global ticks
+ ticks += 1
+ raise AttributeError
+
+if __name__ == "__main__":
+ test_main()
Added: sandbox/trunk/cpy_merge/Lib/test/test_copy_reg.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/test_copy_reg.py Wed May 23 03:45:28 2007
@@ -0,0 +1,121 @@
+import copy_reg
+import unittest
+
+from test import test_support
+from test.pickletester import ExtensionSaver
+
+class C:
+ pass
+
+
+class WithoutSlots(object):
+ pass
+
+class WithWeakref(object):
+ __slots__ = ('__weakref__',)
+
+class WithPrivate(object):
+ __slots__ = ('__spam',)
+
+class WithSingleString(object):
+ __slots__ = 'spam'
+
+class WithInherited(WithSingleString):
+ __slots__ = ('eggs',)
+
+
+class CopyRegTestCase(unittest.TestCase):
+
+ def test_class(self):
+ self.assertRaises(TypeError, copy_reg.pickle,
+ C, None, None)
+
+ def test_noncallable_reduce(self):
+ self.assertRaises(TypeError, copy_reg.pickle,
+ type(1), "not a callable")
+
+ def test_noncallable_constructor(self):
+ self.assertRaises(TypeError, copy_reg.pickle,
+ type(1), int, "not a callable")
+
+ def test_bool(self):
+ import copy
+ self.assertEquals(True, copy.copy(True))
+
+ def test_extension_registry(self):
+ mod, func, code = 'junk1 ', ' junk2', 0xabcd
+ e = ExtensionSaver(code)
+ try:
+ # Shouldn't be in registry now.
+ self.assertRaises(ValueError, copy_reg.remove_extension,
+ mod, func, code)
+ copy_reg.add_extension(mod, func, code)
+ # Should be in the registry.
+ self.assert_(copy_reg._extension_registry[mod, func] == code)
+ self.assert_(copy_reg._inverted_registry[code] == (mod, func))
+ # Shouldn't be in the cache.
+ self.assert_(code not in copy_reg._extension_cache)
+ # Redundant registration should be OK.
+ copy_reg.add_extension(mod, func, code) # shouldn't blow up
+ # Conflicting code.
+ self.assertRaises(ValueError, copy_reg.add_extension,
+ mod, func, code + 1)
+ self.assertRaises(ValueError, copy_reg.remove_extension,
+ mod, func, code + 1)
+ # Conflicting module name.
+ self.assertRaises(ValueError, copy_reg.add_extension,
+ mod[1:], func, code )
+ self.assertRaises(ValueError, copy_reg.remove_extension,
+ mod[1:], func, code )
+ # Conflicting function name.
+ self.assertRaises(ValueError, copy_reg.add_extension,
+ mod, func[1:], code)
+ self.assertRaises(ValueError, copy_reg.remove_extension,
+ mod, func[1:], code)
+ # Can't remove one that isn't registered at all.
+ if code + 1 not in copy_reg._inverted_registry:
+ self.assertRaises(ValueError, copy_reg.remove_extension,
+ mod[1:], func[1:], code + 1)
+
+ finally:
+ e.restore()
+
+ # Shouldn't be there anymore.
+ self.assert_((mod, func) not in copy_reg._extension_registry)
+ # The code *may* be in copy_reg._extension_registry, though, if
+ # we happened to pick on a registered code. So don't check for
+ # that.
+
+ # Check valid codes at the limits.
+ for code in 1, 0x7fffffff:
+ e = ExtensionSaver(code)
+ try:
+ copy_reg.add_extension(mod, func, code)
+ copy_reg.remove_extension(mod, func, code)
+ finally:
+ e.restore()
+
+ # Ensure invalid codes blow up.
+ for code in -1, 0, 0x80000000:
+ self.assertRaises(ValueError, copy_reg.add_extension,
+ mod, func, code)
+
+ def test_slotnames(self):
+ self.assertEquals(copy_reg._slotnames(WithoutSlots), [])
+ self.assertEquals(copy_reg._slotnames(WithWeakref), [])
+ expected = ['_WithPrivate__spam']
+ self.assertEquals(copy_reg._slotnames(WithPrivate), expected)
+ self.assertEquals(copy_reg._slotnames(WithSingleString), ['spam'])
+ expected = ['eggs', 'spam']
+ expected.sort()
+ result = copy_reg._slotnames(WithInherited)
+ result.sort()
+ self.assertEquals(result, expected)
+
+
+def test_main():
+ test_support.run_unittest(CopyRegTestCase)
+
+
+if __name__ == "__main__":
+ test_main()
Added: sandbox/trunk/cpy_merge/Lib/test/test_cpickle.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/test_cpickle.py Wed May 23 03:45:28 2007
@@ -0,0 +1,103 @@
+import cPickle
+import unittest
+from cStringIO import StringIO
+from test.pickletester import AbstractPickleTests, AbstractPickleModuleTests
+from test import test_support
+
+class cPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
+
+ def setUp(self):
+ self.dumps = cPickle.dumps
+ self.loads = cPickle.loads
+
+ error = cPickle.BadPickleGet
+ module = cPickle
+
+class cPicklePicklerTests(AbstractPickleTests):
+
+ def dumps(self, arg, proto=0):
+ f = StringIO()
+ p = cPickle.Pickler(f, proto)
+ p.dump(arg)
+ f.seek(0)
+ return f.read()
+
+ def loads(self, buf):
+ f = StringIO(buf)
+ p = cPickle.Unpickler(f)
+ return p.load()
+
+ error = cPickle.BadPickleGet
+
+class cPickleListPicklerTests(AbstractPickleTests):
+
+ def dumps(self, arg, proto=0):
+ p = cPickle.Pickler(proto)
+ p.dump(arg)
+ return p.getvalue()
+
+ def loads(self, *args):
+ f = StringIO(args[0])
+ p = cPickle.Unpickler(f)
+ return p.load()
+
+ error = cPickle.BadPickleGet
+
+class cPickleFastPicklerTests(AbstractPickleTests):
+
+ def dumps(self, arg, proto=0):
+ f = StringIO()
+ p = cPickle.Pickler(f, proto)
+ p.fast = 1
+ p.dump(arg)
+ f.seek(0)
+ return f.read()
+
+ def loads(self, *args):
+ f = StringIO(args[0])
+ p = cPickle.Unpickler(f)
+ return p.load()
+
+ error = cPickle.BadPickleGet
+
+ def test_recursive_list(self):
+ self.assertRaises(ValueError,
+ AbstractPickleTests.test_recursive_list,
+ self)
+
+ def test_recursive_inst(self):
+ self.assertRaises(ValueError,
+ AbstractPickleTests.test_recursive_inst,
+ self)
+
+ def test_recursive_dict(self):
+ self.assertRaises(ValueError,
+ AbstractPickleTests.test_recursive_dict,
+ self)
+
+ def test_recursive_multi(self):
+ self.assertRaises(ValueError,
+ AbstractPickleTests.test_recursive_multi,
+ self)
+
+ def test_nonrecursive_deep(self):
+ # If it's not cyclic, it should pickle OK even if the nesting
+ # depth exceeds PY_CPICKLE_FAST_LIMIT. That happens to be
+ # 50 today. Jack Jansen reported stack overflow on Mac OS 9
+ # at 64.
+ a = []
+ for i in range(60):
+ a = [a]
+ b = self.loads(self.dumps(a))
+ self.assertEqual(a, b)
+
+def test_main():
+ test_support.run_unittest(
+ cPickleTests,
+ cPicklePicklerTests,
+ cPickleListPicklerTests,
+ cPickleFastPicklerTests
+ )
+
+if __name__ == "__main__":
+ test_main()
Added: sandbox/trunk/cpy_merge/Lib/test/test_pickle.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/test_pickle.py Wed May 23 03:45:28 2007
@@ -0,0 +1,73 @@
+import pickle
+import unittest
+import io
+
+from test import test_support
+
+from test.pickletester import AbstractPickleTests
+from test.pickletester import AbstractPickleModuleTests
+from test.pickletester import AbstractPersistentPicklerTests
+
+class PickleTests(AbstractPickleTests, AbstractPickleModuleTests):
+
+ def dumps(self, arg, proto=0, fast=0):
+ # Ignore fast
+ return pickle.dumps(arg, proto)
+
+ def loads(self, buf):
+ # Ignore fast
+ return pickle.loads(buf)
+
+ module = pickle
+ error = KeyError
+
+class PicklerTests(AbstractPickleTests):
+
+ error = KeyError
+
+ def dumps(self, arg, proto=0, fast=0):
+ f = io.BytesIO()
+ p = pickle.Pickler(f, proto)
+ if fast:
+ p.fast = fast
+ p.dump(arg)
+ f.seek(0)
+ return bytes(f.read())
+
+ def loads(self, buf):
+ f = io.BytesIO(buf)
+ u = pickle.Unpickler(f)
+ return u.load()
+
+class PersPicklerTests(AbstractPersistentPicklerTests):
+
+ def dumps(self, arg, proto=0, fast=0):
+ class PersPickler(pickle.Pickler):
+ def persistent_id(subself, obj):
+ return self.persistent_id(obj)
+ f = io.BytesIO()
+ p = PersPickler(f, proto)
+ if fast:
+ p.fast = fast
+ p.dump(arg)
+ f.seek(0)
+ return f.read()
+
+ def loads(self, buf):
+ class PersUnpickler(pickle.Unpickler):
+ def persistent_load(subself, obj):
+ return self.persistent_load(obj)
+ f = io.BytesIO(buf)
+ u = PersUnpickler(f)
+ return u.load()
+
+def test_main():
+ test_support.run_unittest(
+ PickleTests,
+ PicklerTests,
+ PersPicklerTests
+ )
+ test_support.run_doctest(pickle)
+
+if __name__ == "__main__":
+ test_main()
Added: sandbox/trunk/cpy_merge/Lib/test/test_pickletools.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Lib/test/test_pickletools.py Wed May 23 03:45:28 2007
@@ -0,0 +1,3 @@
+import pickletools
+from test import test_support
+test_support.run_doctest(pickletools)
Added: sandbox/trunk/cpy_merge/Modules/_lsprof.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Modules/_lsprof.c Wed May 23 03:45:28 2007
@@ -0,0 +1,875 @@
+#include "Python.h"
+#include "compile.h"
+#include "frameobject.h"
+#include "structseq.h"
+#include "rotatingtree.h"
+
+#if !defined(HAVE_LONG_LONG)
+#error "This module requires long longs!"
+#endif
+
+/*** Selection of a high-precision timer ***/
+
+#ifdef MS_WINDOWS
+
+#include <windows.h>
+
+static PY_LONG_LONG
+hpTimer(void)
+{
+ LARGE_INTEGER li;
+ QueryPerformanceCounter(&li);
+ return li.QuadPart;
+}
+
+static double
+hpTimerUnit(void)
+{
+ LARGE_INTEGER li;
+ if (QueryPerformanceFrequency(&li))
+ return 1.0 / li.QuadPart;
+ else
+ return 0.000001; /* unlikely */
+}
+
+#else /* !MS_WINDOWS */
+
+#ifndef HAVE_GETTIMEOFDAY
+#error "This module requires gettimeofday() on non-Windows platforms!"
+#endif
+
+#if (defined(PYOS_OS2) && defined(PYCC_GCC))
+#include <sys/time.h>
+#else
+#include <sys/resource.h>
+#include <sys/times.h>
+#endif
+
+static PY_LONG_LONG
+hpTimer(void)
+{
+ struct timeval tv;
+ PY_LONG_LONG ret;
+#ifdef GETTIMEOFDAY_NO_TZ
+ gettimeofday(&tv);
+#else
+ gettimeofday(&tv, (struct timezone *)NULL);
+#endif
+ ret = tv.tv_sec;
+ ret = ret * 1000000 + tv.tv_usec;
+ return ret;
+}
+
+static double
+hpTimerUnit(void)
+{
+ return 0.000001;
+}
+
+#endif /* MS_WINDOWS */
+
+/************************************************************/
+/* Written by Brett Rosen and Ted Czotter */
+
+struct _ProfilerEntry;
+
+/* represents a function called from another function */
+typedef struct _ProfilerSubEntry {
+ rotating_node_t header;
+ PY_LONG_LONG tt;
+ PY_LONG_LONG it;
+ long callcount;
+ long recursivecallcount;
+ long recursionLevel;
+} ProfilerSubEntry;
+
+/* represents a function or user defined block */
+typedef struct _ProfilerEntry {
+ rotating_node_t header;
+ PyObject *userObj; /* PyCodeObject, or a descriptive str for builtins */
+ PY_LONG_LONG tt; /* total time in this entry */
+ PY_LONG_LONG it; /* inline time in this entry (not in subcalls) */
+ long callcount; /* how many times this was called */
+ long recursivecallcount; /* how many times called recursively */
+ long recursionLevel;
+ rotating_node_t *calls;
+} ProfilerEntry;
+
+typedef struct _ProfilerContext {
+ PY_LONG_LONG t0;
+ PY_LONG_LONG subt;
+ struct _ProfilerContext *previous;
+ ProfilerEntry *ctxEntry;
+} ProfilerContext;
+
+typedef struct {
+ PyObject_HEAD
+ rotating_node_t *profilerEntries;
+ ProfilerContext *currentProfilerContext;
+ ProfilerContext *freelistProfilerContext;
+ int flags;
+ PyObject *externalTimer;
+ double externalTimerUnit;
+} ProfilerObject;
+
+#define POF_ENABLED 0x001
+#define POF_SUBCALLS 0x002
+#define POF_BUILTINS 0x004
+#define POF_NOMEMORY 0x100
+
+static PyTypeObject PyProfiler_Type;
+
+#define PyProfiler_Check(op) PyObject_TypeCheck(op, &PyProfiler_Type)
+#define PyProfiler_CheckExact(op) ((op)->ob_type == &PyProfiler_Type)
+
+/*** External Timers ***/
+
+#define DOUBLE_TIMER_PRECISION 4294967296.0
+static PyObject *empty_tuple;
+
+static PY_LONG_LONG CallExternalTimer(ProfilerObject *pObj)
+{
+ PY_LONG_LONG result;
+ PyObject *o = PyObject_Call(pObj->externalTimer, empty_tuple, NULL);
+ if (o == NULL) {
+ PyErr_WriteUnraisable(pObj->externalTimer);
+ return 0;
+ }
+ if (pObj->externalTimerUnit > 0.0) {
+ /* interpret the result as an integer that will be scaled
+ in profiler_getstats() */
+ result = PyLong_AsLongLong(o);
+ }
+ else {
+ /* interpret the result as a double measured in seconds.
+ As the profiler works with PY_LONG_LONG internally
+ we convert it to a large integer */
+ double val = PyFloat_AsDouble(o);
+ /* error handling delayed to the code below */
+ result = (PY_LONG_LONG) (val * DOUBLE_TIMER_PRECISION);
+ }
+ Py_DECREF(o);
+ if (PyErr_Occurred()) {
+ PyErr_WriteUnraisable((PyObject *) pObj);
+ return 0;
+ }
+ return result;
+}
+
+#define CALL_TIMER(pObj) ((pObj)->externalTimer ? \
+ CallExternalTimer(pObj) : \
+ hpTimer())
+
+/*** ProfilerObject ***/
+
+static PyObject *
+normalizeUserObj(PyObject *obj)
+{
+ PyCFunctionObject *fn;
+ if (!PyCFunction_Check(obj)) {
+ Py_INCREF(obj);
+ return obj;
+ }
+ /* Replace built-in function objects with a descriptive string
+ because of built-in methods -- keeping a reference to
+ __self__ is probably not a good idea. */
+ fn = (PyCFunctionObject *)obj;
+
+ if (fn->m_self == NULL) {
+ /* built-in function: look up the module name */
+ PyObject *mod = fn->m_module;
+ char *modname;
+ if (mod && PyString_Check(mod)) {
+ modname = PyString_AS_STRING(mod);
+ }
+ else if (mod && PyModule_Check(mod)) {
+ modname = PyModule_GetName(mod);
+ if (modname == NULL) {
+ PyErr_Clear();
+ modname = "__builtin__";
+ }
+ }
+ else {
+ modname = "__builtin__";
+ }
+ if (strcmp(modname, "__builtin__") != 0)
+ return PyString_FromFormat("<%s.%s>",
+ modname,
+ fn->m_ml->ml_name);
+ else
+ return PyString_FromFormat("<%s>",
+ fn->m_ml->ml_name);
+ }
+ else {
+ /* built-in method: try to return
+ repr(getattr(type(__self__), __name__))
+ */
+ PyObject *self = fn->m_self;
+ PyObject *name = PyString_FromString(fn->m_ml->ml_name);
+ if (name != NULL) {
+ PyObject *mo = _PyType_Lookup(self->ob_type, name);
+ Py_XINCREF(mo);
+ Py_DECREF(name);
+ if (mo != NULL) {
+ PyObject *res = PyObject_Repr(mo);
+ Py_DECREF(mo);
+ if (res != NULL)
+ return res;
+ }
+ }
+ PyErr_Clear();
+ return PyString_FromFormat("<built-in method %s>",
+ fn->m_ml->ml_name);
+ }
+}
+
+static ProfilerEntry*
+newProfilerEntry(ProfilerObject *pObj, void *key, PyObject *userObj)
+{
+ ProfilerEntry *self;
+ self = (ProfilerEntry*) malloc(sizeof(ProfilerEntry));
+ if (self == NULL) {
+ pObj->flags |= POF_NOMEMORY;
+ return NULL;
+ }
+ userObj = normalizeUserObj(userObj);
+ if (userObj == NULL) {
+ PyErr_Clear();
+ free(self);
+ pObj->flags |= POF_NOMEMORY;
+ return NULL;
+ }
+ self->header.key = key;
+ self->userObj = userObj;
+ self->tt = 0;
+ self->it = 0;
+ self->callcount = 0;
+ self->recursivecallcount = 0;
+ self->recursionLevel = 0;
+ self->calls = EMPTY_ROTATING_TREE;
+ RotatingTree_Add(&pObj->profilerEntries, &self->header);
+ return self;
+}
+
+static ProfilerEntry*
+getEntry(ProfilerObject *pObj, void *key)
+{
+ return (ProfilerEntry*) RotatingTree_Get(&pObj->profilerEntries, key);
+}
+
+static ProfilerSubEntry *
+getSubEntry(ProfilerObject *pObj, ProfilerEntry *caller, ProfilerEntry* entry)
+{
+ return (ProfilerSubEntry*) RotatingTree_Get(&caller->calls,
+ (void *)entry);
+}
+
+static ProfilerSubEntry *
+newSubEntry(ProfilerObject *pObj, ProfilerEntry *caller, ProfilerEntry* entry)
+{
+ ProfilerSubEntry *self;
+ self = (ProfilerSubEntry*) malloc(sizeof(ProfilerSubEntry));
+ if (self == NULL) {
+ pObj->flags |= POF_NOMEMORY;
+ return NULL;
+ }
+ self->header.key = (void *)entry;
+ self->tt = 0;
+ self->it = 0;
+ self->callcount = 0;
+ self->recursivecallcount = 0;
+ self->recursionLevel = 0;
+ RotatingTree_Add(&caller->calls, &self->header);
+ return self;
+}
+
+static int freeSubEntry(rotating_node_t *header, void *arg)
+{
+ ProfilerSubEntry *subentry = (ProfilerSubEntry*) header;
+ free(subentry);
+ return 0;
+}
+
+static int freeEntry(rotating_node_t *header, void *arg)
+{
+ ProfilerEntry *entry = (ProfilerEntry*) header;
+ RotatingTree_Enum(entry->calls, freeSubEntry, NULL);
+ Py_DECREF(entry->userObj);
+ free(entry);
+ return 0;
+}
+
+static void clearEntries(ProfilerObject *pObj)
+{
+ RotatingTree_Enum(pObj->profilerEntries, freeEntry, NULL);
+ pObj->profilerEntries = EMPTY_ROTATING_TREE;
+ /* release the memory hold by the free list of ProfilerContexts */
+ while (pObj->freelistProfilerContext) {
+ ProfilerContext *c = pObj->freelistProfilerContext;
+ pObj->freelistProfilerContext = c->previous;
+ free(c);
+ }
+}
+
+static void
+initContext(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry)
+{
+ self->ctxEntry = entry;
+ self->subt = 0;
+ self->previous = pObj->currentProfilerContext;
+ pObj->currentProfilerContext = self;
+ ++entry->recursionLevel;
+ if ((pObj->flags & POF_SUBCALLS) && self->previous) {
+ /* find or create an entry for me in my caller's entry */
+ ProfilerEntry *caller = self->previous->ctxEntry;
+ ProfilerSubEntry *subentry = getSubEntry(pObj, caller, entry);
+ if (subentry == NULL)
+ subentry = newSubEntry(pObj, caller, entry);
+ if (subentry)
+ ++subentry->recursionLevel;
+ }
+ self->t0 = CALL_TIMER(pObj);
+}
+
+static void
+Stop(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry)
+{
+ PY_LONG_LONG tt = CALL_TIMER(pObj) - self->t0;
+ PY_LONG_LONG it = tt - self->subt;
+ if (self->previous)
+ self->previous->subt += tt;
+ pObj->currentProfilerContext = self->previous;
+ if (--entry->recursionLevel == 0)
+ entry->tt += tt;
+ else
+ ++entry->recursivecallcount;
+ entry->it += it;
+ entry->callcount++;
+ if ((pObj->flags & POF_SUBCALLS) && self->previous) {
+ /* find or create an entry for me in my caller's entry */
+ ProfilerEntry *caller = self->previous->ctxEntry;
+ ProfilerSubEntry *subentry = getSubEntry(pObj, caller, entry);
+ if (subentry) {
+ if (--subentry->recursionLevel == 0)
+ subentry->tt += tt;
+ else
+ ++subentry->recursivecallcount;
+ subentry->it += it;
+ ++subentry->callcount;
+ }
+ }
+}
+
+static void
+ptrace_enter_call(PyObject *self, void *key, PyObject *userObj)
+{
+ /* entering a call to the function identified by 'key'
+ (which can be a PyCodeObject or a PyMethodDef pointer) */
+ ProfilerObject *pObj = (ProfilerObject*)self;
+ ProfilerEntry *profEntry;
+ ProfilerContext *pContext;
+
+ profEntry = getEntry(pObj, key);
+ if (profEntry == NULL) {
+ profEntry = newProfilerEntry(pObj, key, userObj);
+ if (profEntry == NULL)
+ return;
+ }
+ /* grab a ProfilerContext out of the free list */
+ pContext = pObj->freelistProfilerContext;
+ if (pContext) {
+ pObj->freelistProfilerContext = pContext->previous;
+ }
+ else {
+ /* free list exhausted, allocate a new one */
+ pContext = (ProfilerContext*)
+ malloc(sizeof(ProfilerContext));
+ if (pContext == NULL) {
+ pObj->flags |= POF_NOMEMORY;
+ return;
+ }
+ }
+ initContext(pObj, pContext, profEntry);
+}
+
+static void
+ptrace_leave_call(PyObject *self, void *key)
+{
+ /* leaving a call to the function identified by 'key' */
+ ProfilerObject *pObj = (ProfilerObject*)self;
+ ProfilerEntry *profEntry;
+ ProfilerContext *pContext;
+
+ pContext = pObj->currentProfilerContext;
+ if (pContext == NULL)
+ return;
+ profEntry = getEntry(pObj, key);
+ if (profEntry) {
+ Stop(pObj, pContext, profEntry);
+ }
+ else {
+ pObj->currentProfilerContext = pContext->previous;
+ }
+ /* put pContext into the free list */
+ pContext->previous = pObj->freelistProfilerContext;
+ pObj->freelistProfilerContext = pContext;
+}
+
+static int
+profiler_callback(PyObject *self, PyFrameObject *frame, int what,
+ PyObject *arg)
+{
+ switch (what) {
+
+ /* the 'frame' of a called function is about to start its execution */
+ case PyTrace_CALL:
+ ptrace_enter_call(self, (void *)frame->f_code,
+ (PyObject *)frame->f_code);
+ break;
+
+ /* the 'frame' of a called function is about to finish
+ (either normally or with an exception) */
+ case PyTrace_RETURN:
+ ptrace_leave_call(self, (void *)frame->f_code);
+ break;
+
+ /* case PyTrace_EXCEPTION:
+ If the exception results in the function exiting, a
+ PyTrace_RETURN event will be generated, so we don't need to
+ handle it. */
+
+#ifdef PyTrace_C_CALL /* not defined in Python <= 2.3 */
+ /* the Python function 'frame' is issuing a call to the built-in
+ function 'arg' */
+ case PyTrace_C_CALL:
+ if ((((ProfilerObject *)self)->flags & POF_BUILTINS)
+ && PyCFunction_Check(arg)) {
+ ptrace_enter_call(self,
+ ((PyCFunctionObject *)arg)->m_ml,
+ arg);
+ }
+ break;
+
+ /* the call to the built-in function 'arg' is returning into its
+ caller 'frame' */
+ case PyTrace_C_RETURN: /* ...normally */
+ case PyTrace_C_EXCEPTION: /* ...with an exception set */
+ if ((((ProfilerObject *)self)->flags & POF_BUILTINS)
+ && PyCFunction_Check(arg)) {
+ ptrace_leave_call(self,
+ ((PyCFunctionObject *)arg)->m_ml);
+ }
+ break;
+#endif
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int
+pending_exception(ProfilerObject *pObj)
+{
+ if (pObj->flags & POF_NOMEMORY) {
+ pObj->flags -= POF_NOMEMORY;
+ PyErr_SetString(PyExc_MemoryError,
+ "memory was exhausted while profiling");
+ return -1;
+ }
+ return 0;
+}
+
+/************************************************************/
+
+static PyStructSequence_Field profiler_entry_fields[] = {
+ {"code", "code object or built-in function name"},
+ {"callcount", "how many times this was called"},
+ {"reccallcount", "how many times called recursively"},
+ {"totaltime", "total time in this entry"},
+ {"inlinetime", "inline time in this entry (not in subcalls)"},
+ {"calls", "details of the calls"},
+ {0}
+};
+
+static PyStructSequence_Field profiler_subentry_fields[] = {
+ {"code", "called code object or built-in function name"},
+ {"callcount", "how many times this is called"},
+ {"reccallcount", "how many times this is called recursively"},
+ {"totaltime", "total time spent in this call"},
+ {"inlinetime", "inline time (not in further subcalls)"},
+ {0}
+};
+
+static PyStructSequence_Desc profiler_entry_desc = {
+ "_lsprof.profiler_entry", /* name */
+ NULL, /* doc */
+ profiler_entry_fields,
+ 6
+};
+
+static PyStructSequence_Desc profiler_subentry_desc = {
+ "_lsprof.profiler_subentry", /* name */
+ NULL, /* doc */
+ profiler_subentry_fields,
+ 5
+};
+
+static int initialized;
+static PyTypeObject StatsEntryType;
+static PyTypeObject StatsSubEntryType;
+
+
+typedef struct {
+ PyObject *list;
+ PyObject *sublist;
+ double factor;
+} statscollector_t;
+
+static int statsForSubEntry(rotating_node_t *node, void *arg)
+{
+ ProfilerSubEntry *sentry = (ProfilerSubEntry*) node;
+ statscollector_t *collect = (statscollector_t*) arg;
+ ProfilerEntry *entry = (ProfilerEntry*) sentry->header.key;
+ int err;
+ PyObject *sinfo;
+ sinfo = PyObject_CallFunction((PyObject*) &StatsSubEntryType,
+ "((Olldd))",
+ entry->userObj,
+ sentry->callcount,
+ sentry->recursivecallcount,
+ collect->factor * sentry->tt,
+ collect->factor * sentry->it);
+ if (sinfo == NULL)
+ return -1;
+ err = PyList_Append(collect->sublist, sinfo);
+ Py_DECREF(sinfo);
+ return err;
+}
+
+static int statsForEntry(rotating_node_t *node, void *arg)
+{
+ ProfilerEntry *entry = (ProfilerEntry*) node;
+ statscollector_t *collect = (statscollector_t*) arg;
+ PyObject *info;
+ int err;
+ if (entry->callcount == 0)
+ return 0; /* skip */
+
+ if (entry->calls != EMPTY_ROTATING_TREE) {
+ collect->sublist = PyList_New(0);
+ if (collect->sublist == NULL)
+ return -1;
+ if (RotatingTree_Enum(entry->calls,
+ statsForSubEntry, collect) != 0) {
+ Py_DECREF(collect->sublist);
+ return -1;
+ }
+ }
+ else {
+ Py_INCREF(Py_None);
+ collect->sublist = Py_None;
+ }
+
+ info = PyObject_CallFunction((PyObject*) &StatsEntryType,
+ "((OllddO))",
+ entry->userObj,
+ entry->callcount,
+ entry->recursivecallcount,
+ collect->factor * entry->tt,
+ collect->factor * entry->it,
+ collect->sublist);
+ Py_DECREF(collect->sublist);
+ if (info == NULL)
+ return -1;
+ err = PyList_Append(collect->list, info);
+ Py_DECREF(info);
+ return err;
+}
+
+PyDoc_STRVAR(getstats_doc, "\
+getstats() -> list of profiler_entry objects\n\
+\n\
+Return all information collected by the profiler.\n\
+Each profiler_entry is a tuple-like object with the\n\
+following attributes:\n\
+\n\
+ code code object\n\
+ callcount how many times this was called\n\
+ reccallcount how many times called recursively\n\
+ totaltime total time in this entry\n\
+ inlinetime inline time in this entry (not in subcalls)\n\
+ calls details of the calls\n\
+\n\
+The calls attribute is either None or a list of\n\
+profiler_subentry objects:\n\
+\n\
+ code called code object\n\
+ callcount how many times this is called\n\
+ reccallcount how many times this is called recursively\n\
+ totaltime total time spent in this call\n\
+ inlinetime inline time (not in further subcalls)\n\
+");
+
+static PyObject*
+profiler_getstats(ProfilerObject *pObj, PyObject* noarg)
+{
+ statscollector_t collect;
+ if (pending_exception(pObj))
+ return NULL;
+ if (!pObj->externalTimer)
+ collect.factor = hpTimerUnit();
+ else if (pObj->externalTimerUnit > 0.0)
+ collect.factor = pObj->externalTimerUnit;
+ else
+ collect.factor = 1.0 / DOUBLE_TIMER_PRECISION;
+ collect.list = PyList_New(0);
+ if (collect.list == NULL)
+ return NULL;
+ if (RotatingTree_Enum(pObj->profilerEntries, statsForEntry, &collect)
+ != 0) {
+ Py_DECREF(collect.list);
+ return NULL;
+ }
+ return collect.list;
+}
+
+static int
+setSubcalls(ProfilerObject *pObj, int nvalue)
+{
+ if (nvalue == 0)
+ pObj->flags &= ~POF_SUBCALLS;
+ else if (nvalue > 0)
+ pObj->flags |= POF_SUBCALLS;
+ return 0;
+}
+
+static int
+setBuiltins(ProfilerObject *pObj, int nvalue)
+{
+ if (nvalue == 0)
+ pObj->flags &= ~POF_BUILTINS;
+ else if (nvalue > 0) {
+#ifndef PyTrace_C_CALL
+ PyErr_SetString(PyExc_ValueError,
+ "builtins=True requires Python >= 2.4");
+ return -1;
+#else
+ pObj->flags |= POF_BUILTINS;
+#endif
+ }
+ return 0;
+}
+
+PyDoc_STRVAR(enable_doc, "\
+enable(subcalls=True, builtins=True)\n\
+\n\
+Start collecting profiling information.\n\
+If 'subcalls' is True, also records for each function\n\
+statistics separated according to its current caller.\n\
+If 'builtins' is True, records the time spent in\n\
+built-in functions separately from their caller.\n\
+");
+
+static PyObject*
+profiler_enable(ProfilerObject *self, PyObject *args, PyObject *kwds)
+{
+ int subcalls = -1;
+ int builtins = -1;
+ static char *kwlist[] = {"subcalls", "builtins", 0};
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii:enable",
+ kwlist, &subcalls, &builtins))
+ return NULL;
+ if (setSubcalls(self, subcalls) < 0 || setBuiltins(self, builtins) < 0)
+ return NULL;
+ PyEval_SetProfile(profiler_callback, (PyObject*)self);
+ self->flags |= POF_ENABLED;
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static void
+flush_unmatched(ProfilerObject *pObj)
+{
+ while (pObj->currentProfilerContext) {
+ ProfilerContext *pContext = pObj->currentProfilerContext;
+ ProfilerEntry *profEntry= pContext->ctxEntry;
+ if (profEntry)
+ Stop(pObj, pContext, profEntry);
+ else
+ pObj->currentProfilerContext = pContext->previous;
+ if (pContext)
+ free(pContext);
+ }
+
+}
+
+PyDoc_STRVAR(disable_doc, "\
+disable()\n\
+\n\
+Stop collecting profiling information.\n\
+");
+
+static PyObject*
+profiler_disable(ProfilerObject *self, PyObject* noarg)
+{
+ self->flags &= ~POF_ENABLED;
+ PyEval_SetProfile(NULL, NULL);
+ flush_unmatched(self);
+ if (pending_exception(self))
+ return NULL;
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(clear_doc, "\
+clear()\n\
+\n\
+Clear all profiling information collected so far.\n\
+");
+
+static PyObject*
+profiler_clear(ProfilerObject *pObj, PyObject* noarg)
+{
+ clearEntries(pObj);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static void
+profiler_dealloc(ProfilerObject *op)
+{
+ if (op->flags & POF_ENABLED)
+ PyEval_SetProfile(NULL, NULL);
+ flush_unmatched(op);
+ clearEntries(op);
+ Py_XDECREF(op->externalTimer);
+ op->ob_type->tp_free(op);
+}
+
+static int
+profiler_init(ProfilerObject *pObj, PyObject *args, PyObject *kw)
+{
+ PyObject *o;
+ PyObject *timer = NULL;
+ double timeunit = 0.0;
+ int subcalls = 1;
+#ifdef PyTrace_C_CALL
+ int builtins = 1;
+#else
+ int builtins = 0;
+#endif
+ static char *kwlist[] = {"timer", "timeunit",
+ "subcalls", "builtins", 0};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "|Odii:Profiler", kwlist,
+ &timer, &timeunit,
+ &subcalls, &builtins))
+ return -1;
+
+ if (setSubcalls(pObj, subcalls) < 0 || setBuiltins(pObj, builtins) < 0)
+ return -1;
+ o = pObj->externalTimer;
+ pObj->externalTimer = timer;
+ Py_XINCREF(timer);
+ Py_XDECREF(o);
+ pObj->externalTimerUnit = timeunit;
+ return 0;
+}
+
+static PyMethodDef profiler_methods[] = {
+ {"getstats", (PyCFunction)profiler_getstats,
+ METH_NOARGS, getstats_doc},
+ {"enable", (PyCFunction)profiler_enable,
+ METH_VARARGS | METH_KEYWORDS, enable_doc},
+ {"disable", (PyCFunction)profiler_disable,
+ METH_NOARGS, disable_doc},
+ {"clear", (PyCFunction)profiler_clear,
+ METH_NOARGS, clear_doc},
+ {NULL, NULL}
+};
+
+PyDoc_STRVAR(profiler_doc, "\
+Profiler(custom_timer=None, time_unit=None, subcalls=True, builtins=True)\n\
+\n\
+ Builds a profiler object using the specified timer function.\n\
+ The default timer is a fast built-in one based on real time.\n\
+ For custom timer functions returning integers, time_unit can\n\
+ be a float specifying a scale (i.e. how long each integer unit\n\
+ is, in seconds).\n\
+");
+
+static PyTypeObject PyProfiler_Type = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ "_lsprof.Profiler", /* tp_name */
+ sizeof(ProfilerObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)profiler_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ profiler_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ profiler_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)profiler_init, /* tp_init */
+ PyType_GenericAlloc, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+ PyObject_Del, /* tp_free */
+};
+
+static PyMethodDef moduleMethods[] = {
+ {NULL, NULL}
+};
+
+PyMODINIT_FUNC
+init_lsprof(void)
+{
+ PyObject *module, *d;
+ module = Py_InitModule3("_lsprof", moduleMethods, "Fast profiler");
+ if (module == NULL)
+ return;
+ d = PyModule_GetDict(module);
+ if (PyType_Ready(&PyProfiler_Type) < 0)
+ return;
+ PyDict_SetItemString(d, "Profiler", (PyObject *)&PyProfiler_Type);
+
+ if (!initialized) {
+ PyStructSequence_InitType(&StatsEntryType,
+ &profiler_entry_desc);
+ PyStructSequence_InitType(&StatsSubEntryType,
+ &profiler_subentry_desc);
+ }
+ Py_INCREF((PyObject*) &StatsEntryType);
+ Py_INCREF((PyObject*) &StatsSubEntryType);
+ PyModule_AddObject(module, "profiler_entry",
+ (PyObject*) &StatsEntryType);
+ PyModule_AddObject(module, "profiler_subentry",
+ (PyObject*) &StatsSubEntryType);
+ empty_tuple = PyTuple_New(0);
+ initialized = 1;
+}
Added: sandbox/trunk/cpy_merge/Modules/cPickle.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Modules/cPickle.c Wed May 23 03:45:28 2007
@@ -0,0 +1,5583 @@
+#include "Python.h"
+#include "cStringIO.h"
+#include "structmember.h"
+
+PyDoc_STRVAR(cPickle_module_documentation,
+"C implementation and optimization of the Python pickle module.");
+
+#ifndef Py_eval_input
+#include <graminit.h>
+#define Py_eval_input eval_input
+#endif /* Py_eval_input */
+
+#define DEL_LIST_SLICE(list, from, to) (PyList_SetSlice(list, from, to, NULL))
+
+#define WRITE_BUF_SIZE 256
+
+/* Bump this when new opcodes are added to the pickle protocol. */
+#define HIGHEST_PROTOCOL 2
+
+/*
+ * Pickle opcodes. These must be kept in synch with pickle.py. Extensive
+ * docs are in pickletools.py.
+ */
+#define MARK '('
+#define STOP '.'
+#define POP '0'
+#define POP_MARK '1'
+#define DUP '2'
+#define FLOAT 'F'
+#define BINFLOAT 'G'
+#define INT 'I'
+#define BININT 'J'
+#define BININT1 'K'
+#define LONG 'L'
+#define BININT2 'M'
+#define NONE 'N'
+#define PERSID 'P'
+#define BINPERSID 'Q'
+#define REDUCE 'R'
+#define STRING 'S'
+#define BINSTRING 'T'
+#define SHORT_BINSTRING 'U'
+#define UNICODE 'V'
+#define BINUNICODE 'X'
+#define APPEND 'a'
+#define BUILD 'b'
+#define GLOBAL 'c'
+#define DICT 'd'
+#define EMPTY_DICT '}'
+#define APPENDS 'e'
+#define GET 'g'
+#define BINGET 'h'
+#define INST 'i'
+#define LONG_BINGET 'j'
+#define LIST 'l'
+#define EMPTY_LIST ']'
+#define OBJ 'o'
+#define PUT 'p'
+#define BINPUT 'q'
+#define LONG_BINPUT 'r'
+#define SETITEM 's'
+#define TUPLE 't'
+#define EMPTY_TUPLE ')'
+#define SETITEMS 'u'
+
+/* Protocol 2. */
+#define PROTO '\x80' /* identify pickle protocol */
+#define NEWOBJ '\x81' /* build object by applying cls.__new__ to argtuple */
+#define EXT1 '\x82' /* push object from extension registry; 1-byte index */
+#define EXT2 '\x83' /* ditto, but 2-byte index */
+#define EXT4 '\x84' /* ditto, but 4-byte index */
+#define TUPLE1 '\x85' /* build 1-tuple from stack top */
+#define TUPLE2 '\x86' /* build 2-tuple from two topmost stack items */
+#define TUPLE3 '\x87' /* build 3-tuple from three topmost stack items */
+#define NEWTRUE '\x88' /* push True */
+#define NEWFALSE '\x89' /* push False */
+#define LONG1 '\x8a' /* push long from < 256 bytes */
+#define LONG4 '\x8b' /* push really big long */
+
+/* There aren't opcodes -- they're ways to pickle bools before protocol 2,
+ * so that unpicklers written before bools were introduced unpickle them
+ * as ints, but unpicklers after can recognize that bools were intended.
+ * Note that protocol 2 added direct ways to pickle bools.
+ */
+#undef TRUE
+#define TRUE "I01\n"
+#undef FALSE
+#define FALSE "I00\n"
+
+/* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
+ * batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
+ * break if this gets out of synch with pickle.py, but it's unclear that
+ * would help anything either.
+ */
+#define BATCHSIZE 1000
+
+static char MARKv = MARK;
+
+static PyObject *PickleError;
+static PyObject *PicklingError;
+static PyObject *UnpickleableError;
+static PyObject *UnpicklingError;
+static PyObject *BadPickleGet;
+
+/* As the name says, an empty tuple. */
+static PyObject *empty_tuple;
+
+/* copy_reg.dispatch_table, {type_object: pickling_function} */
+static PyObject *dispatch_table;
+
+/* For EXT[124] opcodes. */
+/* copy_reg._extension_registry, {(module_name, function_name): code} */
+static PyObject *extension_registry;
+/* copy_reg._inverted_registry, {code: (module_name, function_name)} */
+static PyObject *inverted_registry;
+/* copy_reg._extension_cache, {code: object} */
+static PyObject *extension_cache;
+
+/* For looking up name pairs in copy_reg._extension_registry. */
+static PyObject *two_tuple;
+
+static PyObject *__class___str, *__getinitargs___str, *__dict___str,
+ *__getstate___str, *__setstate___str, *__name___str, *__reduce___str,
+ *__reduce_ex___str,
+ *write_str, *append_str,
+ *read_str, *readline_str, *__main___str,
+ *copy_reg_str, *dispatch_table_str;
+
+/*************************************************************************
+ Internal Data type for pickle data. */
+
+typedef struct {
+ PyObject_HEAD
+ int length; /* number of initial slots in data currently used */
+ int size; /* number of slots in data allocated */
+ PyObject **data;
+} Pdata;
+
+static void
+Pdata_dealloc(Pdata *self)
+{
+ int i;
+ PyObject **p;
+
+ for (i = self->length, p = self->data; --i >= 0; p++) {
+ Py_DECREF(*p);
+ }
+ if (self->data)
+ free(self->data);
+ PyObject_Del(self);
+}
+
+static PyTypeObject PdataType = {
+ PyObject_HEAD_INIT(NULL) 0, "cPickle.Pdata", sizeof(Pdata), 0,
+ (destructor)Pdata_dealloc,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0L,0L,0L,0L, ""
+};
+
+#define Pdata_Check(O) ((O)->ob_type == &PdataType)
+
+static PyObject *
+Pdata_New(void)
+{
+ Pdata *self;
+
+ if (!(self = PyObject_New(Pdata, &PdataType)))
+ return NULL;
+ self->size = 8;
+ self->length = 0;
+ self->data = malloc(self->size * sizeof(PyObject*));
+ if (self->data)
+ return (PyObject*)self;
+ Py_DECREF(self);
+ return PyErr_NoMemory();
+}
+
+static int
+stackUnderflow(void)
+{
+ PyErr_SetString(UnpicklingError, "unpickling stack underflow");
+ return -1;
+}
+
+/* Retain only the initial clearto items. If clearto >= the current
+ * number of items, this is a (non-erroneous) NOP.
+ */
+static int
+Pdata_clear(Pdata *self, int clearto)
+{
+ int i;
+ PyObject **p;
+
+ if (clearto < 0) return stackUnderflow();
+ if (clearto >= self->length) return 0;
+
+ for (i = self->length, p = self->data + clearto;
+ --i >= clearto;
+ p++) {
+ Py_CLEAR(*p);
+ }
+ self->length = clearto;
+
+ return 0;
+}
+
+static int
+Pdata_grow(Pdata *self)
+{
+ int bigger;
+ size_t nbytes;
+ PyObject **tmp;
+
+ bigger = self->size << 1;
+ if (bigger <= 0) /* was 0, or new value overflows */
+ goto nomemory;
+ if ((int)(size_t)bigger != bigger)
+ goto nomemory;
+ nbytes = (size_t)bigger * sizeof(PyObject *);
+ if (nbytes / sizeof(PyObject *) != (size_t)bigger)
+ goto nomemory;
+ tmp = realloc(self->data, nbytes);
+ if (tmp == NULL)
+ goto nomemory;
+ self->data = tmp;
+ self->size = bigger;
+ return 0;
+
+ nomemory:
+ PyErr_NoMemory();
+ return -1;
+}
+
+/* D is a Pdata*. Pop the topmost element and store it into V, which
+ * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
+ * is raised and V is set to NULL. D and V may be evaluated several times.
+ */
+#define PDATA_POP(D, V) { \
+ if ((D)->length) \
+ (V) = (D)->data[--((D)->length)]; \
+ else { \
+ PyErr_SetString(UnpicklingError, "bad pickle data"); \
+ (V) = NULL; \
+ } \
+}
+
+/* PDATA_PUSH and PDATA_APPEND both push rvalue PyObject* O on to Pdata*
+ * D. If the Pdata stack can't be grown to hold the new value, both
+ * raise MemoryError and execute "return ER". The difference is in ownership
+ * of O after: _PUSH transfers ownership of O from the caller to the stack
+ * (no incref of O is done, and in case of error O is decrefed), while
+ * _APPEND pushes a new reference.
+ */
+
+/* Push O on stack D, giving ownership of O to the stack. */
+#define PDATA_PUSH(D, O, ER) { \
+ if (((Pdata*)(D))->length == ((Pdata*)(D))->size && \
+ Pdata_grow((Pdata*)(D)) < 0) { \
+ Py_DECREF(O); \
+ return ER; \
+ } \
+ ((Pdata*)(D))->data[((Pdata*)(D))->length++] = (O); \
+}
+
+/* Push O on stack D, pushing a new reference. */
+#define PDATA_APPEND(D, O, ER) { \
+ if (((Pdata*)(D))->length == ((Pdata*)(D))->size && \
+ Pdata_grow((Pdata*)(D)) < 0) \
+ return ER; \
+ Py_INCREF(O); \
+ ((Pdata*)(D))->data[((Pdata*)(D))->length++] = (O); \
+}
+
+
+static PyObject *
+Pdata_popTuple(Pdata *self, int start)
+{
+ PyObject *r;
+ int i, j, l;
+
+ l = self->length-start;
+ r = PyTuple_New(l);
+ if (r == NULL)
+ return NULL;
+ for (i = start, j = 0 ; j < l; i++, j++)
+ PyTuple_SET_ITEM(r, j, self->data[i]);
+
+ self->length = start;
+ return r;
+}
+
+static PyObject *
+Pdata_popList(Pdata *self, int start)
+{
+ PyObject *r;
+ int i, j, l;
+
+ l=self->length-start;
+ if (!( r=PyList_New(l))) return NULL;
+ for (i=start, j=0 ; j < l; i++, j++)
+ PyList_SET_ITEM(r, j, self->data[i]);
+
+ self->length=start;
+ return r;
+}
+
+/*************************************************************************/
+
+#define ARG_TUP(self, o) { \
+ if (self->arg || (self->arg=PyTuple_New(1))) { \
+ Py_XDECREF(PyTuple_GET_ITEM(self->arg,0)); \
+ PyTuple_SET_ITEM(self->arg,0,o); \
+ } \
+ else { \
+ Py_DECREF(o); \
+ } \
+}
+
+#define FREE_ARG_TUP(self) { \
+ if (self->arg->ob_refcnt > 1) { \
+ Py_DECREF(self->arg); \
+ self->arg=NULL; \
+ } \
+ }
+
+typedef struct Picklerobject {
+ PyObject_HEAD
+ FILE *fp;
+ PyObject *write;
+ PyObject *file;
+ PyObject *memo;
+ PyObject *arg;
+ PyObject *pers_func;
+ PyObject *inst_pers_func;
+
+ /* pickle protocol number, >= 0 */
+ int proto;
+
+ /* bool, true if proto > 0 */
+ int bin;
+
+ int fast; /* Fast mode doesn't save in memo, don't use if circ ref */
+ int nesting;
+ int (*write_func)(struct Picklerobject *, const char *, Py_ssize_t);
+ char *write_buf;
+ int buf_size;
+ PyObject *dispatch_table;
+ int fast_container; /* count nested container dumps */
+ PyObject *fast_memo;
+} Picklerobject;
+
+#ifndef PY_CPICKLE_FAST_LIMIT
+#define PY_CPICKLE_FAST_LIMIT 50
+#endif
+
+static PyTypeObject Picklertype;
+
+typedef struct Unpicklerobject {
+ PyObject_HEAD
+ FILE *fp;
+ PyObject *file;
+ PyObject *readline;
+ PyObject *read;
+ PyObject *memo;
+ PyObject *arg;
+ Pdata *stack;
+ PyObject *mark;
+ PyObject *pers_func;
+ PyObject *last_string;
+ int *marks;
+ int num_marks;
+ int marks_size;
+ Py_ssize_t (*read_func)(struct Unpicklerobject *, char **, Py_ssize_t);
+ Py_ssize_t (*readline_func)(struct Unpicklerobject *, char **);
+ int buf_size;
+ char *buf;
+ PyObject *find_class;
+} Unpicklerobject;
+
+static PyTypeObject Unpicklertype;
+
+/* Forward decls that need the above structs */
+static int save(Picklerobject *, PyObject *, int);
+static int put2(Picklerobject *, PyObject *);
+
+static
+PyObject *
+cPickle_ErrFormat(PyObject *ErrType, char *stringformat, char *format, ...)
+{
+ va_list va;
+ PyObject *args=0, *retval=0;
+ va_start(va, format);
+
+ if (format) args = Py_VaBuildValue(format, va);
+ va_end(va);
+ if (format && ! args) return NULL;
+ if (stringformat && !(retval=PyString_FromString(stringformat)))
+ return NULL;
+
+ if (retval) {
+ if (args) {
+ PyObject *v;
+ v=PyString_Format(retval, args);
+ Py_DECREF(retval);
+ Py_DECREF(args);
+ if (! v) return NULL;
+ retval=v;
+ }
+ }
+ else
+ if (args) retval=args;
+ else {
+ PyErr_SetObject(ErrType,Py_None);
+ return NULL;
+ }
+ PyErr_SetObject(ErrType,retval);
+ Py_DECREF(retval);
+ return NULL;
+}
+
+static int
+write_file(Picklerobject *self, const char *s, Py_ssize_t n)
+{
+ size_t nbyteswritten;
+
+ if (s == NULL) {
+ return 0;
+ }
+
+ if (n > INT_MAX) {
+ /* String too large */
+ return -1;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ nbyteswritten = fwrite(s, sizeof(char), n, self->fp);
+ Py_END_ALLOW_THREADS
+ if (nbyteswritten != (size_t)n) {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+
+ return (int)n;
+}
+
+static int
+write_cStringIO(Picklerobject *self, const char *s, Py_ssize_t n)
+{
+ if (s == NULL) {
+ return 0;
+ }
+
+ if (PycStringIO->cwrite((PyObject *)self->file, s, n) != n) {
+ return -1;
+ }
+
+ return (int)n;
+}
+
+static int
+write_none(Picklerobject *self, const char *s, Py_ssize_t n)
+{
+ if (s == NULL) return 0;
+ if (n > INT_MAX) return -1;
+ return (int)n;
+}
+
+static int
+write_other(Picklerobject *self, const char *s, Py_ssize_t _n)
+{
+ PyObject *py_str = 0, *junk = 0;
+ int n;
+
+ if (_n > INT_MAX)
+ return -1;
+ n = (int)_n;
+ if (s == NULL) {
+ if (!( self->buf_size )) return 0;
+ py_str = PyString_FromStringAndSize(self->write_buf,
+ self->buf_size);
+ if (!py_str)
+ return -1;
+ }
+ else {
+ if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
+ if (write_other(self, NULL, 0) < 0)
+ return -1;
+ }
+
+ if (n > WRITE_BUF_SIZE) {
+ if (!( py_str =
+ PyString_FromStringAndSize(s, n)))
+ return -1;
+ }
+ else {
+ memcpy(self->write_buf + self->buf_size, s, n);
+ self->buf_size += n;
+ return n;
+ }
+ }
+
+ if (self->write) {
+ /* object with write method */
+ ARG_TUP(self, py_str);
+ if (self->arg) {
+ junk = PyObject_Call(self->write, self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ if (junk) Py_DECREF(junk);
+ else return -1;
+ }
+ else
+ PDATA_PUSH(self->file, py_str, -1);
+
+ self->buf_size = 0;
+ return n;
+}
+
+
+static Py_ssize_t
+read_file(Unpicklerobject *self, char **s, Py_ssize_t n)
+{
+ size_t nbytesread;
+
+ if (self->buf_size == 0) {
+ int size;
+
+ size = ((n < 32) ? 32 : n);
+ if (!( self->buf = (char *)malloc(size))) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ self->buf_size = size;
+ }
+ else if (n > self->buf_size) {
+ char *newbuf = (char *)realloc(self->buf, n);
+ if (!newbuf) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->buf = newbuf;
+ self->buf_size = n;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ nbytesread = fread(self->buf, sizeof(char), n, self->fp);
+ Py_END_ALLOW_THREADS
+ if (nbytesread != (size_t)n) {
+ if (feof(self->fp)) {
+ PyErr_SetNone(PyExc_EOFError);
+ return -1;
+ }
+
+ PyErr_SetFromErrno(PyExc_IOError);
+ return -1;
+ }
+
+ *s = self->buf;
+
+ return n;
+}
+
+
+static Py_ssize_t
+readline_file(Unpicklerobject *self, char **s)
+{
+ int i;
+
+ if (self->buf_size == 0) {
+ if (!( self->buf = (char *)malloc(40))) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->buf_size = 40;
+ }
+
+ i = 0;
+ while (1) {
+ int bigger;
+ char *newbuf;
+ for (; i < (self->buf_size - 1); i++) {
+ if (feof(self->fp) ||
+ (self->buf[i] = getc(self->fp)) == '\n') {
+ self->buf[i + 1] = '\0';
+ *s = self->buf;
+ return i + 1;
+ }
+ }
+ bigger = self->buf_size << 1;
+ if (bigger <= 0) { /* overflow */
+ PyErr_NoMemory();
+ return -1;
+ }
+ newbuf = (char *)realloc(self->buf, bigger);
+ if (!newbuf) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->buf = newbuf;
+ self->buf_size = bigger;
+ }
+}
+
+
+static Py_ssize_t
+read_cStringIO(Unpicklerobject *self, char **s, Py_ssize_t n)
+{
+ char *ptr;
+
+ if (PycStringIO->cread((PyObject *)self->file, &ptr, n) != n) {
+ PyErr_SetNone(PyExc_EOFError);
+ return -1;
+ }
+
+ *s = ptr;
+
+ return n;
+}
+
+
+static Py_ssize_t
+readline_cStringIO(Unpicklerobject *self, char **s)
+{
+ Py_ssize_t n;
+ char *ptr;
+
+ if ((n = PycStringIO->creadline((PyObject *)self->file, &ptr)) < 0) {
+ return -1;
+ }
+
+ *s = ptr;
+
+ return n;
+}
+
+
+static Py_ssize_t
+read_other(Unpicklerobject *self, char **s, Py_ssize_t n)
+{
+ PyObject *bytes, *str=0;
+
+ if (!( bytes = PyInt_FromSsize_t(n))) return -1;
+
+ ARG_TUP(self, bytes);
+ if (self->arg) {
+ str = PyObject_Call(self->read, self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ if (! str) return -1;
+
+ Py_XDECREF(self->last_string);
+ self->last_string = str;
+
+ if (! (*s = PyString_AsString(str))) return -1;
+ return n;
+}
+
+
+static Py_ssize_t
+readline_other(Unpicklerobject *self, char **s)
+{
+ PyObject *str;
+ Py_ssize_t str_size;
+
+ if (!( str = PyObject_CallObject(self->readline, empty_tuple))) {
+ return -1;
+ }
+
+ if ((str_size = PyString_Size(str)) < 0)
+ return -1;
+
+ Py_XDECREF(self->last_string);
+ self->last_string = str;
+
+ if (! (*s = PyString_AsString(str)))
+ return -1;
+
+ return str_size;
+}
+
+/* Copy the first n bytes from s into newly malloc'ed memory, plus a
+ * trailing 0 byte. Return a pointer to that, or NULL if out of memory.
+ * The caller is responsible for free()'ing the return value.
+ */
+static char *
+pystrndup(const char *s, int n)
+{
+ char *r = (char *)malloc(n+1);
+ if (r == NULL)
+ return (char*)PyErr_NoMemory();
+ memcpy(r, s, n);
+ r[n] = 0;
+ return r;
+}
+
+
+static int
+get(Picklerobject *self, PyObject *id)
+{
+ PyObject *value, *mv;
+ long c_value;
+ char s[30];
+ size_t len;
+
+ if (!( mv = PyDict_GetItem(self->memo, id))) {
+ PyErr_SetObject(PyExc_KeyError, id);
+ return -1;
+ }
+
+ if (!( value = PyTuple_GetItem(mv, 0)))
+ return -1;
+
+ if (!( PyInt_Check(value))) {
+ PyErr_SetString(PicklingError, "no int where int expected in memo");
+ return -1;
+ }
+ c_value = PyInt_AsLong(value);
+ if (c_value == -1 && PyErr_Occurred())
+ return -1;
+
+ if (!self->bin) {
+ s[0] = GET;
+ PyOS_snprintf(s + 1, sizeof(s) - 1, "%ld\n", c_value);
+ len = strlen(s);
+ }
+ else if (Pdata_Check(self->file)) {
+ if (write_other(self, NULL, 0) < 0) return -1;
+ PDATA_APPEND(self->file, mv, -1);
+ return 0;
+ }
+ else {
+ if (c_value < 256) {
+ s[0] = BINGET;
+ s[1] = (int)(c_value & 0xff);
+ len = 2;
+ }
+ else {
+ s[0] = LONG_BINGET;
+ s[1] = (int)(c_value & 0xff);
+ s[2] = (int)((c_value >> 8) & 0xff);
+ s[3] = (int)((c_value >> 16) & 0xff);
+ s[4] = (int)((c_value >> 24) & 0xff);
+ len = 5;
+ }
+ }
+
+ if (self->write_func(self, s, len) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+static int
+put(Picklerobject *self, PyObject *ob)
+{
+ if (ob->ob_refcnt < 2 || self->fast)
+ return 0;
+
+ return put2(self, ob);
+}
+
+
+static int
+put2(Picklerobject *self, PyObject *ob)
+{
+ char c_str[30];
+ int p;
+ size_t len;
+ int res = -1;
+ PyObject *py_ob_id = 0, *memo_len = 0, *t = 0;
+
+ if (self->fast)
+ return 0;
+
+ if ((p = PyDict_Size(self->memo)) < 0)
+ goto finally;
+
+ /* Make sure memo keys are positive! */
+ /* XXX Why?
+ * XXX And does "positive" really mean non-negative?
+ * XXX pickle.py starts with PUT index 0, not 1. This makes for
+ * XXX gratuitous differences between the pickling modules.
+ */
+ p++;
+
+ if (!( py_ob_id = PyLong_FromVoidPtr(ob)))
+ goto finally;
+
+ if (!( memo_len = PyInt_FromLong(p)))
+ goto finally;
+
+ if (!( t = PyTuple_New(2)))
+ goto finally;
+
+ PyTuple_SET_ITEM(t, 0, memo_len);
+ Py_INCREF(memo_len);
+ PyTuple_SET_ITEM(t, 1, ob);
+ Py_INCREF(ob);
+
+ if (PyDict_SetItem(self->memo, py_ob_id, t) < 0)
+ goto finally;
+
+ if (!self->bin) {
+ c_str[0] = PUT;
+ PyOS_snprintf(c_str + 1, sizeof(c_str) - 1, "%d\n", p);
+ len = strlen(c_str);
+ }
+ else if (Pdata_Check(self->file)) {
+ if (write_other(self, NULL, 0) < 0) return -1;
+ PDATA_APPEND(self->file, memo_len, -1);
+ res=0; /* Job well done ;) */
+ goto finally;
+ }
+ else {
+ if (p >= 256) {
+ c_str[0] = LONG_BINPUT;
+ c_str[1] = (int)(p & 0xff);
+ c_str[2] = (int)((p >> 8) & 0xff);
+ c_str[3] = (int)((p >> 16) & 0xff);
+ c_str[4] = (int)((p >> 24) & 0xff);
+ len = 5;
+ }
+ else {
+ c_str[0] = BINPUT;
+ c_str[1] = p;
+ len = 2;
+ }
+ }
+
+ if (self->write_func(self, c_str, len) < 0)
+ goto finally;
+
+ res = 0;
+
+ finally:
+ Py_XDECREF(py_ob_id);
+ Py_XDECREF(memo_len);
+ Py_XDECREF(t);
+
+ return res;
+}
+
+static PyObject *
+whichmodule(PyObject *global, PyObject *global_name)
+{
+ Py_ssize_t i, j;
+ PyObject *module = 0, *modules_dict = 0,
+ *global_name_attr = 0, *name = 0;
+
+ module = PyObject_GetAttrString(global, "__module__");
+ if (module)
+ return module;
+ if (PyErr_ExceptionMatches(PyExc_AttributeError))
+ PyErr_Clear();
+ else
+ return NULL;
+
+ if (!( modules_dict = PySys_GetObject("modules")))
+ return NULL;
+
+ i = 0;
+ while ((j = PyDict_Next(modules_dict, &i, &name, &module))) {
+
+ if (PyObject_Compare(name, __main___str)==0) continue;
+
+ global_name_attr = PyObject_GetAttr(module, global_name);
+ if (!global_name_attr) {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError))
+ PyErr_Clear();
+ else
+ return NULL;
+ continue;
+ }
+
+ if (global_name_attr != global) {
+ Py_DECREF(global_name_attr);
+ continue;
+ }
+
+ Py_DECREF(global_name_attr);
+
+ break;
+ }
+
+ /* The following implements the rule in pickle.py added in 1.5
+ that used __main__ if no module is found. I don't actually
+ like this rule. jlf
+ */
+ if (!j) {
+ j=1;
+ name=__main___str;
+ }
+
+ Py_INCREF(name);
+ return name;
+}
+
+
+static int
+fast_save_enter(Picklerobject *self, PyObject *obj)
+{
+ /* if fast_container < 0, we're doing an error exit. */
+ if (++self->fast_container >= PY_CPICKLE_FAST_LIMIT) {
+ PyObject *key = NULL;
+ if (self->fast_memo == NULL) {
+ self->fast_memo = PyDict_New();
+ if (self->fast_memo == NULL) {
+ self->fast_container = -1;
+ return 0;
+ }
+ }
+ key = PyLong_FromVoidPtr(obj);
+ if (key == NULL)
+ return 0;
+ if (PyDict_GetItem(self->fast_memo, key)) {
+ Py_DECREF(key);
+ PyErr_Format(PyExc_ValueError,
+ "fast mode: can't pickle cyclic objects "
+ "including object type %s at %p",
+ obj->ob_type->tp_name, obj);
+ self->fast_container = -1;
+ return 0;
+ }
+ if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
+ Py_DECREF(key);
+ self->fast_container = -1;
+ return 0;
+ }
+ Py_DECREF(key);
+ }
+ return 1;
+}
+
+int
+fast_save_leave(Picklerobject *self, PyObject *obj)
+{
+ if (self->fast_container-- >= PY_CPICKLE_FAST_LIMIT) {
+ PyObject *key = PyLong_FromVoidPtr(obj);
+ if (key == NULL)
+ return 0;
+ if (PyDict_DelItem(self->fast_memo, key) < 0) {
+ Py_DECREF(key);
+ return 0;
+ }
+ Py_DECREF(key);
+ }
+ return 1;
+}
+
+static int
+save_none(Picklerobject *self, PyObject *args)
+{
+ static char none = NONE;
+ if (self->write_func(self, &none, 1) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+save_bool(Picklerobject *self, PyObject *args)
+{
+ static const char *buf[2] = {FALSE, TRUE};
+ static char len[2] = {sizeof(FALSE)-1, sizeof(TRUE)-1};
+ long l = args == Py_True;
+
+ if (self->proto >= 2) {
+ char opcode = l ? NEWTRUE : NEWFALSE;
+ if (self->write_func(self, &opcode, 1) < 0)
+ return -1;
+ }
+ else if (self->write_func(self, buf[l], len[l]) < 0)
+ return -1;
+ return 0;
+}
+
+static int
+save_int(Picklerobject *self, long l)
+{
+ char c_str[32];
+ int len = 0;
+
+ if (!self->bin
+#if SIZEOF_LONG > 4
+ || l > 0x7fffffffL
+ || l < -0x80000000L
+#endif
+ ) {
+ /* Text-mode pickle, or long too big to fit in the 4-byte
+ * signed BININT format: store as a string.
+ */
+ c_str[0] = INT;
+ PyOS_snprintf(c_str + 1, sizeof(c_str) - 1, "%ld\n", l);
+ if (self->write_func(self, c_str, strlen(c_str)) < 0)
+ return -1;
+ }
+ else {
+ /* Binary pickle and l fits in a signed 4-byte int. */
+ c_str[1] = (int)( l & 0xff);
+ c_str[2] = (int)((l >> 8) & 0xff);
+ c_str[3] = (int)((l >> 16) & 0xff);
+ c_str[4] = (int)((l >> 24) & 0xff);
+
+ if ((c_str[4] == 0) && (c_str[3] == 0)) {
+ if (c_str[2] == 0) {
+ c_str[0] = BININT1;
+ len = 2;
+ }
+ else {
+ c_str[0] = BININT2;
+ len = 3;
+ }
+ }
+ else {
+ c_str[0] = BININT;
+ len = 5;
+ }
+
+ if (self->write_func(self, c_str, len) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static int
+save_long(Picklerobject *self, PyObject *args)
+{
+ Py_ssize_t size;
+ int res = -1;
+ PyObject *repr = NULL;
+ long val = PyInt_AsLong(args);
+ static char l = LONG;
+
+ if (val == -1 && PyErr_Occurred()) {
+ /* out of range for int pickling */
+ PyErr_Clear();
+ }
+ else
+ return save_int(self, val);
+
+ if (self->proto >= 2) {
+ /* Linear-time pickling. */
+ size_t nbits;
+ size_t nbytes;
+ unsigned char *pdata;
+ char c_str[5];
+ int i;
+ int sign = _PyLong_Sign(args);
+
+ if (sign == 0) {
+ /* It's 0 -- an empty bytestring. */
+ c_str[0] = LONG1;
+ c_str[1] = 0;
+ i = self->write_func(self, c_str, 2);
+ if (i < 0) goto finally;
+ res = 0;
+ goto finally;
+ }
+ nbits = _PyLong_NumBits(args);
+ if (nbits == (size_t)-1 && PyErr_Occurred())
+ goto finally;
+ /* How many bytes do we need? There are nbits >> 3 full
+ * bytes of data, and nbits & 7 leftover bits. If there
+ * are any leftover bits, then we clearly need another
+ * byte. Wnat's not so obvious is that we *probably*
+ * need another byte even if there aren't any leftovers:
+ * the most-significant bit of the most-significant byte
+ * acts like a sign bit, and it's usually got a sense
+ * opposite of the one we need. The exception is longs
+ * of the form -(2**(8*j-1)) for j > 0. Such a long is
+ * its own 256's-complement, so has the right sign bit
+ * even without the extra byte. That's a pain to check
+ * for in advance, though, so we always grab an extra
+ * byte at the start, and cut it back later if possible.
+ */
+ nbytes = (nbits >> 3) + 1;
+ if (nbytes > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError, "long too large "
+ "to pickle");
+ goto finally;
+ }
+ repr = PyString_FromStringAndSize(NULL, (int)nbytes);
+ if (repr == NULL) goto finally;
+ pdata = (unsigned char *)PyString_AS_STRING(repr);
+ i = _PyLong_AsByteArray((PyLongObject *)args,
+ pdata, nbytes,
+ 1 /* little endian */, 1 /* signed */);
+ if (i < 0) goto finally;
+ /* If the long is negative, this may be a byte more than
+ * needed. This is so iff the MSB is all redundant sign
+ * bits.
+ */
+ if (sign < 0 && nbytes > 1 && pdata[nbytes - 1] == 0xff &&
+ (pdata[nbytes - 2] & 0x80) != 0)
+ --nbytes;
+
+ if (nbytes < 256) {
+ c_str[0] = LONG1;
+ c_str[1] = (char)nbytes;
+ size = 2;
+ }
+ else {
+ c_str[0] = LONG4;
+ size = (int)nbytes;
+ for (i = 1; i < 5; i++) {
+ c_str[i] = (char)(size & 0xff);
+ size >>= 8;
+ }
+ size = 5;
+ }
+ i = self->write_func(self, c_str, size);
+ if (i < 0) goto finally;
+ i = self->write_func(self, (char *)pdata, (int)nbytes);
+ if (i < 0) goto finally;
+ res = 0;
+ goto finally;
+ }
+
+ /* proto < 2: write the repr and newline. This is quadratic-time
+ * (in the number of digits), in both directions.
+ */
+ if (!( repr = PyObject_Repr(args)))
+ goto finally;
+
+ if ((size = PyString_Size(repr)) < 0)
+ goto finally;
+
+ if (self->write_func(self, &l, 1) < 0)
+ goto finally;
+
+ if (self->write_func(self,
+ PyString_AS_STRING((PyStringObject *)repr),
+ size) < 0)
+ goto finally;
+
+ if (self->write_func(self, "\n", 1) < 0)
+ goto finally;
+
+ res = 0;
+
+ finally:
+ Py_XDECREF(repr);
+ return res;
+}
+
+
+static int
+save_float(Picklerobject *self, PyObject *args)
+{
+ double x = PyFloat_AS_DOUBLE((PyFloatObject *)args);
+
+ if (self->bin) {
+ char str[9];
+ str[0] = BINFLOAT;
+ if (_PyFloat_Pack8(x, (unsigned char *)&str[1], 0) < 0)
+ return -1;
+ if (self->write_func(self, str, 9) < 0)
+ return -1;
+ }
+ else {
+ char c_str[250];
+ c_str[0] = FLOAT;
+ PyOS_ascii_formatd(c_str + 1, sizeof(c_str) - 2, "%.17g", x);
+ /* Extend the formatted string with a newline character */
+ strcat(c_str, "\n");
+
+ if (self->write_func(self, c_str, strlen(c_str)) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static int
+save_string(Picklerobject *self, PyObject *args, int doput)
+{
+ int size, len;
+ PyObject *repr=0;
+
+ if ((size = PyString_Size(args)) < 0)
+ return -1;
+
+ if (!self->bin) {
+ char *repr_str;
+
+ static char string = STRING;
+
+ if (!( repr = PyObject_Repr(args)))
+ return -1;
+
+ if ((len = PyString_Size(repr)) < 0)
+ goto err;
+ repr_str = PyString_AS_STRING((PyStringObject *)repr);
+
+ if (self->write_func(self, &string, 1) < 0)
+ goto err;
+
+ if (self->write_func(self, repr_str, len) < 0)
+ goto err;
+
+ if (self->write_func(self, "\n", 1) < 0)
+ goto err;
+
+ Py_XDECREF(repr);
+ }
+ else {
+ int i;
+ char c_str[5];
+
+ if ((size = PyString_Size(args)) < 0)
+ return -1;
+
+ if (size < 256) {
+ c_str[0] = SHORT_BINSTRING;
+ c_str[1] = size;
+ len = 2;
+ }
+ else if (size <= INT_MAX) {
+ c_str[0] = BINSTRING;
+ for (i = 1; i < 5; i++)
+ c_str[i] = (int)(size >> ((i - 1) * 8));
+ len = 5;
+ }
+ else
+ return -1; /* string too large */
+
+ if (self->write_func(self, c_str, len) < 0)
+ return -1;
+
+ if (size > 128 && Pdata_Check(self->file)) {
+ if (write_other(self, NULL, 0) < 0) return -1;
+ PDATA_APPEND(self->file, args, -1);
+ }
+ else {
+ if (self->write_func(self,
+ PyString_AS_STRING(
+ (PyStringObject *)args),
+ size) < 0)
+ return -1;
+ }
+ }
+
+ if (doput)
+ if (put(self, args) < 0)
+ return -1;
+
+ return 0;
+
+ err:
+ Py_XDECREF(repr);
+ return -1;
+}
+
+
+#ifdef Py_USING_UNICODE
+/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
+ backslash and newline characters to \uXXXX escapes. */
+static PyObject *
+modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, int size)
+{
+ PyObject *repr;
+ char *p;
+ char *q;
+
+ static const char *hexdigit = "0123456789ABCDEF";
+
+ repr = PyString_FromStringAndSize(NULL, 6 * size);
+ if (repr == NULL)
+ return NULL;
+ if (size == 0)
+ return repr;
+
+ p = q = PyString_AS_STRING(repr);
+ while (size-- > 0) {
+ Py_UNICODE ch = *s++;
+ /* Map 16-bit characters to '\uxxxx' */
+ if (ch >= 256 || ch == '\\' || ch == '\n') {
+ *p++ = '\\';
+ *p++ = 'u';
+ *p++ = hexdigit[(ch >> 12) & 0xf];
+ *p++ = hexdigit[(ch >> 8) & 0xf];
+ *p++ = hexdigit[(ch >> 4) & 0xf];
+ *p++ = hexdigit[ch & 15];
+ }
+ /* Copy everything else as-is */
+ else
+ *p++ = (char) ch;
+ }
+ *p = '\0';
+ _PyString_Resize(&repr, p - q);
+ return repr;
+}
+
+
+static int
+save_unicode(Picklerobject *self, PyObject *args, int doput)
+{
+ Py_ssize_t size, len;
+ PyObject *repr=0;
+
+ if (!PyUnicode_Check(args))
+ return -1;
+
+ if (!self->bin) {
+ char *repr_str;
+ static char string = UNICODE;
+
+ repr = modified_EncodeRawUnicodeEscape(
+ PyUnicode_AS_UNICODE(args), PyUnicode_GET_SIZE(args));
+ if (!repr)
+ return -1;
+
+ if ((len = PyString_Size(repr)) < 0)
+ goto err;
+ repr_str = PyString_AS_STRING((PyStringObject *)repr);
+
+ if (self->write_func(self, &string, 1) < 0)
+ goto err;
+
+ if (self->write_func(self, repr_str, len) < 0)
+ goto err;
+
+ if (self->write_func(self, "\n", 1) < 0)
+ goto err;
+
+ Py_XDECREF(repr);
+ }
+ else {
+ int i;
+ char c_str[5];
+
+ if (!( repr = PyUnicode_AsUTF8String(args)))
+ return -1;
+
+ if ((size = PyString_Size(repr)) < 0)
+ goto err;
+ if (size > INT_MAX)
+ return -1; /* string too large */
+
+ c_str[0] = BINUNICODE;
+ for (i = 1; i < 5; i++)
+ c_str[i] = (int)(size >> ((i - 1) * 8));
+ len = 5;
+
+ if (self->write_func(self, c_str, len) < 0)
+ goto err;
+
+ if (size > 128 && Pdata_Check(self->file)) {
+ if (write_other(self, NULL, 0) < 0)
+ goto err;
+ PDATA_APPEND(self->file, repr, -1);
+ }
+ else {
+ if (self->write_func(self, PyString_AS_STRING(repr),
+ size) < 0)
+ goto err;
+ }
+
+ Py_DECREF(repr);
+ }
+
+ if (doput)
+ if (put(self, args) < 0)
+ return -1;
+
+ return 0;
+
+ err:
+ Py_XDECREF(repr);
+ return -1;
+}
+#endif
+
+/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
+static int
+store_tuple_elements(Picklerobject *self, PyObject *t, int len)
+{
+ int i;
+ int res = -1; /* guilty until proved innocent */
+
+ assert(PyTuple_Size(t) == len);
+
+ for (i = 0; i < len; i++) {
+ PyObject *element = PyTuple_GET_ITEM(t, i);
+
+ if (element == NULL)
+ goto finally;
+ if (save(self, element, 0) < 0)
+ goto finally;
+ }
+ res = 0;
+
+ finally:
+ return res;
+}
+
+/* Tuples are ubiquitous in the pickle protocols, so many techniques are
+ * used across protocols to minimize the space needed to pickle them.
+ * Tuples are also the only builtin immutable type that can be recursive
+ * (a tuple can be reached from itself), and that requires some subtle
+ * magic so that it works in all cases. IOW, this is a long routine.
+ */
+static int
+save_tuple(Picklerobject *self, PyObject *args)
+{
+ PyObject *py_tuple_id = NULL;
+ int len, i;
+ int res = -1;
+
+ static char tuple = TUPLE;
+ static char pop = POP;
+ static char pop_mark = POP_MARK;
+ static char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
+
+ if ((len = PyTuple_Size(args)) < 0)
+ goto finally;
+
+ if (len == 0) {
+ char c_str[2];
+
+ if (self->proto) {
+ c_str[0] = EMPTY_TUPLE;
+ len = 1;
+ }
+ else {
+ c_str[0] = MARK;
+ c_str[1] = TUPLE;
+ len = 2;
+ }
+ if (self->write_func(self, c_str, len) >= 0)
+ res = 0;
+ /* Don't memoize an empty tuple. */
+ goto finally;
+ }
+
+ /* A non-empty tuple. */
+
+ /* id(tuple) isn't in the memo now. If it shows up there after
+ * saving the tuple elements, the tuple must be recursive, in
+ * which case we'll pop everything we put on the stack, and fetch
+ * its value from the memo.
+ */
+ py_tuple_id = PyLong_FromVoidPtr(args);
+ if (py_tuple_id == NULL)
+ goto finally;
+
+ if (len <= 3 && self->proto >= 2) {
+ /* Use TUPLE{1,2,3} opcodes. */
+ if (store_tuple_elements(self, args, len) < 0)
+ goto finally;
+ if (PyDict_GetItem(self->memo, py_tuple_id)) {
+ /* pop the len elements */
+ for (i = 0; i < len; ++i)
+ if (self->write_func(self, &pop, 1) < 0)
+ goto finally;
+ /* fetch from memo */
+ if (get(self, py_tuple_id) < 0)
+ goto finally;
+ res = 0;
+ goto finally;
+ }
+ /* Not recursive. */
+ if (self->write_func(self, len2opcode + len, 1) < 0)
+ goto finally;
+ goto memoize;
+ }
+
+ /* proto < 2 and len > 0, or proto >= 2 and len > 3.
+ * Generate MARK elt1 elt2 ... TUPLE
+ */
+ if (self->write_func(self, &MARKv, 1) < 0)
+ goto finally;
+
+ if (store_tuple_elements(self, args, len) < 0)
+ goto finally;
+
+ if (PyDict_GetItem(self->memo, py_tuple_id)) {
+ /* pop the stack stuff we pushed */
+ if (self->bin) {
+ if (self->write_func(self, &pop_mark, 1) < 0)
+ goto finally;
+ }
+ else {
+ /* Note that we pop one more than len, to remove
+ * the MARK too.
+ */
+ for (i = 0; i <= len; i++)
+ if (self->write_func(self, &pop, 1) < 0)
+ goto finally;
+ }
+ /* fetch from memo */
+ if (get(self, py_tuple_id) >= 0)
+ res = 0;
+ goto finally;
+ }
+
+ /* Not recursive. */
+ if (self->write_func(self, &tuple, 1) < 0)
+ goto finally;
+
+ memoize:
+ if (put(self, args) >= 0)
+ res = 0;
+
+ finally:
+ Py_XDECREF(py_tuple_id);
+ return res;
+}
+
+/* iter is an iterator giving items, and we batch up chunks of
+ * MARK item item ... item APPENDS
+ * opcode sequences. Calling code should have arranged to first create an
+ * empty list, or list-like object, for the APPENDS to operate on.
+ * Returns 0 on success, <0 on error.
+ */
+static int
+batch_list(Picklerobject *self, PyObject *iter)
+{
+ PyObject *obj;
+ PyObject *slice[BATCHSIZE];
+ int i, n;
+
+ static char append = APPEND;
+ static char appends = APPENDS;
+
+ assert(iter != NULL);
+
+ if (self->proto == 0) {
+ /* APPENDS isn't available; do one at a time. */
+ for (;;) {
+ obj = PyIter_Next(iter);
+ if (obj == NULL) {
+ if (PyErr_Occurred())
+ return -1;
+ break;
+ }
+ i = save(self, obj, 0);
+ Py_DECREF(obj);
+ if (i < 0)
+ return -1;
+ if (self->write_func(self, &append, 1) < 0)
+ return -1;
+ }
+ return 0;
+ }
+
+ /* proto > 0: write in batches of BATCHSIZE. */
+ do {
+ /* Get next group of (no more than) BATCHSIZE elements. */
+ for (n = 0; n < BATCHSIZE; ++n) {
+ obj = PyIter_Next(iter);
+ if (obj == NULL) {
+ if (PyErr_Occurred())
+ goto BatchFailed;
+ break;
+ }
+ slice[n] = obj;
+ }
+
+ if (n > 1) {
+ /* Pump out MARK, slice[0:n], APPENDS. */
+ if (self->write_func(self, &MARKv, 1) < 0)
+ goto BatchFailed;
+ for (i = 0; i < n; ++i) {
+ if (save(self, slice[i], 0) < 0)
+ goto BatchFailed;
+ }
+ if (self->write_func(self, &appends, 1) < 0)
+ goto BatchFailed;
+ }
+ else if (n == 1) {
+ if (save(self, slice[0], 0) < 0)
+ goto BatchFailed;
+ if (self->write_func(self, &append, 1) < 0)
+ goto BatchFailed;
+ }
+
+ for (i = 0; i < n; ++i) {
+ Py_DECREF(slice[i]);
+ }
+ } while (n == BATCHSIZE);
+ return 0;
+
+BatchFailed:
+ while (--n >= 0) {
+ Py_DECREF(slice[n]);
+ }
+ return -1;
+}
+
+static int
+save_list(Picklerobject *self, PyObject *args)
+{
+ int res = -1;
+ char s[3];
+ int len;
+ PyObject *iter;
+
+ if (self->fast && !fast_save_enter(self, args))
+ goto finally;
+
+ /* Create an empty list. */
+ if (self->bin) {
+ s[0] = EMPTY_LIST;
+ len = 1;
+ }
+ else {
+ s[0] = MARK;
+ s[1] = LIST;
+ len = 2;
+ }
+
+ if (self->write_func(self, s, len) < 0)
+ goto finally;
+
+ /* Get list length, and bow out early if empty. */
+ if ((len = PyList_Size(args)) < 0)
+ goto finally;
+
+ /* Memoize. */
+ if (len == 0) {
+ if (put(self, args) >= 0)
+ res = 0;
+ goto finally;
+ }
+ if (put2(self, args) < 0)
+ goto finally;
+
+ /* Materialize the list elements. */
+ iter = PyObject_GetIter(args);
+ if (iter == NULL)
+ goto finally;
+ res = batch_list(self, iter);
+ Py_DECREF(iter);
+
+ finally:
+ if (self->fast && !fast_save_leave(self, args))
+ res = -1;
+
+ return res;
+}
+
+
+/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
+ * MARK key value ... key value SETITEMS
+ * opcode sequences. Calling code should have arranged to first create an
+ * empty dict, or dict-like object, for the SETITEMS to operate on.
+ * Returns 0 on success, <0 on error.
+ *
+ * This is very much like batch_list(). The difference between saving
+ * elements directly, and picking apart two-tuples, is so long-winded at
+ * the C level, though, that attempts to combine these routines were too
+ * ugly to bear.
+ */
+static int
+batch_dict(Picklerobject *self, PyObject *iter)
+{
+ PyObject *p;
+ PyObject *slice[BATCHSIZE];
+ int i, n;
+
+ static char setitem = SETITEM;
+ static char setitems = SETITEMS;
+
+ assert(iter != NULL);
+
+ if (self->proto == 0) {
+ /* SETITEMS isn't available; do one at a time. */
+ for (;;) {
+ p = PyIter_Next(iter);
+ if (p == NULL) {
+ if (PyErr_Occurred())
+ return -1;
+ break;
+ }
+ if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+ PyErr_SetString(PyExc_TypeError, "dict items "
+ "iterator must return 2-tuples");
+ return -1;
+ }
+ i = save(self, PyTuple_GET_ITEM(p, 0), 0);
+ if (i >= 0)
+ i = save(self, PyTuple_GET_ITEM(p, 1), 0);
+ Py_DECREF(p);
+ if (i < 0)
+ return -1;
+ if (self->write_func(self, &setitem, 1) < 0)
+ return -1;
+ }
+ return 0;
+ }
+
+ /* proto > 0: write in batches of BATCHSIZE. */
+ do {
+ /* Get next group of (no more than) BATCHSIZE elements. */
+ for (n = 0; n < BATCHSIZE; ++n) {
+ p = PyIter_Next(iter);
+ if (p == NULL) {
+ if (PyErr_Occurred())
+ goto BatchFailed;
+ break;
+ }
+ if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+ PyErr_SetString(PyExc_TypeError, "dict items "
+ "iterator must return 2-tuples");
+ goto BatchFailed;
+ }
+ slice[n] = p;
+ }
+
+ if (n > 1) {
+ /* Pump out MARK, slice[0:n], SETITEMS. */
+ if (self->write_func(self, &MARKv, 1) < 0)
+ goto BatchFailed;
+ for (i = 0; i < n; ++i) {
+ p = slice[i];
+ if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
+ goto BatchFailed;
+ if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
+ goto BatchFailed;
+ }
+ if (self->write_func(self, &setitems, 1) < 0)
+ goto BatchFailed;
+ }
+ else if (n == 1) {
+ p = slice[0];
+ if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
+ goto BatchFailed;
+ if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
+ goto BatchFailed;
+ if (self->write_func(self, &setitem, 1) < 0)
+ goto BatchFailed;
+ }
+
+ for (i = 0; i < n; ++i) {
+ Py_DECREF(slice[i]);
+ }
+ } while (n == BATCHSIZE);
+ return 0;
+
+BatchFailed:
+ while (--n >= 0) {
+ Py_DECREF(slice[n]);
+ }
+ return -1;
+}
+
+static int
+save_dict(Picklerobject *self, PyObject *args)
+{
+ int res = -1;
+ char s[3];
+ int len;
+ PyObject *items, *iter;
+
+ if (self->fast && !fast_save_enter(self, args))
+ goto finally;
+
+ /* Create an empty dict. */
+ if (self->bin) {
+ s[0] = EMPTY_DICT;
+ len = 1;
+ }
+ else {
+ s[0] = MARK;
+ s[1] = DICT;
+ len = 2;
+ }
+
+ if (self->write_func(self, s, len) < 0)
+ goto finally;
+
+ /* Get dict size, and bow out early if empty. */
+ if ((len = PyDict_Size(args)) < 0)
+ goto finally;
+
+ if (len == 0) {
+ if (put(self, args) >= 0)
+ res = 0;
+ goto finally;
+ }
+ if (put2(self, args) < 0)
+ goto finally;
+
+ /* Materialize the dict items. */
+ items = PyObject_CallMethod(args, "items", "()");
+ if (items == NULL)
+ goto finally;
+ iter = PyObject_GetIter(items);
+ Py_DECREF(items);
+ if (iter == NULL)
+ goto finally;
+ res = batch_dict(self, iter);
+ Py_DECREF(iter);
+
+ finally:
+ if (self->fast && !fast_save_leave(self, args))
+ res = -1;
+
+ return res;
+}
+
+
+static int
+save_global(Picklerobject *self, PyObject *args, PyObject *name)
+{
+ PyObject *global_name = 0, *module = 0, *mod = 0, *klass = 0;
+ char *name_str, *module_str;
+ int module_size, name_size, res = -1;
+
+ static char global = GLOBAL;
+
+ if (name) {
+ global_name = name;
+ Py_INCREF(global_name);
+ }
+ else {
+ if (!( global_name = PyObject_GetAttr(args, __name___str)))
+ goto finally;
+ }
+
+ if (!( module = whichmodule(args, global_name)))
+ goto finally;
+
+ if ((module_size = PyString_Size(module)) < 0 ||
+ (name_size = PyString_Size(global_name)) < 0)
+ goto finally;
+
+ module_str = PyString_AS_STRING((PyStringObject *)module);
+ name_str = PyString_AS_STRING((PyStringObject *)global_name);
+
+ /* XXX This can be doing a relative import. Clearly it shouldn't,
+ but I don't know how to stop it. :-( */
+ mod = PyImport_ImportModule(module_str);
+ if (mod == NULL) {
+ cPickle_ErrFormat(PicklingError,
+ "Can't pickle %s: import of module %s "
+ "failed",
+ "OS", args, module);
+ goto finally;
+ }
+ klass = PyObject_GetAttrString(mod, name_str);
+ if (klass == NULL) {
+ cPickle_ErrFormat(PicklingError,
+ "Can't pickle %s: attribute lookup %s.%s "
+ "failed",
+ "OSS", args, module, global_name);
+ goto finally;
+ }
+ if (klass != args) {
+ Py_DECREF(klass);
+ cPickle_ErrFormat(PicklingError,
+ "Can't pickle %s: it's not the same object "
+ "as %s.%s",
+ "OSS", args, module, global_name);
+ goto finally;
+ }
+ Py_DECREF(klass);
+
+ if (self->proto >= 2) {
+ /* See whether this is in the extension registry, and if
+ * so generate an EXT opcode.
+ */
+ PyObject *py_code; /* extension code as Python object */
+ long code; /* extension code as C value */
+ char c_str[5];
+ int n;
+
+ PyTuple_SET_ITEM(two_tuple, 0, module);
+ PyTuple_SET_ITEM(two_tuple, 1, global_name);
+ py_code = PyDict_GetItem(extension_registry, two_tuple);
+ if (py_code == NULL)
+ goto gen_global; /* not registered */
+
+ /* Verify py_code has the right type and value. */
+ if (!PyInt_Check(py_code)) {
+ cPickle_ErrFormat(PicklingError, "Can't pickle %s: "
+ "extension code %s isn't an integer",
+ "OO", args, py_code);
+ goto finally;
+ }
+ code = PyInt_AS_LONG(py_code);
+ if (code <= 0 || code > 0x7fffffffL) {
+ cPickle_ErrFormat(PicklingError, "Can't pickle %s: "
+ "extension code %ld is out of range",
+ "Ol", args, code);
+ goto finally;
+ }
+
+ /* Generate an EXT opcode. */
+ if (code <= 0xff) {
+ c_str[0] = EXT1;
+ c_str[1] = (char)code;
+ n = 2;
+ }
+ else if (code <= 0xffff) {
+ c_str[0] = EXT2;
+ c_str[1] = (char)(code & 0xff);
+ c_str[2] = (char)((code >> 8) & 0xff);
+ n = 3;
+ }
+ else {
+ c_str[0] = EXT4;
+ c_str[1] = (char)(code & 0xff);
+ c_str[2] = (char)((code >> 8) & 0xff);
+ c_str[3] = (char)((code >> 16) & 0xff);
+ c_str[4] = (char)((code >> 24) & 0xff);
+ n = 5;
+ }
+
+ if (self->write_func(self, c_str, n) >= 0)
+ res = 0;
+ goto finally; /* and don't memoize */
+ }
+
+ gen_global:
+ if (self->write_func(self, &global, 1) < 0)
+ goto finally;
+
+ if (self->write_func(self, module_str, module_size) < 0)
+ goto finally;
+
+ if (self->write_func(self, "\n", 1) < 0)
+ goto finally;
+
+ if (self->write_func(self, name_str, name_size) < 0)
+ goto finally;
+
+ if (self->write_func(self, "\n", 1) < 0)
+ goto finally;
+
+ if (put(self, args) < 0)
+ goto finally;
+
+ res = 0;
+
+ finally:
+ Py_XDECREF(module);
+ Py_XDECREF(global_name);
+ Py_XDECREF(mod);
+
+ return res;
+}
+
+static int
+save_pers(Picklerobject *self, PyObject *args, PyObject *f)
+{
+ PyObject *pid = 0;
+ int size, res = -1;
+
+ static char persid = PERSID, binpersid = BINPERSID;
+
+ Py_INCREF(args);
+ ARG_TUP(self, args);
+ if (self->arg) {
+ pid = PyObject_Call(f, self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ if (! pid) return -1;
+
+ if (pid != Py_None) {
+ if (!self->bin) {
+ if (!PyString_Check(pid)) {
+ PyErr_SetString(PicklingError,
+ "persistent id must be string");
+ goto finally;
+ }
+
+ if (self->write_func(self, &persid, 1) < 0)
+ goto finally;
+
+ if ((size = PyString_Size(pid)) < 0)
+ goto finally;
+
+ if (self->write_func(self,
+ PyString_AS_STRING(
+ (PyStringObject *)pid),
+ size) < 0)
+ goto finally;
+
+ if (self->write_func(self, "\n", 1) < 0)
+ goto finally;
+
+ res = 1;
+ goto finally;
+ }
+ else if (save(self, pid, 1) >= 0) {
+ if (self->write_func(self, &binpersid, 1) < 0)
+ res = -1;
+ else
+ res = 1;
+ }
+
+ goto finally;
+ }
+
+ res = 0;
+
+ finally:
+ Py_XDECREF(pid);
+
+ return res;
+}
+
+/* We're saving ob, and args is the 2-thru-5 tuple returned by the
+ * appropriate __reduce__ method for ob.
+ */
+static int
+save_reduce(Picklerobject *self, PyObject *args, PyObject *ob)
+{
+ PyObject *callable;
+ PyObject *argtup;
+ PyObject *state = NULL;
+ PyObject *listitems = NULL;
+ PyObject *dictitems = NULL;
+
+ int use_newobj = self->proto >= 2;
+
+ static char reduce = REDUCE;
+ static char build = BUILD;
+ static char newobj = NEWOBJ;
+
+ if (! PyArg_UnpackTuple(args, "save_reduce", 2, 5,
+ &callable,
+ &argtup,
+ &state,
+ &listitems,
+ &dictitems))
+ return -1;
+
+ if (!PyTuple_Check(argtup)) {
+ PyErr_SetString(PicklingError,
+ "args from reduce() should be a tuple");
+ return -1;
+ }
+
+ if (state == Py_None)
+ state = NULL;
+ if (listitems == Py_None)
+ listitems = NULL;
+ if (dictitems == Py_None)
+ dictitems = NULL;
+
+ /* Protocol 2 special case: if callable's name is __newobj__, use
+ * NEWOBJ. This consumes a lot of code.
+ */
+ if (use_newobj) {
+ PyObject *temp = PyObject_GetAttr(callable, __name___str);
+
+ if (temp == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError))
+ PyErr_Clear();
+ else
+ return -1;
+ use_newobj = 0;
+ }
+ else {
+ use_newobj = PyString_Check(temp) &&
+ strcmp(PyString_AS_STRING(temp),
+ "__newobj__") == 0;
+ Py_DECREF(temp);
+ }
+ }
+ if (use_newobj) {
+ PyObject *cls;
+ PyObject *newargtup;
+ int n, i;
+
+ /* Sanity checks. */
+ n = PyTuple_Size(argtup);
+ if (n < 1) {
+ PyErr_SetString(PicklingError, "__newobj__ arglist "
+ "is empty");
+ return -1;
+ }
+
+ cls = PyTuple_GET_ITEM(argtup, 0);
+ if (! PyObject_HasAttrString(cls, "__new__")) {
+ PyErr_SetString(PicklingError, "args[0] from "
+ "__newobj__ args has no __new__");
+ return -1;
+ }
+
+ /* XXX How could ob be NULL? */
+ if (ob != NULL) {
+ PyObject *ob_dot_class;
+
+ ob_dot_class = PyObject_GetAttr(ob, __class___str);
+ if (ob_dot_class == NULL) {
+ if (PyErr_ExceptionMatches(
+ PyExc_AttributeError))
+ PyErr_Clear();
+ else
+ return -1;
+ }
+ i = ob_dot_class != cls; /* true iff a problem */
+ Py_XDECREF(ob_dot_class);
+ if (i) {
+ PyErr_SetString(PicklingError, "args[0] from "
+ "__newobj__ args has the wrong class");
+ return -1;
+ }
+ }
+
+ /* Save the class and its __new__ arguments. */
+ if (save(self, cls, 0) < 0)
+ return -1;
+
+ newargtup = PyTuple_New(n-1); /* argtup[1:] */
+ if (newargtup == NULL)
+ return -1;
+ for (i = 1; i < n; ++i) {
+ PyObject *temp = PyTuple_GET_ITEM(argtup, i);
+ Py_INCREF(temp);
+ PyTuple_SET_ITEM(newargtup, i-1, temp);
+ }
+ i = save(self, newargtup, 0) < 0;
+ Py_DECREF(newargtup);
+ if (i < 0)
+ return -1;
+
+ /* Add NEWOBJ opcode. */
+ if (self->write_func(self, &newobj, 1) < 0)
+ return -1;
+ }
+ else {
+ /* Not using NEWOBJ. */
+ if (save(self, callable, 0) < 0 ||
+ save(self, argtup, 0) < 0 ||
+ self->write_func(self, &reduce, 1) < 0)
+ return -1;
+ }
+
+ /* Memoize. */
+ /* XXX How can ob be NULL? */
+ if (ob != NULL) {
+ if (state && !PyDict_Check(state)) {
+ if (put2(self, ob) < 0)
+ return -1;
+ }
+ else if (put(self, ob) < 0)
+ return -1;
+ }
+
+
+ if (listitems && batch_list(self, listitems) < 0)
+ return -1;
+
+ if (dictitems && batch_dict(self, dictitems) < 0)
+ return -1;
+
+ if (state) {
+ if (save(self, state, 0) < 0 ||
+ self->write_func(self, &build, 1) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+save(Picklerobject *self, PyObject *args, int pers_save)
+{
+ PyTypeObject *type;
+ PyObject *py_ob_id = 0, *__reduce__ = 0, *t = 0;
+ PyObject *arg_tup;
+ int res = -1;
+ int tmp, size;
+
+ if (self->nesting++ > Py_GetRecursionLimit()){
+ PyErr_SetString(PyExc_RuntimeError,
+ "maximum recursion depth exceeded");
+ goto finally;
+ }
+
+ if (!pers_save && self->pers_func) {
+ if ((tmp = save_pers(self, args, self->pers_func)) != 0) {
+ res = tmp;
+ goto finally;
+ }
+ }
+
+ if (args == Py_None) {
+ res = save_none(self, args);
+ goto finally;
+ }
+
+ type = args->ob_type;
+
+ switch (type->tp_name[0]) {
+ case 'b':
+ if (args == Py_False || args == Py_True) {
+ res = save_bool(self, args);
+ goto finally;
+ }
+ break;
+ case 'i':
+ if (type == &PyLong_Type) {
+ res = save_long(self, args);
+ goto finally;
+ }
+ break;
+
+ case 'f':
+ if (type == &PyFloat_Type) {
+ res = save_float(self, args);
+ goto finally;
+ }
+ break;
+
+ case 't':
+ if (type == &PyTuple_Type && PyTuple_Size(args) == 0) {
+ res = save_tuple(self, args);
+ goto finally;
+ }
+ break;
+
+ case 's':
+ if ((type == &PyString_Type) && (PyString_GET_SIZE(args) < 2)) {
+ res = save_string(self, args, 0);
+ goto finally;
+ }
+
+#ifdef Py_USING_UNICODE
+ case 'u':
+ if ((type == &PyUnicode_Type) && (PyString_GET_SIZE(args) < 2)) {
+ res = save_unicode(self, args, 0);
+ goto finally;
+ }
+#endif
+ }
+
+ if (args->ob_refcnt > 1) {
+ if (!( py_ob_id = PyLong_FromVoidPtr(args)))
+ goto finally;
+
+ if (PyDict_GetItem(self->memo, py_ob_id)) {
+ if (get(self, py_ob_id) < 0)
+ goto finally;
+
+ res = 0;
+ goto finally;
+ }
+ }
+
+ switch (type->tp_name[0]) {
+ case 's':
+ if (type == &PyString_Type) {
+ res = save_string(self, args, 1);
+ goto finally;
+ }
+ break;
+
+#ifdef Py_USING_UNICODE
+ case 'u':
+ if (type == &PyUnicode_Type) {
+ res = save_unicode(self, args, 1);
+ goto finally;
+ }
+ break;
+#endif
+
+ case 't':
+ if (type == &PyTuple_Type) {
+ res = save_tuple(self, args);
+ goto finally;
+ }
+ if (type == &PyType_Type) {
+ res = save_global(self, args, NULL);
+ goto finally;
+ }
+ break;
+
+ case 'l':
+ if (type == &PyList_Type) {
+ res = save_list(self, args);
+ goto finally;
+ }
+ break;
+
+ case 'd':
+ if (type == &PyDict_Type) {
+ res = save_dict(self, args);
+ goto finally;
+ }
+ break;
+
+ case 'i':
+ break;
+
+ case 'c':
+ break;
+
+ case 'f':
+ if (type == &PyFunction_Type) {
+ res = save_global(self, args, NULL);
+ if (res && PyErr_ExceptionMatches(PickleError)) {
+ /* fall back to reduce */
+ PyErr_Clear();
+ break;
+ }
+ goto finally;
+ }
+ break;
+
+ case 'b':
+ if (type == &PyCFunction_Type) {
+ res = save_global(self, args, NULL);
+ goto finally;
+ }
+ }
+
+ if (!pers_save && self->inst_pers_func) {
+ if ((tmp = save_pers(self, args, self->inst_pers_func)) != 0) {
+ res = tmp;
+ goto finally;
+ }
+ }
+
+ if (PyType_IsSubtype(type, &PyType_Type)) {
+ res = save_global(self, args, NULL);
+ goto finally;
+ }
+
+ /* Get a reduction callable, and call it. This may come from
+ * copy_reg.dispatch_table, the object's __reduce_ex__ method,
+ * or the object's __reduce__ method.
+ */
+ __reduce__ = PyDict_GetItem(dispatch_table, (PyObject *)type);
+ if (__reduce__ != NULL) {
+ Py_INCREF(__reduce__);
+ Py_INCREF(args);
+ ARG_TUP(self, args);
+ if (self->arg) {
+ t = PyObject_Call(__reduce__, self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ }
+ else {
+ /* Check for a __reduce_ex__ method. */
+ __reduce__ = PyObject_GetAttr(args, __reduce_ex___str);
+ if (__reduce__ != NULL) {
+ t = PyInt_FromLong(self->proto);
+ if (t != NULL) {
+ ARG_TUP(self, t);
+ t = NULL;
+ if (self->arg) {
+ t = PyObject_Call(__reduce__,
+ self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ }
+ }
+ else {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError))
+ PyErr_Clear();
+ else
+ goto finally;
+ /* Check for a __reduce__ method. */
+ __reduce__ = PyObject_GetAttr(args, __reduce___str);
+ if (__reduce__ != NULL) {
+ t = PyObject_Call(__reduce__,
+ empty_tuple, NULL);
+ }
+ else {
+ PyErr_SetObject(UnpickleableError, args);
+ goto finally;
+ }
+ }
+ }
+
+ if (t == NULL)
+ goto finally;
+
+ if (PyString_Check(t)) {
+ res = save_global(self, args, t);
+ goto finally;
+ }
+
+ if (! PyTuple_Check(t)) {
+ cPickle_ErrFormat(PicklingError, "Value returned by "
+ "%s must be string or tuple",
+ "O", __reduce__);
+ goto finally;
+ }
+
+ size = PyTuple_Size(t);
+ if (size < 2 || size > 5) {
+ cPickle_ErrFormat(PicklingError, "tuple returned by "
+ "%s must contain 2 through 5 elements",
+ "O", __reduce__);
+ goto finally;
+ }
+
+ arg_tup = PyTuple_GET_ITEM(t, 1);
+ if (!(PyTuple_Check(arg_tup) || arg_tup == Py_None)) {
+ cPickle_ErrFormat(PicklingError, "Second element of "
+ "tuple returned by %s must be a tuple",
+ "O", __reduce__);
+ goto finally;
+ }
+
+ res = save_reduce(self, t, args);
+
+ finally:
+ self->nesting--;
+ Py_XDECREF(py_ob_id);
+ Py_XDECREF(__reduce__);
+ Py_XDECREF(t);
+
+ return res;
+}
+
+
+static int
+dump(Picklerobject *self, PyObject *args)
+{
+ static char stop = STOP;
+
+ if (self->proto >= 2) {
+ char bytes[2];
+
+ bytes[0] = PROTO;
+ assert(self->proto >= 0 && self->proto < 256);
+ bytes[1] = (char)self->proto;
+ if (self->write_func(self, bytes, 2) < 0)
+ return -1;
+ }
+
+ if (save(self, args, 0) < 0)
+ return -1;
+
+ if (self->write_func(self, &stop, 1) < 0)
+ return -1;
+
+ if (self->write_func(self, NULL, 0) < 0)
+ return -1;
+
+ return 0;
+}
+
+static PyObject *
+Pickle_clear_memo(Picklerobject *self, PyObject *args)
+{
+ if (self->memo)
+ PyDict_Clear(self->memo);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+Pickle_getvalue(Picklerobject *self, PyObject *args)
+{
+ int l, i, rsize, ssize, clear=1, lm;
+ long ik;
+ PyObject *k, *r;
+ char *s, *p, *have_get;
+ Pdata *data;
+
+ /* Can be called by Python code or C code */
+ if (args && !PyArg_ParseTuple(args, "|i:getvalue", &clear))
+ return NULL;
+
+ /* Check to make sure we are based on a list */
+ if (! Pdata_Check(self->file)) {
+ PyErr_SetString(PicklingError,
+ "Attempt to getvalue() a non-list-based pickler");
+ return NULL;
+ }
+
+ /* flush write buffer */
+ if (write_other(self, NULL, 0) < 0) return NULL;
+
+ data=(Pdata*)self->file;
+ l=data->length;
+
+ /* set up an array to hold get/put status */
+ lm = PyDict_Size(self->memo);
+ if (lm < 0) return NULL;
+ lm++;
+ have_get = malloc(lm);
+ if (have_get == NULL) return PyErr_NoMemory();
+ memset(have_get, 0, lm);
+
+ /* Scan for gets. */
+ for (rsize = 0, i = l; --i >= 0; ) {
+ k = data->data[i];
+
+ if (PyString_Check(k))
+ rsize += PyString_GET_SIZE(k);
+
+ else if (PyInt_Check(k)) { /* put */
+ ik = PyInt_AsLong(k);
+ if (ik == -1 && PyErr_Occurred())
+ goto err;
+ if (ik >= lm || ik == 0) {
+ PyErr_SetString(PicklingError,
+ "Invalid get data");
+ goto err;
+ }
+ if (have_get[ik]) /* with matching get */
+ rsize += ik < 256 ? 2 : 5;
+ }
+
+ else if (! (PyTuple_Check(k) &&
+ PyTuple_GET_SIZE(k) == 2 &&
+ PyInt_Check((k = PyTuple_GET_ITEM(k, 0))))
+ ) {
+ PyErr_SetString(PicklingError,
+ "Unexpected data in internal list");
+ goto err;
+ }
+
+ else { /* put */
+ ik = PyInt_AsLong(k);
+ if (ik == -1 && PyErr_Occurred())
+ goto err;
+ if (ik >= lm || ik == 0) {
+ PyErr_SetString(PicklingError,
+ "Invalid get data");
+ return NULL;
+ }
+ have_get[ik] = 1;
+ rsize += ik < 256 ? 2 : 5;
+ }
+ }
+
+ /* Now generate the result */
+ r = PyString_FromStringAndSize(NULL, rsize);
+ if (r == NULL) goto err;
+ s = PyString_AS_STRING((PyStringObject *)r);
+
+ for (i = 0; i < l; i++) {
+ k = data->data[i];
+
+ if (PyString_Check(k)) {
+ ssize = PyString_GET_SIZE(k);
+ if (ssize) {
+ p=PyString_AS_STRING((PyStringObject *)k);
+ while (--ssize >= 0)
+ *s++ = *p++;
+ }
+ }
+
+ else if (PyTuple_Check(k)) { /* get */
+ ik = PyLong_AsLong(PyTuple_GET_ITEM(k, 0));
+ if (ik == -1 && PyErr_Occurred())
+ goto err;
+ if (ik < 256) {
+ *s++ = BINGET;
+ *s++ = (int)(ik & 0xff);
+ }
+ else {
+ *s++ = LONG_BINGET;
+ *s++ = (int)(ik & 0xff);
+ *s++ = (int)((ik >> 8) & 0xff);
+ *s++ = (int)((ik >> 16) & 0xff);
+ *s++ = (int)((ik >> 24) & 0xff);
+ }
+ }
+
+ else { /* put */
+ ik = PyLong_AsLong(k);
+ if (ik == -1 && PyErr_Occurred())
+ goto err;
+
+ if (have_get[ik]) { /* with matching get */
+ if (ik < 256) {
+ *s++ = BINPUT;
+ *s++ = (int)(ik & 0xff);
+ }
+ else {
+ *s++ = LONG_BINPUT;
+ *s++ = (int)(ik & 0xff);
+ *s++ = (int)((ik >> 8) & 0xff);
+ *s++ = (int)((ik >> 16) & 0xff);
+ *s++ = (int)((ik >> 24) & 0xff);
+ }
+ }
+ }
+ }
+
+ if (clear) {
+ PyDict_Clear(self->memo);
+ Pdata_clear(data, 0);
+ }
+
+ free(have_get);
+ return r;
+ err:
+ free(have_get);
+ return NULL;
+}
+
+static PyObject *
+Pickler_dump(Picklerobject *self, PyObject *args)
+{
+ PyObject *ob;
+ int get=0;
+
+ if (!( PyArg_ParseTuple(args, "O|i:dump", &ob, &get)))
+ return NULL;
+
+ if (dump(self, ob) < 0)
+ return NULL;
+
+ if (get) return Pickle_getvalue(self, NULL);
+
+ /* XXX Why does dump() return self? */
+ Py_INCREF(self);
+ return (PyObject*)self;
+}
+
+
+static struct PyMethodDef Pickler_methods[] =
+{
+ {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
+ PyDoc_STR("dump(object) -- "
+ "Write an object in pickle format to the object's pickle stream")},
+ {"clear_memo", (PyCFunction)Pickle_clear_memo, METH_NOARGS,
+ PyDoc_STR("clear_memo() -- Clear the picklers memo")},
+ {"getvalue", (PyCFunction)Pickle_getvalue, METH_VARARGS,
+ PyDoc_STR("getvalue() -- Finish picking a list-based pickle")},
+ {NULL, NULL} /* sentinel */
+};
+
+
+static Picklerobject *
+newPicklerobject(PyObject *file, int proto)
+{
+ Picklerobject *self;
+
+ if (proto < 0)
+ proto = HIGHEST_PROTOCOL;
+ if (proto > HIGHEST_PROTOCOL) {
+ PyErr_Format(PyExc_ValueError, "pickle protocol %d asked for; "
+ "the highest available protocol is %d",
+ proto, HIGHEST_PROTOCOL);
+ return NULL;
+ }
+
+ self = PyObject_GC_New(Picklerobject, &Picklertype);
+ if (self == NULL)
+ return NULL;
+ self->proto = proto;
+ self->bin = proto > 0;
+ self->fp = NULL;
+ self->write = NULL;
+ self->memo = NULL;
+ self->arg = NULL;
+ self->pers_func = NULL;
+ self->inst_pers_func = NULL;
+ self->write_buf = NULL;
+ self->fast = 0;
+ self->nesting = 0;
+ self->fast_container = 0;
+ self->fast_memo = NULL;
+ self->buf_size = 0;
+ self->dispatch_table = NULL;
+
+ self->file = NULL;
+ if (file)
+ Py_INCREF(file);
+ else {
+ file = Pdata_New();
+ if (file == NULL)
+ goto err;
+ }
+ self->file = file;
+
+ if (!( self->memo = PyDict_New()))
+ goto err;
+
+ if (PyFile_Check(file)) {
+ self->fp = PyFile_AsFile(file);
+ if (self->fp == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "I/O operation on closed file");
+ goto err;
+ }
+ self->write_func = write_file;
+ }
+ else if (PycStringIO_OutputCheck(file)) {
+ self->write_func = write_cStringIO;
+ }
+ else if (file == Py_None) {
+ self->write_func = write_none;
+ }
+ else {
+ self->write_func = write_other;
+
+ if (! Pdata_Check(file)) {
+ self->write = PyObject_GetAttr(file, write_str);
+ if (!self->write) {
+ PyErr_Clear();
+ PyErr_SetString(PyExc_TypeError,
+ "argument must have 'write' "
+ "attribute");
+ goto err;
+ }
+ }
+
+ self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
+ if (self->write_buf == NULL) {
+ PyErr_NoMemory();
+ goto err;
+ }
+ }
+
+ self->dispatch_table = dispatch_table;
+ Py_INCREF(dispatch_table);
+ PyObject_GC_Track(self);
+
+ return self;
+
+ err:
+ Py_DECREF(self);
+ return NULL;
+}
+
+
+static PyObject *
+get_Pickler(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"file", "protocol", NULL};
+ PyObject *file = NULL;
+ int proto = 0;
+
+ /* XXX
+ * The documented signature is Pickler(file, protocol=0), but this
+ * accepts Pickler() and Pickler(integer) too. The meaning then
+ * is clear as mud, undocumented, and not supported by pickle.py.
+ * I'm told Zope uses this, but I haven't traced into this code
+ * far enough to figure out what it means.
+ */
+ if (!PyArg_ParseTuple(args, "|i:Pickler", &proto)) {
+ PyErr_Clear();
+ proto = 0;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:Pickler",
+ kwlist, &file, &proto))
+ return NULL;
+ }
+ return (PyObject *)newPicklerobject(file, proto);
+}
+
+
+static void
+Pickler_dealloc(Picklerobject *self)
+{
+ PyObject_GC_UnTrack(self);
+ Py_XDECREF(self->write);
+ Py_XDECREF(self->memo);
+ Py_XDECREF(self->fast_memo);
+ Py_XDECREF(self->arg);
+ Py_XDECREF(self->file);
+ Py_XDECREF(self->pers_func);
+ Py_XDECREF(self->inst_pers_func);
+ Py_XDECREF(self->dispatch_table);
+ PyMem_Free(self->write_buf);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+static int
+Pickler_traverse(Picklerobject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->write);
+ Py_VISIT(self->memo);
+ Py_VISIT(self->fast_memo);
+ Py_VISIT(self->arg);
+ Py_VISIT(self->file);
+ Py_VISIT(self->pers_func);
+ Py_VISIT(self->inst_pers_func);
+ Py_VISIT(self->dispatch_table);
+ return 0;
+}
+
+static int
+Pickler_clear(Picklerobject *self)
+{
+ Py_CLEAR(self->write);
+ Py_CLEAR(self->memo);
+ Py_CLEAR(self->fast_memo);
+ Py_CLEAR(self->arg);
+ Py_CLEAR(self->file);
+ Py_CLEAR(self->pers_func);
+ Py_CLEAR(self->inst_pers_func);
+ Py_CLEAR(self->dispatch_table);
+ return 0;
+}
+
+static PyObject *
+Pickler_get_pers_func(Picklerobject *p)
+{
+ if (p->pers_func == NULL)
+ PyErr_SetString(PyExc_AttributeError, "persistent_id");
+ else
+ Py_INCREF(p->pers_func);
+ return p->pers_func;
+}
+
+static int
+Pickler_set_pers_func(Picklerobject *p, PyObject *v)
+{
+ if (v == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "attribute deletion is not supported");
+ return -1;
+ }
+ Py_XDECREF(p->pers_func);
+ Py_INCREF(v);
+ p->pers_func = v;
+ return 0;
+}
+
+static int
+Pickler_set_inst_pers_func(Picklerobject *p, PyObject *v)
+{
+ if (v == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "attribute deletion is not supported");
+ return -1;
+ }
+ Py_XDECREF(p->inst_pers_func);
+ Py_INCREF(v);
+ p->inst_pers_func = v;
+ return 0;
+}
+
+static PyObject *
+Pickler_get_memo(Picklerobject *p)
+{
+ if (p->memo == NULL)
+ PyErr_SetString(PyExc_AttributeError, "memo");
+ else
+ Py_INCREF(p->memo);
+ return p->memo;
+}
+
+static int
+Pickler_set_memo(Picklerobject *p, PyObject *v)
+{
+ if (v == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "attribute deletion is not supported");
+ return -1;
+ }
+ if (!PyDict_Check(v)) {
+ PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
+ return -1;
+ }
+ Py_XDECREF(p->memo);
+ Py_INCREF(v);
+ p->memo = v;
+ return 0;
+}
+
+static PyObject *
+Pickler_get_error(Picklerobject *p)
+{
+ /* why is this an attribute on the Pickler? */
+ Py_INCREF(PicklingError);
+ return PicklingError;
+}
+
+static PyMemberDef Pickler_members[] = {
+ {"binary", T_INT, offsetof(Picklerobject, bin)},
+ {"fast", T_INT, offsetof(Picklerobject, fast)},
+ {NULL}
+};
+
+static PyGetSetDef Pickler_getsets[] = {
+ {"persistent_id", (getter)Pickler_get_pers_func,
+ (setter)Pickler_set_pers_func},
+ {"inst_persistent_id", NULL, (setter)Pickler_set_inst_pers_func},
+ {"memo", (getter)Pickler_get_memo, (setter)Pickler_set_memo},
+ {"PicklingError", (getter)Pickler_get_error, NULL},
+ {NULL}
+};
+
+PyDoc_STRVAR(Picklertype__doc__,
+"Objects that know how to pickle objects\n");
+
+static PyTypeObject Picklertype = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "cPickle.Pickler", /*tp_name*/
+ sizeof(Picklerobject), /*tp_basicsize*/
+ 0,
+ (destructor)Pickler_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ PyObject_GenericSetAttr, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ Picklertype__doc__, /* tp_doc */
+ (traverseproc)Pickler_traverse, /* tp_traverse */
+ (inquiry)Pickler_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ Pickler_methods, /* tp_methods */
+ Pickler_members, /* tp_members */
+ Pickler_getsets, /* tp_getset */
+};
+
+static PyObject *
+find_class(PyObject *py_module_name, PyObject *py_global_name, PyObject *fc)
+{
+ PyObject *global = 0, *module;
+
+ if (fc) {
+ if (fc==Py_None) {
+ PyErr_SetString(UnpicklingError, "Global and instance "
+ "pickles are not supported.");
+ return NULL;
+ }
+ return PyObject_CallFunctionObjArgs(fc, py_module_name,
+ py_global_name, NULL);
+ }
+
+ module = PySys_GetObject("modules");
+ if (module == NULL)
+ return NULL;
+
+ module = PyDict_GetItem(module, py_module_name);
+ if (module == NULL) {
+ module = PyImport_Import(py_module_name);
+ if (!module)
+ return NULL;
+ global = PyObject_GetAttr(module, py_global_name);
+ Py_DECREF(module);
+ }
+ else
+ global = PyObject_GetAttr(module, py_global_name);
+ return global;
+}
+
+static int
+marker(Unpicklerobject *self)
+{
+ if (self->num_marks < 1) {
+ PyErr_SetString(UnpicklingError, "could not find MARK");
+ return -1;
+ }
+
+ return self->marks[--self->num_marks];
+}
+
+
+static int
+load_none(Unpicklerobject *self)
+{
+ PDATA_APPEND(self->stack, Py_None, -1);
+ return 0;
+}
+
+static int
+bad_readline(void)
+{
+ PyErr_SetString(UnpicklingError, "pickle data was truncated");
+ return -1;
+}
+
+static int
+load_int(Unpicklerobject *self)
+{
+ PyObject *py_int = 0;
+ char *endptr, *s;
+ int len, res = -1;
+ long l;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+ if (!( s=pystrndup(s,len))) return -1;
+
+ errno = 0;
+ l = strtol(s, &endptr, 0);
+
+ if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
+ /* Hm, maybe we've got something long. Let's try reading
+ it as a Python long object. */
+ errno = 0;
+ py_int = PyLong_FromString(s, NULL, 0);
+ if (py_int == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "could not convert string to int");
+ goto finally;
+ }
+ }
+ else {
+ if (len == 3 && (l == 0 || l == 1)) {
+ if (!( py_int = PyBool_FromLong(l))) goto finally;
+ }
+ else {
+ if (!( py_int = PyInt_FromLong(l))) goto finally;
+ }
+ }
+
+ free(s);
+ PDATA_PUSH(self->stack, py_int, -1);
+ return 0;
+
+ finally:
+ free(s);
+
+ return res;
+}
+
+static int
+load_bool(Unpicklerobject *self, PyObject *boolean)
+{
+ assert(boolean == Py_True || boolean == Py_False);
+ PDATA_APPEND(self->stack, boolean, -1);
+ return 0;
+}
+
+/* s contains x bytes of a little-endian integer. Return its value as a
+ * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
+ * int, but when x is 4 it's a signed one. This is an historical source
+ * of x-platform bugs.
+ */
+static long
+calc_binint(char *s, int x)
+{
+ unsigned char c;
+ int i;
+ long l;
+
+ for (i = 0, l = 0L; i < x; i++) {
+ c = (unsigned char)s[i];
+ l |= (long)c << (i * 8);
+ }
+#if SIZEOF_LONG > 4
+ /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
+ * is signed, so on a box with longs bigger than 4 bytes we need
+ * to extend a BININT's sign bit to the full width.
+ */
+ if (x == 4 && l & (1L << 31))
+ l |= (~0L) << 32;
+#endif
+ return l;
+}
+
+
+static int
+load_binintx(Unpicklerobject *self, char *s, int x)
+{
+ PyObject *py_int = 0;
+ long l;
+
+ l = calc_binint(s, x);
+
+ if (!( py_int = PyInt_FromLong(l)))
+ return -1;
+
+ PDATA_PUSH(self->stack, py_int, -1);
+ return 0;
+}
+
+
+static int
+load_binint(Unpicklerobject *self)
+{
+ char *s;
+
+ if (self->read_func(self, &s, 4) < 0)
+ return -1;
+
+ return load_binintx(self, s, 4);
+}
+
+
+static int
+load_binint1(Unpicklerobject *self)
+{
+ char *s;
+
+ if (self->read_func(self, &s, 1) < 0)
+ return -1;
+
+ return load_binintx(self, s, 1);
+}
+
+
+static int
+load_binint2(Unpicklerobject *self)
+{
+ char *s;
+
+ if (self->read_func(self, &s, 2) < 0)
+ return -1;
+
+ return load_binintx(self, s, 2);
+}
+
+static int
+load_long(Unpicklerobject *self)
+{
+ PyObject *l = 0;
+ char *end, *s;
+ int len, res = -1;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+ if (!( s=pystrndup(s,len))) return -1;
+
+ if (!( l = PyLong_FromString(s, &end, 0)))
+ goto finally;
+
+ free(s);
+ PDATA_PUSH(self->stack, l, -1);
+ return 0;
+
+ finally:
+ free(s);
+
+ return res;
+}
+
+/* 'size' bytes contain the # of bytes of little-endian 256's-complement
+ * data following.
+ */
+static int
+load_counted_long(Unpicklerobject *self, int size)
+{
+ Py_ssize_t i;
+ char *nbytes;
+ unsigned char *pdata;
+ PyObject *along;
+
+ assert(size == 1 || size == 4);
+ i = self->read_func(self, &nbytes, size);
+ if (i < 0) return -1;
+
+ size = calc_binint(nbytes, size);
+ if (size < 0) {
+ /* Corrupt or hostile pickle -- we never write one like
+ * this.
+ */
+ PyErr_SetString(UnpicklingError, "LONG pickle has negative "
+ "byte count");
+ return -1;
+ }
+
+ if (size == 0)
+ along = PyLong_FromLong(0L);
+ else {
+ /* Read the raw little-endian bytes & convert. */
+ i = self->read_func(self, (char **)&pdata, size);
+ if (i < 0) return -1;
+ along = _PyLong_FromByteArray(pdata, (size_t)size,
+ 1 /* little endian */, 1 /* signed */);
+ }
+ if (along == NULL)
+ return -1;
+ PDATA_PUSH(self->stack, along, -1);
+ return 0;
+}
+
+static int
+load_float(Unpicklerobject *self)
+{
+ PyObject *py_float = 0;
+ char *endptr, *s;
+ int len, res = -1;
+ double d;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+ if (!( s=pystrndup(s,len))) return -1;
+
+ errno = 0;
+ d = PyOS_ascii_strtod(s, &endptr);
+
+ if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
+ PyErr_SetString(PyExc_ValueError,
+ "could not convert string to float");
+ goto finally;
+ }
+
+ if (!( py_float = PyFloat_FromDouble(d)))
+ goto finally;
+
+ free(s);
+ PDATA_PUSH(self->stack, py_float, -1);
+ return 0;
+
+ finally:
+ free(s);
+
+ return res;
+}
+
+static int
+load_binfloat(Unpicklerobject *self)
+{
+ PyObject *py_float;
+ double x;
+ char *p;
+
+ if (self->read_func(self, &p, 8) < 0)
+ return -1;
+
+ x = _PyFloat_Unpack8((unsigned char *)p, 0);
+ if (x == -1.0 && PyErr_Occurred())
+ return -1;
+
+ py_float = PyFloat_FromDouble(x);
+ if (py_float == NULL)
+ return -1;
+
+ PDATA_PUSH(self->stack, py_float, -1);
+ return 0;
+}
+
+static int
+load_string(Unpicklerobject *self)
+{
+ PyObject *str = 0;
+ int len, res = -1;
+ char *s, *p;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+ if (!( s=pystrndup(s,len))) return -1;
+
+
+ /* Strip outermost quotes */
+ while (s[len-1] <= ' ')
+ len--;
+ if(s[0]=='"' && s[len-1]=='"'){
+ s[len-1] = '\0';
+ p = s + 1 ;
+ len -= 2;
+ } else if(s[0]=='\'' && s[len-1]=='\''){
+ s[len-1] = '\0';
+ p = s + 1 ;
+ len -= 2;
+ } else
+ goto insecure;
+ /********************************************/
+
+ str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
+ free(s);
+ if (str) {
+ PDATA_PUSH(self->stack, str, -1);
+ res = 0;
+ }
+ return res;
+
+ insecure:
+ free(s);
+ PyErr_SetString(PyExc_ValueError,"insecure string pickle");
+ return -1;
+}
+
+
+static int
+load_binstring(Unpicklerobject *self)
+{
+ PyObject *py_string = 0;
+ long l;
+ char *s;
+
+ if (self->read_func(self, &s, 4) < 0) return -1;
+
+ l = calc_binint(s, 4);
+
+ if (self->read_func(self, &s, l) < 0)
+ return -1;
+
+ if (!( py_string = PyString_FromStringAndSize(s, l)))
+ return -1;
+
+ PDATA_PUSH(self->stack, py_string, -1);
+ return 0;
+}
+
+
+static int
+load_short_binstring(Unpicklerobject *self)
+{
+ PyObject *py_string = 0;
+ unsigned char l;
+ char *s;
+
+ if (self->read_func(self, &s, 1) < 0)
+ return -1;
+
+ l = (unsigned char)s[0];
+
+ if (self->read_func(self, &s, l) < 0) return -1;
+
+ if (!( py_string = PyString_FromStringAndSize(s, l))) return -1;
+
+ PDATA_PUSH(self->stack, py_string, -1);
+ return 0;
+}
+
+
+#ifdef Py_USING_UNICODE
+static int
+load_unicode(Unpicklerobject *self)
+{
+ PyObject *str = 0;
+ int len, res = -1;
+ char *s;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 1) return bad_readline();
+
+ if (!( str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL)))
+ goto finally;
+
+ PDATA_PUSH(self->stack, str, -1);
+ return 0;
+
+ finally:
+ return res;
+}
+#endif
+
+
+#ifdef Py_USING_UNICODE
+static int
+load_binunicode(Unpicklerobject *self)
+{
+ PyObject *unicode;
+ long l;
+ char *s;
+
+ if (self->read_func(self, &s, 4) < 0) return -1;
+
+ l = calc_binint(s, 4);
+
+ if (self->read_func(self, &s, l) < 0)
+ return -1;
+
+ if (!( unicode = PyUnicode_DecodeUTF8(s, l, NULL)))
+ return -1;
+
+ PDATA_PUSH(self->stack, unicode, -1);
+ return 0;
+}
+#endif
+
+
+static int
+load_tuple(Unpicklerobject *self)
+{
+ PyObject *tup;
+ int i;
+
+ if ((i = marker(self)) < 0) return -1;
+ if (!( tup=Pdata_popTuple(self->stack, i))) return -1;
+ PDATA_PUSH(self->stack, tup, -1);
+ return 0;
+}
+
+static int
+load_counted_tuple(Unpicklerobject *self, int len)
+{
+ PyObject *tup = PyTuple_New(len);
+
+ if (tup == NULL)
+ return -1;
+
+ while (--len >= 0) {
+ PyObject *element;
+
+ PDATA_POP(self->stack, element);
+ if (element == NULL)
+ return -1;
+ PyTuple_SET_ITEM(tup, len, element);
+ }
+ PDATA_PUSH(self->stack, tup, -1);
+ return 0;
+}
+
+static int
+load_empty_list(Unpicklerobject *self)
+{
+ PyObject *list;
+
+ if (!( list=PyList_New(0))) return -1;
+ PDATA_PUSH(self->stack, list, -1);
+ return 0;
+}
+
+static int
+load_empty_dict(Unpicklerobject *self)
+{
+ PyObject *dict;
+
+ if (!( dict=PyDict_New())) return -1;
+ PDATA_PUSH(self->stack, dict, -1);
+ return 0;
+}
+
+
+static int
+load_list(Unpicklerobject *self)
+{
+ PyObject *list = 0;
+ int i;
+
+ if ((i = marker(self)) < 0) return -1;
+ if (!( list=Pdata_popList(self->stack, i))) return -1;
+ PDATA_PUSH(self->stack, list, -1);
+ return 0;
+}
+
+static int
+load_dict(Unpicklerobject *self)
+{
+ PyObject *dict, *key, *value;
+ int i, j, k;
+
+ if ((i = marker(self)) < 0) return -1;
+ j=self->stack->length;
+
+ if (!( dict = PyDict_New())) return -1;
+
+ for (k = i+1; k < j; k += 2) {
+ key =self->stack->data[k-1];
+ value=self->stack->data[k ];
+ if (PyDict_SetItem(dict, key, value) < 0) {
+ Py_DECREF(dict);
+ return -1;
+ }
+ }
+ Pdata_clear(self->stack, i);
+ PDATA_PUSH(self->stack, dict, -1);
+ return 0;
+}
+
+static PyObject *
+Instance_New(PyObject *cls, PyObject *args)
+{
+ PyObject *r = 0;
+
+ if ((r=PyObject_CallObject(cls, args))) return r;
+
+ {
+ PyObject *tp, *v, *tb, *tmp_value;
+
+ PyErr_Fetch(&tp, &v, &tb);
+ tmp_value = v;
+ /* NULL occurs when there was a KeyboardInterrupt */
+ if (tmp_value == NULL)
+ tmp_value = Py_None;
+ if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
+ Py_XDECREF(v);
+ v=r;
+ }
+ PyErr_Restore(tp,v,tb);
+ }
+ return NULL;
+}
+
+
+static int
+load_obj(Unpicklerobject *self)
+{
+ PyObject *class, *tup, *obj=0;
+ int i;
+
+ if ((i = marker(self)) < 0) return -1;
+ if (!( tup=Pdata_popTuple(self->stack, i+1))) return -1;
+ PDATA_POP(self->stack, class);
+ if (class) {
+ obj = Instance_New(class, tup);
+ Py_DECREF(class);
+ }
+ Py_DECREF(tup);
+
+ if (! obj) return -1;
+ PDATA_PUSH(self->stack, obj, -1);
+ return 0;
+}
+
+
+static int
+load_inst(Unpicklerobject *self)
+{
+ PyObject *tup, *class=0, *obj=0, *module_name, *class_name;
+ int i, len;
+ char *s;
+
+ if ((i = marker(self)) < 0) return -1;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+ module_name = PyString_FromStringAndSize(s, len - 1);
+ if (!module_name) return -1;
+
+ if ((len = self->readline_func(self, &s)) >= 0) {
+ if (len < 2) return bad_readline();
+ if ((class_name = PyString_FromStringAndSize(s, len - 1))) {
+ class = find_class(module_name, class_name,
+ self->find_class);
+ Py_DECREF(class_name);
+ }
+ }
+ Py_DECREF(module_name);
+
+ if (! class) return -1;
+
+ if ((tup=Pdata_popTuple(self->stack, i))) {
+ obj = Instance_New(class, tup);
+ Py_DECREF(tup);
+ }
+ Py_DECREF(class);
+
+ if (! obj) return -1;
+
+ PDATA_PUSH(self->stack, obj, -1);
+ return 0;
+}
+
+static int
+load_newobj(Unpicklerobject *self)
+{
+ PyObject *args = NULL;
+ PyObject *clsraw = NULL;
+ PyTypeObject *cls; /* clsraw cast to its true type */
+ PyObject *obj;
+
+ /* Stack is ... cls argtuple, and we want to call
+ * cls.__new__(cls, *argtuple).
+ */
+ PDATA_POP(self->stack, args);
+ if (args == NULL) goto Fail;
+ if (! PyTuple_Check(args)) {
+ PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg "
+ "tuple.");
+ goto Fail;
+ }
+
+ PDATA_POP(self->stack, clsraw);
+ cls = (PyTypeObject *)clsraw;
+ if (cls == NULL) goto Fail;
+ if (! PyType_Check(cls)) {
+ PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
+ "isn't a type object");
+ goto Fail;
+ }
+ if (cls->tp_new == NULL) {
+ PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
+ "has NULL tp_new");
+ goto Fail;
+ }
+
+ /* Call __new__. */
+ obj = cls->tp_new(cls, args, NULL);
+ if (obj == NULL) goto Fail;
+
+ Py_DECREF(args);
+ Py_DECREF(clsraw);
+ PDATA_PUSH(self->stack, obj, -1);
+ return 0;
+
+ Fail:
+ Py_XDECREF(args);
+ Py_XDECREF(clsraw);
+ return -1;
+}
+
+static int
+load_global(Unpicklerobject *self)
+{
+ PyObject *class = 0, *module_name = 0, *class_name = 0;
+ int len;
+ char *s;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+ module_name = PyString_FromStringAndSize(s, len - 1);
+ if (!module_name) return -1;
+
+ if ((len = self->readline_func(self, &s)) >= 0) {
+ if (len < 2) {
+ Py_DECREF(module_name);
+ return bad_readline();
+ }
+ if ((class_name = PyString_FromStringAndSize(s, len - 1))) {
+ class = find_class(module_name, class_name,
+ self->find_class);
+ Py_DECREF(class_name);
+ }
+ }
+ Py_DECREF(module_name);
+
+ if (! class) return -1;
+ PDATA_PUSH(self->stack, class, -1);
+ return 0;
+}
+
+
+static int
+load_persid(Unpicklerobject *self)
+{
+ PyObject *pid = 0;
+ int len;
+ char *s;
+
+ if (self->pers_func) {
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+
+ pid = PyString_FromStringAndSize(s, len - 1);
+ if (!pid) return -1;
+
+ if (PyList_Check(self->pers_func)) {
+ if (PyList_Append(self->pers_func, pid) < 0) {
+ Py_DECREF(pid);
+ return -1;
+ }
+ }
+ else {
+ ARG_TUP(self, pid);
+ if (self->arg) {
+ pid = PyObject_Call(self->pers_func, self->arg,
+ NULL);
+ FREE_ARG_TUP(self);
+ }
+ }
+
+ if (! pid) return -1;
+
+ PDATA_PUSH(self->stack, pid, -1);
+ return 0;
+ }
+ else {
+ PyErr_SetString(UnpicklingError,
+ "A load persistent id instruction was encountered,\n"
+ "but no persistent_load function was specified.");
+ return -1;
+ }
+}
+
+static int
+load_binpersid(Unpicklerobject *self)
+{
+ PyObject *pid = 0;
+
+ if (self->pers_func) {
+ PDATA_POP(self->stack, pid);
+ if (! pid) return -1;
+
+ if (PyList_Check(self->pers_func)) {
+ if (PyList_Append(self->pers_func, pid) < 0) {
+ Py_DECREF(pid);
+ return -1;
+ }
+ }
+ else {
+ ARG_TUP(self, pid);
+ if (self->arg) {
+ pid = PyObject_Call(self->pers_func, self->arg,
+ NULL);
+ FREE_ARG_TUP(self);
+ }
+ if (! pid) return -1;
+ }
+
+ PDATA_PUSH(self->stack, pid, -1);
+ return 0;
+ }
+ else {
+ PyErr_SetString(UnpicklingError,
+ "A load persistent id instruction was encountered,\n"
+ "but no persistent_load function was specified.");
+ return -1;
+ }
+}
+
+
+static int
+load_pop(Unpicklerobject *self)
+{
+ int len;
+
+ if (!( (len=self->stack->length) > 0 )) return stackUnderflow();
+
+ /* Note that we split the (pickle.py) stack into two stacks,
+ an object stack and a mark stack. We have to be clever and
+ pop the right one. We do this by looking at the top of the
+ mark stack.
+ */
+
+ if ((self->num_marks > 0) &&
+ (self->marks[self->num_marks - 1] == len))
+ self->num_marks--;
+ else {
+ len--;
+ Py_DECREF(self->stack->data[len]);
+ self->stack->length=len;
+ }
+
+ return 0;
+}
+
+
+static int
+load_pop_mark(Unpicklerobject *self)
+{
+ int i;
+
+ if ((i = marker(self)) < 0)
+ return -1;
+
+ Pdata_clear(self->stack, i);
+
+ return 0;
+}
+
+
+static int
+load_dup(Unpicklerobject *self)
+{
+ PyObject *last;
+ int len;
+
+ if ((len = self->stack->length) <= 0) return stackUnderflow();
+ last=self->stack->data[len-1];
+ Py_INCREF(last);
+ PDATA_PUSH(self->stack, last, -1);
+ return 0;
+}
+
+
+static int
+load_get(Unpicklerobject *self)
+{
+ PyObject *py_str = 0, *value = 0;
+ int len;
+ char *s;
+ int rc;
+
+ if ((len = self->readline_func(self, &s)) < 0) return -1;
+ if (len < 2) return bad_readline();
+
+ if (!( py_str = PyString_FromStringAndSize(s, len - 1))) return -1;
+
+ value = PyDict_GetItem(self->memo, py_str);
+ if (! value) {
+ PyErr_SetObject(BadPickleGet, py_str);
+ rc = -1;
+ }
+ else {
+ PDATA_APPEND(self->stack, value, -1);
+ rc = 0;
+ }
+
+ Py_DECREF(py_str);
+ return rc;
+}
+
+
+static int
+load_binget(Unpicklerobject *self)
+{
+ PyObject *py_key = 0, *value = 0;
+ unsigned char key;
+ char *s;
+ int rc;
+
+ if (self->read_func(self, &s, 1) < 0) return -1;
+
+ key = (unsigned char)s[0];
+ if (!( py_key = PyInt_FromLong((long)key))) return -1;
+
+ value = PyDict_GetItem(self->memo, py_key);
+ if (! value) {
+ PyErr_SetObject(BadPickleGet, py_key);
+ rc = -1;
+ }
+ else {
+ PDATA_APPEND(self->stack, value, -1);
+ rc = 0;
+ }
+
+ Py_DECREF(py_key);
+ return rc;
+}
+
+
+static int
+load_long_binget(Unpicklerobject *self)
+{
+ PyObject *py_key = 0, *value = 0;
+ unsigned char c;
+ char *s;
+ long key;
+ int rc;
+
+ if (self->read_func(self, &s, 4) < 0) return -1;
+
+ c = (unsigned char)s[0];
+ key = (long)c;
+ c = (unsigned char)s[1];
+ key |= (long)c << 8;
+ c = (unsigned char)s[2];
+ key |= (long)c << 16;
+ c = (unsigned char)s[3];
+ key |= (long)c << 24;
+
+ if (!( py_key = PyInt_FromLong((long)key))) return -1;
+
+ value = PyDict_GetItem(self->memo, py_key);
+ if (! value) {
+ PyErr_SetObject(BadPickleGet, py_key);
+ rc = -1;
+ }
+ else {
+ PDATA_APPEND(self->stack, value, -1);
+ rc = 0;
+ }
+
+ Py_DECREF(py_key);
+ return rc;
+}
+
+/* Push an object from the extension registry (EXT[124]). nbytes is
+ * the number of bytes following the opcode, holding the index (code) value.
+ */
+static int
+load_extension(Unpicklerobject *self, int nbytes)
+{
+ char *codebytes; /* the nbytes bytes after the opcode */
+ long code; /* calc_binint returns long */
+ PyObject *py_code; /* code as a Python int */
+ PyObject *obj; /* the object to push */
+ PyObject *pair; /* (module_name, class_name) */
+ PyObject *module_name, *class_name;
+
+ assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
+ if (self->read_func(self, &codebytes, nbytes) < 0) return -1;
+ code = calc_binint(codebytes, nbytes);
+ if (code <= 0) { /* note that 0 is forbidden */
+ /* Corrupt or hostile pickle. */
+ PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
+ return -1;
+ }
+
+ /* Look for the code in the cache. */
+ py_code = PyInt_FromLong(code);
+ if (py_code == NULL) return -1;
+ obj = PyDict_GetItem(extension_cache, py_code);
+ if (obj != NULL) {
+ /* Bingo. */
+ Py_DECREF(py_code);
+ PDATA_APPEND(self->stack, obj, -1);
+ return 0;
+ }
+
+ /* Look up the (module_name, class_name) pair. */
+ pair = PyDict_GetItem(inverted_registry, py_code);
+ if (pair == NULL) {
+ Py_DECREF(py_code);
+ PyErr_Format(PyExc_ValueError, "unregistered extension "
+ "code %ld", code);
+ return -1;
+ }
+ /* Since the extension registry is manipulable via Python code,
+ * confirm that pair is really a 2-tuple of strings.
+ */
+ if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
+ !PyString_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
+ !PyString_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
+ Py_DECREF(py_code);
+ PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
+ "isn't a 2-tuple of strings", code);
+ return -1;
+ }
+ /* Load the object. */
+ obj = find_class(module_name, class_name, self->find_class);
+ if (obj == NULL) {
+ Py_DECREF(py_code);
+ return -1;
+ }
+ /* Cache code -> obj. */
+ code = PyDict_SetItem(extension_cache, py_code, obj);
+ Py_DECREF(py_code);
+ if (code < 0) {
+ Py_DECREF(obj);
+ return -1;
+ }
+ PDATA_PUSH(self->stack, obj, -1);
+ return 0;
+}
+
+static int
+load_put(Unpicklerobject *self)
+{
+ PyObject *py_str = 0, *value = 0;
+ int len, l;
+ char *s;
+
+ if ((l = self->readline_func(self, &s)) < 0) return -1;
+ if (l < 2) return bad_readline();
+ if (!( len=self->stack->length )) return stackUnderflow();
+ if (!( py_str = PyString_FromStringAndSize(s, l - 1))) return -1;
+ value=self->stack->data[len-1];
+ l=PyDict_SetItem(self->memo, py_str, value);
+ Py_DECREF(py_str);
+ return l;
+}
+
+
+static int
+load_binput(Unpicklerobject *self)
+{
+ PyObject *py_key = 0, *value = 0;
+ unsigned char key;
+ char *s;
+ int len;
+
+ if (self->read_func(self, &s, 1) < 0) return -1;
+ if (!( (len=self->stack->length) > 0 )) return stackUnderflow();
+
+ key = (unsigned char)s[0];
+
+ if (!( py_key = PyInt_FromLong((long)key))) return -1;
+ value=self->stack->data[len-1];
+ len=PyDict_SetItem(self->memo, py_key, value);
+ Py_DECREF(py_key);
+ return len;
+}
+
+
+static int
+load_long_binput(Unpicklerobject *self)
+{
+ PyObject *py_key = 0, *value = 0;
+ long key;
+ unsigned char c;
+ char *s;
+ int len;
+
+ if (self->read_func(self, &s, 4) < 0) return -1;
+ if (!( len=self->stack->length )) return stackUnderflow();
+
+ c = (unsigned char)s[0];
+ key = (long)c;
+ c = (unsigned char)s[1];
+ key |= (long)c << 8;
+ c = (unsigned char)s[2];
+ key |= (long)c << 16;
+ c = (unsigned char)s[3];
+ key |= (long)c << 24;
+
+ if (!( py_key = PyInt_FromLong(key))) return -1;
+ value=self->stack->data[len-1];
+ len=PyDict_SetItem(self->memo, py_key, value);
+ Py_DECREF(py_key);
+ return len;
+}
+
+
+static int
+do_append(Unpicklerobject *self, int x)
+{
+ PyObject *value = 0, *list = 0, *append_method = 0;
+ int len, i;
+
+ len=self->stack->length;
+ if (!( len >= x && x > 0 )) return stackUnderflow();
+ /* nothing to do */
+ if (len==x) return 0;
+
+ list=self->stack->data[x-1];
+
+ if (PyList_Check(list)) {
+ PyObject *slice;
+ int list_len;
+
+ slice=Pdata_popList(self->stack, x);
+ if (! slice) return -1;
+ list_len = PyList_GET_SIZE(list);
+ i=PyList_SetSlice(list, list_len, list_len, slice);
+ Py_DECREF(slice);
+ return i;
+ }
+ else {
+
+ if (!( append_method = PyObject_GetAttr(list, append_str)))
+ return -1;
+
+ for (i = x; i < len; i++) {
+ PyObject *junk;
+
+ value=self->stack->data[i];
+ junk=0;
+ ARG_TUP(self, value);
+ if (self->arg) {
+ junk = PyObject_Call(append_method, self->arg,
+ NULL);
+ FREE_ARG_TUP(self);
+ }
+ if (! junk) {
+ Pdata_clear(self->stack, i+1);
+ self->stack->length=x;
+ Py_DECREF(append_method);
+ return -1;
+ }
+ Py_DECREF(junk);
+ }
+ self->stack->length=x;
+ Py_DECREF(append_method);
+ }
+
+ return 0;
+}
+
+
+static int
+load_append(Unpicklerobject *self)
+{
+ return do_append(self, self->stack->length - 1);
+}
+
+
+static int
+load_appends(Unpicklerobject *self)
+{
+ return do_append(self, marker(self));
+}
+
+
+static int
+do_setitems(Unpicklerobject *self, int x)
+{
+ PyObject *value = 0, *key = 0, *dict = 0;
+ int len, i, r=0;
+
+ if (!( (len=self->stack->length) >= x
+ && x > 0 )) return stackUnderflow();
+
+ dict=self->stack->data[x-1];
+
+ for (i = x+1; i < len; i += 2) {
+ key =self->stack->data[i-1];
+ value=self->stack->data[i ];
+ if (PyObject_SetItem(dict, key, value) < 0) {
+ r=-1;
+ break;
+ }
+ }
+
+ Pdata_clear(self->stack, x);
+
+ return r;
+}
+
+
+static int
+load_setitem(Unpicklerobject *self)
+{
+ return do_setitems(self, self->stack->length - 2);
+}
+
+static int
+load_setitems(Unpicklerobject *self)
+{
+ return do_setitems(self, marker(self));
+}
+
+
+static int
+load_build(Unpicklerobject *self)
+{
+ PyObject *state, *inst, *slotstate;
+ PyObject *__setstate__;
+ PyObject *d_key, *d_value;
+ Py_ssize_t i;
+ int res = -1;
+
+ /* Stack is ... instance, state. We want to leave instance at
+ * the stack top, possibly mutated via instance.__setstate__(state).
+ */
+ if (self->stack->length < 2)
+ return stackUnderflow();
+ PDATA_POP(self->stack, state);
+ if (state == NULL)
+ return -1;
+ inst = self->stack->data[self->stack->length - 1];
+
+ __setstate__ = PyObject_GetAttr(inst, __setstate___str);
+ if (__setstate__ != NULL) {
+ PyObject *junk = NULL;
+
+ /* The explicit __setstate__ is responsible for everything. */
+ ARG_TUP(self, state);
+ if (self->arg) {
+ junk = PyObject_Call(__setstate__, self->arg, NULL);
+ FREE_ARG_TUP(self);
+ }
+ Py_DECREF(__setstate__);
+ if (junk == NULL)
+ return -1;
+ Py_DECREF(junk);
+ return 0;
+ }
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError))
+ return -1;
+ PyErr_Clear();
+
+ /* A default __setstate__. First see whether state embeds a
+ * slot state dict too (a proto 2 addition).
+ */
+ if (PyTuple_Check(state) && PyTuple_Size(state) == 2) {
+ PyObject *temp = state;
+ state = PyTuple_GET_ITEM(temp, 0);
+ slotstate = PyTuple_GET_ITEM(temp, 1);
+ Py_INCREF(state);
+ Py_INCREF(slotstate);
+ Py_DECREF(temp);
+ }
+ else
+ slotstate = NULL;
+
+ /* Set inst.__dict__ from the state dict (if any). */
+ if (state != Py_None) {
+ PyObject *dict;
+ if (! PyDict_Check(state)) {
+ PyErr_SetString(UnpicklingError, "state is not a "
+ "dictionary");
+ goto finally;
+ }
+ dict = PyObject_GetAttr(inst, __dict___str);
+ if (dict == NULL)
+ goto finally;
+
+ i = 0;
+ while (PyDict_Next(state, &i, &d_key, &d_value)) {
+ if (PyObject_SetItem(dict, d_key, d_value) < 0)
+ goto finally;
+ }
+ Py_DECREF(dict);
+ }
+
+ /* Also set instance attributes from the slotstate dict (if any). */
+ if (slotstate != NULL) {
+ if (! PyDict_Check(slotstate)) {
+ PyErr_SetString(UnpicklingError, "slot state is not "
+ "a dictionary");
+ goto finally;
+ }
+ i = 0;
+ while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
+ if (PyObject_SetAttr(inst, d_key, d_value) < 0)
+ goto finally;
+ }
+ }
+ res = 0;
+
+ finally:
+ Py_DECREF(state);
+ Py_XDECREF(slotstate);
+ return res;
+}
+
+
+static int
+load_mark(Unpicklerobject *self)
+{
+ int s;
+
+ /* Note that we split the (pickle.py) stack into two stacks, an
+ object stack and a mark stack. Here we push a mark onto the
+ mark stack.
+ */
+
+ if ((self->num_marks + 1) >= self->marks_size) {
+ int *marks;
+ s=self->marks_size+20;
+ if (s <= self->num_marks) s=self->num_marks + 1;
+ if (self->marks == NULL)
+ marks=(int *)malloc(s * sizeof(int));
+ else
+ marks=(int *)realloc(self->marks,
+ s * sizeof(int));
+ if (!marks) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->marks = marks;
+ self->marks_size = s;
+ }
+
+ self->marks[self->num_marks++] = self->stack->length;
+
+ return 0;
+}
+
+static int
+load_reduce(Unpicklerobject *self)
+{
+ PyObject *callable = 0, *arg_tup = 0, *ob = 0;
+
+ PDATA_POP(self->stack, arg_tup);
+ if (! arg_tup) return -1;
+ PDATA_POP(self->stack, callable);
+ if (callable) {
+ ob = Instance_New(callable, arg_tup);
+ Py_DECREF(callable);
+ }
+ Py_DECREF(arg_tup);
+
+ if (! ob) return -1;
+
+ PDATA_PUSH(self->stack, ob, -1);
+ return 0;
+}
+
+/* Just raises an error if we don't know the protocol specified. PROTO
+ * is the first opcode for protocols >= 2.
+ */
+static int
+load_proto(Unpicklerobject *self)
+{
+ int i;
+ char *protobyte;
+
+ i = self->read_func(self, &protobyte, 1);
+ if (i < 0)
+ return -1;
+
+ i = calc_binint(protobyte, 1);
+ /* No point checking for < 0, since calc_binint returns an unsigned
+ * int when chewing on 1 byte.
+ */
+ assert(i >= 0);
+ if (i <= HIGHEST_PROTOCOL)
+ return 0;
+
+ PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
+ return -1;
+}
+
+static PyObject *
+load(Unpicklerobject *self)
+{
+ PyObject *err = 0, *val = 0;
+ char *s;
+
+ self->num_marks = 0;
+ if (self->stack->length) Pdata_clear(self->stack, 0);
+
+ while (1) {
+ if (self->read_func(self, &s, 1) < 0)
+ break;
+
+ switch (s[0]) {
+ case NONE:
+ if (load_none(self) < 0)
+ break;
+ continue;
+
+ case BININT:
+ if (load_binint(self) < 0)
+ break;
+ continue;
+
+ case BININT1:
+ if (load_binint1(self) < 0)
+ break;
+ continue;
+
+ case BININT2:
+ if (load_binint2(self) < 0)
+ break;
+ continue;
+
+ case INT:
+ if (load_int(self) < 0)
+ break;
+ continue;
+
+ case LONG:
+ if (load_long(self) < 0)
+ break;
+ continue;
+
+ case LONG1:
+ if (load_counted_long(self, 1) < 0)
+ break;
+ continue;
+
+ case LONG4:
+ if (load_counted_long(self, 4) < 0)
+ break;
+ continue;
+
+ case FLOAT:
+ if (load_float(self) < 0)
+ break;
+ continue;
+
+ case BINFLOAT:
+ if (load_binfloat(self) < 0)
+ break;
+ continue;
+
+ case BINSTRING:
+ if (load_binstring(self) < 0)
+ break;
+ continue;
+
+ case SHORT_BINSTRING:
+ if (load_short_binstring(self) < 0)
+ break;
+ continue;
+
+ case STRING:
+ if (load_string(self) < 0)
+ break;
+ continue;
+
+#ifdef Py_USING_UNICODE
+ case UNICODE:
+ if (load_unicode(self) < 0)
+ break;
+ continue;
+
+ case BINUNICODE:
+ if (load_binunicode(self) < 0)
+ break;
+ continue;
+#endif
+
+ case EMPTY_TUPLE:
+ if (load_counted_tuple(self, 0) < 0)
+ break;
+ continue;
+
+ case TUPLE1:
+ if (load_counted_tuple(self, 1) < 0)
+ break;
+ continue;
+
+ case TUPLE2:
+ if (load_counted_tuple(self, 2) < 0)
+ break;
+ continue;
+
+ case TUPLE3:
+ if (load_counted_tuple(self, 3) < 0)
+ break;
+ continue;
+
+ case TUPLE:
+ if (load_tuple(self) < 0)
+ break;
+ continue;
+
+ case EMPTY_LIST:
+ if (load_empty_list(self) < 0)
+ break;
+ continue;
+
+ case LIST:
+ if (load_list(self) < 0)
+ break;
+ continue;
+
+ case EMPTY_DICT:
+ if (load_empty_dict(self) < 0)
+ break;
+ continue;
+
+ case DICT:
+ if (load_dict(self) < 0)
+ break;
+ continue;
+
+ case OBJ:
+ if (load_obj(self) < 0)
+ break;
+ continue;
+
+ case INST:
+ if (load_inst(self) < 0)
+ break;
+ continue;
+
+ case NEWOBJ:
+ if (load_newobj(self) < 0)
+ break;
+ continue;
+
+ case GLOBAL:
+ if (load_global(self) < 0)
+ break;
+ continue;
+
+ case APPEND:
+ if (load_append(self) < 0)
+ break;
+ continue;
+
+ case APPENDS:
+ if (load_appends(self) < 0)
+ break;
+ continue;
+
+ case BUILD:
+ if (load_build(self) < 0)
+ break;
+ continue;
+
+ case DUP:
+ if (load_dup(self) < 0)
+ break;
+ continue;
+
+ case BINGET:
+ if (load_binget(self) < 0)
+ break;
+ continue;
+
+ case LONG_BINGET:
+ if (load_long_binget(self) < 0)
+ break;
+ continue;
+
+ case GET:
+ if (load_get(self) < 0)
+ break;
+ continue;
+
+ case EXT1:
+ if (load_extension(self, 1) < 0)
+ break;
+ continue;
+
+ case EXT2:
+ if (load_extension(self, 2) < 0)
+ break;
+ continue;
+
+ case EXT4:
+ if (load_extension(self, 4) < 0)
+ break;
+ continue;
+ case MARK:
+ if (load_mark(self) < 0)
+ break;
+ continue;
+
+ case BINPUT:
+ if (load_binput(self) < 0)
+ break;
+ continue;
+
+ case LONG_BINPUT:
+ if (load_long_binput(self) < 0)
+ break;
+ continue;
+
+ case PUT:
+ if (load_put(self) < 0)
+ break;
+ continue;
+
+ case POP:
+ if (load_pop(self) < 0)
+ break;
+ continue;
+
+ case POP_MARK:
+ if (load_pop_mark(self) < 0)
+ break;
+ continue;
+
+ case SETITEM:
+ if (load_setitem(self) < 0)
+ break;
+ continue;
+
+ case SETITEMS:
+ if (load_setitems(self) < 0)
+ break;
+ continue;
+
+ case STOP:
+ break;
+
+ case PERSID:
+ if (load_persid(self) < 0)
+ break;
+ continue;
+
+ case BINPERSID:
+ if (load_binpersid(self) < 0)
+ break;
+ continue;
+
+ case REDUCE:
+ if (load_reduce(self) < 0)
+ break;
+ continue;
+
+ case PROTO:
+ if (load_proto(self) < 0)
+ break;
+ continue;
+
+ case NEWTRUE:
+ if (load_bool(self, Py_True) < 0)
+ break;
+ continue;
+
+ case NEWFALSE:
+ if (load_bool(self, Py_False) < 0)
+ break;
+ continue;
+
+ case '\0':
+ /* end of file */
+ PyErr_SetNone(PyExc_EOFError);
+ break;
+
+ default:
+ cPickle_ErrFormat(UnpicklingError,
+ "invalid load key, '%s'.",
+ "c", s[0]);
+ return NULL;
+ }
+
+ break;
+ }
+
+ if ((err = PyErr_Occurred())) {
+ if (err == PyExc_EOFError) {
+ PyErr_SetNone(PyExc_EOFError);
+ }
+ return NULL;
+ }
+
+ PDATA_POP(self->stack, val);
+ return val;
+}
+
+
+/* No-load functions to support noload, which is used to
+ find persistent references. */
+
+static int
+noload_obj(Unpicklerobject *self)
+{
+ int i;
+
+ if ((i = marker(self)) < 0) return -1;
+ return Pdata_clear(self->stack, i+1);
+}
+
+
+static int
+noload_inst(Unpicklerobject *self)
+{
+ int i;
+ char *s;
+
+ if ((i = marker(self)) < 0) return -1;
+ Pdata_clear(self->stack, i);
+ if (self->readline_func(self, &s) < 0) return -1;
+ if (self->readline_func(self, &s) < 0) return -1;
+ PDATA_APPEND(self->stack, Py_None, -1);
+ return 0;
+}
+
+static int
+noload_newobj(Unpicklerobject *self)
+{
+ PyObject *obj;
+
+ PDATA_POP(self->stack, obj); /* pop argtuple */
+ if (obj == NULL) return -1;
+ Py_DECREF(obj);
+
+ PDATA_POP(self->stack, obj); /* pop cls */
+ if (obj == NULL) return -1;
+ Py_DECREF(obj);
+
+ PDATA_APPEND(self->stack, Py_None, -1);
+ return 0;
+}
+
+static int
+noload_global(Unpicklerobject *self)
+{
+ char *s;
+
+ if (self->readline_func(self, &s) < 0) return -1;
+ if (self->readline_func(self, &s) < 0) return -1;
+ PDATA_APPEND(self->stack, Py_None,-1);
+ return 0;
+}
+
+static int
+noload_reduce(Unpicklerobject *self)
+{
+
+ if (self->stack->length < 2) return stackUnderflow();
+ Pdata_clear(self->stack, self->stack->length-2);
+ PDATA_APPEND(self->stack, Py_None,-1);
+ return 0;
+}
+
+static int
+noload_build(Unpicklerobject *self) {
+
+ if (self->stack->length < 1) return stackUnderflow();
+ Pdata_clear(self->stack, self->stack->length-1);
+ return 0;
+}
+
+static int
+noload_extension(Unpicklerobject *self, int nbytes)
+{
+ char *codebytes;
+
+ assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
+ if (self->read_func(self, &codebytes, nbytes) < 0) return -1;
+ PDATA_APPEND(self->stack, Py_None, -1);
+ return 0;
+}
+
+
+static PyObject *
+noload(Unpicklerobject *self)
+{
+ PyObject *err = 0, *val = 0;
+ char *s;
+
+ self->num_marks = 0;
+ Pdata_clear(self->stack, 0);
+
+ while (1) {
+ if (self->read_func(self, &s, 1) < 0)
+ break;
+
+ switch (s[0]) {
+ case NONE:
+ if (load_none(self) < 0)
+ break;
+ continue;
+
+ case BININT:
+ if (load_binint(self) < 0)
+ break;
+ continue;
+
+ case BININT1:
+ if (load_binint1(self) < 0)
+ break;
+ continue;
+
+ case BININT2:
+ if (load_binint2(self) < 0)
+ break;
+ continue;
+
+ case INT:
+ if (load_int(self) < 0)
+ break;
+ continue;
+
+ case LONG:
+ if (load_long(self) < 0)
+ break;
+ continue;
+
+ case LONG1:
+ if (load_counted_long(self, 1) < 0)
+ break;
+ continue;
+
+ case LONG4:
+ if (load_counted_long(self, 4) < 0)
+ break;
+ continue;
+
+ case FLOAT:
+ if (load_float(self) < 0)
+ break;
+ continue;
+
+ case BINFLOAT:
+ if (load_binfloat(self) < 0)
+ break;
+ continue;
+
+ case BINSTRING:
+ if (load_binstring(self) < 0)
+ break;
+ continue;
+
+ case SHORT_BINSTRING:
+ if (load_short_binstring(self) < 0)
+ break;
+ continue;
+
+ case STRING:
+ if (load_string(self) < 0)
+ break;
+ continue;
+
+#ifdef Py_USING_UNICODE
+ case UNICODE:
+ if (load_unicode(self) < 0)
+ break;
+ continue;
+
+ case BINUNICODE:
+ if (load_binunicode(self) < 0)
+ break;
+ continue;
+#endif
+
+ case EMPTY_TUPLE:
+ if (load_counted_tuple(self, 0) < 0)
+ break;
+ continue;
+
+ case TUPLE1:
+ if (load_counted_tuple(self, 1) < 0)
+ break;
+ continue;
+
+ case TUPLE2:
+ if (load_counted_tuple(self, 2) < 0)
+ break;
+ continue;
+
+ case TUPLE3:
+ if (load_counted_tuple(self, 3) < 0)
+ break;
+ continue;
+
+ case TUPLE:
+ if (load_tuple(self) < 0)
+ break;
+ continue;
+
+ case EMPTY_LIST:
+ if (load_empty_list(self) < 0)
+ break;
+ continue;
+
+ case LIST:
+ if (load_list(self) < 0)
+ break;
+ continue;
+
+ case EMPTY_DICT:
+ if (load_empty_dict(self) < 0)
+ break;
+ continue;
+
+ case DICT:
+ if (load_dict(self) < 0)
+ break;
+ continue;
+
+ case OBJ:
+ if (noload_obj(self) < 0)
+ break;
+ continue;
+
+ case INST:
+ if (noload_inst(self) < 0)
+ break;
+ continue;
+
+ case NEWOBJ:
+ if (noload_newobj(self) < 0)
+ break;
+ continue;
+
+ case GLOBAL:
+ if (noload_global(self) < 0)
+ break;
+ continue;
+
+ case APPEND:
+ if (load_append(self) < 0)
+ break;
+ continue;
+
+ case APPENDS:
+ if (load_appends(self) < 0)
+ break;
+ continue;
+
+ case BUILD:
+ if (noload_build(self) < 0)
+ break;
+ continue;
+
+ case DUP:
+ if (load_dup(self) < 0)
+ break;
+ continue;
+
+ case BINGET:
+ if (load_binget(self) < 0)
+ break;
+ continue;
+
+ case LONG_BINGET:
+ if (load_long_binget(self) < 0)
+ break;
+ continue;
+
+ case GET:
+ if (load_get(self) < 0)
+ break;
+ continue;
+
+ case EXT1:
+ if (noload_extension(self, 1) < 0)
+ break;
+ continue;
+
+ case EXT2:
+ if (noload_extension(self, 2) < 0)
+ break;
+ continue;
+
+ case EXT4:
+ if (noload_extension(self, 4) < 0)
+ break;
+ continue;
+
+ case MARK:
+ if (load_mark(self) < 0)
+ break;
+ continue;
+
+ case BINPUT:
+ if (load_binput(self) < 0)
+ break;
+ continue;
+
+ case LONG_BINPUT:
+ if (load_long_binput(self) < 0)
+ break;
+ continue;
+
+ case PUT:
+ if (load_put(self) < 0)
+ break;
+ continue;
+
+ case POP:
+ if (load_pop(self) < 0)
+ break;
+ continue;
+
+ case POP_MARK:
+ if (load_pop_mark(self) < 0)
+ break;
+ continue;
+
+ case SETITEM:
+ if (load_setitem(self) < 0)
+ break;
+ continue;
+
+ case SETITEMS:
+ if (load_setitems(self) < 0)
+ break;
+ continue;
+
+ case STOP:
+ break;
+
+ case PERSID:
+ if (load_persid(self) < 0)
+ break;
+ continue;
+
+ case BINPERSID:
+ if (load_binpersid(self) < 0)
+ break;
+ continue;
+
+ case REDUCE:
+ if (noload_reduce(self) < 0)
+ break;
+ continue;
+
+ case PROTO:
+ if (load_proto(self) < 0)
+ break;
+ continue;
+
+ case NEWTRUE:
+ if (load_bool(self, Py_True) < 0)
+ break;
+ continue;
+
+ case NEWFALSE:
+ if (load_bool(self, Py_False) < 0)
+ break;
+ continue;
+ default:
+ cPickle_ErrFormat(UnpicklingError,
+ "invalid load key, '%s'.",
+ "c", s[0]);
+ return NULL;
+ }
+
+ break;
+ }
+
+ if ((err = PyErr_Occurred())) {
+ if (err == PyExc_EOFError) {
+ PyErr_SetNone(PyExc_EOFError);
+ }
+ return NULL;
+ }
+
+ PDATA_POP(self->stack, val);
+ return val;
+}
+
+
+static PyObject *
+Unpickler_load(Unpicklerobject *self, PyObject *unused)
+{
+ return load(self);
+}
+
+static PyObject *
+Unpickler_noload(Unpicklerobject *self, PyObject *unused)
+{
+ return noload(self);
+}
+
+
+static struct PyMethodDef Unpickler_methods[] = {
+ {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
+ PyDoc_STR("load() -- Load a pickle")
+ },
+ {"noload", (PyCFunction)Unpickler_noload, METH_NOARGS,
+ PyDoc_STR(
+ "noload() -- not load a pickle, but go through most of the motions\n"
+ "\n"
+ "This function can be used to read past a pickle without instantiating\n"
+ "any objects or importing any modules. It can also be used to find all\n"
+ "persistent references without instantiating any objects or importing\n"
+ "any modules.\n")
+ },
+ {NULL, NULL} /* sentinel */
+};
+
+
+static Unpicklerobject *
+newUnpicklerobject(PyObject *f)
+{
+ Unpicklerobject *self;
+
+ if (!( self = PyObject_GC_New(Unpicklerobject, &Unpicklertype)))
+ return NULL;
+
+ self->file = NULL;
+ self->arg = NULL;
+ self->stack = (Pdata*)Pdata_New();
+ self->pers_func = NULL;
+ self->last_string = NULL;
+ self->marks = NULL;
+ self->num_marks = 0;
+ self->marks_size = 0;
+ self->buf_size = 0;
+ self->read = NULL;
+ self->readline = NULL;
+ self->find_class = NULL;
+
+ if (!( self->memo = PyDict_New()))
+ goto err;
+
+ if (!self->stack)
+ goto err;
+
+ Py_INCREF(f);
+ self->file = f;
+
+ /* Set read, readline based on type of f */
+ if (PyFile_Check(f)) {
+ self->fp = PyFile_AsFile(f);
+ if (self->fp == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "I/O operation on closed file");
+ goto err;
+ }
+ self->read_func = read_file;
+ self->readline_func = readline_file;
+ }
+ else if (PycStringIO_InputCheck(f)) {
+ self->fp = NULL;
+ self->read_func = read_cStringIO;
+ self->readline_func = readline_cStringIO;
+ }
+ else {
+
+ self->fp = NULL;
+ self->read_func = read_other;
+ self->readline_func = readline_other;
+
+ if (!( (self->readline = PyObject_GetAttr(f, readline_str)) &&
+ (self->read = PyObject_GetAttr(f, read_str)))) {
+ PyErr_Clear();
+ PyErr_SetString( PyExc_TypeError,
+ "argument must have 'read' and "
+ "'readline' attributes" );
+ goto err;
+ }
+ }
+ PyObject_GC_Track(self);
+
+ return self;
+
+ err:
+ Py_DECREF((PyObject *)self);
+ return NULL;
+}
+
+
+static PyObject *
+get_Unpickler(PyObject *self, PyObject *file)
+{
+ return (PyObject *)newUnpicklerobject(file);
+}
+
+
+static void
+Unpickler_dealloc(Unpicklerobject *self)
+{
+ PyObject_GC_UnTrack((PyObject *)self);
+ Py_XDECREF(self->readline);
+ Py_XDECREF(self->read);
+ Py_XDECREF(self->file);
+ Py_XDECREF(self->memo);
+ Py_XDECREF(self->stack);
+ Py_XDECREF(self->pers_func);
+ Py_XDECREF(self->arg);
+ Py_XDECREF(self->last_string);
+ Py_XDECREF(self->find_class);
+
+ if (self->marks) {
+ free(self->marks);
+ }
+
+ if (self->buf_size) {
+ free(self->buf);
+ }
+
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+static int
+Unpickler_traverse(Unpicklerobject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->readline);
+ Py_VISIT(self->read);
+ Py_VISIT(self->file);
+ Py_VISIT(self->memo);
+ Py_VISIT(self->stack);
+ Py_VISIT(self->pers_func);
+ Py_VISIT(self->arg);
+ Py_VISIT(self->last_string);
+ Py_VISIT(self->find_class);
+ return 0;
+}
+
+static int
+Unpickler_clear(Unpicklerobject *self)
+{
+ Py_CLEAR(self->readline);
+ Py_CLEAR(self->read);
+ Py_CLEAR(self->file);
+ Py_CLEAR(self->memo);
+ Py_CLEAR(self->stack);
+ Py_CLEAR(self->pers_func);
+ Py_CLEAR(self->arg);
+ Py_CLEAR(self->last_string);
+ Py_CLEAR(self->find_class);
+ return 0;
+}
+
+static PyObject *
+Unpickler_getattr(Unpicklerobject *self, char *name)
+{
+ if (!strcmp(name, "persistent_load")) {
+ if (!self->pers_func) {
+ PyErr_SetString(PyExc_AttributeError, name);
+ return NULL;
+ }
+
+ Py_INCREF(self->pers_func);
+ return self->pers_func;
+ }
+
+ if (!strcmp(name, "find_global")) {
+ if (!self->find_class) {
+ PyErr_SetString(PyExc_AttributeError, name);
+ return NULL;
+ }
+
+ Py_INCREF(self->find_class);
+ return self->find_class;
+ }
+
+ if (!strcmp(name, "memo")) {
+ if (!self->memo) {
+ PyErr_SetString(PyExc_AttributeError, name);
+ return NULL;
+ }
+
+ Py_INCREF(self->memo);
+ return self->memo;
+ }
+
+ if (!strcmp(name, "UnpicklingError")) {
+ Py_INCREF(UnpicklingError);
+ return UnpicklingError;
+ }
+
+ return Py_FindMethod(Unpickler_methods, (PyObject *)self, name);
+}
+
+
+static int
+Unpickler_setattr(Unpicklerobject *self, char *name, PyObject *value)
+{
+
+ if (!strcmp(name, "persistent_load")) {
+ Py_XDECREF(self->pers_func);
+ self->pers_func = value;
+ Py_XINCREF(value);
+ return 0;
+ }
+
+ if (!strcmp(name, "find_global")) {
+ Py_XDECREF(self->find_class);
+ self->find_class = value;
+ Py_XINCREF(value);
+ return 0;
+ }
+
+ if (! value) {
+ PyErr_SetString(PyExc_TypeError,
+ "attribute deletion is not supported");
+ return -1;
+ }
+
+ if (strcmp(name, "memo") == 0) {
+ if (!PyDict_Check(value)) {
+ PyErr_SetString(PyExc_TypeError,
+ "memo must be a dictionary");
+ return -1;
+ }
+ Py_XDECREF(self->memo);
+ self->memo = value;
+ Py_INCREF(value);
+ return 0;
+ }
+
+ PyErr_SetString(PyExc_AttributeError, name);
+ return -1;
+}
+
+/* ---------------------------------------------------------------------------
+ * Module-level functions.
+ */
+
+/* dump(obj, file, protocol=0). */
+static PyObject *
+cpm_dump(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"obj", "file", "protocol", NULL};
+ PyObject *ob, *file, *res = NULL;
+ Picklerobject *pickler = 0;
+ int proto = 0;
+
+ if (!( PyArg_ParseTupleAndKeywords(args, kwds, "OO|i", kwlist,
+ &ob, &file, &proto)))
+ goto finally;
+
+ if (!( pickler = newPicklerobject(file, proto)))
+ goto finally;
+
+ if (dump(pickler, ob) < 0)
+ goto finally;
+
+ Py_INCREF(Py_None);
+ res = Py_None;
+
+ finally:
+ Py_XDECREF(pickler);
+
+ return res;
+}
+
+
+/* dumps(obj, protocol=0). */
+static PyObject *
+cpm_dumps(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"obj", "protocol", NULL};
+ PyObject *ob, *file = 0, *res = NULL;
+ Picklerobject *pickler = 0;
+ int proto = 0;
+
+ if (!( PyArg_ParseTupleAndKeywords(args, kwds, "O|i:dumps", kwlist,
+ &ob, &proto)))
+ goto finally;
+
+ if (!( file = PycStringIO->NewOutput(128)))
+ goto finally;
+
+ if (!( pickler = newPicklerobject(file, proto)))
+ goto finally;
+
+ if (dump(pickler, ob) < 0)
+ goto finally;
+
+ res = PycStringIO->cgetvalue(file);
+
+ finally:
+ Py_XDECREF(pickler);
+ Py_XDECREF(file);
+
+ return res;
+}
+
+
+/* load(fileobj). */
+static PyObject *
+cpm_load(PyObject *self, PyObject *ob)
+{
+ Unpicklerobject *unpickler = 0;
+ PyObject *res = NULL;
+
+ if (!( unpickler = newUnpicklerobject(ob)))
+ goto finally;
+
+ res = load(unpickler);
+
+ finally:
+ Py_XDECREF(unpickler);
+
+ return res;
+}
+
+
+/* loads(string) */
+static PyObject *
+cpm_loads(PyObject *self, PyObject *args)
+{
+ PyObject *ob, *file = 0, *res = NULL;
+ Unpicklerobject *unpickler = 0;
+
+ if (!( PyArg_ParseTuple(args, "S:loads", &ob)))
+ goto finally;
+
+ if (!( file = PycStringIO->NewInput(ob)))
+ goto finally;
+
+ if (!( unpickler = newUnpicklerobject(file)))
+ goto finally;
+
+ res = load(unpickler);
+
+ finally:
+ Py_XDECREF(file);
+ Py_XDECREF(unpickler);
+
+ return res;
+}
+
+
+PyDoc_STRVAR(Unpicklertype__doc__,
+"Objects that know how to unpickle");
+
+static PyTypeObject Unpicklertype = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "cPickle.Unpickler", /*tp_name*/
+ sizeof(Unpicklerobject), /*tp_basicsize*/
+ 0,
+ (destructor)Unpickler_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ (getattrfunc)Unpickler_getattr, /* tp_getattr */
+ (setattrfunc)Unpickler_setattr, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ Unpicklertype__doc__, /* tp_doc */
+ (traverseproc)Unpickler_traverse, /* tp_traverse */
+ (inquiry)Unpickler_clear, /* tp_clear */
+};
+
+static struct PyMethodDef cPickle_methods[] = {
+ {"dump", (PyCFunction)cpm_dump, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("dump(obj, file, protocol=0) -- "
+ "Write an object in pickle format to the given file.\n"
+ "\n"
+ "See the Pickler docstring for the meaning of optional argument proto.")
+ },
+
+ {"dumps", (PyCFunction)cpm_dumps, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("dumps(obj, protocol=0) -- "
+ "Return a string containing an object in pickle format.\n"
+ "\n"
+ "See the Pickler docstring for the meaning of optional argument proto.")
+ },
+
+ {"load", (PyCFunction)cpm_load, METH_O,
+ PyDoc_STR("load(file) -- Load a pickle from the given file")},
+
+ {"loads", (PyCFunction)cpm_loads, METH_VARARGS,
+ PyDoc_STR("loads(string) -- Load a pickle from the given string")},
+
+ {"Pickler", (PyCFunction)get_Pickler, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("Pickler(file, protocol=0) -- Create a pickler.\n"
+ "\n"
+ "This takes a file-like object for writing a pickle data stream.\n"
+ "The optional proto argument tells the pickler to use the given\n"
+ "protocol; supported protocols are 0, 1, 2. The default\n"
+ "protocol is 0, to be backwards compatible. (Protocol 0 is the\n"
+ "only protocol that can be written to a file opened in text\n"
+ "mode and read back successfully. When using a protocol higher\n"
+ "than 0, make sure the file is opened in binary mode, both when\n"
+ "pickling and unpickling.)\n"
+ "\n"
+ "Protocol 1 is more efficient than protocol 0; protocol 2 is\n"
+ "more efficient than protocol 1.\n"
+ "\n"
+ "Specifying a negative protocol version selects the highest\n"
+ "protocol version supported. The higher the protocol used, the\n"
+ "more recent the version of Python needed to read the pickle\n"
+ "produced.\n"
+ "\n"
+ "The file parameter must have a write() method that accepts a single\n"
+ "string argument. It can thus be an open file object, a StringIO\n"
+ "object, or any other custom object that meets this interface.\n")
+ },
+
+ {"Unpickler", (PyCFunction)get_Unpickler, METH_O,
+ PyDoc_STR("Unpickler(file) -- Create an unpickler.")},
+
+ { NULL, NULL }
+};
+
+static int
+init_stuff(PyObject *module_dict)
+{
+ PyObject *copy_reg, *t, *r;
+
+#define INIT_STR(S) if (!( S ## _str=PyString_InternFromString(#S))) return -1;
+
+ if (PyType_Ready(&Unpicklertype) < 0)
+ return -1;
+ if (PyType_Ready(&Picklertype) < 0)
+ return -1;
+
+ INIT_STR(__class__);
+ INIT_STR(__getinitargs__);
+ INIT_STR(__dict__);
+ INIT_STR(__getstate__);
+ INIT_STR(__setstate__);
+ INIT_STR(__name__);
+ INIT_STR(__main__);
+ INIT_STR(__reduce__);
+ INIT_STR(__reduce_ex__);
+ INIT_STR(write);
+ INIT_STR(append);
+ INIT_STR(read);
+ INIT_STR(readline);
+ INIT_STR(copy_reg);
+ INIT_STR(dispatch_table);
+
+ if (!( copy_reg = PyImport_ImportModule("copy_reg")))
+ return -1;
+
+ /* This is special because we want to use a different
+ one in restricted mode. */
+ dispatch_table = PyObject_GetAttr(copy_reg, dispatch_table_str);
+ if (!dispatch_table) return -1;
+
+ extension_registry = PyObject_GetAttrString(copy_reg,
+ "_extension_registry");
+ if (!extension_registry) return -1;
+
+ inverted_registry = PyObject_GetAttrString(copy_reg,
+ "_inverted_registry");
+ if (!inverted_registry) return -1;
+
+ extension_cache = PyObject_GetAttrString(copy_reg,
+ "_extension_cache");
+ if (!extension_cache) return -1;
+
+ Py_DECREF(copy_reg);
+
+ if (!(empty_tuple = PyTuple_New(0)))
+ return -1;
+
+ two_tuple = PyTuple_New(2);
+ if (two_tuple == NULL)
+ return -1;
+ /* We use this temp container with no regard to refcounts, or to
+ * keeping containees alive. Exempt from GC, because we don't
+ * want anything looking at two_tuple() by magic.
+ */
+ PyObject_GC_UnTrack(two_tuple);
+
+ /* Ugh */
+ if (!( t=PyImport_ImportModule("__builtin__"))) return -1;
+ if (PyDict_SetItemString(module_dict, "__builtins__", t) < 0)
+ return -1;
+
+ if (!( t=PyDict_New())) return -1;
+ if (!( r=PyRun_String(
+ "def __str__(self):\n"
+ " return self.args and ('%s' % self.args[0]) or '(what)'\n",
+ Py_file_input,
+ module_dict, t) )) return -1;
+ Py_DECREF(r);
+
+ PickleError = PyErr_NewException("cPickle.PickleError", NULL, t);
+ if (!PickleError)
+ return -1;
+
+ Py_DECREF(t);
+
+ PicklingError = PyErr_NewException("cPickle.PicklingError",
+ PickleError, NULL);
+ if (!PicklingError)
+ return -1;
+
+ if (!( t=PyDict_New())) return -1;
+ if (!( r=PyRun_String(
+ "def __str__(self):\n"
+ " a=self.args\n"
+ " a=a and type(a[0]) or '(what)'\n"
+ " return 'Cannot pickle %s objects' % a\n"
+ , Py_file_input,
+ module_dict, t) )) return -1;
+ Py_DECREF(r);
+
+ if (!( UnpickleableError = PyErr_NewException(
+ "cPickle.UnpickleableError", PicklingError, t)))
+ return -1;
+
+ Py_DECREF(t);
+
+ if (!( UnpicklingError = PyErr_NewException("cPickle.UnpicklingError",
+ PickleError, NULL)))
+ return -1;
+
+ if (!( BadPickleGet = PyErr_NewException("cPickle.BadPickleGet",
+ UnpicklingError, NULL)))
+ return -1;
+
+ if (PyDict_SetItemString(module_dict, "PickleError",
+ PickleError) < 0)
+ return -1;
+
+ if (PyDict_SetItemString(module_dict, "PicklingError",
+ PicklingError) < 0)
+ return -1;
+
+ if (PyDict_SetItemString(module_dict, "UnpicklingError",
+ UnpicklingError) < 0)
+ return -1;
+
+ if (PyDict_SetItemString(module_dict, "UnpickleableError",
+ UnpickleableError) < 0)
+ return -1;
+
+ if (PyDict_SetItemString(module_dict, "BadPickleGet",
+ BadPickleGet) < 0)
+ return -1;
+
+ PycString_IMPORT;
+
+ return 0;
+}
+
+#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+PyMODINIT_FUNC
+initcPickle(void)
+{
+ PyObject *m, *d, *di, *v, *k;
+ Py_ssize_t i;
+ char *rev = "1.71"; /* XXX when does this change? */
+ PyObject *format_version;
+ PyObject *compatible_formats;
+
+ Picklertype.ob_type = &PyType_Type;
+ Unpicklertype.ob_type = &PyType_Type;
+ PdataType.ob_type = &PyType_Type;
+
+ /* Initialize some pieces. We need to do this before module creation,
+ * so we're forced to use a temporary dictionary. :(
+ */
+ di = PyDict_New();
+ if (!di) return;
+ if (init_stuff(di) < 0) return;
+
+ /* Create the module and add the functions */
+ m = Py_InitModule4("cPickle", cPickle_methods,
+ cPickle_module_documentation,
+ (PyObject*)NULL,PYTHON_API_VERSION);
+ if (m == NULL)
+ return;
+
+ /* Add some symbolic constants to the module */
+ d = PyModule_GetDict(m);
+ v = PyString_FromString(rev);
+ PyDict_SetItemString(d, "__version__", v);
+ Py_XDECREF(v);
+
+ /* Copy data from di. Waaa. */
+ for (i=0; PyDict_Next(di, &i, &k, &v); ) {
+ if (PyObject_SetItem(d, k, v) < 0) {
+ Py_DECREF(di);
+ return;
+ }
+ }
+ Py_DECREF(di);
+
+ i = PyModule_AddIntConstant(m, "HIGHEST_PROTOCOL", HIGHEST_PROTOCOL);
+ if (i < 0)
+ return;
+
+ /* These are purely informational; no code uses them. */
+ /* File format version we write. */
+ format_version = PyString_FromString("2.0");
+ /* Format versions we can read. */
+ compatible_formats = Py_BuildValue("[sssss]",
+ "1.0", /* Original protocol 0 */
+ "1.1", /* Protocol 0 + INST */
+ "1.2", /* Original protocol 1 */
+ "1.3", /* Protocol 1 + BINFLOAT */
+ "2.0"); /* Original protocol 2 */
+ PyDict_SetItemString(d, "format_version", format_version);
+ PyDict_SetItemString(d, "compatible_formats", compatible_formats);
+ Py_XDECREF(format_version);
+ Py_XDECREF(compatible_formats);
+}
Added: sandbox/trunk/cpy_merge/Modules/cStringIO.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Modules/cStringIO.c Wed May 23 03:45:28 2007
@@ -0,0 +1,754 @@
+
+#include "Python.h"
+#include "import.h"
+#include "cStringIO.h"
+#include "structmember.h"
+
+PyDoc_STRVAR(cStringIO_module_documentation,
+"A simple fast partial StringIO replacement.\n"
+"\n"
+"This module provides a simple useful replacement for\n"
+"the StringIO module that is written in C. It does not provide the\n"
+"full generality of StringIO, but it provides enough for most\n"
+"applications and is especially useful in conjunction with the\n"
+"pickle module.\n"
+"\n"
+"Usage:\n"
+"\n"
+" from cStringIO import StringIO\n"
+"\n"
+" an_output_stream=StringIO()\n"
+" an_output_stream.write(some_stuff)\n"
+" ...\n"
+" value=an_output_stream.getvalue()\n"
+"\n"
+" an_input_stream=StringIO(a_string)\n"
+" spam=an_input_stream.readline()\n"
+" spam=an_input_stream.read(5)\n"
+" an_input_stream.seek(0) # OK, start over\n"
+" spam=an_input_stream.read() # and read it all\n"
+" \n"
+"If someone else wants to provide a more complete implementation,\n"
+"go for it. :-) \n"
+"\n"
+"cStringIO.c,v 1.29 1999/06/15 14:10:27 jim Exp\n");
+
+/* Declaration for file-like objects that manage data as strings
+
+ The IOobject type should be though of as a common base type for
+ Iobjects, which provide input (read-only) StringIO objects and
+ Oobjects, which provide read-write objects. Most of the methods
+ depend only on common data.
+*/
+
+typedef struct {
+ PyObject_HEAD
+ char *buf;
+ Py_ssize_t pos, string_size;
+} IOobject;
+
+#define IOOOBJECT(O) ((IOobject*)(O))
+
+/* Declarations for objects of type StringO */
+
+typedef struct { /* Subtype of IOobject */
+ PyObject_HEAD
+ char *buf;
+ Py_ssize_t pos, string_size;
+
+ Py_ssize_t buf_size;
+} Oobject;
+
+/* Declarations for objects of type StringI */
+
+typedef struct { /* Subtype of IOobject */
+ PyObject_HEAD
+ char *buf;
+ Py_ssize_t pos, string_size;
+ /* We store a reference to the object here in order to keep
+ the buffer alive during the lifetime of the Iobject. */
+ PyObject *pbuf;
+} Iobject;
+
+/* IOobject (common) methods */
+
+PyDoc_STRVAR(IO_flush__doc__, "flush(): does nothing.");
+
+static int
+IO__opencheck(IOobject *self) {
+ if (!self->buf) {
+ PyErr_SetString(PyExc_ValueError,
+ "I/O operation on closed file");
+ return 0;
+ }
+ return 1;
+}
+
+static PyObject *
+IO_get_closed(IOobject *self, void *closure)
+{
+ PyObject *result = Py_False;
+
+ if (self->buf == NULL)
+ result = Py_True;
+ Py_INCREF(result);
+ return result;
+}
+
+static PyGetSetDef file_getsetlist[] = {
+ {"closed", (getter)IO_get_closed, NULL, "True if the file is closed"},
+ {0},
+};
+
+static PyObject *
+IO_flush(IOobject *self, PyObject *unused) {
+
+ if (!IO__opencheck(self)) return NULL;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(IO_getval__doc__,
+"getvalue([use_pos]) -- Get the string value."
+"\n"
+"If use_pos is specified and is a true value, then the string returned\n"
+"will include only the text up to the current file position.\n");
+
+static PyObject *
+IO_cgetval(PyObject *self) {
+ if (!IO__opencheck(IOOOBJECT(self))) return NULL;
+ return PyString_FromStringAndSize(((IOobject*)self)->buf,
+ ((IOobject*)self)->pos);
+}
+
+static PyObject *
+IO_getval(IOobject *self, PyObject *args) {
+ PyObject *use_pos=Py_None;
+ Py_ssize_t s;
+
+ if (!IO__opencheck(self)) return NULL;
+ if (!PyArg_UnpackTuple(args,"getval", 0, 1,&use_pos)) return NULL;
+
+ if (PyObject_IsTrue(use_pos)) {
+ s=self->pos;
+ if (s > self->string_size) s=self->string_size;
+ }
+ else
+ s=self->string_size;
+ return PyString_FromStringAndSize(self->buf, s);
+}
+
+PyDoc_STRVAR(IO_isatty__doc__, "isatty(): always returns 0");
+
+static PyObject *
+IO_isatty(IOobject *self, PyObject *unused) {
+ if (!IO__opencheck(self)) return NULL;
+ Py_INCREF(Py_False);
+ return Py_False;
+}
+
+PyDoc_STRVAR(IO_read__doc__,
+"read([s]) -- Read s characters, or the rest of the string");
+
+static int
+IO_cread(PyObject *self, char **output, Py_ssize_t n) {
+ Py_ssize_t l;
+
+ if (!IO__opencheck(IOOOBJECT(self))) return -1;
+ l = ((IOobject*)self)->string_size - ((IOobject*)self)->pos;
+ if (n < 0 || n > l) {
+ n = l;
+ if (n < 0) n=0;
+ }
+
+ *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
+ ((IOobject*)self)->pos += n;
+ return n;
+}
+
+static PyObject *
+IO_read(IOobject *self, PyObject *args) {
+ Py_ssize_t n = -1;
+ char *output = NULL;
+
+ if (!PyArg_ParseTuple(args, "|n:read", &n)) return NULL;
+
+ if ( (n=IO_cread((PyObject*)self,&output,n)) < 0) return NULL;
+
+ return PyString_FromStringAndSize(output, n);
+}
+
+PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line");
+
+static int
+IO_creadline(PyObject *self, char **output) {
+ char *n, *s;
+ Py_ssize_t l;
+
+ if (!IO__opencheck(IOOOBJECT(self))) return -1;
+
+ for (n = ((IOobject*)self)->buf + ((IOobject*)self)->pos,
+ s = ((IOobject*)self)->buf + ((IOobject*)self)->string_size;
+ n < s && *n != '\n'; n++);
+ if (n < s) n++;
+
+ *output=((IOobject*)self)->buf + ((IOobject*)self)->pos;
+ l = n - ((IOobject*)self)->buf - ((IOobject*)self)->pos;
+ assert(((IOobject*)self)->pos + l < INT_MAX);
+ ((IOobject*)self)->pos += (int)l;
+ return (int)l;
+}
+
+static PyObject *
+IO_readline(IOobject *self, PyObject *args) {
+ int n, m=-1;
+ char *output;
+
+ if (args)
+ if (!PyArg_ParseTuple(args, "|i:readline", &m)) return NULL;
+
+ if( (n=IO_creadline((PyObject*)self,&output)) < 0) return NULL;
+ if (m >= 0 && m < n) {
+ m = n - m;
+ n -= m;
+ self->pos -= m;
+ }
+ return PyString_FromStringAndSize(output, n);
+}
+
+PyDoc_STRVAR(IO_readlines__doc__, "readlines() -- Read all lines");
+
+static PyObject *
+IO_readlines(IOobject *self, PyObject *args) {
+ int n;
+ char *output;
+ PyObject *result, *line;
+ int hint = 0, length = 0;
+
+ if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL;
+
+ result = PyList_New(0);
+ if (!result)
+ return NULL;
+
+ while (1){
+ if ( (n = IO_creadline((PyObject*)self,&output)) < 0)
+ goto err;
+ if (n == 0)
+ break;
+ line = PyString_FromStringAndSize (output, n);
+ if (!line)
+ goto err;
+ if (PyList_Append (result, line) == -1) {
+ Py_DECREF (line);
+ goto err;
+ }
+ Py_DECREF (line);
+ length += n;
+ if (hint > 0 && length >= hint)
+ break;
+ }
+ return result;
+ err:
+ Py_DECREF(result);
+ return NULL;
+}
+
+PyDoc_STRVAR(IO_reset__doc__,
+"reset() -- Reset the file position to the beginning");
+
+static PyObject *
+IO_reset(IOobject *self, PyObject *unused) {
+
+ if (!IO__opencheck(self)) return NULL;
+
+ self->pos = 0;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(IO_tell__doc__, "tell() -- get the current position.");
+
+static PyObject *
+IO_tell(IOobject *self, PyObject *unused) {
+
+ if (!IO__opencheck(self)) return NULL;
+
+ return PyInt_FromSsize_t(self->pos);
+}
+
+PyDoc_STRVAR(IO_truncate__doc__,
+"truncate(): truncate the file at the current position.");
+
+static PyObject *
+IO_truncate(IOobject *self, PyObject *args) {
+ Py_ssize_t pos = -1;
+
+ if (!IO__opencheck(self)) return NULL;
+ if (!PyArg_ParseTuple(args, "|n:truncate", &pos)) return NULL;
+
+ if (PyTuple_Size(args) == 0) {
+ /* No argument passed, truncate to current position */
+ pos = self->pos;
+ }
+
+ if (pos < 0) {
+ errno = EINVAL;
+ PyErr_SetFromErrno(PyExc_IOError);
+ return NULL;
+ }
+
+ if (self->string_size > pos) self->string_size = pos;
+ self->pos = self->string_size;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+IO_iternext(Iobject *self)
+{
+ PyObject *next;
+ next = IO_readline((IOobject *)self, NULL);
+ if (!next)
+ return NULL;
+ if (!PyString_GET_SIZE(next)) {
+ Py_DECREF(next);
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ return next;
+}
+
+
+
+
+/* Read-write object methods */
+
+PyDoc_STRVAR(O_seek__doc__,
+"seek(position) -- set the current position\n"
+"seek(position, mode) -- mode 0: absolute; 1: relative; 2: relative to EOF");
+
+static PyObject *
+O_seek(Oobject *self, PyObject *args) {
+ Py_ssize_t position;
+ int mode = 0;
+
+ if (!IO__opencheck(IOOOBJECT(self))) return NULL;
+ if (!PyArg_ParseTuple(args, "n|i:seek", &position, &mode))
+ return NULL;
+
+ if (mode == 2) {
+ position += self->string_size;
+ }
+ else if (mode == 1) {
+ position += self->pos;
+ }
+
+ if (position > self->buf_size) {
+ char *newbuf;
+ self->buf_size*=2;
+ if (self->buf_size <= position) self->buf_size=position+1;
+ newbuf = (char*) realloc(self->buf,self->buf_size);
+ if (!newbuf) {
+ free(self->buf);
+ self->buf = 0;
+ self->buf_size=self->pos=0;
+ return PyErr_NoMemory();
+ }
+ self->buf = newbuf;
+ }
+ else if (position < 0) position=0;
+
+ self->pos=position;
+
+ while (--position >= self->string_size) self->buf[position]=0;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(O_write__doc__,
+"write(s) -- Write a string to the file"
+"\n\nNote (hack:) writing None resets the buffer");
+
+
+static int
+O_cwrite(PyObject *self, const char *c, Py_ssize_t l) {
+ Py_ssize_t newl;
+ Oobject *oself;
+ char *newbuf;
+
+ if (!IO__opencheck(IOOOBJECT(self))) return -1;
+ oself = (Oobject *)self;
+
+ newl = oself->pos+l;
+ if (newl >= oself->buf_size) {
+ oself->buf_size *= 2;
+ if (oself->buf_size <= newl) {
+ assert(newl + 1 < INT_MAX);
+ oself->buf_size = (int)(newl+1);
+ }
+ newbuf = (char*)realloc(oself->buf, oself->buf_size);
+ if (!newbuf) {
+ PyErr_SetString(PyExc_MemoryError,"out of memory");
+ free(oself->buf);
+ oself->buf = 0;
+ oself->buf_size = oself->pos = 0;
+ return -1;
+ }
+ oself->buf = newbuf;
+ }
+
+ memcpy(oself->buf+oself->pos,c,l);
+
+ assert(oself->pos + l < INT_MAX);
+ oself->pos += (int)l;
+
+ if (oself->string_size < oself->pos) {
+ oself->string_size = oself->pos;
+ }
+
+ return (int)l;
+}
+
+static PyObject *
+O_write(Oobject *self, PyObject *args) {
+ char *c;
+ int l;
+
+ if (!PyArg_ParseTuple(args, "t#:write", &c, &l)) return NULL;
+
+ if (O_cwrite((PyObject*)self,c,l) < 0) return NULL;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(O_close__doc__, "close(): explicitly release resources held.");
+
+static PyObject *
+O_close(Oobject *self, PyObject *unused) {
+ if (self->buf != NULL) free(self->buf);
+ self->buf = NULL;
+
+ self->pos = self->string_size = self->buf_size = 0;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(O_writelines__doc__,
+"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
+"\n"
+"Note that newlines are not added. The sequence can be any iterable object\n"
+"producing strings. This is equivalent to calling write() for each string.");
+static PyObject *
+O_writelines(Oobject *self, PyObject *args) {
+ PyObject *it, *s;
+
+ it = PyObject_GetIter(args);
+ if (it == NULL)
+ return NULL;
+ while ((s = PyIter_Next(it)) != NULL) {
+ Py_ssize_t n;
+ char *c;
+ if (PyString_AsStringAndSize(s, &c, &n) == -1) {
+ Py_DECREF(it);
+ Py_DECREF(s);
+ return NULL;
+ }
+ if (O_cwrite((PyObject *)self, c, n) == -1) {
+ Py_DECREF(it);
+ Py_DECREF(s);
+ return NULL;
+ }
+ Py_DECREF(s);
+ }
+
+ Py_DECREF(it);
+
+ /* See if PyIter_Next failed */
+ if (PyErr_Occurred())
+ return NULL;
+
+ Py_RETURN_NONE;
+}
+static struct PyMethodDef O_methods[] = {
+ /* Common methods: */
+ {"flush", (PyCFunction)IO_flush, METH_NOARGS, IO_flush__doc__},
+ {"getvalue", (PyCFunction)IO_getval, METH_VARARGS, IO_getval__doc__},
+ {"isatty", (PyCFunction)IO_isatty, METH_NOARGS, IO_isatty__doc__},
+ {"read", (PyCFunction)IO_read, METH_VARARGS, IO_read__doc__},
+ {"readline", (PyCFunction)IO_readline, METH_VARARGS, IO_readline__doc__},
+ {"readlines", (PyCFunction)IO_readlines,METH_VARARGS, IO_readlines__doc__},
+ {"reset", (PyCFunction)IO_reset, METH_NOARGS, IO_reset__doc__},
+ {"tell", (PyCFunction)IO_tell, METH_NOARGS, IO_tell__doc__},
+ {"truncate", (PyCFunction)IO_truncate, METH_VARARGS, IO_truncate__doc__},
+
+ /* Read-write StringIO specific methods: */
+ {"close", (PyCFunction)O_close, METH_NOARGS, O_close__doc__},
+ {"seek", (PyCFunction)O_seek, METH_VARARGS, O_seek__doc__},
+ {"write", (PyCFunction)O_write, METH_VARARGS, O_write__doc__},
+ {"writelines", (PyCFunction)O_writelines, METH_O, O_writelines__doc__},
+ {NULL, NULL} /* sentinel */
+};
+
+static void
+O_dealloc(Oobject *self) {
+ if (self->buf != NULL)
+ free(self->buf);
+ PyObject_Del(self);
+}
+
+PyDoc_STRVAR(Otype__doc__, "Simple type for output to strings.");
+
+static PyTypeObject Otype = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "cStringIO.StringO", /*tp_name*/
+ sizeof(Oobject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)O_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr */
+ 0, /*tp_setattr */
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0 , /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro */
+ 0, /*tp_setattro */
+ 0, /*tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /*tp_flags*/
+ Otype__doc__, /*tp_doc */
+ 0, /*tp_traverse */
+ 0, /*tp_clear */
+ 0, /*tp_richcompare */
+ 0, /*tp_weaklistoffset */
+ PyObject_SelfIter, /*tp_iter */
+ (iternextfunc)IO_iternext, /*tp_iternext */
+ O_methods, /*tp_methods */
+ 0, /*tp_members */
+ file_getsetlist, /*tp_getset */
+};
+
+static PyObject *
+newOobject(int size) {
+ Oobject *self;
+
+ self = PyObject_New(Oobject, &Otype);
+ if (self == NULL)
+ return NULL;
+ self->pos=0;
+ self->string_size = 0;
+
+ self->buf = (char *)malloc(size);
+ if (!self->buf) {
+ PyErr_SetString(PyExc_MemoryError,"out of memory");
+ self->buf_size = 0;
+ Py_DECREF(self);
+ return NULL;
+ }
+
+ self->buf_size=size;
+ return (PyObject*)self;
+}
+
+/* End of code for StringO objects */
+/* -------------------------------------------------------- */
+
+static PyObject *
+I_close(Iobject *self, PyObject *unused) {
+ Py_XDECREF(self->pbuf);
+ self->pbuf = NULL;
+ self->buf = NULL;
+
+ self->pos = self->string_size = 0;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+I_seek(Iobject *self, PyObject *args) {
+ Py_ssize_t position;
+ int mode = 0;
+
+ if (!IO__opencheck(IOOOBJECT(self))) return NULL;
+ if (!PyArg_ParseTuple(args, "n|i:seek", &position, &mode))
+ return NULL;
+
+ if (mode == 2) position += self->string_size;
+ else if (mode == 1) position += self->pos;
+
+ if (position < 0) position=0;
+
+ self->pos=position;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static struct PyMethodDef I_methods[] = {
+ /* Common methods: */
+ {"flush", (PyCFunction)IO_flush, METH_NOARGS, IO_flush__doc__},
+ {"getvalue", (PyCFunction)IO_getval, METH_VARARGS, IO_getval__doc__},
+ {"isatty", (PyCFunction)IO_isatty, METH_NOARGS, IO_isatty__doc__},
+ {"read", (PyCFunction)IO_read, METH_VARARGS, IO_read__doc__},
+ {"readline", (PyCFunction)IO_readline, METH_VARARGS, IO_readline__doc__},
+ {"readlines", (PyCFunction)IO_readlines,METH_VARARGS, IO_readlines__doc__},
+ {"reset", (PyCFunction)IO_reset, METH_NOARGS, IO_reset__doc__},
+ {"tell", (PyCFunction)IO_tell, METH_NOARGS, IO_tell__doc__},
+ {"truncate", (PyCFunction)IO_truncate, METH_VARARGS, IO_truncate__doc__},
+
+ /* Read-only StringIO specific methods: */
+ {"close", (PyCFunction)I_close, METH_NOARGS, O_close__doc__},
+ {"seek", (PyCFunction)I_seek, METH_VARARGS, O_seek__doc__},
+ {NULL, NULL}
+};
+
+static void
+I_dealloc(Iobject *self) {
+ Py_XDECREF(self->pbuf);
+ PyObject_Del(self);
+}
+
+
+PyDoc_STRVAR(Itype__doc__,
+"Simple type for treating strings as input file streams");
+
+static PyTypeObject Itype = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "cStringIO.StringI", /*tp_name*/
+ sizeof(Iobject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)I_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /* tp_getattr */
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ Itype__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)IO_iternext, /* tp_iternext */
+ I_methods, /* tp_methods */
+ 0, /* tp_members */
+ file_getsetlist, /* tp_getset */
+};
+
+static PyObject *
+newIobject(PyObject *s) {
+ Iobject *self;
+ char *buf;
+ Py_ssize_t size;
+
+ if (PyObject_AsCharBuffer(s, (const char **)&buf, &size) != 0)
+ return NULL;
+
+ self = PyObject_New(Iobject, &Itype);
+ if (!self) return NULL;
+ Py_INCREF(s);
+ self->buf=buf;
+ self->string_size=size;
+ self->pbuf=s;
+ self->pos=0;
+
+ return (PyObject*)self;
+}
+
+/* End of code for StringI objects */
+/* -------------------------------------------------------- */
+
+
+PyDoc_STRVAR(IO_StringIO__doc__,
+"StringIO([s]) -- Return a StringIO-like stream for reading or writing");
+
+static PyObject *
+IO_StringIO(PyObject *self, PyObject *args) {
+ PyObject *s=0;
+
+ if (!PyArg_UnpackTuple(args, "StringIO", 0, 1, &s)) return NULL;
+
+ if (s) return newIobject(s);
+ return newOobject(128);
+}
+
+/* List of methods defined in the module */
+
+static struct PyMethodDef IO_methods[] = {
+ {"StringIO", (PyCFunction)IO_StringIO,
+ METH_VARARGS, IO_StringIO__doc__},
+ {NULL, NULL} /* sentinel */
+};
+
+
+/* Initialization function for the module (*must* be called initcStringIO) */
+
+static struct PycStringIO_CAPI CAPI = {
+ IO_cread,
+ IO_creadline,
+ O_cwrite,
+ IO_cgetval,
+ newOobject,
+ newIobject,
+ &Itype,
+ &Otype,
+};
+
+#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+PyMODINIT_FUNC
+initcStringIO(void) {
+ PyObject *m, *d, *v;
+
+
+ /* Create the module and add the functions */
+ m = Py_InitModule4("cStringIO", IO_methods,
+ cStringIO_module_documentation,
+ (PyObject*)NULL,PYTHON_API_VERSION);
+ if (m == NULL) return;
+
+ /* Add some symbolic constants to the module */
+ d = PyModule_GetDict(m);
+
+ /* Export C API */
+ Itype.ob_type=&PyType_Type;
+ Otype.ob_type=&PyType_Type;
+ if (PyType_Ready(&Otype) < 0) return;
+ if (PyType_Ready(&Itype) < 0) return;
+ PyDict_SetItemString(d,"cStringIO_CAPI",
+ v = PyCObject_FromVoidPtr(&CAPI,NULL));
+ Py_XDECREF(v);
+
+ /* Export Types */
+ PyDict_SetItemString(d,"InputType", (PyObject*)&Itype);
+ PyDict_SetItemString(d,"OutputType", (PyObject*)&Otype);
+
+ /* Maybe make certain warnings go away */
+ if (0) PycString_IMPORT;
+}
Added: sandbox/trunk/cpy_merge/Modules/rotatingtree.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Modules/rotatingtree.c Wed May 23 03:45:28 2007
@@ -0,0 +1,121 @@
+#include "rotatingtree.h"
+
+#define KEY_LOWER_THAN(key1, key2) ((char*)(key1) < (char*)(key2))
+
+/* The randombits() function below is a fast-and-dirty generator that
+ * is probably irregular enough for our purposes. Note that it's biased:
+ * I think that ones are slightly more probable than zeroes. It's not
+ * important here, though.
+ */
+
+static unsigned int random_value = 1;
+static unsigned int random_stream = 0;
+
+static int
+randombits(int bits)
+{
+ int result;
+ if (random_stream < (1U << bits)) {
+ random_value *= 1082527;
+ random_stream = random_value;
+ }
+ result = random_stream & ((1<<bits)-1);
+ random_stream >>= bits;
+ return result;
+}
+
+
+/* Insert a new node into the tree.
+ (*root) is modified to point to the new root. */
+void
+RotatingTree_Add(rotating_node_t **root, rotating_node_t *node)
+{
+ while (*root != NULL) {
+ if (KEY_LOWER_THAN(node->key, (*root)->key))
+ root = &((*root)->left);
+ else
+ root = &((*root)->right);
+ }
+ node->left = NULL;
+ node->right = NULL;
+ *root = node;
+}
+
+/* Locate the node with the given key. This is the most complicated
+ function because it occasionally rebalances the tree to move the
+ resulting node closer to the root. */
+rotating_node_t *
+RotatingTree_Get(rotating_node_t **root, void *key)
+{
+ if (randombits(3) != 4) {
+ /* Fast path, no rebalancing */
+ rotating_node_t *node = *root;
+ while (node != NULL) {
+ if (node->key == key)
+ return node;
+ if (KEY_LOWER_THAN(key, node->key))
+ node = node->left;
+ else
+ node = node->right;
+ }
+ return NULL;
+ }
+ else {
+ rotating_node_t **pnode = root;
+ rotating_node_t *node = *pnode;
+ rotating_node_t *next;
+ int rotate;
+ if (node == NULL)
+ return NULL;
+ while (1) {
+ if (node->key == key)
+ return node;
+ rotate = !randombits(1);
+ if (KEY_LOWER_THAN(key, node->key)) {
+ next = node->left;
+ if (next == NULL)
+ return NULL;
+ if (rotate) {
+ node->left = next->right;
+ next->right = node;
+ *pnode = next;
+ }
+ else
+ pnode = &(node->left);
+ }
+ else {
+ next = node->right;
+ if (next == NULL)
+ return NULL;
+ if (rotate) {
+ node->right = next->left;
+ next->left = node;
+ *pnode = next;
+ }
+ else
+ pnode = &(node->right);
+ }
+ node = next;
+ }
+ }
+}
+
+/* Enumerate all nodes in the tree. The callback enumfn() should return
+ zero to continue the enumeration, or non-zero to interrupt it.
+ A non-zero value is directly returned by RotatingTree_Enum(). */
+int
+RotatingTree_Enum(rotating_node_t *root, rotating_tree_enum_fn enumfn,
+ void *arg)
+{
+ int result;
+ rotating_node_t *node;
+ while (root != NULL) {
+ result = RotatingTree_Enum(root->left, enumfn, arg);
+ if (result != 0) return result;
+ node = root->right;
+ result = enumfn(root, arg);
+ if (result != 0) return result;
+ root = node;
+ }
+ return 0;
+}
Added: sandbox/trunk/cpy_merge/Modules/rotatingtree.h
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/Modules/rotatingtree.h Wed May 23 03:45:28 2007
@@ -0,0 +1,27 @@
+/* "Rotating trees" (Armin Rigo)
+ *
+ * Google "splay trees" for the general idea.
+ *
+ * It's a dict-like data structure that works best when accesses are not
+ * random, but follow a strong pattern. The one implemented here is for
+ * access patterns where the same small set of keys is looked up over
+ * and over again, and this set of keys evolves slowly over time.
+ */
+
+#include <stdlib.h>
+
+#define EMPTY_ROTATING_TREE ((rotating_node_t *)NULL)
+
+typedef struct rotating_node_s rotating_node_t;
+typedef int (*rotating_tree_enum_fn) (rotating_node_t *node, void *arg);
+
+struct rotating_node_s {
+ void *key;
+ rotating_node_t *left;
+ rotating_node_t *right;
+};
+
+void RotatingTree_Add(rotating_node_t **root, rotating_node_t *node);
+rotating_node_t* RotatingTree_Get(rotating_node_t **root, void *key);
+int RotatingTree_Enum(rotating_node_t *root, rotating_tree_enum_fn enumfn,
+ void *arg);
Added: sandbox/trunk/cpy_merge/README
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/README Wed May 23 03:45:28 2007
@@ -0,0 +1,8 @@
+This sandbox is for the merge of the modules with a dual C and Python
+implementations -- i.e., cStringIO/StringIO, cPickle/pickle and
+cProfile/profile. It contains the modules to be modified and their
+respective test suite.
+
+The initial code was taken from the p3yk branch
+(http://svn.python.org/projects/python/branches/p3yk) at
+revision 55520.
Added: sandbox/trunk/cpy_merge/setup.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/cpy_merge/setup.py Wed May 23 03:45:28 2007
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# A dumb and simple build script for the C extension modules
+
+from distutils.core import Extension, setup
+
+mod_dir = "Modules/"
+inc_dir = ["./Include"]
+
+setup(name = "C/Python modules merge",
+ maintainer = "Alexandre Vassalotti",
+ maintainer_email = "alexandre at peadrop.com",
+ ext_modules=[
+ Extension(name="_lsprof",
+ sources=[mod_dir + "_lsprof.c",
+ mod_dir + "rotatingtree.c"],
+ include_dirs=inc_dir),
+ Extension(name="cStringIO",
+ sources=[mod_dir + "cStringIO.c"]),
+ Extension(name="cPickle",
+ sources=[mod_dir + "cPickle.c"])
+ ])
More information about the Python-checkins
mailing list