[Python-checkins] python/nondist/sandbox/pickletools pickletools.py,NONE,1.1

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
Sat, 25 Jan 2003 12:58:55 -0800


Update of /cvsroot/python/python/nondist/sandbox/pickletools
In directory sc8-pr-cvs1:/tmp/cvs-serv2358

Added Files:
	pickletools.py 
Log Message:
A start at a suite of pickle tools.


--- NEW FILE: pickletools.py ---
# This is meant to become the basis for several pickle helpers:
#
# - "Executable documentation".  The descriptions are meant to be precise
#   enough to execute, and the various doc attrs are meant to hold text good
#   enough so that reference docs could be generated by crawling over the
#   descriptors.
#
# - A symbolic pickle disassembler.
#
# - A pickle verifier -- read a pickle and check it exhaustively for
#   well-formedness.


# Meta-rule:  Descriptions are stored in instances of descriptor objects,
# with plain constructors.  No meta-language is defined from which
# descriptors could be constructed.  If you want, e.g., XML, write a little
# program to generate XML from the objects.


# Some pickle opcodes have an argument, following the opcode in the
# bytestream.  An argument is of a specific type, described by an instance
# of ArgumentDescriptor.

class ArgumentDescriptor(object):
    __slots__ = (
        # name of descriptor record, also a module global name; a string
        'name',

        # length of argument, in bytes; an int; or None means variable-length
        'n',

        # a function taking a file-like object, reading this kind of argument
        # from the object at the current position, advancing the current
        # position by n bytes, and returning the value of the argument
        'reader',

        # human-readable docs for this arg descriptor; a string
        'doc',
    )

    def __init__(self, name, n, reader, doc):
        assert isinstance(name, str)
        self.name = name

        assert n is None or (isinstance(n, int) and n >= 0)
        self.n = n

        self.reader = reader

        assert isinstance(doc, str)
        self.doc = doc

from struct import unpack as _unpack

def read_uint1(f):
    """
    >>> import StringIO
    >>> read_uint1(StringIO.StringIO('\\xff'))
    255
    """

    data = f.read(1)
    if data:
        return ord(data)
    raise ValueError("not enough data in stream to read uint1")

uint1 = ArgumentDescriptor(
            name='uint1',
            n=1,
            reader=read_uint1,
            doc="one-byte unsigned integer")


def read_uint2(f):
    """
    >>> import StringIO
    >>> read_uint2(StringIO.StringIO('\\xff\\x00'))
    255
    >>> read_uint2(StringIO.StringIO('\\xff\\xff'))
    65535
    """

    data = f.read(2)
    if len(data) == 2:
        return _unpack("<H", data)[0]
    raise ValueError("not enough data in stream to read uint2")

uint2 = ArgumentDescriptor(
            name='uint2',
            n=2,
            reader=read_uint2,
            doc="two-byte unsigned integer, little-endian")


def read_int4(f):
    """
    >>> import StringIO
    >>> read_int4(StringIO.StringIO('\\xff\\x00\\x00\\x00'))
    255
    >>> read_int4(StringIO.StringIO('\\x00\\x00\\x00\\x80')) == -(2**31)
    True
    """

    data = f.read(4)
    if len(data) == 4:
        return _unpack("<i", data)[0]
    raise ValueError("not enough data in stream to read int4")

int4 = ArgumentDescriptor(
           name='int4',
           n=4,
           reader=read_int4,
           doc="four-byte signed integer, little-endian")


def read_stringnl(f):
    """
    >>> import StringIO
    >>> read_stringnl(StringIO.StringIO("abcd\\nefg\\n"))
    'abcd'

    >>> read_stringnl(StringIO.StringIO("\\n"))
    ''

    >>> read_stringnl(StringIO.StringIO("abcd"))
    Traceback (most recent call last):
    ...
    ValueError: no newline found when trying to read stringnl

    Embedded escapes are undone in the result.
    >>> read_stringnl(StringIO.StringIO("a\\\\nb\\x00c\\td\\ne"))
    'a\\nb\\x00c\\td'
    """

    data = f.readline()
    if not data.endswith('\n'):
        raise ValueError("no newline found when trying to read stringnl")
    data = data[:-1]    # lose the newline
    # I'm not sure when 'string_escape' was added to the std codecs; it's
    # crazy not to use it if it's there.
    return data.decode('string_escape')

stringnl = ArgumentDescriptor(
               name='stringnl',
               n=None,
               reader=read_stringnl,
               doc="""A newline-terminated string.

                   This is a repr-style string, with embedded escapes.
                   """)


# Descriptors for pickle opcodes.

class OpcodeInfo(object):

    __slots__ = (
        # symbolic name of opcode; a string
        'name',

        # the code used in a bytestream to represent the opcode; a string,
        # usually one letter
        'code',

        # list of 0 or more argument descriptors
        'args',

        # what the stack looks like before this opcode runs; a list
        'stack_before',

        # what the stack looks like after this opcode runs; a list
        'stack_after',

        # the protocol number in which this opcode was introduced; an int
        'proto',

        # human-readable docs for this opcode; a string
        'doc',
    )

    def __init__(self, name, code, args,
                 stack_before, stack_after, proto, doc):
        assert isinstance(name, str)
        self.name = name

        assert isinstance(code, str)
        self.code = code

        assert isinstance(args, list)
        self.args = args

        assert isinstance(stack_before, list)
        self.stack_before = stack_before

        assert isinstance(stack_after, list)
        self.stack_after = stack_after

        assert isinstance(proto, int)
        self.proto = proto

        assert isinstance(doc, str)
        self.doc = doc

I = OpcodeInfo
opcodes = [
    I(name='MARK',
      code='(',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='STOP',
      code='.',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='POP',
      code='0',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='POP_MARK',
      code='1',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='DUP',
      code='2',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='FLOAT',
      code='F',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='INT',
      code='I',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BININT',
      code='J',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BININT1',
      code='K',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='LONG',
      code='L',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BININT2',
      code='M',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='NONE',
      code='N',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='PERSID',
      code='P',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BINPERSID',
      code='Q',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='REDUCE',
      code='R',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='STRING',
      code='S',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BINSTRING',
      code='T',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='SHORT_BINSTRING',
      code='U',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='UNICODE',
      code='V',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BINUNICODE',
      code='X',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='APPEND',
      code='a',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BUILD',
      code='b',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='GLOBAL',
      code='c',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='DICT',
      code='d',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='EMPTY_DICT',
      code='}',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='APPENDS',
      code='e',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='GET',
      code='g',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BINGET',
      code='h',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='INST',
      code='i',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='LONG_BINGET',
      code='j',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='LIST',
      code='l',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='EMPTY_LIST',
      code=']',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='OBJ',
      code='o',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='PUT',
      code='p',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BINPUT',
      code='q',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='LONG_BINPUT',
      code='r',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='SETITEM',
      code='s',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='TUPLE',
      code='t',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='EMPTY_TUPLE',
      code=')',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='SETITEMS',
      code='u',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='BINFLOAT',
      code='G',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='TRUE',
      code='I01\n',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),

    I(name='FALSE',
      code='I00\n',
      args=[],
      stack_before=[],
      stack_after=[],
      proto=0,
      doc="""XXX One-line description goes here.

      XXX Doc body goes here.
      """),
]
del I

# Verify uniqueness of .name and .code members.
name2i = {}
code2i = {}

i = 0
for d in opcodes:
    if d.name in name2i:
        raise ValueError("repeated name %r at indices %d and %d" %
                         (d.name, name2i[d.name], i))
    if d.code in code2i:
        raise ValueError("repeated code %r at indices %d and %d" %
                         (d.code, code2i[d.code], i))

    name2i[d.name] = i
    code2i[d.code] = i
    i += 1

del name2i, code2i, i, d

def _test():
    import doctest
    return doctest.testmod()

if __name__ == "__main__":
    _test()