[Python-checkins] python/nondist/sandbox/pickletools pickletools.py,NONE,1.1
tim_one@users.sourceforge.net
tim_one@users.sourceforge.net
Sat, 25 Jan 2003 12:58:55 -0800
Update of /cvsroot/python/python/nondist/sandbox/pickletools
In directory sc8-pr-cvs1:/tmp/cvs-serv2358
Added Files:
pickletools.py
Log Message:
A start at a suite of pickle tools.
--- NEW FILE: pickletools.py ---
# This is meant to become the basis for several pickle helpers:
#
# - "Executable documentation". The descriptions are meant to be precise
# enough to execute, and the various doc attrs are meant to hold text good
# enough so that reference docs could be generated by crawling over the
# descriptors.
#
# - A symbolic pickle disassembler.
#
# - A pickle verifier -- read a pickle and check it exhaustively for
# well-formedness.
# Meta-rule: Descriptions are stored in instances of descriptor objects,
# with plain constructors. No meta-language is defined from which
# descriptors could be constructed. If you want, e.g., XML, write a little
# program to generate XML from the objects.
# Some pickle opcodes have an argument, following the opcode in the
# bytestream. An argument is of a specific type, described by an instance
# of ArgumentDescriptor.
class ArgumentDescriptor(object):
__slots__ = (
# name of descriptor record, also a module global name; a string
'name',
# length of argument, in bytes; an int; or None means variable-length
'n',
# a function taking a file-like object, reading this kind of argument
# from the object at the current position, advancing the current
# position by n bytes, and returning the value of the argument
'reader',
# human-readable docs for this arg descriptor; a string
'doc',
)
def __init__(self, name, n, reader, doc):
assert isinstance(name, str)
self.name = name
assert n is None or (isinstance(n, int) and n >= 0)
self.n = n
self.reader = reader
assert isinstance(doc, str)
self.doc = doc
from struct import unpack as _unpack
def read_uint1(f):
"""
>>> import StringIO
>>> read_uint1(StringIO.StringIO('\\xff'))
255
"""
data = f.read(1)
if data:
return ord(data)
raise ValueError("not enough data in stream to read uint1")
uint1 = ArgumentDescriptor(
name='uint1',
n=1,
reader=read_uint1,
doc="one-byte unsigned integer")
def read_uint2(f):
"""
>>> import StringIO
>>> read_uint2(StringIO.StringIO('\\xff\\x00'))
255
>>> read_uint2(StringIO.StringIO('\\xff\\xff'))
65535
"""
data = f.read(2)
if len(data) == 2:
return _unpack("<H", data)[0]
raise ValueError("not enough data in stream to read uint2")
uint2 = ArgumentDescriptor(
name='uint2',
n=2,
reader=read_uint2,
doc="two-byte unsigned integer, little-endian")
def read_int4(f):
"""
>>> import StringIO
>>> read_int4(StringIO.StringIO('\\xff\\x00\\x00\\x00'))
255
>>> read_int4(StringIO.StringIO('\\x00\\x00\\x00\\x80')) == -(2**31)
True
"""
data = f.read(4)
if len(data) == 4:
return _unpack("<i", data)[0]
raise ValueError("not enough data in stream to read int4")
int4 = ArgumentDescriptor(
name='int4',
n=4,
reader=read_int4,
doc="four-byte signed integer, little-endian")
def read_stringnl(f):
"""
>>> import StringIO
>>> read_stringnl(StringIO.StringIO("abcd\\nefg\\n"))
'abcd'
>>> read_stringnl(StringIO.StringIO("\\n"))
''
>>> read_stringnl(StringIO.StringIO("abcd"))
Traceback (most recent call last):
...
ValueError: no newline found when trying to read stringnl
Embedded escapes are undone in the result.
>>> read_stringnl(StringIO.StringIO("a\\\\nb\\x00c\\td\\ne"))
'a\\nb\\x00c\\td'
"""
data = f.readline()
if not data.endswith('\n'):
raise ValueError("no newline found when trying to read stringnl")
data = data[:-1] # lose the newline
# I'm not sure when 'string_escape' was added to the std codecs; it's
# crazy not to use it if it's there.
return data.decode('string_escape')
stringnl = ArgumentDescriptor(
name='stringnl',
n=None,
reader=read_stringnl,
doc="""A newline-terminated string.
This is a repr-style string, with embedded escapes.
""")
# Descriptors for pickle opcodes.
class OpcodeInfo(object):
__slots__ = (
# symbolic name of opcode; a string
'name',
# the code used in a bytestream to represent the opcode; a string,
# usually one letter
'code',
# list of 0 or more argument descriptors
'args',
# what the stack looks like before this opcode runs; a list
'stack_before',
# what the stack looks like after this opcode runs; a list
'stack_after',
# the protocol number in which this opcode was introduced; an int
'proto',
# human-readable docs for this opcode; a string
'doc',
)
def __init__(self, name, code, args,
stack_before, stack_after, proto, doc):
assert isinstance(name, str)
self.name = name
assert isinstance(code, str)
self.code = code
assert isinstance(args, list)
self.args = args
assert isinstance(stack_before, list)
self.stack_before = stack_before
assert isinstance(stack_after, list)
self.stack_after = stack_after
assert isinstance(proto, int)
self.proto = proto
assert isinstance(doc, str)
self.doc = doc
I = OpcodeInfo
opcodes = [
I(name='MARK',
code='(',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='STOP',
code='.',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='POP',
code='0',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='POP_MARK',
code='1',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='DUP',
code='2',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='FLOAT',
code='F',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='INT',
code='I',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BININT',
code='J',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BININT1',
code='K',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='LONG',
code='L',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BININT2',
code='M',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='NONE',
code='N',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='PERSID',
code='P',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BINPERSID',
code='Q',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='REDUCE',
code='R',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='STRING',
code='S',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BINSTRING',
code='T',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='SHORT_BINSTRING',
code='U',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='UNICODE',
code='V',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BINUNICODE',
code='X',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='APPEND',
code='a',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BUILD',
code='b',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='GLOBAL',
code='c',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='DICT',
code='d',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='EMPTY_DICT',
code='}',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='APPENDS',
code='e',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='GET',
code='g',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BINGET',
code='h',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='INST',
code='i',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='LONG_BINGET',
code='j',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='LIST',
code='l',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='EMPTY_LIST',
code=']',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='OBJ',
code='o',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='PUT',
code='p',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BINPUT',
code='q',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='LONG_BINPUT',
code='r',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='SETITEM',
code='s',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='TUPLE',
code='t',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='EMPTY_TUPLE',
code=')',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='SETITEMS',
code='u',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='BINFLOAT',
code='G',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='TRUE',
code='I01\n',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
I(name='FALSE',
code='I00\n',
args=[],
stack_before=[],
stack_after=[],
proto=0,
doc="""XXX One-line description goes here.
XXX Doc body goes here.
"""),
]
del I
# Verify uniqueness of .name and .code members.
name2i = {}
code2i = {}
i = 0
for d in opcodes:
if d.name in name2i:
raise ValueError("repeated name %r at indices %d and %d" %
(d.name, name2i[d.name], i))
if d.code in code2i:
raise ValueError("repeated code %r at indices %d and %d" %
(d.code, code2i[d.code], i))
name2i[d.name] = i
code2i[d.code] = i
i += 1
del name2i, code2i, i, d
def _test():
import doctest
return doctest.testmod()
if __name__ == "__main__":
_test()