[Python-checkins] python/dist/src/Lib pickletools.py,1.21,1.22
tim_one@users.sourceforge.net
tim_one@users.sourceforge.net
Wed, 29 Jan 2003 12:12:24 -0800
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv21312/Lib
Modified Files:
pickletools.py
Log Message:
dis(): This had a problem with proto 0 pickles, in that POP sometimes
popped a MARK, but without stack emulation the disassembler couldn't
know that, and subsequent indentation got hosed.
Now the disassembler does do enough stack emulation to catch this. While
I was at it, also added lots of sanity checks for other stack operations,
and correct use of the memo. This goes (I think) a long way toward being
a "pickle verifier" now too.
Index: pickletools.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickletools.py,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pickletools.py 29 Jan 2003 15:41:33 -0000 1.21
--- pickletools.py 29 Jan 2003 20:12:21 -0000 1.22
***************
*** 14,21 ****
#
# - A pickle verifier: read a pickle and check it exhaustively for
! # well-formedness.
#
# - A protocol identifier: examine a pickle and return its protocol number
# (== the highest .proto attr value among all the opcodes in the pickle).
#
# - A pickle optimizer: for example, tuple-building code is sometimes more
--- 14,22 ----
#
# - A pickle verifier: read a pickle and check it exhaustively for
! # well-formedness. dis() does a lot of this already.
#
# - A protocol identifier: examine a pickle and return its protocol number
# (== the highest .proto attr value among all the opcodes in the pickle).
+ # dis() already prints this info at the end.
#
# - A pickle optimizer: for example, tuple-building code is sometimes more
***************
*** 713,716 ****
--- 714,720 ----
self.doc = doc
+ def __repr__(self):
+ return self.name
+
pyint = StackObject(
***************
*** 1859,1866 ****
Optional arg indentlevel is the number of blanks by which to indent
a new MARK level. It defaults to 4.
"""
! markstack = []
indentchunk = ' ' * indentlevel
for opcode, arg, pos in genops(pickle):
if pos is not None:
--- 1863,1893 ----
Optional arg indentlevel is the number of blanks by which to indent
a new MARK level. It defaults to 4.
+
+ In addition to printing the disassembly, some sanity checks are made:
+
+ + All embedded opcode arguments "make sense".
+
+ + Explicit and implicit pop operations have enough items on the stack.
+
+ + When an opcode implicitly refers to a markobject, a markobject is
+ actually on the stack.
+
+ + A memo entry isn't referenced before it's defined.
+
+ + The markobject isn't stored in the memo.
+
+ + A memo entry isn't redefined.
"""
! # Most of the hair here is for sanity checks, but most of it is needed
! # anyway to detect when a protocol 0 POP takes a MARK off the stack
! # (which in turn is needed to indent MARK blocks correctly).
!
! stack = [] # crude emulation of unpickler stack
! memo = {} # crude emulation of unpicker memo
! maxproto = -1 # max protocol number seen
! markstack = [] # bytecode positions of MARK opcodes
indentchunk = ' ' * indentlevel
+ errormsg = None
for opcode, arg, pos in genops(pickle):
if pos is not None:
***************
*** 1871,1880 ****
opcode.name)
markmsg = None
! if markstack and markobject in opcode.stack_before:
! assert markobject not in opcode.stack_after
! markpos = markstack.pop()
! if markpos is not None:
! markmsg = "(MARK at %d)" % markpos
if arg is not None or markmsg:
--- 1898,1949 ----
opcode.name)
+ maxproto = max(maxproto, opcode.proto)
+
+ # See whether a MARK should be popped.
+ before = opcode.stack_before # don't mutate
+ after = opcode.stack_after # don't mutate
markmsg = None
! if markobject in before or (opcode.name == "POP" and
! stack and
! stack[-1] is markobject):
! assert markobject not in after
! if markstack:
! markpos = markstack.pop()
! if markpos is None:
! markmsg = "(MARK at unknown opcode offset)"
! else:
! markmsg = "(MARK at %d)" % markpos
! # Pop everything at and after the topmost markobject.
! while stack[-1] is not markobject:
! stack.pop()
! stack.pop()
! # Remove markobject stuff from stack_before.
! try:
! i = before.index(markobject)
! before = before[:i]
! except ValueError:
! assert opcode.name == "POP"
! assert len(before) == 1
! before = [] # stop code later from popping again
! else:
! errormsg = markmsg = "no MARK exists on stack"
!
! # Check for correct memo usage.
! if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT"):
! if arg in memo:
! errormsg = "memo key %r already defined" % arg
! elif not stack:
! errormsg = "stack is empty -- can't store into memo"
! elif stack[-1] is markobject:
! errormsg = "can't store markobject in the memo"
! else:
! memo[arg] = stack[-1]
!
! elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
! if arg in memo:
! assert len(after) == 1
! after = [memo[arg]] # for better stack emulation
! else:
! errormsg = "memo key %r has never been stored into" % arg
if arg is not None or markmsg:
***************
*** 1887,1894 ****
print >> out, line
! if markobject in opcode.stack_after:
assert markobject not in opcode.stack_before
markstack.append(pos)
_dis_test = r"""
--- 1956,1980 ----
print >> out, line
! if errormsg:
! # Note that we delayed complaining until the offending opcode
! # was printed.
! raise ValueError(errormsg)
!
! # Emulate the stack effects.
! n = len(before)
! if len(stack) < n:
! raise ValueError("tried to pop %d items from stack with "
! "only %d items" % (n, len(stack)))
! if n:
! del stack[-n:]
! if markobject in after:
assert markobject not in opcode.stack_before
markstack.append(pos)
+ stack.extend(after)
+
+ print >> out, "highest protocol among opcodes =", maxproto
+ if stack:
+ raise ValueError("stack not empty after STOP: %r" % stack)
_dis_test = r"""
***************
*** 1920,1923 ****
--- 2006,2010 ----
49: a APPEND
50: . STOP
+ highest protocol among opcodes = 0
Try again with a "binary" pickle.
***************
*** 1944,1947 ****
--- 2031,2035 ----
37: e APPENDS (MARK at 3)
38: . STOP
+ highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.
***************
*** 1952,1955 ****
--- 2040,2044 ----
15: p PUT 0
18: . STOP
+ highest protocol among opcodes = 0
>>> x = [pickle.PicklingError()] * 2
***************
*** 1974,1977 ****
--- 2063,2067 ----
55: a APPEND
56: . STOP
+ highest protocol among opcodes = 0
>>> dis(pickle.dumps(x, 1))
***************
*** 1994,1997 ****
--- 2084,2088 ----
48: e APPENDS (MARK at 3)
49: . STOP
+ highest protocol among opcodes = 1
Try "the canonical" recursive-object test.
***************
*** 2018,2021 ****
--- 2109,2114 ----
13: a APPEND
14: . STOP
+ highest protocol among opcodes = 0
+
>>> dis(pickle.dumps(L, 1))
0: ] EMPTY_LIST
***************
*** 2027,2037 ****
9: a APPEND
10: . STOP
! The protocol 0 pickle of the tuple causes the disassembly to get confused,
! as it doesn't realize that the POP opcode at 16 gets rid of the MARK at 0
! (so the output remains indented until the end). The protocol 1 pickle
! doesn't trigger this glitch, because the disassembler realizes that
! POP_MARK gets rid of the MARK. Doing a better job on the protocol 0
! pickle would require the disassembler to emulate the stack.
>>> dis(pickle.dumps(T, 0))
--- 2120,2128 ----
9: a APPEND
10: . STOP
+ highest protocol among opcodes = 1
! Note that, in the protocol 0 pickle of the recursive tuple, the disassembler
! has to emulate the stack in order to realize that the POP opcode at 16 gets
! rid of the MARK at 0.
>>> dis(pickle.dumps(T, 0))
***************
*** 2046,2052 ****
14: a APPEND
15: 0 POP
! 16: 0 POP
! 17: g GET 1
! 20: . STOP
>>> dis(pickle.dumps(T, 1))
0: ( MARK
--- 2137,2145 ----
14: a APPEND
15: 0 POP
! 16: 0 POP (MARK at 0)
! 17: g GET 1
! 20: . STOP
! highest protocol among opcodes = 0
!
>>> dis(pickle.dumps(T, 1))
0: ( MARK
***************
*** 2061,2064 ****
--- 2154,2158 ----
12: h BINGET 1
14: . STOP
+ highest protocol among opcodes = 1
Try protocol 2.
***************
*** 2073,2076 ****
--- 2167,2171 ----
10: a APPEND
11: . STOP
+ highest protocol among opcodes = 2
>>> dis(pickle.dumps(T, 2))
***************
*** 2085,2088 ****
--- 2180,2184 ----
12: h BINGET 1
14: . STOP
+ highest protocol among opcodes = 2
"""