[Python-checkins] python/dist/src/Lib pickletools.py,1.21,1.22

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
Wed, 29 Jan 2003 12:12:24 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv21312/Lib

Modified Files:
	pickletools.py 
Log Message:
dis():  This had a problem with proto 0 pickles, in that POP sometimes
popped a MARK, but without stack emulation the disassembler couldn't
know that, and subsequent indentation got hosed.

Now the disassembler does do enough stack emulation to catch this.  While
I was at it, also added lots of sanity checks for other stack operations,
and correct use of the memo.  This goes (I think) a long way toward being
a "pickle verifier" now too.


Index: pickletools.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickletools.py,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pickletools.py	29 Jan 2003 15:41:33 -0000	1.21
--- pickletools.py	29 Jan 2003 20:12:21 -0000	1.22
***************
*** 14,21 ****
  #
  # - A pickle verifier:  read a pickle and check it exhaustively for
! #   well-formedness.
  #
  # - A protocol identifier:  examine a pickle and return its protocol number
  #   (== the highest .proto attr value among all the opcodes in the pickle).
  #
  # - A pickle optimizer:  for example, tuple-building code is sometimes more
--- 14,22 ----
  #
  # - A pickle verifier:  read a pickle and check it exhaustively for
! #   well-formedness.  dis() does a lot of this already.
  #
  # - A protocol identifier:  examine a pickle and return its protocol number
  #   (== the highest .proto attr value among all the opcodes in the pickle).
+ #   dis() already prints this info at the end.
  #
  # - A pickle optimizer:  for example, tuple-building code is sometimes more
***************
*** 713,716 ****
--- 714,720 ----
          self.doc = doc
  
+     def __repr__(self):
+         return self.name
+ 
  
  pyint = StackObject(
***************
*** 1859,1866 ****
      Optional arg indentlevel is the number of blanks by which to indent
      a new MARK level.  It defaults to 4.
      """
  
!     markstack = []
      indentchunk = ' ' * indentlevel
      for opcode, arg, pos in genops(pickle):
          if pos is not None:
--- 1863,1893 ----
      Optional arg indentlevel is the number of blanks by which to indent
      a new MARK level.  It defaults to 4.
+ 
+     In addition to printing the disassembly, some sanity checks are made:
+ 
+     + All embedded opcode arguments "make sense".
+ 
+     + Explicit and implicit pop operations have enough items on the stack.
+ 
+     + When an opcode implicitly refers to a markobject, a markobject is
+       actually on the stack.
+ 
+     + A memo entry isn't referenced before it's defined.
+ 
+     + The markobject isn't stored in the memo.
+ 
+     + A memo entry isn't redefined.
      """
  
!     # Most of the hair here is for sanity checks, but most of it is needed
!     # anyway to detect when a protocol 0 POP takes a MARK off the stack
!     # (which in turn is needed to indent MARK blocks correctly).
! 
!     stack = []          # crude emulation of unpickler stack
!     memo = {}           # crude emulation of unpicker memo
!     maxproto = -1       # max protocol number seen
!     markstack = []      # bytecode positions of MARK opcodes
      indentchunk = ' ' * indentlevel
+     errormsg = None
      for opcode, arg, pos in genops(pickle):
          if pos is not None:
***************
*** 1871,1880 ****
                                opcode.name)
  
          markmsg = None
!         if markstack and markobject in opcode.stack_before:
!             assert markobject not in opcode.stack_after
!             markpos = markstack.pop()
!             if markpos is not None:
!                 markmsg = "(MARK at %d)" % markpos
  
          if arg is not None or markmsg:
--- 1898,1949 ----
                                opcode.name)
  
+         maxproto = max(maxproto, opcode.proto)
+ 
+         # See whether a MARK should be popped.
+         before = opcode.stack_before    # don't mutate
+         after = opcode.stack_after      # don't mutate
          markmsg = None
!         if markobject in before or (opcode.name == "POP" and
!                                     stack and
!                                     stack[-1] is markobject):
!             assert markobject not in after
!             if markstack:
!                 markpos = markstack.pop()
!                 if markpos is None:
!                     markmsg = "(MARK at unknown opcode offset)"
!                 else:
!                     markmsg = "(MARK at %d)" % markpos
!                 # Pop everything at and after the topmost markobject.
!                 while stack[-1] is not markobject:
!                     stack.pop()
!                 stack.pop()
!                 # Remove markobject stuff from stack_before.
!                 try:
!                     i = before.index(markobject)
!                     before = before[:i]
!                 except ValueError:
!                     assert opcode.name == "POP"
!                     assert len(before) == 1
!                     before = []     # stop code later from popping again
!             else:
!                 errormsg = markmsg = "no MARK exists on stack"
! 
!         # Check for correct memo usage.
!         if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT"):
!             if arg in memo:
!                 errormsg = "memo key %r already defined" % arg
!             elif not stack:
!                 errormsg = "stack is empty -- can't store into memo"
!             elif stack[-1] is markobject:
!                 errormsg = "can't store markobject in the memo"
!             else:
!                 memo[arg] = stack[-1]
! 
!         elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
!             if arg in memo:
!                 assert len(after) == 1
!                 after = [memo[arg]]     # for better stack emulation
!             else:
!                 errormsg = "memo key %r has never been stored into" % arg
  
          if arg is not None or markmsg:
***************
*** 1887,1894 ****
          print >> out, line
  
!         if markobject in opcode.stack_after:
              assert markobject not in opcode.stack_before
              markstack.append(pos)
  
  
  _dis_test = r"""
--- 1956,1980 ----
          print >> out, line
  
!         if errormsg:
!             # Note that we delayed complaining until the offending opcode
!             # was printed.
!             raise ValueError(errormsg)
! 
!         # Emulate the stack effects.
!         n = len(before)
!         if len(stack) < n:
!             raise ValueError("tried to pop %d items from stack with "
!                              "only %d items" % (n, len(stack)))
!         if n:
!             del stack[-n:]
!         if markobject in after:
              assert markobject not in opcode.stack_before
              markstack.append(pos)
  
+         stack.extend(after)
+ 
+     print >> out, "highest protocol among opcodes =", maxproto
+     if stack:
+         raise ValueError("stack not empty after STOP: %r" % stack)
  
  _dis_test = r"""
***************
*** 1920,1923 ****
--- 2006,2010 ----
     49: a    APPEND
     50: .    STOP
+ highest protocol among opcodes = 0
  
  Try again with a "binary" pickle.
***************
*** 1944,1947 ****
--- 2031,2035 ----
     37: e        APPENDS    (MARK at 3)
     38: .    STOP
+ highest protocol among opcodes = 1
  
  Exercise the INST/OBJ/BUILD family.
***************
*** 1952,1955 ****
--- 2040,2044 ----
     15: p    PUT        0
     18: .    STOP
+ highest protocol among opcodes = 0
  
  >>> x = [pickle.PicklingError()] * 2
***************
*** 1974,1977 ****
--- 2063,2067 ----
     55: a    APPEND
     56: .    STOP
+ highest protocol among opcodes = 0
  
  >>> dis(pickle.dumps(x, 1))
***************
*** 1994,1997 ****
--- 2084,2088 ----
     48: e        APPENDS    (MARK at 3)
     49: .    STOP
+ highest protocol among opcodes = 1
  
  Try "the canonical" recursive-object test.
***************
*** 2018,2021 ****
--- 2109,2114 ----
     13: a    APPEND
     14: .    STOP
+ highest protocol among opcodes = 0
+ 
  >>> dis(pickle.dumps(L, 1))
      0: ]    EMPTY_LIST
***************
*** 2027,2037 ****
      9: a    APPEND
     10: .    STOP
  
! The protocol 0 pickle of the tuple causes the disassembly to get confused,
! as it doesn't realize that the POP opcode at 16 gets rid of the MARK at 0
! (so the output remains indented until the end).  The protocol 1 pickle
! doesn't trigger this glitch, because the disassembler realizes that
! POP_MARK gets rid of the MARK.  Doing a better job on the protocol 0
! pickle would require the disassembler to emulate the stack.
  
  >>> dis(pickle.dumps(T, 0))
--- 2120,2128 ----
      9: a    APPEND
     10: .    STOP
+ highest protocol among opcodes = 1
  
! Note that, in the protocol 0 pickle of the recursive tuple, the disassembler
! has to emulate the stack in order to realize that the POP opcode at 16 gets
! rid of the MARK at 0.
  
  >>> dis(pickle.dumps(T, 0))
***************
*** 2046,2052 ****
     14: a        APPEND
     15: 0        POP
!    16: 0        POP
!    17: g        GET        1
!    20: .        STOP
  >>> dis(pickle.dumps(T, 1))
      0: (    MARK
--- 2137,2145 ----
     14: a        APPEND
     15: 0        POP
!    16: 0        POP        (MARK at 0)
!    17: g    GET        1
!    20: .    STOP
! highest protocol among opcodes = 0
! 
  >>> dis(pickle.dumps(T, 1))
      0: (    MARK
***************
*** 2061,2064 ****
--- 2154,2158 ----
     12: h    BINGET     1
     14: .    STOP
+ highest protocol among opcodes = 1
  
  Try protocol 2.
***************
*** 2073,2076 ****
--- 2167,2171 ----
     10: a    APPEND
     11: .    STOP
+ highest protocol among opcodes = 2
  
  >>> dis(pickle.dumps(T, 2))
***************
*** 2085,2088 ****
--- 2180,2184 ----
     12: h    BINGET     1
     14: .    STOP
+ highest protocol among opcodes = 2
  """