[Python-checkins] python/dist/src/Lib pickle.py,1.79,1.80 pickletools.py,1.9,1.10

Mon, 27 Jan 2003 16:13:22 -0800

Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv14951/Lib

Modified Files:
	pickle.py pickletools.py 
Log Message:
Removed the new LONG2 opcode:  it's extravagant.  If LONG1 isn't enough,
then the embedded argument consumes at least 256 bytes.  The difference
between a 3-byte prefix (LONG2 + 2 bytes) and a 5-byte prefix (LONG4 +
4 bytes) is at worst less than 1%.  Note that binary strings and binary
Unicode strings also have only "size is 1 byte, or size is 4 bytes?"
flavors, and I expect for the same reason.  The only place a 2-byte
thingie was used was in BININT2, where the 2 bytes make up the *entire*
embedded argument (and now EXT2 also does this); that's a large savings
over 4 bytes, because the total opcode+argument size is so small in
the BININT2/EXT2 case.

Removed the TAKEN_FROM_ARGUMENT "number of bytes" code, and bifurcated it
into TAKEN_FROM_ARGUMENT1 and TAKEN_FROM_ARGUMENT4.  Now there's enough
info in ArgumentDescriptor objects to deduce the # of bytes consumed by
each opcode.

Rearranged the order in which proto2 opcodes are listed in pickle.py.

Index: pickle.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickle.py,v
retrieving revision 1.79
retrieving revision 1.80
diff -C2 -d -r1.79 -r1.80
*** pickle.py	27 Jan 2003 22:47:45 -0000	1.79
--- pickle.py	28 Jan 2003 00:13:19 -0000	1.80
***************
*** 136,152 ****
  # Protocol 2 (not yet implemented) (XXX comments will be added later)

- NEWOBJ          = '\x81'
  PROTO           = '\x80'
! EXT2            = '\x83'
  EXT1            = '\x82'
! TUPLE1          = '\x85'
  EXT4            = '\x84'
! TUPLE3          = '\x87'
  TUPLE2          = '\x86'
! NEWFALSE        = '\x89'
  NEWTRUE         = '\x88'
! LONG2           = '\x8b'
  LONG1           = '\x8a'
! LONG4           = '\x8c'

--- 136,151 ----
  # Protocol 2 (not yet implemented) (XXX comments will be added later)

  PROTO           = '\x80'
! NEWOBJ          = '\x81'
  EXT1            = '\x82'
! EXT2            = '\x83'
  EXT4            = '\x84'
! TUPLE1          = '\x85'
  TUPLE2          = '\x86'
! TUPLE3          = '\x87'
  NEWTRUE         = '\x88'
! NEWFALSE        = '\x89'
  LONG1           = '\x8a'
! LONG4           = '\x8b'

Index: pickletools.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/pickletools.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** pickletools.py	27 Jan 2003 23:54:04 -0000	1.9
--- pickletools.py	28 Jan 2003 00:13:19 -0000	1.10
***************
*** 126,130 ****
  # Represents the number of bytes consumed by a two-argument opcode where
  # the first argument gives the number of bytes in the second argument.
! TAKEN_FROM_ARGUMENT = -2

  class ArgumentDescriptor(object):
--- 126,131 ----
  # Represents the number of bytes consumed by a two-argument opcode where
  # the first argument gives the number of bytes in the second argument.
! TAKEN_FROM_ARGUMENT1 = -2   # num bytes is 1-byte unsigned int
! TAKEN_FROM_ARGUMENT4 = -3   # num bytes is 4-byte signed little-endian int

  class ArgumentDescriptor(object):
***************
*** 134,138 ****

          # length of argument, in bytes; an int; UP_TO_NEWLINE and
!         # TAKEN_FROM_ARGUMENT are negative values for variable-length cases
          'n',

--- 135,140 ----

          # length of argument, in bytes; an int; UP_TO_NEWLINE and
!         # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length
!         # cases
          'n',

***************
*** 151,156 ****

          assert isinstance(n, int) and (n >= 0 or
!                                        n is UP_TO_NEWLINE or
!                                        n is TAKEN_FROM_ARGUMENT)
          self.n = n

--- 153,159 ----

          assert isinstance(n, int) and (n >= 0 or
!                                        n in (UP_TO_NEWLINE,
!                                              TAKEN_FROM_ARGUMENT1,
!                                              TAKEN_FROM_ARGUMENT4))
          self.n = n

***************
*** 342,346 ****
  string4 = ArgumentDescriptor(
                name="string4",
!               n=TAKEN_FROM_ARGUMENT,
                reader=read_string4,
                doc="""A counted string.
--- 345,349 ----
  string4 = ArgumentDescriptor(
                name="string4",
!               n=TAKEN_FROM_ARGUMENT4,
                reader=read_string4,
                doc="""A counted string.
***************
*** 371,375 ****
  string1 = ArgumentDescriptor(
                name="string1",
!               n=TAKEN_FROM_ARGUMENT,
                reader=read_string1,
                doc="""A counted string.
--- 374,378 ----
  string1 = ArgumentDescriptor(
                name="string1",
!               n=TAKEN_FROM_ARGUMENT1,
                reader=read_string1,
                doc="""A counted string.
***************
*** 435,439 ****
  unicodestring4 = ArgumentDescriptor(
                      name="unicodestring4",
!                     n=TAKEN_FROM_ARGUMENT,
                      reader=read_unicodestring4,
                      doc="""A counted Unicode string.
--- 438,442 ----
  unicodestring4 = ArgumentDescriptor(
                      name="unicodestring4",
!                     n=TAKEN_FROM_ARGUMENT4,
                      reader=read_unicodestring4,
                      doc="""A counted Unicode string.
***************
*** 627,631 ****
  long1 = ArgumentDescriptor(
      name="long1",
!     n=TAKEN_FROM_ARGUMENT,
      reader=read_long1,
      doc="""A binary long, little-endian, using 1-byte size.
--- 630,634 ----
  long1 = ArgumentDescriptor(
      name="long1",
!     n=TAKEN_FROM_ARGUMENT1,
      reader=read_long1,
      doc="""A binary long, little-endian, using 1-byte size.
***************
*** 635,668 ****
      """)

- def read_long2(f):
-     r"""
-     >>> import StringIO
-     >>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
-     255L
-     >>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
-     32767L
-     >>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
-     -256L
-     >>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
-     -32768L
-     >>>
-     """
- 
-     n = read_uint2(f)
-     data = f.read(n)
-     if len(data) != n:
-         raise ValueError("not enough data in stream to read long2")
-     return decode_long(data)
- 
- long2 = ArgumentDescriptor(
-     name="long2",
-     n=TAKEN_FROM_ARGUMENT,
-     reader=read_long2,
-     doc="""A binary long, little-endian, using 2-byte size.
- 
-     This first reads two byte as an unsigned size, then reads that
-     many bytes and interprets them as a little-endian 2's-complement long.
-     """)
- 
  def read_long4(f):
      r"""
--- 638,641 ----
***************
*** 689,693 ****
  long4 = ArgumentDescriptor(
      name="long4",
!     n=TAKEN_FROM_ARGUMENT,
      reader=read_long4,
      doc="""A binary representation of a long, little-endian.
--- 662,666 ----
  long4 = ArgumentDescriptor(
      name="long4",
!     n=TAKEN_FROM_ARGUMENT4,
      reader=read_long4,
      doc="""A binary representation of a long, little-endian.
***************
*** 1706,1722 ****
        says it all."""),

-     I(name="LONG2",
-       code='\x8b',
-       arg=long2,
-       stack_before=[],
-       stack_after=[pylong],
-       proto=2,
-       doc="""Long integer using two-byte length.
- 
-       A more efficient encoding of a Python long; the long2 encoding
-       says it all."""),
- 
      I(name="LONG4",
!       code='\x8c',
        arg=long4,
        stack_before=[],
--- 1679,1684 ----
        says it all."""),

      I(name="LONG4",
!       code='\x8b',
        arg=long4,
        stack_before=[],