[Python-checkins] cpython (merge 3.3 -> 3.3): Merge heads.

alexandre.vassalotti python-checkins at python.org
Sun Apr 14 12:45:25 CEST 2013


http://hg.python.org/cpython/rev/fdb8e5028c0d
changeset:   83361:fdb8e5028c0d
branch:      3.3
parent:      83357:ca5fc67e0ad1
parent:      83360:4ced30417300
user:        Alexandre Vassalotti <alexandre at peadrop.com>
date:        Sun Apr 14 03:31:40 2013 -0700
summary:
  Merge heads.

files:
  Lib/pickletools.py |  127 +++++++++++++++++++++++++++-----
  Misc/NEWS          |    3 +
  2 files changed, 109 insertions(+), 21 deletions(-)


diff --git a/Lib/pickletools.py b/Lib/pickletools.py
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -13,6 +13,7 @@
 import codecs
 import pickle
 import re
+import sys
 
 __all__ = ['dis', 'genops', 'optimize']
 
@@ -165,8 +166,9 @@
 
 # Represents the number of bytes consumed by a two-argument opcode where
 # the first argument gives the number of bytes in the second argument.
-TAKEN_FROM_ARGUMENT1 = -2   # num bytes is 1-byte unsigned int
-TAKEN_FROM_ARGUMENT4 = -3   # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT1  = -2   # num bytes is 1-byte unsigned int
+TAKEN_FROM_ARGUMENT4  = -3   # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT4U = -4   # num bytes is 4-byte unsigned little-endian int
 
 class ArgumentDescriptor(object):
     __slots__ = (
@@ -194,7 +196,8 @@
         assert isinstance(n, int) and (n >= 0 or
                                        n in (UP_TO_NEWLINE,
                                              TAKEN_FROM_ARGUMENT1,
-                                             TAKEN_FROM_ARGUMENT4))
+                                             TAKEN_FROM_ARGUMENT4,
+                                             TAKEN_FROM_ARGUMENT4U))
         self.n = n
 
         self.reader = reader
@@ -265,6 +268,27 @@
            doc="Four-byte signed integer, little-endian, 2's complement.")
 
 
+def read_uint4(f):
+    r"""
+    >>> import io
+    >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
+    255
+    >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
+    True
+    """
+
+    data = f.read(4)
+    if len(data) == 4:
+        return _unpack("<I", data)[0]
+    raise ValueError("not enough data in stream to read uint4")
+
+uint4 = ArgumentDescriptor(
+            name='uint4',
+            n=4,
+            reader=read_uint4,
+            doc="Four-byte unsigned integer, little-endian.")
+
+
 def read_stringnl(f, decode=True, stripquotes=True):
     r"""
     >>> import io
@@ -421,6 +445,67 @@
               """)
 
 
+def read_bytes1(f):
+    r"""
+    >>> import io
+    >>> read_bytes1(io.BytesIO(b"\x00"))
+    b''
+    >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
+    b'abc'
+    """
+
+    n = read_uint1(f)
+    assert n >= 0
+    data = f.read(n)
+    if len(data) == n:
+        return data
+    raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
+                     (n, len(data)))
+
+bytes1 = ArgumentDescriptor(
+              name="bytes1",
+              n=TAKEN_FROM_ARGUMENT1,
+              reader=read_bytes1,
+              doc="""A counted bytes string.
+
+              The first argument is a 1-byte unsigned int giving the number
+              of bytes, and the second argument is that many bytes.
+              """)
+
+
+def read_bytes4(f):
+    r"""
+    >>> import io
+    >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
+    b''
+    >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
+    b'abc'
+    >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
+    Traceback (most recent call last):
+    ...
+    ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
+    """
+
+    n = read_uint4(f)
+    if n > sys.maxsize:
+        raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
+    data = f.read(n)
+    if len(data) == n:
+        return data
+    raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
+                     (n, len(data)))
+
+bytes4 = ArgumentDescriptor(
+              name="bytes4",
+              n=TAKEN_FROM_ARGUMENT4U,
+              reader=read_bytes4,
+              doc="""A counted bytes string.
+
+              The first argument is a 4-byte little-endian unsigned int giving
+              the number of bytes, and the second argument is that many bytes.
+              """)
+
+
 def read_unicodestringnl(f):
     r"""
     >>> import io
@@ -464,9 +549,9 @@
     ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
     """
 
-    n = read_int4(f)
-    if n < 0:
-        raise ValueError("unicodestring4 byte count < 0: %d" % n)
+    n = read_uint4(f)
+    if n > sys.maxsize:
+        raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
     data = f.read(n)
     if len(data) == n:
         return str(data, 'utf-8', 'surrogatepass')
@@ -475,7 +560,7 @@
 
 unicodestring4 = ArgumentDescriptor(
                     name="unicodestring4",
-                    n=TAKEN_FROM_ARGUMENT4,
+                    n=TAKEN_FROM_ARGUMENT4U,
                     reader=read_unicodestring4,
                     doc="""A counted Unicode string.
 
@@ -872,7 +957,7 @@
             assert isinstance(x, StackObject)
         self.stack_after = stack_after
 
-        assert isinstance(proto, int) and 0 <= proto <= 3
+        assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
         self.proto = proto
 
         assert isinstance(doc, str)
@@ -1038,28 +1123,28 @@
 
     I(name='BINBYTES',
       code='B',
-      arg=string4,
+      arg=bytes4,
       stack_before=[],
       stack_after=[pybytes],
       proto=3,
       doc="""Push a Python bytes object.
 
-      There are two arguments:  the first is a 4-byte little-endian signed int
-      giving the number of bytes in the string, and the second is that many
-      bytes, which are taken literally as the bytes content.
+      There are two arguments:  the first is a 4-byte little-endian unsigned int
+      giving the number of bytes, and the second is that many bytes, which are
+      taken literally as the bytes content.
       """),
 
     I(name='SHORT_BINBYTES',
       code='C',
-      arg=string1,
+      arg=bytes1,
       stack_before=[],
       stack_after=[pybytes],
       proto=3,
-      doc="""Push a Python string object.
+      doc="""Push a Python bytes object.
 
       There are two arguments:  the first is a 1-byte unsigned int giving
-      the number of bytes in the string, and the second is that many bytes,
-      which are taken literally as the string content.
+      the number of bytes, and the second is that many bytes, which are taken
+      literally as the string content.
       """),
 
     # Ways to spell None.
@@ -1118,7 +1203,7 @@
       proto=1,
       doc="""Push a Python Unicode string object.
 
-      There are two arguments:  the first is a 4-byte little-endian signed int
+      There are two arguments:  the first is a 4-byte little-endian unsigned int
       giving the number of bytes in the string.  The second is that many
       bytes, and is the UTF-8 encoding of the Unicode string.
       """),
@@ -1422,13 +1507,13 @@
 
     I(name='LONG_BINGET',
       code='j',
-      arg=int4,
+      arg=uint4,
       stack_before=[],
       stack_after=[anyobject],
       proto=1,
       doc="""Read an object from the memo and push it on the stack.
 
-      The index of the memo object to push is given by the 4-byte signed
+      The index of the memo object to push is given by the 4-byte unsigned
       little-endian integer following.
       """),
 
@@ -1459,14 +1544,14 @@
 
     I(name='LONG_BINPUT',
       code='r',
-      arg=int4,
+      arg=uint4,
       stack_before=[],
       stack_after=[],
       proto=1,
       doc="""Store the stack top into the memo.  The stack is not popped.
 
       The index of the memo location to write into is given by the 4-byte
-      signed little-endian integer following.
+      unsigned little-endian integer following.
       """),
 
     # Access the extension registry (predefined objects).  Akin to the GET
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -58,6 +58,9 @@
 - Issue #17526: fix an IndexError raised while passing code without filename to
   inspect.findsource().  Initial patch by Tyler Doyle.
 
+- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
+  integers where appropriate.  Initial patch by Serhiy Storchaka.
+
 IDLE
 ----
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list