[Python-checkins] cpython: Make C and Python implementations of pickle load STRING opcodes the same way.

alexandre.vassalotti python-checkins at python.org
Tue Apr 16 08:15:37 CEST 2013


http://hg.python.org/cpython/rev/4a6b8f86b081
changeset:   83404:4a6b8f86b081
user:        Alexandre Vassalotti <alexandre at peadrop.com>
date:        Mon Apr 15 23:14:55 2013 -0700
summary:
  Make C and Python implementations of pickle load STRING opcodes the same way.

The C version tried to remove trailing whitespace between the last quote and
the newline character. I am not sure why it had this because pickle never
generated such pickles---for this to happen repr(some_string) would need to
return trailing whitespace. It was maybe there to make it easier for people
to write pickles in text editors. Anyhow, the Python version doesn't do this
so there is no point keeping this around anymore.

Also, I've changed the exception raised when a bad pickle is encountered.
Again this unlikely to make much difference to anyone though it does make
testing slightly nicer for us.

files:
  Lib/pickle.py            |  11 ++---
  Lib/test/pickletester.py |  53 +++++++++++++++------------
  Modules/_pickle.c        |  29 ++++-----------
  3 files changed, 42 insertions(+), 51 deletions(-)


diff --git a/Lib/pickle.py b/Lib/pickle.py
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -900,14 +900,13 @@
     dispatch[BINFLOAT[0]] = load_binfloat
 
     def load_string(self):
-        orig = self.readline()
-        rep = orig[:-1]
+        data = self.readline()[:-1]
         # Strip outermost quotes
-        if len(rep) >= 2 and rep[0] == rep[-1] and rep[0] in b'"\'':
-            rep = rep[1:-1]
+        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
+            data = data[1:-1]
         else:
-            raise ValueError("insecure string pickle")
-        self.append(codecs.escape_decode(rep)[0]
+            raise UnpicklingError("the STRING opcode argument must be quoted")
+        self.append(codecs.escape_decode(data)[0]
                     .decode(self.encoding, self.errors))
     dispatch[STRING[0]] = load_string
 
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -601,30 +601,6 @@
         self.assertRaises(KeyError, self.loads, b'g0\np0')
         self.assertEqual(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)])
 
-    def test_insecure_strings(self):
-        # XXX Some of these tests are temporarily disabled
-        insecure = [b"abc", b"2 + 2", # not quoted
-                    ## b"'abc' + 'def'", # not a single quoted string
-                    b"'abc", # quote is not closed
-                    b"'abc\"", # open quote and close quote don't match
-                    b"'abc'   ?", # junk after close quote
-                    b"'\\'", # trailing backslash
-                    # Variations on issue #17710
-                    b"'",
-                    b'"',
-                    b"' ",
-                    b"'  ",
-                    b"'   ",
-                    b"'    ",
-                    b'"    ',
-                    # some tests of the quoting rules
-                    ## b"'abc\"\''",
-                    ## b"'\\\\a\'\'\'\\\'\\\\\''",
-                    ]
-        for b in insecure:
-            buf = b"S" + b + b"\012p0\012."
-            self.assertRaises(ValueError, self.loads, buf)
-
     def test_unicode(self):
         endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
                     '<\\>', '<\\\U00012345>',
@@ -1214,6 +1190,35 @@
         dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
         self.assertRaises(ValueError, self.loads, dumped)
 
+    def test_badly_escaped_string(self):
+        self.assertRaises(ValueError, self.loads, b"S'\\'\n.")
+
+    def test_badly_quoted_string(self):
+        # Issue #17710
+        badpickles = [b"S'\n.",
+                      b'S"\n.',
+                      b'S\' \n.',
+                      b'S" \n.',
+                      b'S\'"\n.',
+                      b'S"\'\n.',
+                      b"S' ' \n.",
+                      b'S" " \n.',
+                      b"S ''\n.",
+                      b'S ""\n.',
+                      b'S \n.',
+                      b'S\n.',
+                      b'S.']
+        for p in badpickles:
+            self.assertRaises(pickle.UnpicklingError, self.loads, p)
+
+    def test_correctly_quoted_string(self):
+        goodpickles = [(b"S''\n.", ''),
+                       (b'S""\n.', ''),
+                       (b'S"\\n"\n.', '\n'),
+                       (b"S'\\n'\n.", '\n')]
+        for p, expected in goodpickles:
+            self.assertEqual(self.loads(p), expected)
+
 
 class BigmemPickleTests(unittest.TestCase):
 
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -4205,36 +4205,23 @@
 
     if ((len = _Unpickler_Readline(self, &s)) < 0)
         return -1;
-    if (len < 2)
-        return bad_readline();
-    if ((s = strdup(s)) == NULL) {
-        PyErr_NoMemory();
-        return -1;
-    }
-
+    /* Strip the newline */
+    len--;
     /* Strip outermost quotes */
-    while (len > 0 && s[len - 1] <= ' ')
-        len--;
-    if (len > 1 && s[0] == '"' && s[len - 1] == '"') {
-        s[len - 1] = '\0';
+    if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
         p = s + 1;
         len -= 2;
     }
-    else if (len > 1 && s[0] == '\'' && s[len - 1] == '\'') {
-        s[len - 1] = '\0';
-        p = s + 1;
-        len -= 2;
-    }
     else {
-        free(s);
-        PyErr_SetString(PyExc_ValueError, "insecure string pickle");
-        return -1;
-    }
+        PyErr_SetString(UnpicklingError,
+                        "the STRING opcode argument must be quoted");
+        return -1;
+    }
+    assert(len >= 0);
 
     /* Use the PyBytes API to decode the string, since that is what is used
        to encode, and then coerce the result to Unicode. */
     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
-    free(s);
     if (bytes == NULL)
         return -1;
     str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list