[Python-checkins] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241)

miss-islington webhook-mailer at python.org
Thu Nov 24 09:01:57 EST 2022


https://github.com/python/cpython/commit/8dbe08eb7c807f484fe9870f5b7f5ae2881fd966
commit: 8dbe08eb7c807f484fe9870f5b7f5ae2881fd966
branch: main
author: colorfulappl <colorfulappl at qq.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2022-11-24T06:01:26-08:00
summary:

gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241)



Fix double-free bug mentioned at https://github.com/python/cpython/issues/99240,
by moving memory clean up out of "exit" label.

Automerge-Triggered-By: GH:erlend-aasland

files:
A Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
M Lib/test/clinic.test
M Lib/test/test_clinic.py
M Modules/_testclinic.c
M Modules/clinic/_testclinic.c.h
M Tools/clinic/clinic.py

diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test
index 7b804e8576ae..f4842cc962f1 100644
--- a/Lib/test/clinic.test
+++ b/Lib/test/clinic.test
@@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
         goto exit;
     }
     return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length);
+    /* Post parse cleanup for a */
+    PyMem_FREE(a);
+    /* Post parse cleanup for b */
+    PyMem_FREE(b);
+    /* Post parse cleanup for c */
+    PyMem_FREE(c);
+    /* Post parse cleanup for d */
+    PyMem_FREE(d);
+    /* Post parse cleanup for e */
+    PyMem_FREE(e);
 
 exit:
-    /* Cleanup for a */
-    if (a) {
-       PyMem_FREE(a);
-    }
-    /* Cleanup for b */
-    if (b) {
-       PyMem_FREE(b);
-    }
-    /* Cleanup for c */
-    if (c) {
-       PyMem_FREE(c);
-    }
-    /* Cleanup for d */
-    if (d) {
-       PyMem_FREE(d);
-    }
-    /* Cleanup for e */
-    if (e) {
-       PyMem_FREE(e);
-    }
-
     return return_value;
 }
 
@@ -1770,7 +1759,7 @@ static PyObject *
 test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
                                  char *d, Py_ssize_t d_length, char *e,
                                  Py_ssize_t e_length)
-/*[clinic end generated code: output=8acb886a3843f3bc input=eb4c38e1f898f402]*/
+/*[clinic end generated code: output=999c1deecfa15b0a input=eb4c38e1f898f402]*/
 
 
 /*[clinic input]
diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py
index a590fa50aab0..890beeb9efe2 100644
--- a/Lib/test/test_clinic.py
+++ b/Lib/test/test_clinic.py
@@ -1045,6 +1045,17 @@ def test_str_converter(self):
         self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c'))
         self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c'))
 
+    def test_str_converter_encoding(self):
+        with self.assertRaises(TypeError):
+            ac_tester.str_converter_encoding(1)
+        self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c'))
+        with self.assertRaises(TypeError):
+            ac_tester.str_converter_encoding('a', b'b\0b', 'c')
+        self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c'))
+        self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])),
+                         ('a', 'b', 'c\x00c'))
+        self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c'))
+
     def test_py_buffer_converter(self):
         with self.assertRaises(TypeError):
             ac_tester.py_buffer_converter('a', 'b')
@@ -1225,6 +1236,10 @@ def test_gh_99233_refcount(self):
         arg_refcount_after = sys.getrefcount(arg)
         self.assertEqual(arg_refcount_origin, arg_refcount_after)
 
+    def test_gh_99240_double_free(self):
+        expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str'
+        with self.assertRaisesRegex(TypeError, expected_error):
+            ac_tester.gh_99240_double_free('a', '\0b')
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
new file mode 100644
index 000000000000..0a4db052755f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
@@ -0,0 +1,2 @@
+Fix double-free bug in Argument Clinic ``str_converter`` by
+extracting memory clean up to a new ``post_parsing`` section.
diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c
index a23ece2ae035..56eddfd6fdbf 100644
--- a/Modules/_testclinic.c
+++ b/Modules/_testclinic.c
@@ -551,6 +551,64 @@ str_converter_impl(PyObject *module, const char *a, const char *b,
 }
 
 
+/*[clinic input]
+str_converter_encoding
+
+    a: str(encoding="idna")
+    b: str(encoding="idna", accept={bytes, bytearray, str})
+    c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True)
+    /
+
+[clinic start generated code]*/
+
+static PyObject *
+str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
+                            Py_ssize_t c_length)
+/*[clinic end generated code: output=af68766049248a1c input=0c5cf5159d0e870d]*/
+{
+    assert(!PyErr_Occurred());
+    PyObject *out[3] = {NULL,};
+    int i = 0;
+    PyObject *arg;
+
+    arg = PyUnicode_FromString(a);
+    assert(arg || PyErr_Occurred());
+    if (!arg) {
+        goto error;
+    }
+    out[i++] = arg;
+
+    arg = PyUnicode_FromString(b);
+    assert(arg || PyErr_Occurred());
+    if (!arg) {
+        goto error;
+    }
+    out[i++] = arg;
+
+    arg = PyUnicode_FromStringAndSize(c, c_length);
+    assert(arg || PyErr_Occurred());
+    if (!arg) {
+        goto error;
+    }
+    out[i++] = arg;
+
+    PyObject *tuple = PyTuple_New(3);
+    if (!tuple) {
+        goto error;
+    }
+    for (int j = 0; j < 3; j++) {
+        PyTuple_SET_ITEM(tuple, j, out[j]);
+    }
+    return tuple;
+
+error:
+    for (int j = 0; j < i; j++) {
+        Py_DECREF(out[j]);
+    }
+    return NULL;
+}
+
+
 static PyObject *
 bytes_from_buffer(Py_buffer *buf)
 {
@@ -927,6 +985,25 @@ gh_99233_refcount_impl(PyObject *module, PyObject *args)
 }
 
 
+/*[clinic input]
+gh_99240_double_free
+
+    a: str(encoding="idna")
+    b: str(encoding="idna")
+    /
+
+Proof-of-concept of GH-99240 double-free bug.
+
+[clinic start generated code]*/
+
+static PyObject *
+gh_99240_double_free_impl(PyObject *module, char *a, char *b)
+/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/
+{
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef tester_methods[] = {
     TEST_EMPTY_FUNCTION_METHODDEF
     OBJECTS_CONVERTER_METHODDEF
@@ -951,6 +1028,7 @@ static PyMethodDef tester_methods[] = {
     DOUBLE_CONVERTER_METHODDEF
     PY_COMPLEX_CONVERTER_METHODDEF
     STR_CONVERTER_METHODDEF
+    STR_CONVERTER_ENCODING_METHODDEF
     PY_BUFFER_CONVERTER_METHODDEF
     KEYWORDS_METHODDEF
     KEYWORDS_KWONLY_METHODDEF
@@ -970,6 +1048,7 @@ static PyMethodDef tester_methods[] = {
     KEYWORD_ONLY_PARAMETER_METHODDEF
     VARARG_AND_POSONLY_METHODDEF
     GH_99233_REFCOUNT_METHODDEF
+    GH_99240_DOUBLE_FREE_METHODDEF
     {NULL, NULL}
 };
 
diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h
index eb425821e9cb..9aad44566bcd 100644
--- a/Modules/clinic/_testclinic.c.h
+++ b/Modules/clinic/_testclinic.c.h
@@ -1165,6 +1165,43 @@ str_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(str_converter_encoding__doc__,
+"str_converter_encoding($module, a, b, c, /)\n"
+"--\n"
+"\n");
+
+#define STR_CONVERTER_ENCODING_METHODDEF    \
+    {"str_converter_encoding", _PyCFunction_CAST(str_converter_encoding), METH_FASTCALL, str_converter_encoding__doc__},
+
+static PyObject *
+str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
+                            Py_ssize_t c_length);
+
+static PyObject *
+str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    char *a = NULL;
+    char *b = NULL;
+    char *c = NULL;
+    Py_ssize_t c_length;
+
+    if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding",
+        "idna", &a, "idna", &b, "idna", &c, &c_length)) {
+        goto exit;
+    }
+    return_value = str_converter_encoding_impl(module, a, b, c, c_length);
+    /* Post parse cleanup for a */
+    PyMem_FREE(a);
+    /* Post parse cleanup for b */
+    PyMem_FREE(b);
+    /* Post parse cleanup for c */
+    PyMem_FREE(c);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(py_buffer_converter__doc__,
 "py_buffer_converter($module, a, b, /)\n"
 "--\n"
@@ -2353,4 +2390,37 @@ gh_99233_refcount(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
     Py_XDECREF(__clinic_args);
     return return_value;
 }
-/*[clinic end generated code: output=a5c9f181f3a32d85 input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(gh_99240_double_free__doc__,
+"gh_99240_double_free($module, a, b, /)\n"
+"--\n"
+"\n"
+"Proof-of-concept of GH-99240 double-free bug.");
+
+#define GH_99240_DOUBLE_FREE_METHODDEF    \
+    {"gh_99240_double_free", _PyCFunction_CAST(gh_99240_double_free), METH_FASTCALL, gh_99240_double_free__doc__},
+
+static PyObject *
+gh_99240_double_free_impl(PyObject *module, char *a, char *b);
+
+static PyObject *
+gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    char *a = NULL;
+    char *b = NULL;
+
+    if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free",
+        "idna", &a, "idna", &b)) {
+        goto exit;
+    }
+    return_value = gh_99240_double_free_impl(module, a, b);
+    /* Post parse cleanup for a */
+    PyMem_FREE(a);
+    /* Post parse cleanup for b */
+    PyMem_FREE(b);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=49dced2c99bcd0fb input=a9049054013a1b77]*/
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index 94e17ee9c7df..0117a50725da 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@@ -348,6 +348,12 @@ def __init__(self):
         # "goto exit" if there are any.
         self.return_conversion = []
 
+        # The C statements required to do some operations
+        # after the end of parsing but before cleaning up.
+        # These operations may be, for example, memory deallocations which
+        # can only be done without any error happening during argument parsing.
+        self.post_parsing = []
+
         # The C statements required to clean up after the impl call.
         self.cleanup = []
 
@@ -820,6 +826,7 @@ def parser_body(prototype, *fields, declarations=''):
                     {modifications}
                     {return_value} = {c_basename}_impl({impl_arguments});
                     {return_conversion}
+                    {post_parsing}
 
                 {exit_label}
                     {cleanup}
@@ -1460,6 +1467,7 @@ def render_function(self, clinic, f):
         template_dict['impl_parameters'] = ", ".join(data.impl_parameters)
         template_dict['impl_arguments'] = ", ".join(data.impl_arguments)
         template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip())
+        template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip())
         template_dict['cleanup'] = format_escape("".join(data.cleanup))
         template_dict['return_value'] = data.return_value
 
@@ -1484,6 +1492,7 @@ def render_function(self, clinic, f):
                 return_conversion=template_dict['return_conversion'],
                 initializers=template_dict['initializers'],
                 modifications=template_dict['modifications'],
+                post_parsing=template_dict['post_parsing'],
                 cleanup=template_dict['cleanup'],
                 )
 
@@ -2725,6 +2734,10 @@ def _render_non_self(self, parameter, data):
         # parse_arguments
         self.parse_argument(data.parse_arguments)
 
+        # post_parsing
+        if post_parsing := self.post_parsing():
+            data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n')
+
         # cleanup
         cleanup = self.cleanup()
         if cleanup:
@@ -2820,6 +2833,14 @@ def modify(self):
         """
         return ""
 
+    def post_parsing(self):
+        """
+        The C statements required to do some operations after the end of parsing but before cleaning up.
+        Return a string containing this code indented at column 0.
+        If no operation is necessary, return an empty string.
+        """
+        return ""
+
     def cleanup(self):
         """
         The C statements required to clean up after this variable.
@@ -3416,10 +3437,10 @@ def converter_init(self, *, accept={str}, encoding=None, zeroes=False):
         if NoneType in accept and self.c_default == "Py_None":
             self.c_default = "NULL"
 
-    def cleanup(self):
+    def post_parsing(self):
         if self.encoding:
             name = self.name
-            return "".join(["if (", name, ") {\n   PyMem_FREE(", name, ");\n}\n"])
+            return f"PyMem_FREE({name});\n"
 
     def parse_arg(self, argname, displayname):
         if self.format_unit == 's':



More information about the Python-checkins mailing list