[Python-checkins] [3.10] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241) (#100353)
kumaraditya303
webhook-mailer at python.org
Tue Dec 20 05:20:48 EST 2022
https://github.com/python/cpython/commit/53063b7ffa8f8edf13dd7c81f89f034a3b3b71b6
commit: 53063b7ffa8f8edf13dd7c81f89f034a3b3b71b6
branch: 3.10
author: colorfulappl <colorfulappl at qq.com>
committer: kumaraditya303 <59607654+kumaraditya303 at users.noreply.github.com>
date: 2022-12-20T15:50:42+05:30
summary:
[3.10] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241) (#100353)
(cherry picked from commit 8dbe08eb7c807f484fe9870f5b7f5ae2881fd966)
Fix double-free bug mentioned at GH-99240, by moving memory clean up out of "exit" label.
files:
A Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
M Lib/test/clinic.test
M Lib/test/test_clinic.py
M Modules/_testclinic.c
M Modules/clinic/_testclinic.c.h
M Tools/clinic/clinic.py
diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test
index 3a3fbd174d86..af111dbee2b8 100644
--- a/Lib/test/clinic.test
+++ b/Lib/test/clinic.test
@@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
goto exit;
}
return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length);
+ /* Post parse cleanup for a */
+ PyMem_FREE(a);
+ /* Post parse cleanup for b */
+ PyMem_FREE(b);
+ /* Post parse cleanup for c */
+ PyMem_FREE(c);
+ /* Post parse cleanup for d */
+ PyMem_FREE(d);
+ /* Post parse cleanup for e */
+ PyMem_FREE(e);
exit:
- /* Cleanup for a */
- if (a) {
- PyMem_FREE(a);
- }
- /* Cleanup for b */
- if (b) {
- PyMem_FREE(b);
- }
- /* Cleanup for c */
- if (c) {
- PyMem_FREE(c);
- }
- /* Cleanup for d */
- if (d) {
- PyMem_FREE(d);
- }
- /* Cleanup for e */
- if (e) {
- PyMem_FREE(e);
- }
-
return return_value;
}
@@ -1770,7 +1759,7 @@ static PyObject *
test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
char *d, Py_ssize_clean_t d_length, char *e,
Py_ssize_clean_t e_length)
-/*[clinic end generated code: output=f579dd9e795a364e input=eb4c38e1f898f402]*/
+/*[clinic end generated code: output=2d7e8b6203db31aa input=eb4c38e1f898f402]*/
/*[clinic input]
diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py
index 86c4f94ac028..feebb6add09a 100644
--- a/Lib/test/test_clinic.py
+++ b/Lib/test/test_clinic.py
@@ -1045,6 +1045,17 @@ def test_str_converter(self):
self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c'))
self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c'))
+ def test_str_converter_encoding(self):
+ with self.assertRaises(TypeError):
+ ac_tester.str_converter_encoding(1)
+ self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c'))
+ with self.assertRaises(TypeError):
+ ac_tester.str_converter_encoding('a', b'b\0b', 'c')
+ self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c'))
+ self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])),
+ ('a', 'b', 'c\x00c'))
+ self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c'))
+
def test_py_buffer_converter(self):
with self.assertRaises(TypeError):
ac_tester.py_buffer_converter('a', 'b')
@@ -1211,6 +1222,11 @@ def test_keyword_only_parameter(self):
ac_tester.keyword_only_parameter(1)
self.assertEqual(ac_tester.keyword_only_parameter(a=1), (1,))
+ def test_gh_99240_double_free(self):
+ expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str'
+ with self.assertRaisesRegex(TypeError, expected_error):
+ ac_tester.gh_99240_double_free('a', '\0b')
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
new file mode 100644
index 000000000000..0a4db052755f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst
@@ -0,0 +1,2 @@
+Fix double-free bug in Argument Clinic ``str_converter`` by
+extracting memory clean up to a new ``post_parsing`` section.
diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c
index 63427ae6373e..079a40130b1d 100644
--- a/Modules/_testclinic.c
+++ b/Modules/_testclinic.c
@@ -551,6 +551,64 @@ str_converter_impl(PyObject *module, const char *a, const char *b,
}
+/*[clinic input]
+str_converter_encoding
+
+ a: str(encoding="idna")
+ b: str(encoding="idna", accept={bytes, bytearray, str})
+ c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True)
+ /
+
+[clinic start generated code]*/
+
+static PyObject *
+str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
+ Py_ssize_clean_t c_length)
+/*[clinic end generated code: output=ebef1a3f9b730a24 input=0c5cf5159d0e870d]*/
+{
+ assert(!PyErr_Occurred());
+ PyObject *out[3] = {NULL,};
+ int i = 0;
+ PyObject *arg;
+
+ arg = PyUnicode_FromString(a);
+ assert(arg || PyErr_Occurred());
+ if (!arg) {
+ goto error;
+ }
+ out[i++] = arg;
+
+ arg = PyUnicode_FromString(b);
+ assert(arg || PyErr_Occurred());
+ if (!arg) {
+ goto error;
+ }
+ out[i++] = arg;
+
+ arg = PyUnicode_FromStringAndSize(c, c_length);
+ assert(arg || PyErr_Occurred());
+ if (!arg) {
+ goto error;
+ }
+ out[i++] = arg;
+
+ PyObject *tuple = PyTuple_New(3);
+ if (!tuple) {
+ goto error;
+ }
+ for (int j = 0; j < 3; j++) {
+ PyTuple_SET_ITEM(tuple, j, out[j]);
+ }
+ return tuple;
+
+error:
+ for (int j = 0; j < i; j++) {
+ Py_DECREF(out[j]);
+ }
+ return NULL;
+}
+
+
static PyObject *
bytes_from_buffer(Py_buffer *buf)
{
@@ -892,6 +950,25 @@ keyword_only_parameter_impl(PyObject *module, PyObject *a)
}
+/*[clinic input]
+gh_99240_double_free
+
+ a: str(encoding="idna")
+ b: str(encoding="idna")
+ /
+
+Proof-of-concept of GH-99240 double-free bug.
+
+[clinic start generated code]*/
+
+static PyObject *
+gh_99240_double_free_impl(PyObject *module, char *a, char *b)
+/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/
+{
+ Py_RETURN_NONE;
+}
+
+
static PyMethodDef tester_methods[] = {
TEST_EMPTY_FUNCTION_METHODDEF
OBJECTS_CONVERTER_METHODDEF
@@ -916,6 +993,7 @@ static PyMethodDef tester_methods[] = {
DOUBLE_CONVERTER_METHODDEF
PY_COMPLEX_CONVERTER_METHODDEF
STR_CONVERTER_METHODDEF
+ STR_CONVERTER_ENCODING_METHODDEF
PY_BUFFER_CONVERTER_METHODDEF
KEYWORDS_METHODDEF
KEYWORDS_KWONLY_METHODDEF
@@ -933,6 +1011,7 @@ static PyMethodDef tester_methods[] = {
POSONLY_KEYWORDS_OPT_KWONLY_OPT_METHODDEF
POSONLY_OPT_KEYWORDS_OPT_KWONLY_OPT_METHODDEF
KEYWORD_ONLY_PARAMETER_METHODDEF
+ GH_99240_DOUBLE_FREE_METHODDEF
{NULL, NULL}
};
diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h
index a8273a5e6e2f..ed79fac50869 100644
--- a/Modules/clinic/_testclinic.c.h
+++ b/Modules/clinic/_testclinic.c.h
@@ -1159,6 +1159,43 @@ str_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
return return_value;
}
+PyDoc_STRVAR(str_converter_encoding__doc__,
+"str_converter_encoding($module, a, b, c, /)\n"
+"--\n"
+"\n");
+
+#define STR_CONVERTER_ENCODING_METHODDEF \
+ {"str_converter_encoding", (PyCFunction)(void(*)(void))str_converter_encoding, METH_FASTCALL, str_converter_encoding__doc__},
+
+static PyObject *
+str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
+ Py_ssize_clean_t c_length);
+
+static PyObject *
+str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ char *a = NULL;
+ char *b = NULL;
+ char *c = NULL;
+ Py_ssize_clean_t c_length;
+
+ if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding",
+ "idna", &a, "idna", &b, "idna", &c, &c_length)) {
+ goto exit;
+ }
+ return_value = str_converter_encoding_impl(module, a, b, c, c_length);
+ /* Post parse cleanup for a */
+ PyMem_FREE(a);
+ /* Post parse cleanup for b */
+ PyMem_FREE(b);
+ /* Post parse cleanup for c */
+ PyMem_FREE(c);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(py_buffer_converter__doc__,
"py_buffer_converter($module, a, b, /)\n"
"--\n"
@@ -1914,4 +1951,37 @@ keyword_only_parameter(PyObject *module, PyObject *const *args, Py_ssize_t nargs
exit:
return return_value;
}
-/*[clinic end generated code: output=2d5ac6b41daadf6f input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(gh_99240_double_free__doc__,
+"gh_99240_double_free($module, a, b, /)\n"
+"--\n"
+"\n"
+"Proof-of-concept of GH-99240 double-free bug.");
+
+#define GH_99240_DOUBLE_FREE_METHODDEF \
+ {"gh_99240_double_free", (PyCFunction)(void(*)(void))gh_99240_double_free, METH_FASTCALL, gh_99240_double_free__doc__},
+
+static PyObject *
+gh_99240_double_free_impl(PyObject *module, char *a, char *b);
+
+static PyObject *
+gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ char *a = NULL;
+ char *b = NULL;
+
+ if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free",
+ "idna", &a, "idna", &b)) {
+ goto exit;
+ }
+ return_value = gh_99240_double_free_impl(module, a, b);
+ /* Post parse cleanup for a */
+ PyMem_FREE(a);
+ /* Post parse cleanup for b */
+ PyMem_FREE(b);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=ecc55cbeac4adae6 input=a9049054013a1b77]*/
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index 1a59fa57ac41..b0d1717596f6 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@@ -346,6 +346,12 @@ def __init__(self):
# "goto exit" if there are any.
self.return_conversion = []
+ # The C statements required to do some operations
+ # after the end of parsing but before cleaning up.
+ # These operations may be, for example, memory deallocations which
+ # can only be done without any error happening during argument parsing.
+ self.post_parsing = []
+
# The C statements required to clean up after the impl call.
self.cleanup = []
@@ -751,6 +757,7 @@ def parser_body(prototype, *fields, declarations=''):
{modifications}
{return_value} = {c_basename}_impl({impl_arguments});
{return_conversion}
+ {post_parsing}
{exit_label}
{cleanup}
@@ -1343,6 +1350,7 @@ def render_function(self, clinic, f):
template_dict['impl_parameters'] = ", ".join(data.impl_parameters)
template_dict['impl_arguments'] = ", ".join(data.impl_arguments)
template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip())
+ template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip())
template_dict['cleanup'] = format_escape("".join(data.cleanup))
template_dict['return_value'] = data.return_value
@@ -1367,6 +1375,7 @@ def render_function(self, clinic, f):
return_conversion=template_dict['return_conversion'],
initializers=template_dict['initializers'],
modifications=template_dict['modifications'],
+ post_parsing=template_dict['post_parsing'],
cleanup=template_dict['cleanup'],
)
@@ -2595,6 +2604,10 @@ def _render_non_self(self, parameter, data):
# parse_arguments
self.parse_argument(data.parse_arguments)
+ # post_parsing
+ if post_parsing := self.post_parsing():
+ data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n')
+
# cleanup
cleanup = self.cleanup()
if cleanup:
@@ -2686,6 +2699,14 @@ def modify(self):
"""
return ""
+ def post_parsing(self):
+ """
+ The C statements required to do some operations after the end of parsing but before cleaning up.
+ Return a string containing this code indented at column 0.
+ If no operation is necessary, return an empty string.
+ """
+ return ""
+
def cleanup(self):
"""
The C statements required to clean up after this variable.
@@ -3278,10 +3299,10 @@ def converter_init(self, *, accept={str}, encoding=None, zeroes=False):
if NoneType in accept and self.c_default == "Py_None":
self.c_default = "NULL"
- def cleanup(self):
+ def post_parsing(self):
if self.encoding:
name = self.name
- return "".join(["if (", name, ") {\n PyMem_FREE(", name, ");\n}\n"])
+ return f"PyMem_FREE({name});\n"
def parse_arg(self, argname, displayname):
if self.format_unit == 's':
More information about the Python-checkins
mailing list