[Python-checkins] [3.12] gh-106242: Fix path truncation in os.path.normpath (GH-106816) (#107981)
Yhg1s
webhook-mailer at python.org
Wed Aug 16 19:19:51 EDT 2023
https://github.com/python/cpython/commit/ede98958810b76694cf756d305b564cd6adc1a48
commit: ede98958810b76694cf756d305b564cd6adc1a48
branch: 3.12
author: Steve Dower <steve.dower at python.org>
committer: Yhg1s <thomas at python.org>
date: 2023-08-17T01:19:48+02:00
summary:
[3.12] gh-106242: Fix path truncation in os.path.normpath (GH-106816) (#107981)
* gh-106242: Fix path truncation in os.path.normpath (GH-106816)
* gh-106242: Minor fixup to avoid compiler warnings
---------
Co-authored-by: Finn Womack <flan313 at gmail.com>
Co-authored-by: Serhiy Storchaka <storchaka at gmail.com>
files:
A Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst
M Include/internal/pycore_fileutils.h
M Lib/test/test_genericpath.py
M Modules/posixmodule.c
M Python/fileutils.c
diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h
index ef6642d00f1b5..7c2b6ec0bffef 100644
--- a/Include/internal/pycore_fileutils.h
+++ b/Include/internal/pycore_fileutils.h
@@ -252,7 +252,8 @@ extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile,
size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename);
-PyAPI_FUNC(wchar_t *) _Py_normpath(wchar_t *path, Py_ssize_t size);
+PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size);
+extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length);
// The Windows Games API family does not provide these functions
// so provide our own implementations. Remove them in case they get added
diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py
index 489044f8090d3..4f311c2d498e9 100644
--- a/Lib/test/test_genericpath.py
+++ b/Lib/test/test_genericpath.py
@@ -460,6 +460,10 @@ def test_normpath_issue5827(self):
for path in ('', '.', '/', '\\', '///foo/.//bar//'):
self.assertIsInstance(self.pathmodule.normpath(path), str)
+ def test_normpath_issue106242(self):
+ for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'):
+ self.assertEqual(self.pathmodule.normpath(path), path)
+
def test_abspath_issue3426(self):
# Check that abspath returns unicode when the arg is unicode
# with both ASCII and non-ASCII cwds.
diff --git a/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst b/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst
new file mode 100644
index 0000000000000..44237a9f15708
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst
@@ -0,0 +1 @@
+Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path.
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 342f393b1f0f9..b9f45c0ce5543 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -5275,7 +5275,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path)
if (!buffer) {
return NULL;
}
- PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1);
+ Py_ssize_t norm_len;
+ wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len);
+ PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len);
PyMem_Free(buffer);
return result;
}
diff --git a/Python/fileutils.c b/Python/fileutils.c
index f137ee936502c..268ffa3d61a47 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2377,12 +2377,14 @@ _Py_find_basename(const wchar_t *filename)
path, which will be within the original buffer. Guaranteed to not
make the path longer, and will not fail. 'size' is the length of
the path, if known. If -1, the first null character will be assumed
- to be the end of the path. */
+ to be the end of the path. 'normsize' will be set to contain the
+ length of the resulting normalized path. */
wchar_t *
-_Py_normpath(wchar_t *path, Py_ssize_t size)
+_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
{
assert(path != NULL);
- if (!path[0] || size == 0) {
+ if ((size < 0 && !path[0]) || size == 0) {
+ *normsize = 0;
return path;
}
wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
@@ -2431,11 +2433,7 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
*p2++ = lastC = *p1;
}
}
- if (sepCount) {
- minP2 = p2; // Invalid path
- } else {
- minP2 = p2 - 1; // Absolute path has SEP at minP2
- }
+ minP2 = p2 - 1;
}
#else
// Skip past two leading SEPs
@@ -2495,13 +2493,28 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
while (--p2 != minP2 && *p2 == SEP) {
*p2 = L'\0';
}
+ } else {
+ --p2;
}
+ *normsize = p2 - path + 1;
#undef SEP_OR_END
#undef IS_SEP
#undef IS_END
return path;
}
+/* In-place path normalisation. Returns the start of the normalized
+ path, which will be within the original buffer. Guaranteed to not
+ make the path longer, and will not fail. 'size' is the length of
+ the path, if known. If -1, the first null character will be assumed
+ to be the end of the path. */
+wchar_t *
+_Py_normpath(wchar_t *path, Py_ssize_t size)
+{
+ Py_ssize_t norm_length;
+ return _Py_normpath_and_size(path, size, &norm_length);
+}
+
/* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding.
More information about the Python-checkins
mailing list