[Python-checkins] gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin (#94386)
pablogsal
webhook-mailer at python.org
Tue Jul 5 12:40:01 EDT 2022
https://github.com/python/cpython/commit/36fcde61ba48c4e918830691ecf4092e4e3b9b99
commit: 36fcde61ba48c4e918830691ecf4092e4e3b9b99
branch: main
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2022-07-05T17:39:21+01:00
summary:
gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin (#94386)
* gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin
Signed-off-by: Pablo Galindo <pablogsal at gmail.com>
* nitty nit
Co-authored-by: Łukasz Langa <lukasz at langa.pl>
files:
A Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst
M Parser/pegen_errors.c
M Parser/tokenizer.c
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst
new file mode 100644
index 0000000000000..0a74ba38b0ac4
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-28-14-20-36.gh-issue-94360.DiEnen.rst
@@ -0,0 +1,2 @@
+Fixed a tokenizer crash when reading encoded files with syntax errors from
+``stdin`` with non utf-8 encoded text. Patch by Pablo Galindo
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index 489699679633e..5703088443ede 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -259,15 +259,15 @@ get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp;
for (int i = 0; i < relative_lineno - 1; i++) {
- char *new_line = strchr(cur_line, '\n') + 1;
+ char *new_line = strchr(cur_line, '\n');
// The assert is here for debug builds but the conditional that
// follows is there so in release builds we do not crash at the cost
// to report a potentially wrong line.
- assert(new_line != NULL && new_line <= buf_end);
- if (new_line == NULL || new_line > buf_end) {
+ assert(new_line != NULL && new_line + 1 < buf_end);
+ if (new_line == NULL || new_line + 1 > buf_end) {
break;
}
- cur_line = new_line;
+ cur_line = new_line + 1;
}
char *next_newline;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 952265eb923f9..f2606f17d1463 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -308,6 +308,10 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
Py_ssize_t line_size = strlen(line);
+ char last_char = line[line_size > 0 ? line_size - 1 : line_size];
+ if (last_char != '\n') {
+ line_size += 1;
+ }
char* new_str = tok->interactive_src_start;
new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
@@ -321,7 +325,11 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
return -1;
}
strcpy(new_str + current_size, line);
-
+ if (last_char != '\n') {
+ /* Last line does not end in \n, fake one */
+ new_str[current_size + line_size - 1] = '\n';
+ new_str[current_size + line_size] = '\0';
+ }
tok->interactive_src_start = new_str;
tok->interactive_src_end = new_str + current_size + line_size;
return 0;
More information about the Python-checkins
mailing list