[Python-checkins] cpython (3.2): Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
martin.v.loewis
python-checkins at python.org
Fri Apr 20 14:37:25 CEST 2012
http://hg.python.org/cpython/rev/b07488490001
changeset: 76425:b07488490001
branch: 3.2
parent: 76421:41c64c700e1e
user: Martin v. Löwis <martin at v.loewis.de>
date: Fri Apr 20 14:36:47 2012 +0200
summary:
Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
if the first two lines have non-UTF-8 characters without an encoding declaration.
files:
Lib/test/test_tokenize.py | 10 ++++++++++
Lib/tokenize.py | 7 +++++--
Misc/NEWS | 3 +++
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -825,6 +825,16 @@
found, consumed_lines = detect_encoding(rl)
self.assertEqual(found, "iso-8859-1")
+ def test_syntaxerror_latin1(self):
+ # Issue 14629: need to raise SyntaxError if the first
+ # line(s) have non-UTF-8 characters
+ lines = (
+ b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
+ )
+ readline = self.get_readline(lines)
+ self.assertRaises(SyntaxError, detect_encoding, readline)
+
+
def test_utf8_normalization(self):
# See get_normal_name() in tokenizer.c.
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -292,9 +292,12 @@
def find_cookie(line):
try:
- line_string = line.decode('ascii')
+ # Decode as UTF-8. Either the line is an encoding declaration,
+ # in which case it should be pure ASCII, or it must be UTF-8
+ # per default encoding.
+ line_string = line.decode('utf-8')
except UnicodeDecodeError:
- return None
+ raise SyntaxError("invalid or missing encoding declaration")
matches = cookie_re.findall(line_string)
if not matches:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -47,6 +47,9 @@
Library
-------
+- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
+ first two lines have non-UTF-8 characters without an encoding declaration.
+
- Issue #14308: Fix an exception when a "dummy" thread is in the threading
module's active list after a fork().
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list