[Python-checkins] cpython: Issue #14629: Mention the filename in SyntaxError exceptions from
brett.cannon
python-checkins at python.org
Fri Apr 20 19:24:33 CEST 2012
http://hg.python.org/cpython/rev/1b57de8a8383
changeset: 76430:1b57de8a8383
user: Brett Cannon <brett at python.org>
date: Fri Apr 20 13:23:54 2012 -0400
summary:
Issue #14629: Mention the filename in SyntaxError exceptions from
tokenizer.detect_encoding() (when available).
files:
Lib/test/test_tokenize.py | 29 +++++++++++++++++++++++++++
Lib/tokenize.py | 22 +++++++++++++++++--
Misc/NEWS | 3 ++
3 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -904,6 +904,35 @@
self.assertEqual(fp.encoding, 'utf-8-sig')
self.assertEqual(fp.mode, 'r')
+ def test_filename_in_exception(self):
+ # When possible, include the file name in the exception.
+ path = 'some_file_path'
+ lines = (
+ b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
+ )
+ class Bunk:
+ def __init__(self, lines, path):
+ self.name = path
+ self._lines = lines
+ self._index = 0
+
+ def readline(self):
+ if self._index == len(lines):
+ raise StopIteration
+ line = lines[self._index]
+ self._index += 1
+ return line
+
+ with self.assertRaises(SyntaxError):
+ ins = Bunk(lines, path)
+ # Make sure lacking a name isn't an issue.
+ del ins.name
+ detect_encoding(ins.readline)
+ with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
+ ins = Bunk(lines, path)
+ detect_encoding(ins.readline)
+
+
class TestTokenize(TestCase):
def test_tokenize(self):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -353,6 +353,10 @@
If no encoding is specified, then the default of 'utf-8' will be returned.
"""
+ try:
+ filename = readline.__self__.name
+ except AttributeError:
+ filename = None
bom_found = False
encoding = None
default = 'utf-8'
@@ -369,7 +373,10 @@
# per default encoding.
line_string = line.decode('utf-8')
except UnicodeDecodeError:
- raise SyntaxError("invalid or missing encoding declaration")
+ msg = "invalid or missing encoding declaration"
+ if filename is not None:
+ msg = '{} for {!r}'.format(msg, filename)
+ raise SyntaxError(msg)
matches = cookie_re.findall(line_string)
if not matches:
@@ -379,12 +386,21 @@
codec = lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
- raise SyntaxError("unknown encoding: " + encoding)
+ if filename is None:
+ msg = "unknown encoding: " + encoding
+ else:
+ msg = "unknown encoding for {!r}: {}".format(filename,
+ encoding)
+ raise SyntaxError(msg)
if bom_found:
if codec.name != 'utf-8':
# This behaviour mimics the Python interpreter
- raise SyntaxError('encoding problem: utf-8')
+ if filename is None:
+ msg = 'encoding problem: utf-8'
+ else:
+ msg = 'encoding problem for {!r}: utf-8'.format(filename)
+ raise SyntaxError(msg)
encoding += '-sig'
return encoding
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -55,6 +55,9 @@
Library
-------
+- Issue #14629: tokenizer.detect_encoding will specify the filename in the
+ SyntaxError exception if found at readline.__self__.name.
+
- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
first two lines have non-UTF-8 characters without an encoding declaration.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list