[Python-checkins] cpython (2.7): complain if the codec doesn't return unicode

benjamin.peterson python-checkins at python.org
Sat Dec 28 17:37:34 CET 2013


http://hg.python.org/cpython/rev/990d7647ea51
changeset:   88215:990d7647ea51
branch:      2.7
user:        Benjamin Peterson <benjamin at python.org>
date:        Sat Dec 28 10:33:58 2013 -0600
summary:
  complain if the codec doesn't return unicode

files:
  Lib/test/bad_coding3.py |  2 ++
  Lib/test/test_pep263.py |  5 +++++
  Misc/NEWS               |  3 +++
  Parser/tokenizer.c      |  6 ++++++
  4 files changed, 16 insertions(+), 0 deletions(-)


diff --git a/Lib/test/bad_coding3.py b/Lib/test/bad_coding3.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/bad_coding3.py
@@ -0,0 +1,2 @@
+# coding: string-escape
+\x70\x72\x69\x6e\x74\x20\x32\x2b\x32\x0a
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -58,6 +58,11 @@
         with self.assertRaisesRegexp(SyntaxError, 'BOM'):
             compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
 
+    def test_non_unicode_codec(self):
+        with self.assertRaisesRegexp(SyntaxError,
+                                     'codec did not return a unicode'):
+            from test import bad_coding3
+
 
 def test_main():
     test_support.run_unittest(PEP263Test)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@
 Core and Builtins
 -----------------
 
+- Raise a better error when non-unicode codecs are used for a file's coding
+  cookie.
+
 - Issue #17976: Fixed potential problem with file.write() not detecting IO error
   by inspecting the return value of fwrite().  Based on patches by Jaakko Moisio
   and Victor Stinner.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -400,6 +400,12 @@
         buf = PyObject_CallObject(tok->decoding_readline, NULL);
         if (buf == NULL)
             return error_ret(tok);
+        if (!PyUnicode_Check(buf)) {
+            Py_DECREF(buf);
+            PyErr_SetString(PyExc_SyntaxError,
+                            "codec did not return a unicode object");
+            return error_ret(tok);
+        }
     } else {
         tok->decoding_buffer = NULL;
         if (PyString_CheckExact(buf))

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list