[Python-checkins] cpython: Issue #25788: fileinput.hook_encoded() now supports an "errors" argument

serhiy.storchaka python-checkins at python.org
Wed Apr 27 16:14:20 EDT 2016


https://hg.python.org/cpython/rev/8ab8f5259f09
changeset:   101167:8ab8f5259f09
parent:      101165:cb5645b36713
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Apr 27 23:13:46 2016 +0300
summary:
  Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
for passing to open.  Original patch by Joseph Hackman.

files:
  Doc/library/fileinput.rst  |  10 +++++++---
  Doc/whatsnew/3.6.rst       |   7 +++++++
  Lib/fileinput.py           |   4 ++--
  Lib/test/test_fileinput.py |  21 ++++++++++++++++++++-
  Misc/ACKS                  |   1 +
  Misc/NEWS                  |   3 +++
  6 files changed, 40 insertions(+), 6 deletions(-)


diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst
--- a/Doc/library/fileinput.rst
+++ b/Doc/library/fileinput.rst
@@ -193,10 +193,14 @@
    Usage example:  ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)``
 
 
-.. function:: hook_encoded(encoding)
+.. function:: hook_encoded(encoding, errors=None)
 
    Returns a hook which opens each file with :func:`open`, using the given
-   *encoding* to read the file.
+   *encoding* and *errors* to read the file.
 
    Usage example: ``fi =
-   fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))``
+   fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8",
+   "surrogateescape"))``
+
+   .. versionchanged:: 3.6
+      Added the optional *errors* parameter.
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -358,6 +358,13 @@
 (Contributed by Aviv Palivoda in :issue:`26243`.)
 
 
+fileinput
+---------
+
+:func:`~fileinput.hook_encoded` now supports the *errors* argument.
+(Contributed by Joseph Hackman in :issue:`25788`.)
+
+
 Optimizations
 =============
 
diff --git a/Lib/fileinput.py b/Lib/fileinput.py
--- a/Lib/fileinput.py
+++ b/Lib/fileinput.py
@@ -400,9 +400,9 @@
         return open(filename, mode)
 
 
-def hook_encoded(encoding):
+def hook_encoded(encoding, errors=None):
     def openhook(filename, mode):
-        return open(filename, mode, encoding=encoding)
+        return open(filename, mode, encoding=encoding, errors=errors)
     return openhook
 
 
diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py
--- a/Lib/test/test_fileinput.py
+++ b/Lib/test/test_fileinput.py
@@ -945,7 +945,8 @@
 
     def test(self):
         encoding = object()
-        result = fileinput.hook_encoded(encoding)
+        errors = object()
+        result = fileinput.hook_encoded(encoding, errors=errors)
 
         fake_open = InvocationRecorder()
         original_open = builtins.open
@@ -963,8 +964,26 @@
         self.assertIs(args[0], filename)
         self.assertIs(args[1], mode)
         self.assertIs(kwargs.pop('encoding'), encoding)
+        self.assertIs(kwargs.pop('errors'), errors)
         self.assertFalse(kwargs)
 
+    def test_errors(self):
+        with open(TESTFN, 'wb') as f:
+            f.write(b'\x80abc')
+        self.addCleanup(safe_unlink, TESTFN)
+
+        def check(errors, expected_lines):
+            with FileInput(files=TESTFN, mode='r',
+                           openhook=hook_encoded('utf-8', errors=errors)) as fi:
+                lines = list(fi)
+            self.assertEqual(lines, expected_lines)
+
+        check('ignore', ['abc'])
+        with self.assertRaises(UnicodeDecodeError):
+            check('strict', ['abc'])
+        check('replace', ['\ufffdabc'])
+        check('backslashreplace', ['\\x80abc'])
+
     def test_modes(self):
         with open(TESTFN, 'wb') as f:
             # UTF-7 is a convenient, seldom used encoding
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -538,6 +538,7 @@
 Lars Gustäbel
 Thomas Güttler
 Jonas H.
+Joseph Hackman
 Barry Haddow
 Philipp Hagemeister
 Paul ten Hagen
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -256,6 +256,9 @@
 Library
 -------
 
+- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
+  for passing to open.  Original patch by Joseph Hackman.
+
 - Issue #26634: recursive_repr() now sets __qualname__ of wrapper.  Patch by
   Xiang Zhang.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list