[Python-checkins] cpython (merge 3.2 -> default): Fixes issue #12268: File readline, readlines and read() or readall() methods

gregory.p.smith python-checkins at python.org
Sun Jun 24 09:24:48 CEST 2012


http://hg.python.org/cpython/rev/19a6bef57490
changeset:   77673:19a6bef57490
parent:      77666:02bcfb7aa2ca
parent:      77672:781b95159954
user:        Gregory P. Smith <greg at krypto.org>
date:        Sun Jun 24 00:23:47 2012 -0700
summary:
  Fixes issue #12268: File readline, readlines and read() or readall() methods
no longer lose data when an underlying read system call is interrupted.
IOError is no longer raised due to a read system call returning EINTR
from within these methods.

files:
  Lib/test/test_file_eintr.py |  236 ++++++++++++++++++++++++
  Lib/test/test_io.py         |   10 +-
  Misc/NEWS                   |    5 +
  Modules/_io/_iomodule.h     |    5 +
  Modules/_io/bufferedio.c    |    8 +-
  Modules/_io/fileio.c        |    7 +
  Modules/_io/iobase.c        |   23 +-
  Modules/_io/textio.c        |   16 +-
  8 files changed, 295 insertions(+), 15 deletions(-)


diff --git a/Lib/test/test_file_eintr.py b/Lib/test/test_file_eintr.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/test_file_eintr.py
@@ -0,0 +1,236 @@
+# Written to test interrupted system calls interfering with our many buffered
+# IO implementations.  http://bugs.python.org/issue12268
+#
+# It was suggested that this code could be merged into test_io and the tests
+# made to work using the same method as the existing signal tests in test_io.
+# I was unable to get single process tests using alarm or setitimer that way
+# to reproduce the EINTR problems.  This process based test suite reproduces
+# the problems prior to the issue12268 patch reliably on Linux and OSX.
+#  - gregory.p.smith
+
+import os
+import select
+import signal
+import subprocess
+import sys
+from test.support import run_unittest
+import time
+import unittest
+
+# Test import all of the things we're about to try testing up front.
+from _io import FileIO
+
+
+ at unittest.skipUnless(os.name == 'posix', 'tests requires a posix system.')
+class TestFileIOSignalInterrupt(unittest.TestCase):
+    def setUp(self):
+        self._process = None
+
+    def tearDown(self):
+        if self._process and self._process.poll() is None:
+            try:
+                self._process.kill()
+            except OSError:
+                pass
+
+    def _generate_infile_setup_code(self):
+        """Returns the infile = ... line of code for the reader process.
+
+        subclasseses should override this to test different IO objects.
+        """
+        return ('import _io ;'
+                'infile = _io.FileIO(sys.stdin.fileno(), "rb")')
+
+    def fail_with_process_info(self, why, stdout=b'', stderr=b'',
+                               communicate=True):
+        """A common way to cleanup and fail with useful debug output.
+
+        Kills the process if it is still running, collects remaining output
+        and fails the test with an error message including the output.
+
+        Args:
+            why: Text to go after "Error from IO process" in the message.
+            stdout, stderr: standard output and error from the process so
+                far to include in the error message.
+            communicate: bool, when True we call communicate() on the process
+                after killing it to gather additional output.
+        """
+        if self._process.poll() is None:
+            time.sleep(0.1)  # give it time to finish printing the error.
+            try:
+                self._process.terminate()  # Ensure it dies.
+            except OSError:
+                pass
+        if communicate:
+            stdout_end, stderr_end = self._process.communicate()
+            stdout += stdout_end
+            stderr += stderr_end
+        self.fail('Error from IO process %s:\nSTDOUT:\n%sSTDERR:\n%s\n' %
+                  (why, stdout.decode(), stderr.decode()))
+
+    def _test_reading(self, data_to_write, read_and_verify_code):
+        """Generic buffered read method test harness to validate EINTR behavior.
+
+        Also validates that Python signal handlers are run during the read.
+
+        Args:
+            data_to_write: String to write to the child process for reading
+                before sending it a signal, confirming the signal was handled,
+                writing a final newline and closing the infile pipe.
+            read_and_verify_code: Single "line" of code to read from a file
+                object named 'infile' and validate the result.  This will be
+                executed as part of a python subprocess fed data_to_write.
+        """
+        infile_setup_code = self._generate_infile_setup_code()
+        # Total pipe IO in this function is smaller than the minimum posix OS
+        # pipe buffer size of 512 bytes.  No writer should block.
+        assert len(data_to_write) < 512, 'data_to_write must fit in pipe buf.'
+
+        # Start a subprocess to call our read method while handling a signal.
+        self._process = subprocess.Popen(
+                [sys.executable, '-u', '-c',
+                 'import signal, sys ;'
+                 'signal.signal(signal.SIGINT, '
+                               'lambda s, f: sys.stderr.write("$\\n")) ;'
+                 + infile_setup_code + ' ;' +
+                 'sys.stderr.write("Worm Sign!\\n") ;'
+                 + read_and_verify_code + ' ;' +
+                 'infile.close()'
+                ],
+                stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE)
+
+        # Wait for the signal handler to be installed.
+        worm_sign = self._process.stderr.read(len(b'Worm Sign!\n'))
+        if worm_sign != b'Worm Sign!\n':  # See also, Dune by Frank Herbert.
+            self.fail_with_process_info('while awaiting a sign',
+                                        stderr=worm_sign)
+        self._process.stdin.write(data_to_write)
+
+        signals_sent = 0
+        rlist = []
+        # We don't know when the read_and_verify_code in our child is actually
+        # executing within the read system call we want to interrupt.  This
+        # loop waits for a bit before sending the first signal to increase
+        # the likelihood of that.  Implementations without correct EINTR
+        # and signal handling usually fail this test.
+        while not rlist:
+            rlist, _, _ = select.select([self._process.stderr], (), (), 0.05)
+            self._process.send_signal(signal.SIGINT)
+            signals_sent += 1
+            if signals_sent > 200:
+                self._process.kill()
+                self.fail('reader process failed to handle our signals.')
+        # This assumes anything unexpected that writes to stderr will also
+        # write a newline.  That is true of the traceback printing code.
+        signal_line = self._process.stderr.readline()
+        if signal_line != b'$\n':
+            self.fail_with_process_info('while awaiting signal',
+                                        stderr=signal_line)
+
+        # We append a newline to our input so that a readline call can
+        # end on its own before the EOF is seen and so that we're testing
+        # the read call that was interrupted by a signal before the end of
+        # the data stream has been reached.
+        stdout, stderr = self._process.communicate(input=b'\n')
+        if self._process.returncode:
+            self.fail_with_process_info(
+                    'exited rc=%d' % self._process.returncode,
+                    stdout, stderr, communicate=False)
+        # PASS!
+
+    # String format for the read_and_verify_code used by read methods.
+    _READING_CODE_TEMPLATE = (
+            'got = infile.{read_method_name}() ;'
+            'expected = {expected!r} ;'
+            'assert got == expected, ('
+                    '"{read_method_name} returned wrong data.\\n"'
+                    '"got data %r\\nexpected %r" % (got, expected))'
+            )
+
+    def test_readline(self):
+        """readline() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello, world!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='readline',
+                        expected=b'hello, world!\n'))
+
+    def test_readlines(self):
+        """readlines() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello\nworld!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='readlines',
+                        expected=[b'hello\n', b'world!\n']))
+
+    def test_readall(self):
+        """readall() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello\nworld!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='readall',
+                        expected=b'hello\nworld!\n'))
+        # read() is the same thing as readall().
+        self._test_reading(
+                data_to_write=b'hello\nworld!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='read',
+                        expected=b'hello\nworld!\n'))
+
+
+class TestBufferedIOSignalInterrupt(TestFileIOSignalInterrupt):
+    def _generate_infile_setup_code(self):
+        """Returns the infile = ... line of code to make a BufferedReader."""
+        return ('infile = open(sys.stdin.fileno(), "rb") ;'
+                'import _io ;assert isinstance(infile, _io.BufferedReader)')
+
+    def test_readall(self):
+        """BufferedReader.read() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello\nworld!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='read',
+                        expected=b'hello\nworld!\n'))
+
+
+class TestTextIOSignalInterrupt(TestFileIOSignalInterrupt):
+    def _generate_infile_setup_code(self):
+        """Returns the infile = ... line of code to make a TextIOWrapper."""
+        return ('infile = open(sys.stdin.fileno(), "rt", newline=None) ;'
+                'import _io ;assert isinstance(infile, _io.TextIOWrapper)')
+
+    def test_readline(self):
+        """readline() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello, world!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='readline',
+                        expected='hello, world!\n'))
+
+    def test_readlines(self):
+        """readlines() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello\r\nworld!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='readlines',
+                        expected=['hello\n', 'world!\n']))
+
+    def test_readall(self):
+        """read() must handle signals and not lose data."""
+        self._test_reading(
+                data_to_write=b'hello\nworld!',
+                read_and_verify_code=self._READING_CODE_TEMPLATE.format(
+                        read_method_name='read',
+                        expected="hello\nworld!\n"))
+
+
+def test_main():
+    test_cases = [
+            tc for tc in globals().values()
+            if isinstance(tc, type) and issubclass(tc, unittest.TestCase)]
+    run_unittest(*test_cases)
+
+
+if __name__ == '__main__':
+    test_main()
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2912,7 +2912,7 @@
         try:
             wio = self.io.open(w, **fdopen_kwargs)
             t.start()
-            signal.alarm(1)
+            signal.setitimer(signal.ITIMER_REAL, 0.1)
             # Fill the pipe enough that the write will be blocking.
             # It will be interrupted by the timer armed above.  Since the
             # other thread has read one byte, the low-level write will
@@ -2957,7 +2957,7 @@
         r, w = os.pipe()
         wio = self.io.open(w, **fdopen_kwargs)
         try:
-            signal.alarm(1)
+            signal.setitimer(signal.ITIMER_REAL, 0.1)
             # Either the reentrant call to wio.write() fails with RuntimeError,
             # or the signal handler raises ZeroDivisionError.
             with self.assertRaises((ZeroDivisionError, RuntimeError)) as cm:
@@ -2992,7 +2992,7 @@
         try:
             rio = self.io.open(r, **fdopen_kwargs)
             os.write(w, b"foo")
-            signal.alarm(1)
+            signal.setitimer(signal.ITIMER_REAL, 0.1)
             # Expected behaviour:
             # - first raw read() returns partial b"foo"
             # - second raw read() returns EINTR
@@ -3036,13 +3036,13 @@
         t.daemon = True
         def alarm1(sig, frame):
             signal.signal(signal.SIGALRM, alarm2)
-            signal.alarm(1)
+            signal.setitimer(signal.ITIMER_REAL, 0.1)
         def alarm2(sig, frame):
             t.start()
         signal.signal(signal.SIGALRM, alarm1)
         try:
             wio = self.io.open(w, **fdopen_kwargs)
-            signal.alarm(1)
+            signal.setitimer(signal.ITIMER_REAL, 0.1)
             # Expected behaviour:
             # - first raw write() is partial (because of the limited pipe buffer
             #   and the first alarm)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,11 @@
 Core and Builtins
 -----------------
 
+- Issue #12268: File readline, readlines and read() or readall() methods
+  no longer lose data when an underlying read system call is interrupted.
+  IOError is no longer raised due to a read system call returning EINTR
+  from within these methods.
+
 - Issue #11626: Add _SizeT functions to stable ABI.
 
 - Issue #15146: Add PyType_FromSpecWithBases. Patch by Robin Schreiber.
diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h
--- a/Modules/_io/_iomodule.h
+++ b/Modules/_io/_iomodule.h
@@ -57,6 +57,11 @@
     int translated, int universal, PyObject *readnl,
     int kind, char *start, char *end, Py_ssize_t *consumed);
 
+/* Return 1 if an EnvironmentError with errno == EINTR is set (and then
+   clears the error indicator), 0 otherwise.
+   Should only be called when PyErr_Occurred() is true.
+*/
+extern int _PyIO_trap_eintr(void);
 
 #define DEFAULT_BUFFER_SIZE (8 * 1024)  /* bytes */
 
diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@@ -746,8 +746,8 @@
    clears the error indicator), 0 otherwise.
    Should only be called when PyErr_Occurred() is true.
 */
-static int
-_trap_eintr(void)
+int
+_PyIO_trap_eintr(void)
 {
     static PyObject *eintr_int = NULL;
     PyObject *typ, *val, *tb;
@@ -1396,7 +1396,7 @@
     */
     do {
         res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL);
-    } while (res == NULL && _trap_eintr());
+    } while (res == NULL && _PyIO_trap_eintr());
     Py_DECREF(memobj);
     if (res == NULL)
         return -1;
@@ -1850,7 +1850,7 @@
         errno = 0;
         res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL);
         errnum = errno;
-    } while (res == NULL && _trap_eintr());
+    } while (res == NULL && _PyIO_trap_eintr());
     Py_DECREF(memobj);
     if (res == NULL)
         return -1;
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -670,6 +670,13 @@
         if (n == 0)
             break;
         if (n < 0) {
+            if (errno == EINTR) {
+                if (PyErr_CheckSignals()) {
+                    Py_DECREF(result);
+                    return NULL;
+                }
+                continue;
+            }
             if (total > 0)
                 break;
             if (errno == EAGAIN) {
diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c
--- a/Modules/_io/iobase.c
+++ b/Modules/_io/iobase.c
@@ -474,11 +474,15 @@
         PyObject *b;
 
         if (has_peek) {
-            _Py_IDENTIFIER(peek);
             PyObject *readahead = _PyObject_CallMethodId(self, &PyId_peek, "i", 1);
-
-            if (readahead == NULL)
+            if (readahead == NULL) {
+                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
+                   when EINTR occurs so we needn't do it ourselves. */
+                if (_PyIO_trap_eintr()) {
+                    continue;
+                }
                 goto fail;
+            }
             if (!PyBytes_Check(readahead)) {
                 PyErr_Format(PyExc_IOError,
                              "peek() should have returned a bytes object, "
@@ -511,8 +515,14 @@
         }
 
         b = _PyObject_CallMethodId(self, &PyId_read, "n", nreadahead);
-        if (b == NULL)
+        if (b == NULL) {
+            /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
+               when EINTR occurs so we needn't do it ourselves. */
+            if (_PyIO_trap_eintr()) {
+                continue;
+            }
             goto fail;
+        }
         if (!PyBytes_Check(b)) {
             PyErr_Format(PyExc_IOError,
                          "read() should have returned a bytes object, "
@@ -827,6 +837,11 @@
         PyObject *data = _PyObject_CallMethodId(self, &PyId_read,
                                                 "i", DEFAULT_BUFFER_SIZE);
         if (!data) {
+            /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
+               when EINTR occurs so we needn't do it ourselves. */
+            if (_PyIO_trap_eintr()) {
+                continue;
+            }
             Py_DECREF(chunks);
             return NULL;
         }
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1560,8 +1560,14 @@
         /* Keep reading chunks until we have n characters to return */
         while (remaining > 0) {
             res = textiowrapper_read_chunk(self, remaining);
-            if (res < 0)
+            if (res < 0) {
+                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
+                   when EINTR occurs so we needn't do it ourselves. */
+                if (_PyIO_trap_eintr()) {
+                    continue;
+                }
                 goto fail;
+            }
             if (res == 0)  /* EOF */
                 break;
             if (chunks == NULL) {
@@ -1728,8 +1734,14 @@
         while (!self->decoded_chars ||
                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
             res = textiowrapper_read_chunk(self, 0);
-            if (res < 0)
+            if (res < 0) {
+                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
+                   when EINTR occurs so we needn't do it ourselves. */
+                if (_PyIO_trap_eintr()) {
+                    continue;
+                }
                 goto error;
+            }
             if (res == 0)
                 break;
         }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list