[Python-checkins] GH-87235: Make sure "python /dev/fd/9 9</path/to/script.py" works on macOS (#99768)

Sun Nov 27 05:56:19 EST 2022

https://github.com/python/cpython/commit/d08fb257698e3475d6f69bb808211d39e344e5b2
commit: d08fb257698e3475d6f69bb808211d39e344e5b2
branch: main
author: Ronald Oussoren <ronaldoussoren at mac.com>
committer: ronaldoussoren <ronaldoussoren at mac.com>
date: 2022-11-27T11:56:14+01:00
summary:

GH-87235: Make sure "python /dev/fd/9 9</path/to/script.py" works on macOS (#99768)

On macOS all file descriptors for a particular file in /dev/fd
share the same file offset, that is ``open("/dev/fd/9", "r")`` behaves
more like ``dup(9)`` than a regular open.

This causes problems when a user tries to run "/dev/fd/9" as a script
because zipimport changes the file offset to try to read a zipfile
directory. Therefore change zipimport to reset the file offset after
trying to read the zipfile directory.

files:
A Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst
M Lib/test/test_cmd_line_script.py
M Lib/zipimport.py

diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 1ee3acd7076c..c838e95ad554 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -752,6 +752,20 @@ def test_nonexisting_script(self):
         self.assertIn(": can't open file ", err)
         self.assertNotEqual(proc.returncode, 0)
 
+    @unittest.skipUnless(os.path.exists('/dev/fd/0'), 'requires /dev/fd platform')
+    def test_script_as_dev_fd(self):
+        # GH-87235: On macOS passing a non-trivial script to /dev/fd/N can cause
+        # problems because all open /dev/fd/N file descriptors share the same
+        # offset.
+        script = 'print("12345678912345678912345")'
+        with os_helper.temp_dir() as work_dir:
+            script_name = _make_test_script(work_dir, 'script.py', script)
+            with open(script_name, "r") as fp:
+                p = spawn_python(f"/dev/fd/{fp.fileno()}", close_fds=False, pass_fds=(0,1,2,fp.fileno()))
+                out, err = p.communicate()
+                self.assertEqual(out, b"12345678912345678912345\n")
+
+
 
 def tearDownModule():
     support.reap_children()
diff --git a/Lib/zipimport.py b/Lib/zipimport.py
index 016f1b8a7979..a7333a4c4906 100644
--- a/Lib/zipimport.py
+++ b/Lib/zipimport.py
@@ -347,114 +347,121 @@ def _read_directory(archive):
         raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
 
     with fp:
+        # GH-87235: On macOS all file descriptors for /dev/fd/N share the same
+        # file offset, reset the file offset after scanning the zipfile diretory
+        # to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
+        start_offset = fp.tell()
         try:
-            fp.seek(-END_CENTRAL_DIR_SIZE, 2)
-            header_position = fp.tell()
-            buffer = fp.read(END_CENTRAL_DIR_SIZE)
-        except OSError:
-            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
-        if len(buffer) != END_CENTRAL_DIR_SIZE:
-            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
-        if buffer[:4] != STRING_END_ARCHIVE:
-            # Bad: End of Central Dir signature
-            # Check if there's a comment.
             try:
-                fp.seek(0, 2)
-                file_size = fp.tell()
-            except OSError:
-                raise ZipImportError(f"can't read Zip file: {archive!r}",
-                                     path=archive)
-            max_comment_start = max(file_size - MAX_COMMENT_LEN -
-                                    END_CENTRAL_DIR_SIZE, 0)
-            try:
-                fp.seek(max_comment_start)
-                data = fp.read()
-            except OSError:
-                raise ZipImportError(f"can't read Zip file: {archive!r}",
-                                     path=archive)
-            pos = data.rfind(STRING_END_ARCHIVE)
-            if pos < 0:
-                raise ZipImportError(f'not a Zip file: {archive!r}',
-                                     path=archive)
-            buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
-            if len(buffer) != END_CENTRAL_DIR_SIZE:
-                raise ZipImportError(f"corrupt Zip file: {archive!r}",
-                                     path=archive)
-            header_position = file_size - len(data) + pos
-
-        header_size = _unpack_uint32(buffer[12:16])
-        header_offset = _unpack_uint32(buffer[16:20])
-        if header_position < header_size:
-            raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
-        if header_position < header_offset:
-            raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
-        header_position -= header_size
-        arc_offset = header_position - header_offset
-        if arc_offset < 0:
-            raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
-
-        files = {}
-        # Start of Central Directory
-        count = 0
-        try:
-            fp.seek(header_position)
-        except OSError:
-            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
-        while True:
-            buffer = fp.read(46)
-            if len(buffer) < 4:
-                raise EOFError('EOF read where not expected')
-            # Start of file header
-            if buffer[:4] != b'PK\x01\x02':
-                break                                # Bad: Central Dir File Header
-            if len(buffer) != 46:
-                raise EOFError('EOF read where not expected')
-            flags = _unpack_uint16(buffer[8:10])
-            compress = _unpack_uint16(buffer[10:12])
-            time = _unpack_uint16(buffer[12:14])
-            date = _unpack_uint16(buffer[14:16])
-            crc = _unpack_uint32(buffer[16:20])
-            data_size = _unpack_uint32(buffer[20:24])
-            file_size = _unpack_uint32(buffer[24:28])
-            name_size = _unpack_uint16(buffer[28:30])
-            extra_size = _unpack_uint16(buffer[30:32])
-            comment_size = _unpack_uint16(buffer[32:34])
-            file_offset = _unpack_uint32(buffer[42:46])
-            header_size = name_size + extra_size + comment_size
-            if file_offset > header_offset:
-                raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
-            file_offset += arc_offset
-
-            try:
-                name = fp.read(name_size)
+                fp.seek(-END_CENTRAL_DIR_SIZE, 2)
+                header_position = fp.tell()
+                buffer = fp.read(END_CENTRAL_DIR_SIZE)
             except OSError:
                 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
-            if len(name) != name_size:
+            if len(buffer) != END_CENTRAL_DIR_SIZE:
                 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
-            # On Windows, calling fseek to skip over the fields we don't use is
-            # slower than reading the data because fseek flushes stdio's
-            # internal buffers.    See issue #8745.
+            if buffer[:4] != STRING_END_ARCHIVE:
+                # Bad: End of Central Dir signature
+                # Check if there's a comment.
+                try:
+                    fp.seek(0, 2)
+                    file_size = fp.tell()
+                except OSError:
+                    raise ZipImportError(f"can't read Zip file: {archive!r}",
+                                         path=archive)
+                max_comment_start = max(file_size - MAX_COMMENT_LEN -
+                                        END_CENTRAL_DIR_SIZE, 0)
+                try:
+                    fp.seek(max_comment_start)
+                    data = fp.read()
+                except OSError:
+                    raise ZipImportError(f"can't read Zip file: {archive!r}",
+                                         path=archive)
+                pos = data.rfind(STRING_END_ARCHIVE)
+                if pos < 0:
+                    raise ZipImportError(f'not a Zip file: {archive!r}',
+                                         path=archive)
+                buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
+                if len(buffer) != END_CENTRAL_DIR_SIZE:
+                    raise ZipImportError(f"corrupt Zip file: {archive!r}",
+                                         path=archive)
+                header_position = file_size - len(data) + pos
+
+            header_size = _unpack_uint32(buffer[12:16])
+            header_offset = _unpack_uint32(buffer[16:20])
+            if header_position < header_size:
+                raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
+            if header_position < header_offset:
+                raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
+            header_position -= header_size
+            arc_offset = header_position - header_offset
+            if arc_offset < 0:
+                raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
+
+            files = {}
+            # Start of Central Directory
+            count = 0
             try:
-                if len(fp.read(header_size - name_size)) != header_size - name_size:
-                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
+                fp.seek(header_position)
             except OSError:
                 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
+            while True:
+                buffer = fp.read(46)
+                if len(buffer) < 4:
+                    raise EOFError('EOF read where not expected')
+                # Start of file header
+                if buffer[:4] != b'PK\x01\x02':
+                    break                                # Bad: Central Dir File Header
+                if len(buffer) != 46:
+                    raise EOFError('EOF read where not expected')
+                flags = _unpack_uint16(buffer[8:10])
+                compress = _unpack_uint16(buffer[10:12])
+                time = _unpack_uint16(buffer[12:14])
+                date = _unpack_uint16(buffer[14:16])
+                crc = _unpack_uint32(buffer[16:20])
+                data_size = _unpack_uint32(buffer[20:24])
+                file_size = _unpack_uint32(buffer[24:28])
+                name_size = _unpack_uint16(buffer[28:30])
+                extra_size = _unpack_uint16(buffer[30:32])
+                comment_size = _unpack_uint16(buffer[32:34])
+                file_offset = _unpack_uint32(buffer[42:46])
+                header_size = name_size + extra_size + comment_size
+                if file_offset > header_offset:
+                    raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
+                file_offset += arc_offset
 
-            if flags & 0x800:
-                # UTF-8 file names extension
-                name = name.decode()
-            else:
-                # Historical ZIP filename encoding
                 try:
-                    name = name.decode('ascii')
-                except UnicodeDecodeError:
-                    name = name.decode('latin1').translate(cp437_table)
-
-            name = name.replace('/', path_sep)
-            path = _bootstrap_external._path_join(archive, name)
-            t = (path, compress, data_size, file_size, file_offset, time, date, crc)
-            files[name] = t
-            count += 1
+                    name = fp.read(name_size)
+                except OSError:
+                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
+                if len(name) != name_size:
+                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
+                # On Windows, calling fseek to skip over the fields we don't use is
+                # slower than reading the data because fseek flushes stdio's
+                # internal buffers.    See issue #8745.
+                try:
+                    if len(fp.read(header_size - name_size)) != header_size - name_size:
+                        raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
+                except OSError:
+                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
+
+                if flags & 0x800:
+                    # UTF-8 file names extension
+                    name = name.decode()
+                else:
+                    # Historical ZIP filename encoding
+                    try:
+                        name = name.decode('ascii')
+                    except UnicodeDecodeError:
+                        name = name.decode('latin1').translate(cp437_table)
+
+                name = name.replace('/', path_sep)
+                path = _bootstrap_external._path_join(archive, name)
+                t = (path, compress, data_size, file_size, file_offset, time, date, crc)
+                files[name] = t
+                count += 1
+        finally:
+            fp.seek(start_offset)
     _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
     return files
 
diff --git a/Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst b/Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst
new file mode 100644
index 000000000000..3111e4975e87
--- /dev/null
+++ b/Misc/NEWS.d/next/macOS/2022-11-25-09-23-20.gh-issue-87235.SifjCD.rst
@@ -0,0 +1 @@
+On macOS ``python3 /dev/fd/9 9</path/to/script.py`` failed for any script longer than a couple of bytes.