[Python-checkins] bpo-43785: Improve BZ2File performance by removing RLock (GH-25299)

methane webhook-mailer at python.org
Mon Apr 12 01:47:01 EDT 2021


https://github.com/python/cpython/commit/cc2ffcdfd78df3a18edae60df81b2f1b044b1634
commit: cc2ffcdfd78df3a18edae60df81b2f1b044b1634
branch: master
author: Inada Naoki <songofacandy at gmail.com>
committer: methane <songofacandy at gmail.com>
date: 2021-04-12T14:46:53+09:00
summary:

bpo-43785: Improve BZ2File performance by removing RLock (GH-25299)

Remove `RLock` from `BZ2File`. It makes `BZ2File` to thread unsafe, but
gzip and lzma don't use it too.

Co-authored-by: Gregory P. Smith <greg at krypto.org>

files:
A Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst
M Lib/bz2.py

diff --git a/Lib/bz2.py b/Lib/bz2.py
index 1da3ce65c81b7..43f321ae85239 100644
--- a/Lib/bz2.py
+++ b/Lib/bz2.py
@@ -13,7 +13,6 @@
 import io
 import os
 import _compression
-from threading import RLock
 
 from _bz2 import BZ2Compressor, BZ2Decompressor
 
@@ -53,9 +52,6 @@ def __init__(self, filename, mode="r", *, compresslevel=9):
         If mode is 'r', the input file may be the concatenation of
         multiple compressed streams.
         """
-        # This lock must be recursive, so that BufferedIOBase's
-        # writelines() does not deadlock.
-        self._lock = RLock()
         self._fp = None
         self._closefp = False
         self._mode = _MODE_CLOSED
@@ -104,24 +100,23 @@ def close(self):
         May be called more than once without error. Once the file is
         closed, any other operation on it will raise a ValueError.
         """
-        with self._lock:
-            if self._mode == _MODE_CLOSED:
-                return
+        if self._mode == _MODE_CLOSED:
+            return
+        try:
+            if self._mode == _MODE_READ:
+                self._buffer.close()
+            elif self._mode == _MODE_WRITE:
+                self._fp.write(self._compressor.flush())
+                self._compressor = None
+        finally:
             try:
-                if self._mode == _MODE_READ:
-                    self._buffer.close()
-                elif self._mode == _MODE_WRITE:
-                    self._fp.write(self._compressor.flush())
-                    self._compressor = None
+                if self._closefp:
+                    self._fp.close()
             finally:
-                try:
-                    if self._closefp:
-                        self._fp.close()
-                finally:
-                    self._fp = None
-                    self._closefp = False
-                    self._mode = _MODE_CLOSED
-                    self._buffer = None
+                self._fp = None
+                self._closefp = False
+                self._mode = _MODE_CLOSED
+                self._buffer = None
 
     @property
     def closed(self):
@@ -153,12 +148,11 @@ def peek(self, n=0):
         Always returns at least one byte of data, unless at EOF.
         The exact number of bytes returned is unspecified.
         """
-        with self._lock:
-            self._check_can_read()
-            # Relies on the undocumented fact that BufferedReader.peek()
-            # always returns at least one byte (except at EOF), independent
-            # of the value of n
-            return self._buffer.peek(n)
+        self._check_can_read()
+        # Relies on the undocumented fact that BufferedReader.peek()
+        # always returns at least one byte (except at EOF), independent
+        # of the value of n
+        return self._buffer.peek(n)
 
     def read(self, size=-1):
         """Read up to size uncompressed bytes from the file.
@@ -166,9 +160,8 @@ def read(self, size=-1):
         If size is negative or omitted, read until EOF is reached.
         Returns b'' if the file is already at EOF.
         """
-        with self._lock:
-            self._check_can_read()
-            return self._buffer.read(size)
+        self._check_can_read()
+        return self._buffer.read(size)
 
     def read1(self, size=-1):
         """Read up to size uncompressed bytes, while trying to avoid
@@ -177,20 +170,18 @@ def read1(self, size=-1):
 
         Returns b'' if the file is at EOF.
         """
-        with self._lock:
-            self._check_can_read()
-            if size < 0:
-                size = io.DEFAULT_BUFFER_SIZE
-            return self._buffer.read1(size)
+        self._check_can_read()
+        if size < 0:
+            size = io.DEFAULT_BUFFER_SIZE
+        return self._buffer.read1(size)
 
     def readinto(self, b):
         """Read bytes into b.
 
         Returns the number of bytes read (0 for EOF).
         """
-        with self._lock:
-            self._check_can_read()
-            return self._buffer.readinto(b)
+        self._check_can_read()
+        return self._buffer.readinto(b)
 
     def readline(self, size=-1):
         """Read a line of uncompressed bytes from the file.
@@ -203,9 +194,8 @@ def readline(self, size=-1):
             if not hasattr(size, "__index__"):
                 raise TypeError("Integer argument expected")
             size = size.__index__()
-        with self._lock:
-            self._check_can_read()
-            return self._buffer.readline(size)
+        self._check_can_read()
+        return self._buffer.readline(size)
 
     def readlines(self, size=-1):
         """Read a list of lines of uncompressed bytes from the file.
@@ -218,9 +208,8 @@ def readlines(self, size=-1):
             if not hasattr(size, "__index__"):
                 raise TypeError("Integer argument expected")
             size = size.__index__()
-        with self._lock:
-            self._check_can_read()
-            return self._buffer.readlines(size)
+        self._check_can_read()
+        return self._buffer.readlines(size)
 
     def write(self, data):
         """Write a byte string to the file.
@@ -229,12 +218,11 @@ def write(self, data):
         always len(data). Note that due to buffering, the file on disk
         may not reflect the data written until close() is called.
         """
-        with self._lock:
-            self._check_can_write()
-            compressed = self._compressor.compress(data)
-            self._fp.write(compressed)
-            self._pos += len(data)
-            return len(data)
+        self._check_can_write()
+        compressed = self._compressor.compress(data)
+        self._fp.write(compressed)
+        self._pos += len(data)
+        return len(data)
 
     def writelines(self, seq):
         """Write a sequence of byte strings to the file.
@@ -244,8 +232,7 @@ def writelines(self, seq):
 
         Line separators are not added between the written byte strings.
         """
-        with self._lock:
-            return _compression.BaseStream.writelines(self, seq)
+        return _compression.BaseStream.writelines(self, seq)
 
     def seek(self, offset, whence=io.SEEK_SET):
         """Change the file position.
@@ -262,17 +249,15 @@ def seek(self, offset, whence=io.SEEK_SET):
         Note that seeking is emulated, so depending on the parameters,
         this operation may be extremely slow.
         """
-        with self._lock:
-            self._check_can_seek()
-            return self._buffer.seek(offset, whence)
+        self._check_can_seek()
+        return self._buffer.seek(offset, whence)
 
     def tell(self):
         """Return the current file position."""
-        with self._lock:
-            self._check_not_closed()
-            if self._mode == _MODE_READ:
-                return self._buffer.tell()
-            return self._pos
+        self._check_not_closed()
+        if self._mode == _MODE_READ:
+            return self._buffer.tell()
+        return self._pos
 
 
 def open(filename, mode="rb", compresslevel=9,
diff --git a/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst b/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst
new file mode 100644
index 0000000000000..b4ed5e51e2247
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst
@@ -0,0 +1,4 @@
+Improve ``bz2.BZ2File`` performance by removing the RLock from BZ2File.
+This makes BZ2File thread unsafe in the face of multiple simultaneous
+readers or writers, just like its equivalent classes in :mod:`gzip` and
+:mod:`lzma` have always been.  Patch by Inada Naoki.



More information about the Python-checkins mailing list