Skip to content

Commit cc2ffcd

Browse files
methanegpshead
andauthored
bpo-43785: Improve BZ2File performance by removing RLock (GH-25299)
Remove `RLock` from `BZ2File`. It makes `BZ2File` to thread unsafe, but gzip and lzma don't use it too. Co-authored-by: Gregory P. Smith <[email protected]>
1 parent 553ee27 commit cc2ffcd

File tree

2 files changed

+48
-59
lines changed

2 files changed

+48
-59
lines changed

Lib/bz2.py

Lines changed: 44 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import io
1414
import os
1515
import _compression
16-
from threading import RLock
1716

1817
from _bz2 import BZ2Compressor, BZ2Decompressor
1918

@@ -53,9 +52,6 @@ def __init__(self, filename, mode="r", *, compresslevel=9):
5352
If mode is 'r', the input file may be the concatenation of
5453
multiple compressed streams.
5554
"""
56-
# This lock must be recursive, so that BufferedIOBase's
57-
# writelines() does not deadlock.
58-
self._lock = RLock()
5955
self._fp = None
6056
self._closefp = False
6157
self._mode = _MODE_CLOSED
@@ -104,24 +100,23 @@ def close(self):
104100
May be called more than once without error. Once the file is
105101
closed, any other operation on it will raise a ValueError.
106102
"""
107-
with self._lock:
108-
if self._mode == _MODE_CLOSED:
109-
return
103+
if self._mode == _MODE_CLOSED:
104+
return
105+
try:
106+
if self._mode == _MODE_READ:
107+
self._buffer.close()
108+
elif self._mode == _MODE_WRITE:
109+
self._fp.write(self._compressor.flush())
110+
self._compressor = None
111+
finally:
110112
try:
111-
if self._mode == _MODE_READ:
112-
self._buffer.close()
113-
elif self._mode == _MODE_WRITE:
114-
self._fp.write(self._compressor.flush())
115-
self._compressor = None
113+
if self._closefp:
114+
self._fp.close()
116115
finally:
117-
try:
118-
if self._closefp:
119-
self._fp.close()
120-
finally:
121-
self._fp = None
122-
self._closefp = False
123-
self._mode = _MODE_CLOSED
124-
self._buffer = None
116+
self._fp = None
117+
self._closefp = False
118+
self._mode = _MODE_CLOSED
119+
self._buffer = None
125120

126121
@property
127122
def closed(self):
@@ -153,22 +148,20 @@ def peek(self, n=0):
153148
Always returns at least one byte of data, unless at EOF.
154149
The exact number of bytes returned is unspecified.
155150
"""
156-
with self._lock:
157-
self._check_can_read()
158-
# Relies on the undocumented fact that BufferedReader.peek()
159-
# always returns at least one byte (except at EOF), independent
160-
# of the value of n
161-
return self._buffer.peek(n)
151+
self._check_can_read()
152+
# Relies on the undocumented fact that BufferedReader.peek()
153+
# always returns at least one byte (except at EOF), independent
154+
# of the value of n
155+
return self._buffer.peek(n)
162156

163157
def read(self, size=-1):
164158
"""Read up to size uncompressed bytes from the file.
165159
166160
If size is negative or omitted, read until EOF is reached.
167161
Returns b'' if the file is already at EOF.
168162
"""
169-
with self._lock:
170-
self._check_can_read()
171-
return self._buffer.read(size)
163+
self._check_can_read()
164+
return self._buffer.read(size)
172165

173166
def read1(self, size=-1):
174167
"""Read up to size uncompressed bytes, while trying to avoid
@@ -177,20 +170,18 @@ def read1(self, size=-1):
177170
178171
Returns b'' if the file is at EOF.
179172
"""
180-
with self._lock:
181-
self._check_can_read()
182-
if size < 0:
183-
size = io.DEFAULT_BUFFER_SIZE
184-
return self._buffer.read1(size)
173+
self._check_can_read()
174+
if size < 0:
175+
size = io.DEFAULT_BUFFER_SIZE
176+
return self._buffer.read1(size)
185177

186178
def readinto(self, b):
187179
"""Read bytes into b.
188180
189181
Returns the number of bytes read (0 for EOF).
190182
"""
191-
with self._lock:
192-
self._check_can_read()
193-
return self._buffer.readinto(b)
183+
self._check_can_read()
184+
return self._buffer.readinto(b)
194185

195186
def readline(self, size=-1):
196187
"""Read a line of uncompressed bytes from the file.
@@ -203,9 +194,8 @@ def readline(self, size=-1):
203194
if not hasattr(size, "__index__"):
204195
raise TypeError("Integer argument expected")
205196
size = size.__index__()
206-
with self._lock:
207-
self._check_can_read()
208-
return self._buffer.readline(size)
197+
self._check_can_read()
198+
return self._buffer.readline(size)
209199

210200
def readlines(self, size=-1):
211201
"""Read a list of lines of uncompressed bytes from the file.
@@ -218,9 +208,8 @@ def readlines(self, size=-1):
218208
if not hasattr(size, "__index__"):
219209
raise TypeError("Integer argument expected")
220210
size = size.__index__()
221-
with self._lock:
222-
self._check_can_read()
223-
return self._buffer.readlines(size)
211+
self._check_can_read()
212+
return self._buffer.readlines(size)
224213

225214
def write(self, data):
226215
"""Write a byte string to the file.
@@ -229,12 +218,11 @@ def write(self, data):
229218
always len(data). Note that due to buffering, the file on disk
230219
may not reflect the data written until close() is called.
231220
"""
232-
with self._lock:
233-
self._check_can_write()
234-
compressed = self._compressor.compress(data)
235-
self._fp.write(compressed)
236-
self._pos += len(data)
237-
return len(data)
221+
self._check_can_write()
222+
compressed = self._compressor.compress(data)
223+
self._fp.write(compressed)
224+
self._pos += len(data)
225+
return len(data)
238226

239227
def writelines(self, seq):
240228
"""Write a sequence of byte strings to the file.
@@ -244,8 +232,7 @@ def writelines(self, seq):
244232
245233
Line separators are not added between the written byte strings.
246234
"""
247-
with self._lock:
248-
return _compression.BaseStream.writelines(self, seq)
235+
return _compression.BaseStream.writelines(self, seq)
249236

250237
def seek(self, offset, whence=io.SEEK_SET):
251238
"""Change the file position.
@@ -262,17 +249,15 @@ def seek(self, offset, whence=io.SEEK_SET):
262249
Note that seeking is emulated, so depending on the parameters,
263250
this operation may be extremely slow.
264251
"""
265-
with self._lock:
266-
self._check_can_seek()
267-
return self._buffer.seek(offset, whence)
252+
self._check_can_seek()
253+
return self._buffer.seek(offset, whence)
268254

269255
def tell(self):
270256
"""Return the current file position."""
271-
with self._lock:
272-
self._check_not_closed()
273-
if self._mode == _MODE_READ:
274-
return self._buffer.tell()
275-
return self._pos
257+
self._check_not_closed()
258+
if self._mode == _MODE_READ:
259+
return self._buffer.tell()
260+
return self._pos
276261

277262

278263
def open(filename, mode="rb", compresslevel=9,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Improve ``bz2.BZ2File`` performance by removing the RLock from BZ2File.
2+
This makes BZ2File thread unsafe in the face of multiple simultaneous
3+
readers or writers, just like its equivalent classes in :mod:`gzip` and
4+
:mod:`lzma` have always been. Patch by Inada Naoki.

0 commit comments

Comments
 (0)