Skip to content

Commit 8bc26d8

Browse files
Ma Linmarcoffee
Ma Lin
andauthored
bpo-44439: BZ2File.write()/LZMAFile.write() handle length correctly (GH-26846)
No longer use len() to get the length of the input data. For some buffer protocol objects, the length obtained by using len() is wrong. Co-authored-by: Marco Ribeiro <[email protected]>
1 parent 0ff487b commit 8bc26d8

File tree

5 files changed

+45
-8
lines changed

5 files changed

+45
-8
lines changed

Lib/bz2.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,15 +226,23 @@ def write(self, data):
226226
"""Write a byte string to the file.
227227
228228
Returns the number of uncompressed bytes written, which is
229-
always len(data). Note that due to buffering, the file on disk
230-
may not reflect the data written until close() is called.
229+
always the length of data in bytes. Note that due to buffering,
230+
the file on disk may not reflect the data written until close()
231+
is called.
231232
"""
232233
with self._lock:
233234
self._check_can_write()
235+
if isinstance(data, (bytes, bytearray)):
236+
length = len(data)
237+
else:
238+
# accept any data that supports the buffer protocol
239+
data = memoryview(data)
240+
length = data.nbytes
241+
234242
compressed = self._compressor.compress(data)
235243
self._fp.write(compressed)
236-
self._pos += len(data)
237-
return len(data)
244+
self._pos += length
245+
return length
238246

239247
def writelines(self, seq):
240248
"""Write a sequence of byte strings to the file.

Lib/lzma.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,14 +225,22 @@ def write(self, data):
225225
"""Write a bytes object to the file.
226226
227227
Returns the number of uncompressed bytes written, which is
228-
always len(data). Note that due to buffering, the file on disk
229-
may not reflect the data written until close() is called.
228+
always the length of data in bytes. Note that due to buffering,
229+
the file on disk may not reflect the data written until close()
230+
is called.
230231
"""
231232
self._check_can_write()
233+
if isinstance(data, (bytes, bytearray)):
234+
length = len(data)
235+
else:
236+
# accept any data that supports the buffer protocol
237+
data = memoryview(data)
238+
length = data.nbytes
239+
232240
compressed = self._compressor.compress(data)
233241
self._fp.write(compressed)
234-
self._pos += len(data)
235-
return len(data)
242+
self._pos += length
243+
return length
236244

237245
def seek(self, offset, whence=io.SEEK_SET):
238246
"""Change the file position.

Lib/test/test_bz2.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from test import support
22
from test.support import bigmemtest, _4G
33

4+
import array
45
import unittest
56
from io import BytesIO, DEFAULT_BUFFER_SIZE
67
import os
@@ -618,6 +619,14 @@ def test_read_truncated(self):
618619
with BZ2File(BytesIO(truncated[:i])) as f:
619620
self.assertRaises(EOFError, f.read, 1)
620621

622+
def test_issue44439(self):
623+
q = array.array('Q', [1, 2, 3, 4, 5])
624+
LENGTH = len(q) * q.itemsize
625+
626+
with BZ2File(BytesIO(), 'w') as f:
627+
self.assertEqual(f.write(q), LENGTH)
628+
self.assertEqual(f.tell(), LENGTH)
629+
621630

622631
class BZ2CompressorTest(BaseTest):
623632
def testCompress(self):

Lib/test/test_lzma.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import _compression
2+
import array
23
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
34
import os
45
import pathlib
@@ -1227,6 +1228,14 @@ def test_issue21872(self):
12271228
self.assertTrue(d2.eof)
12281229
self.assertEqual(out1 + out2, entire)
12291230

1231+
def test_issue44439(self):
1232+
q = array.array('Q', [1, 2, 3, 4, 5])
1233+
LENGTH = len(q) * q.itemsize
1234+
1235+
with LZMAFile(BytesIO(), 'w') as f:
1236+
self.assertEqual(f.write(q), LENGTH)
1237+
self.assertEqual(f.tell(), LENGTH)
1238+
12301239

12311240
class OpenTestCase(unittest.TestCase):
12321241

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix in :meth:`bz2.BZ2File.write` / :meth:`lzma.LZMAFile.write` methods, when
2+
the input data is an object that supports the buffer protocol, the file length
3+
may be wrong.

0 commit comments

Comments
 (0)