Skip to content

Commit d24c6a4

Browse files
committed
Fix edge cases in checking if zip64 extensions are required
This fixes an issue where if data requiring zip64 extensions was added to an unseekable stream without specifying `force_zip64=True`, zip64 extensions would not be used and a RuntimeError would not be raised when closing the file (even though the size would be known at that point). This would result in successfully writing corrupt zip files. Deciding if zip64 extensions are required outside of the `FileHeader` function means that both `FileHeader` and `_ZipWriteFile` will always be in sync. Previously, the `FileHeader` function could enable zip64 extensions without propagating that decision to the `_ZipWriteFile` class, which would then not correctly write the data descriptor record or check for errors on close.
1 parent c42700d commit d24c6a4

File tree

2 files changed

+96
-15
lines changed

2 files changed

+96
-15
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,92 @@ def test_force_zip64(self):
11151115
self.assertEqual(len(zinfos), 1)
11161116
self.assertGreaterEqual(zinfos[0].extract_version, zipfile.ZIP64_VERSION) # requires zip64 to extract
11171117

1118+
def test_unseekable_zip_unknown_filesize(self):
1119+
"""Test that creating a zip with/without seeking will raise a RuntimeError if zip64 was required but not used"""
1120+
1121+
def make_zip(fp):
1122+
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
1123+
with zf.open("text.txt", mode="w", force_zip64=False) as zi:
1124+
zi.write(b"_" * (zipfile.ZIP64_LIMIT + 1))
1125+
1126+
self.assertRaises(RuntimeError, make_zip, io.BytesIO())
1127+
self.assertRaises(RuntimeError, make_zip, Unseekable(io.BytesIO()))
1128+
1129+
def test_zip64_required_not_allowed_fail(self):
1130+
"""Test that trying to add a large file to a zip that doesn't allow zip64 extensions fails on add"""
1131+
def make_zip(fp):
1132+
with zipfile.ZipFile(fp, mode="w", allowZip64=False) as zf:
1133+
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
1134+
info = zipfile.ZipInfo("text.txt")
1135+
info.file_size = zipfile.ZIP64_LIMIT + 1
1136+
zf.open(info, mode="w")
1137+
1138+
self.assertRaises(zipfile.LargeZipFile, make_zip, io.BytesIO())
1139+
self.assertRaises(zipfile.LargeZipFile, make_zip, Unseekable(io.BytesIO()))
1140+
1141+
def test_unseekable_zip_known_filesize(self):
1142+
"""Test that creating a zip without seeking will use zip64 extensions if the file size is provided up-front"""
1143+
1144+
file_size = zipfile.ZIP64_LIMIT + 1
1145+
1146+
def make_zip(fp):
1147+
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
1148+
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
1149+
info = zipfile.ZipInfo("text.txt")
1150+
info.file_size = file_size
1151+
with zf.open(info, mode="w", force_zip64=False) as zi:
1152+
zi.write(b"_" * file_size)
1153+
return fp
1154+
1155+
# check seekable file information
1156+
seekable_data = make_zip(io.BytesIO()).getvalue()
1157+
(
1158+
header, vers, os, flags, comp, csize, usize, fn_len,
1159+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
1160+
cd_sig
1161+
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s".format(file_size), seekable_data[:62 + file_size])
1162+
1163+
self.assertEqual(header, b"PK\x03\x04")
1164+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1165+
self.assertEqual(os, 0) # compatible with MS-DOS
1166+
self.assertEqual(flags, 0) # no flags set
1167+
self.assertEqual(comp, 0) # compression method = stored
1168+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1169+
self.assertEqual(usize, 0xFFFFFFFF)
1170+
self.assertEqual(fn_len, 8) # filename len
1171+
self.assertEqual(ex_total_len, 20) # size of extra records
1172+
self.assertEqual(ex_id, 1) # Zip64 extra record
1173+
self.assertEqual(ex_len, 16) # 16 bytes of data
1174+
self.assertEqual(ex_usize, file_size) # uncompressed size
1175+
self.assertEqual(ex_csize, file_size) # compressed size
1176+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1177+
1178+
# check unseekable file information
1179+
unseekable_data = make_zip(Unseekable(io.BytesIO())).fp.getvalue()
1180+
(
1181+
header, vers, os, flags, comp, csize, usize, fn_len,
1182+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
1183+
dd_header, dd_usize, dd_csize, cd_sig
1184+
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s4xQQ4s".format(file_size), unseekable_data[:86 + file_size])
1185+
1186+
self.assertEqual(header, b"PK\x03\x04")
1187+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1188+
self.assertEqual(os, 0) # compatible with MS-DOS
1189+
self.assertEqual("{:b}".format(flags), "1000") # streaming flag set
1190+
self.assertEqual(comp, 0) # compression method = stored
1191+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1192+
self.assertEqual(usize, 0xFFFFFFFF)
1193+
self.assertEqual(fn_len, 8) # filename len
1194+
self.assertEqual(ex_total_len, 20) # size of extra records
1195+
self.assertEqual(ex_id, 1) # Zip64 extra record
1196+
self.assertEqual(ex_len, 16) # 16 bytes of data
1197+
self.assertEqual(ex_usize, 0) # uncompressed size - 0 to defer to data descriptor
1198+
self.assertEqual(ex_csize, 0) # compressed size - 0 to defer to data descriptor
1199+
self.assertEqual(dd_header, b"PK\07\x08") # data descriptor
1200+
self.assertEqual(dd_usize, file_size) # file size (8 bytes because zip64)
1201+
self.assertEqual(dd_csize, file_size) # compressed size (8 bytes because zip64)
1202+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1203+
11181204

11191205
@requires_zlib()
11201206
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,

Lib/zipfile/__init__.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def __repr__(self):
439439
result.append('>')
440440
return ''.join(result)
441441

442-
def FileHeader(self, zip64=None):
442+
def FileHeader(self, zip64):
443443
"""Return the per-file header as a bytes object."""
444444
dt = self.date_time
445445
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
@@ -455,11 +455,6 @@ def FileHeader(self, zip64=None):
455455
extra = self.extra
456456

457457
min_version = 0
458-
if (file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT):
459-
if zip64 is None:
460-
zip64 = True
461-
elif not zip64:
462-
raise LargeZipFile("Filesize would require ZIP64 extensions")
463458
if zip64:
464459
fmt = '<HHQQ'
465460
extra = extra + struct.pack(fmt,
@@ -1215,6 +1210,12 @@ def close(self):
12151210
self._zinfo.CRC = self._crc
12161211
self._zinfo.file_size = self._file_size
12171212

1213+
if not self._zip64:
1214+
if self._file_size > ZIP64_LIMIT:
1215+
raise RuntimeError("File size too large, try using force_zip64")
1216+
if self._compress_size > ZIP64_LIMIT:
1217+
raise RuntimeError("Compressed size too large, try using force_zip64")
1218+
12181219
# Write updated header info
12191220
if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
12201221
# Write CRC and file sizes after the file data
@@ -1223,13 +1224,6 @@ def close(self):
12231224
self._zinfo.compress_size, self._zinfo.file_size))
12241225
self._zipfile.start_dir = self._fileobj.tell()
12251226
else:
1226-
if not self._zip64:
1227-
if self._file_size > ZIP64_LIMIT:
1228-
raise RuntimeError(
1229-
'File size too large, try using force_zip64')
1230-
if self._compress_size > ZIP64_LIMIT:
1231-
raise RuntimeError(
1232-
'Compressed size too large, try using force_zip64')
12331227
# Seek backwards and write file header (which will now include
12341228
# correct CRC and file sizes)
12351229

@@ -1668,8 +1662,9 @@ def _open_to_write(self, zinfo, force_zip64=False):
16681662
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
16691663

16701664
# Compressed size can be larger than uncompressed size
1671-
zip64 = self._allowZip64 and \
1672-
(force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1665+
zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1666+
if not self._allowZip64 and zip64:
1667+
raise LargeZipFile("Filesize would require ZIP64 extensions")
16731668

16741669
if self._seekable:
16751670
self.fp.seek(self.start_dir)

0 commit comments

Comments
 (0)