Skip to content

Commit 182d7cd

Browse files
Issue #9720: zipfile now writes correct local headers for files larger than 4 GiB.
1 parent 03530b9 commit 182d7cd

File tree

2 files changed

+35
-15
lines changed

2 files changed

+35
-15
lines changed

Lib/zipfile.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
326326
# compress_size Size of the compressed file
327327
# file_size Size of the uncompressed file
328328

329-
def FileHeader(self):
329+
def FileHeader(self, zip64=None):
330330
"""Return the per-file header as a string."""
331331
dt = self.date_time
332332
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
@@ -341,12 +341,17 @@ def FileHeader(self):
341341

342342
extra = self.extra
343343

344-
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
345-
# File is larger than what fits into a 4 byte integer,
346-
# fall back to the ZIP64 extension
344+
if zip64 is None:
345+
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346+
if zip64:
347347
fmt = '<HHQQ'
348348
extra = extra + struct.pack(fmt,
349349
1, struct.calcsize(fmt)-4, file_size, compress_size)
350+
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351+
if not zip64:
352+
raise LargeZipFile("Filesize would require ZIP64 extensions")
353+
# File is larger than what fits into a 4 byte integer,
354+
# fall back to the ZIP64 extension
350355
file_size = 0xffffffff
351356
compress_size = 0xffffffff
352357
self.extract_version = max(45, self.extract_version)
@@ -1135,20 +1140,23 @@ def write(self, filename, arcname=None, compress_type=None):
11351140
zinfo.CRC = 0
11361141
self.filelist.append(zinfo)
11371142
self.NameToInfo[zinfo.filename] = zinfo
1138-
self.fp.write(zinfo.FileHeader())
1143+
self.fp.write(zinfo.FileHeader(False))
11391144
return
11401145

11411146
with open(filename, "rb") as fp:
11421147
# Must overwrite CRC and sizes with correct data later
11431148
zinfo.CRC = CRC = 0
11441149
zinfo.compress_size = compress_size = 0
1145-
zinfo.file_size = file_size = 0
1146-
self.fp.write(zinfo.FileHeader())
1150+
# Compressed size can be larger than uncompressed size
1151+
zip64 = self._allowZip64 and \
1152+
zinfo.file_size * 1.05 > ZIP64_LIMIT
1153+
self.fp.write(zinfo.FileHeader(zip64))
11471154
if zinfo.compress_type == ZIP_DEFLATED:
11481155
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
11491156
zlib.DEFLATED, -15)
11501157
else:
11511158
cmpr = None
1159+
file_size = 0
11521160
while 1:
11531161
buf = fp.read(1024 * 8)
11541162
if not buf:
@@ -1168,11 +1176,16 @@ def write(self, filename, arcname=None, compress_type=None):
11681176
zinfo.compress_size = file_size
11691177
zinfo.CRC = CRC
11701178
zinfo.file_size = file_size
1171-
# Seek backwards and write CRC and file sizes
1179+
if not zip64 and self._allowZip64:
1180+
if file_size > ZIP64_LIMIT:
1181+
raise RuntimeError('File size has increased during compressing')
1182+
if compress_size > ZIP64_LIMIT:
1183+
raise RuntimeError('Compressed size larger than uncompressed size')
1184+
# Seek backwards and write file header (which will now include
1185+
# correct CRC and file sizes)
11721186
position = self.fp.tell() # Preserve current position in file
1173-
self.fp.seek(zinfo.header_offset + 14, 0)
1174-
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1175-
zinfo.file_size))
1187+
self.fp.seek(zinfo.header_offset, 0)
1188+
self.fp.write(zinfo.FileHeader(zip64))
11761189
self.fp.seek(position, 0)
11771190
self.filelist.append(zinfo)
11781191
self.NameToInfo[zinfo.filename] = zinfo
@@ -1212,14 +1225,18 @@ def writestr(self, zinfo_or_arcname, data, compress_type=None):
12121225
zinfo.compress_size = len(data) # Compressed size
12131226
else:
12141227
zinfo.compress_size = zinfo.file_size
1215-
zinfo.header_offset = self.fp.tell() # Start of header data
1216-
self.fp.write(zinfo.FileHeader())
1228+
zip64 = zinfo.file_size > ZIP64_LIMIT or \
1229+
zinfo.compress_size > ZIP64_LIMIT
1230+
if zip64 and not self._allowZip64:
1231+
raise LargeZipFile("Filesize would require ZIP64 extensions")
1232+
self.fp.write(zinfo.FileHeader(zip64))
12171233
self.fp.write(data)
1218-
self.fp.flush()
12191234
if zinfo.flag_bits & 0x08:
12201235
# Write CRC and file sizes after the file data
1221-
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1236+
fmt = '<LQQ' if zip64 else '<LLL'
1237+
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
12221238
zinfo.file_size))
1239+
self.fp.flush()
12231240
self.filelist.append(zinfo)
12241241
self.NameToInfo[zinfo.filename] = zinfo
12251242

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ Core and Builtins
199199
Library
200200
-------
201201

202+
- Issue #9720: zipfile now writes correct local headers for files larger than
203+
4 GiB.
204+
202205
- Issue #16829: IDLE printing no longer fails if there are spaces or other
203206
special characters in the file path.
204207

0 commit comments

Comments
 (0)