Skip to content

Commit 5d99266

Browse files
authored
Further lock-down ZIP archive features (#18846)
* Further lock-down ZIP archive features * Get 100% coverage * Add unexpected error to assert message
1 parent fc9826a commit 5d99266

12 files changed

+310
-38
lines changed

tests/unit/utils/test_zipfiles.py

Lines changed: 198 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import pathlib
66
import struct
7+
import tempfile
78

89
import pytest
910

@@ -23,7 +24,18 @@ def zippath(filename: str):
2324
("reject/8bitcomment.zip", "Filename not in central directory"),
2425
("reject/cd_extra_entry.zip", "Duplicate filename in central directory"),
2526
("reject/cd_missing_entry.zip", "Filename not in central directory"),
26-
("reject/data_descriptor_bad_crc_0.zip", "Unknown record signature"),
27+
("reject/data_descriptor.zip", "ZIP contains a data descriptor"),
28+
("reject/data_descriptor_bad_crc.zip", "ZIP contains a data descriptor"),
29+
("reject/data_descriptor_bad_crc_0.zip", "ZIP contains a data descriptor"),
30+
("reject/data_descriptor_bad_csize.zip", "ZIP contains a data descriptor"),
31+
("reject/data_descriptor_bad_usize.zip", "ZIP contains a data descriptor"),
32+
(
33+
"reject/data_descriptor_bad_usize_no_sig.zip",
34+
"ZIP contains a data descriptor",
35+
),
36+
("reject/data_descriptor_zip64.zip", "ZIP contains a data descriptor"),
37+
("reject/data_descriptor_zip64_csize.zip", "ZIP contains a data descriptor"),
38+
("reject/data_descriptor_zip64_usize.zip", "ZIP contains a data descriptor"),
2739
("reject/dupe_eocd.zip", "Truncated central directory"),
2840
(
2941
"reject/eocd64_locator_mismatch.zip",
@@ -37,10 +49,10 @@ def zippath(filename: str):
3749
("reject/non_ascii_original_name.zip", "Filename not unicode"),
3850
("reject/not.zip", "File is not a zip file"),
3951
("reject/prefix.zip", "Unknown record signature"),
40-
("reject/second_unicode_extra.zip", "Filename not in central directory"),
52+
("reject/second_unicode_extra.zip", "Invalid duplicate extra in local file"),
4153
("reject/shortextra.zip", "Corrupt extra field 7075 (size=9)"),
4254
("reject/suffix_not_comment.zip", "Trailing data"),
43-
("reject/unicode_extra_chain.zip", "Filename not in central directory"),
55+
("reject/unicode_extra_chain.zip", "Invalid duplicate extra in local file"),
4456
("reject/wheel-1.0-py3-none-any.whl", "Duplicate filename in local headers"),
4557
("reject/zip64_eocd_confusion.zip", "Filename not in central directory"),
4658
("reject/zip64_eocd_extensible_data.zip", "Bad offset for central directory"),
@@ -70,7 +82,7 @@ def test_bad_zips(filename, error):
7082
@pytest.mark.parametrize("filename", list(os.listdir(ZIPDATA_DIR / "accept")))
7183
def test_good_zips(filename):
7284
result = zipfiles.validate_zipfile(zippath(f"accept/{filename}"))
73-
assert result[0] is True
85+
assert result[0] is True, f"Expected no error, got {result[1]!r}"
7486
assert result[1] is None
7587

7688

@@ -151,3 +163,185 @@ def test_local_file_header_zip64_extra_no_compressed_size_nok_using_deflate():
151163
fp = io.BytesIO(header + b"a" + extra + b"a")
152164
with pytest.raises(zipfiles.InvalidZipFileError):
153165
zipfiles._handle_local_file_header(fp, {"a": 0})
166+
167+
168+
def test_local_file_invalid_filename():
169+
header = struct.pack("<xxHHxxxxxxxxLxxxxHxx", 0, 0, 0xFFFFFFFF, 1)
170+
fp = io.BytesIO(header + b"\x7f")
171+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
172+
zipfiles._handle_local_file_header(fp, {"a": 0})
173+
assert str(e.value) == "Invalid character in filename"
174+
175+
176+
def test_local_file_invalid_filename_in_unicode_extra():
177+
extra = struct.pack("<HHxxxxxxxx", 0x7075, 8)
178+
header = struct.pack("<xxHHxxxxxxxxLxxxxHH", 0, 0, 0, 1, len(extra))
179+
fp = io.BytesIO(header + b"a" + extra)
180+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
181+
zipfiles._handle_local_file_header(fp, {"a": 0})
182+
assert str(e.value) == "Invalid character in filename"
183+
184+
185+
def test_local_file_invalid_filename_utf8():
186+
extra = struct.pack("<HHxxxxx", 0x7075, 6) + b"\xf7"
187+
header = struct.pack("<xxHHxxxxxxxxLxxxxHH", 0, 0, 0, 1, len(extra))
188+
fp = io.BytesIO(header + b"a" + extra)
189+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
190+
zipfiles._handle_local_file_header(fp, {"a": 0})
191+
assert str(e.value) == "Filename not valid UTF-8"
192+
193+
194+
def test_local_file_multiple_extras():
195+
extras = struct.pack("<HH", 0x7075, 1) + b"a" + struct.pack("<HHxxxx", 0x0002, 4)
196+
header = struct.pack("<xxHHxxxxxxxxLxxxxHH", 0, 0, 0, 1, len(extras))
197+
fp = io.BytesIO(header + b"a" + extras)
198+
filename = zipfiles._handle_local_file_header(fp, {"a": 0})
199+
assert filename == b"a"
200+
201+
202+
def test_cd_with_comment_rejected():
203+
data = struct.pack("<xxxxxxxxxxxxxxxxLxxxxHHHxxxxxxxxL", 0, 1, 0, 1, 0)
204+
fp = io.BytesIO(data)
205+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
206+
zipfiles._handle_central_directory_header(fp)
207+
assert str(e.value) == "Comment in central directory"
208+
209+
210+
def test_cd_with_invalid_filename():
211+
data = struct.pack("<xxxxxxxxxxxxxxxxLxxxxHHHxxxxxxxxL", 0, 1, 0, 0, 0) + b"\x00"
212+
fp = io.BytesIO(data)
213+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
214+
zipfiles._handle_central_directory_header(fp)
215+
assert str(e.value) == "Invalid character in filename"
216+
217+
218+
def test_eocd_mismatched_records_on_disk():
219+
data = struct.pack("<xxxxHHLLH", 100, 1, 0, 0, 0)
220+
fp = io.BytesIO(data)
221+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
222+
zipfiles._handle_eocd(fp)
223+
assert str(e.value) == "Malformed zip file"
224+
225+
226+
def test_eocd64_mismatched_records_on_disk():
227+
data = struct.pack("<QxxxxxxxxxxxxQQQQ", 0, 100, 1, 0, 0)
228+
fp = io.BytesIO(data)
229+
with pytest.raises(zipfiles.InvalidZipFileError) as e:
230+
zipfiles._handle_eocd64(fp)
231+
assert str(e.value) == "Malformed zip file"
232+
233+
234+
def test_cd_and_eocd_match():
235+
data_lf = (
236+
zipfiles.RECORD_SIG_LOCAL_FILE
237+
+ struct.pack("<xxHHHxxxxxxLxxxxHH", 0, 0, 20, 0, 1, 0)
238+
+ b"a"
239+
)
240+
data_cd = (
241+
zipfiles.RECORD_SIG_CENTRAL_DIRECTORY
242+
+ struct.pack("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL", 20, 20, 0, 1, 0, 0, 0)
243+
+ b"a"
244+
)
245+
cd_records = 1
246+
cd_offset = len(data_lf)
247+
cd_size = len(data_cd)
248+
data_eocd = zipfiles.RECORD_SIG_EOCD + struct.pack(
249+
"<xxxxHHLLH", cd_records, cd_records, cd_size, cd_offset, 0
250+
)
251+
data = data_lf + data_cd + data_eocd
252+
with tempfile.NamedTemporaryFile(mode="wb") as tmp:
253+
tmp.write(data)
254+
tmp.flush()
255+
assert (True, None) == zipfiles.validate_zipfile(tmp.name)
256+
257+
258+
@pytest.mark.parametrize(
259+
("cd_records", "error"),
260+
[
261+
(0, "Mismatched central directory records"),
262+
(2, "Mismatched central directory records"),
263+
],
264+
)
265+
def test_cd_and_eocd_mismatch_records(cd_records, error):
266+
data_lf = (
267+
zipfiles.RECORD_SIG_LOCAL_FILE
268+
+ struct.pack("<xxHHHxxxxxxLxxxxHH", 0, 0, 20, 0, 1, 0)
269+
+ b"a"
270+
)
271+
data_cd = (
272+
zipfiles.RECORD_SIG_CENTRAL_DIRECTORY
273+
+ struct.pack("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL", 20, 20, 0, 1, 0, 0, 0)
274+
+ b"a"
275+
)
276+
cd_offset = len(data_lf)
277+
cd_size = len(data_cd)
278+
data_eocd = zipfiles.RECORD_SIG_EOCD + struct.pack(
279+
"<xxxxHHLLH", cd_records, cd_records, cd_size, cd_offset, 0
280+
)
281+
data = data_lf + data_cd + data_eocd
282+
with tempfile.NamedTemporaryFile(mode="wb") as tmp:
283+
tmp.write(data)
284+
tmp.flush()
285+
assert (False, error) == zipfiles.validate_zipfile(tmp.name)
286+
287+
288+
@pytest.mark.parametrize(
289+
("cd_offset_diff", "error"),
290+
[
291+
(-1, "Mismatched central directory offset"),
292+
(1, "Mismatched central directory offset"),
293+
],
294+
)
295+
def test_cd_and_eocd_mismatch_offset(cd_offset_diff, error):
296+
data_lf = (
297+
zipfiles.RECORD_SIG_LOCAL_FILE
298+
+ struct.pack("<xxHHHxxxxxxLxxxxHH", 0, 0, 20, 0, 1, 0)
299+
+ b"a"
300+
)
301+
data_cd = (
302+
zipfiles.RECORD_SIG_CENTRAL_DIRECTORY
303+
+ struct.pack("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL", 20, 20, 0, 1, 0, 0, 0)
304+
+ b"a"
305+
)
306+
cd_records = 1
307+
cd_offset = len(data_lf) + cd_offset_diff
308+
cd_size = len(data_cd)
309+
data_eocd = zipfiles.RECORD_SIG_EOCD + struct.pack(
310+
"<xxxxHHLLH", cd_records, cd_records, cd_size, cd_offset, 0
311+
)
312+
data = data_lf + data_cd + data_eocd
313+
with tempfile.NamedTemporaryFile(mode="wb") as tmp:
314+
tmp.write(data)
315+
tmp.flush()
316+
assert (False, error) == zipfiles.validate_zipfile(tmp.name)
317+
318+
319+
@pytest.mark.parametrize(
320+
("cd_size_diff", "error"),
321+
[
322+
(-1, "Bad magic number for central directory"),
323+
(1, "Bad magic number for central directory"),
324+
],
325+
)
326+
def test_cd_and_eocd_mismatch_size(cd_size_diff, error):
327+
data_lf = (
328+
zipfiles.RECORD_SIG_LOCAL_FILE
329+
+ struct.pack("<xxHHHxxxxxxLxxxxHH", 0, 0, 20, 0, 1, 0)
330+
+ b"a"
331+
)
332+
data_cd = (
333+
zipfiles.RECORD_SIG_CENTRAL_DIRECTORY
334+
+ struct.pack("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL", 20, 20, 0, 1, 0, 0, 0)
335+
+ b"a"
336+
)
337+
cd_records = 1
338+
cd_offset = len(data_lf)
339+
cd_size = len(data_cd) + cd_size_diff
340+
data_eocd = zipfiles.RECORD_SIG_EOCD + struct.pack(
341+
"<xxxxHHLLH", cd_records, cd_records, cd_size, cd_offset, 0
342+
)
343+
data = data_lf + data_cd + data_eocd
344+
with tempfile.NamedTemporaryFile(mode="wb") as tmp:
345+
tmp.write(data)
346+
tmp.flush()
347+
assert (False, error) == zipfiles.validate_zipfile(tmp.name)
22 Bytes
Binary file not shown.
113 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)