44import os
55import pathlib
66import struct
7+ import tempfile
78
89import pytest
910
@@ -23,7 +24,18 @@ def zippath(filename: str):
2324 ("reject/8bitcomment.zip" , "Filename not in central directory" ),
2425 ("reject/cd_extra_entry.zip" , "Duplicate filename in central directory" ),
2526 ("reject/cd_missing_entry.zip" , "Filename not in central directory" ),
26- ("reject/data_descriptor_bad_crc_0.zip" , "Unknown record signature" ),
27+ ("reject/data_descriptor.zip" , "ZIP contains a data descriptor" ),
28+ ("reject/data_descriptor_bad_crc.zip" , "ZIP contains a data descriptor" ),
29+ ("reject/data_descriptor_bad_crc_0.zip" , "ZIP contains a data descriptor" ),
30+ ("reject/data_descriptor_bad_csize.zip" , "ZIP contains a data descriptor" ),
31+ ("reject/data_descriptor_bad_usize.zip" , "ZIP contains a data descriptor" ),
32+ (
33+ "reject/data_descriptor_bad_usize_no_sig.zip" ,
34+ "ZIP contains a data descriptor" ,
35+ ),
36+ ("reject/data_descriptor_zip64.zip" , "ZIP contains a data descriptor" ),
37+ ("reject/data_descriptor_zip64_csize.zip" , "ZIP contains a data descriptor" ),
38+ ("reject/data_descriptor_zip64_usize.zip" , "ZIP contains a data descriptor" ),
2739 ("reject/dupe_eocd.zip" , "Truncated central directory" ),
2840 (
2941 "reject/eocd64_locator_mismatch.zip" ,
@@ -37,10 +49,10 @@ def zippath(filename: str):
3749 ("reject/non_ascii_original_name.zip" , "Filename not unicode" ),
3850 ("reject/not.zip" , "File is not a zip file" ),
3951 ("reject/prefix.zip" , "Unknown record signature" ),
40- ("reject/second_unicode_extra.zip" , "Filename not in central directory " ),
52+ ("reject/second_unicode_extra.zip" , "Invalid duplicate extra in local file " ),
4153 ("reject/shortextra.zip" , "Corrupt extra field 7075 (size=9)" ),
4254 ("reject/suffix_not_comment.zip" , "Trailing data" ),
43- ("reject/unicode_extra_chain.zip" , "Filename not in central directory " ),
55+ ("reject/unicode_extra_chain.zip" , "Invalid duplicate extra in local file " ),
4456 ("reject/wheel-1.0-py3-none-any.whl" , "Duplicate filename in local headers" ),
4557 ("reject/zip64_eocd_confusion.zip" , "Filename not in central directory" ),
4658 ("reject/zip64_eocd_extensible_data.zip" , "Bad offset for central directory" ),
@@ -70,7 +82,7 @@ def test_bad_zips(filename, error):
7082@pytest .mark .parametrize ("filename" , list (os .listdir (ZIPDATA_DIR / "accept" )))
7183def test_good_zips (filename ):
7284 result = zipfiles .validate_zipfile (zippath (f"accept/{ filename } " ))
73- assert result [0 ] is True
85+ assert result [0 ] is True , f"Expected no error, got { result [ 1 ]!r } "
7486 assert result [1 ] is None
7587
7688
@@ -151,3 +163,185 @@ def test_local_file_header_zip64_extra_no_compressed_size_nok_using_deflate():
151163 fp = io .BytesIO (header + b"a" + extra + b"a" )
152164 with pytest .raises (zipfiles .InvalidZipFileError ):
153165 zipfiles ._handle_local_file_header (fp , {"a" : 0 })
166+
167+
168+ def test_local_file_invalid_filename ():
169+ header = struct .pack ("<xxHHxxxxxxxxLxxxxHxx" , 0 , 0 , 0xFFFFFFFF , 1 )
170+ fp = io .BytesIO (header + b"\x7f " )
171+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
172+ zipfiles ._handle_local_file_header (fp , {"a" : 0 })
173+ assert str (e .value ) == "Invalid character in filename"
174+
175+
176+ def test_local_file_invalid_filename_in_unicode_extra ():
177+ extra = struct .pack ("<HHxxxxxxxx" , 0x7075 , 8 )
178+ header = struct .pack ("<xxHHxxxxxxxxLxxxxHH" , 0 , 0 , 0 , 1 , len (extra ))
179+ fp = io .BytesIO (header + b"a" + extra )
180+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
181+ zipfiles ._handle_local_file_header (fp , {"a" : 0 })
182+ assert str (e .value ) == "Invalid character in filename"
183+
184+
185+ def test_local_file_invalid_filename_utf8 ():
186+ extra = struct .pack ("<HHxxxxx" , 0x7075 , 6 ) + b"\xf7 "
187+ header = struct .pack ("<xxHHxxxxxxxxLxxxxHH" , 0 , 0 , 0 , 1 , len (extra ))
188+ fp = io .BytesIO (header + b"a" + extra )
189+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
190+ zipfiles ._handle_local_file_header (fp , {"a" : 0 })
191+ assert str (e .value ) == "Filename not valid UTF-8"
192+
193+
194+ def test_local_file_multiple_extras ():
195+ extras = struct .pack ("<HH" , 0x7075 , 1 ) + b"a" + struct .pack ("<HHxxxx" , 0x0002 , 4 )
196+ header = struct .pack ("<xxHHxxxxxxxxLxxxxHH" , 0 , 0 , 0 , 1 , len (extras ))
197+ fp = io .BytesIO (header + b"a" + extras )
198+ filename = zipfiles ._handle_local_file_header (fp , {"a" : 0 })
199+ assert filename == b"a"
200+
201+
202+ def test_cd_with_comment_rejected ():
203+ data = struct .pack ("<xxxxxxxxxxxxxxxxLxxxxHHHxxxxxxxxL" , 0 , 1 , 0 , 1 , 0 )
204+ fp = io .BytesIO (data )
205+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
206+ zipfiles ._handle_central_directory_header (fp )
207+ assert str (e .value ) == "Comment in central directory"
208+
209+
210+ def test_cd_with_invalid_filename ():
211+ data = struct .pack ("<xxxxxxxxxxxxxxxxLxxxxHHHxxxxxxxxL" , 0 , 1 , 0 , 0 , 0 ) + b"\x00 "
212+ fp = io .BytesIO (data )
213+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
214+ zipfiles ._handle_central_directory_header (fp )
215+ assert str (e .value ) == "Invalid character in filename"
216+
217+
218+ def test_eocd_mismatched_records_on_disk ():
219+ data = struct .pack ("<xxxxHHLLH" , 100 , 1 , 0 , 0 , 0 )
220+ fp = io .BytesIO (data )
221+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
222+ zipfiles ._handle_eocd (fp )
223+ assert str (e .value ) == "Malformed zip file"
224+
225+
226+ def test_eocd64_mismatched_records_on_disk ():
227+ data = struct .pack ("<QxxxxxxxxxxxxQQQQ" , 0 , 100 , 1 , 0 , 0 )
228+ fp = io .BytesIO (data )
229+ with pytest .raises (zipfiles .InvalidZipFileError ) as e :
230+ zipfiles ._handle_eocd64 (fp )
231+ assert str (e .value ) == "Malformed zip file"
232+
233+
234+ def test_cd_and_eocd_match ():
235+ data_lf = (
236+ zipfiles .RECORD_SIG_LOCAL_FILE
237+ + struct .pack ("<xxHHHxxxxxxLxxxxHH" , 0 , 0 , 20 , 0 , 1 , 0 )
238+ + b"a"
239+ )
240+ data_cd = (
241+ zipfiles .RECORD_SIG_CENTRAL_DIRECTORY
242+ + struct .pack ("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL" , 20 , 20 , 0 , 1 , 0 , 0 , 0 )
243+ + b"a"
244+ )
245+ cd_records = 1
246+ cd_offset = len (data_lf )
247+ cd_size = len (data_cd )
248+ data_eocd = zipfiles .RECORD_SIG_EOCD + struct .pack (
249+ "<xxxxHHLLH" , cd_records , cd_records , cd_size , cd_offset , 0
250+ )
251+ data = data_lf + data_cd + data_eocd
252+ with tempfile .NamedTemporaryFile (mode = "wb" ) as tmp :
253+ tmp .write (data )
254+ tmp .flush ()
255+ assert (True , None ) == zipfiles .validate_zipfile (tmp .name )
256+
257+
258+ @pytest .mark .parametrize (
259+ ("cd_records" , "error" ),
260+ [
261+ (0 , "Mismatched central directory records" ),
262+ (2 , "Mismatched central directory records" ),
263+ ],
264+ )
265+ def test_cd_and_eocd_mismatch_records (cd_records , error ):
266+ data_lf = (
267+ zipfiles .RECORD_SIG_LOCAL_FILE
268+ + struct .pack ("<xxHHHxxxxxxLxxxxHH" , 0 , 0 , 20 , 0 , 1 , 0 )
269+ + b"a"
270+ )
271+ data_cd = (
272+ zipfiles .RECORD_SIG_CENTRAL_DIRECTORY
273+ + struct .pack ("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL" , 20 , 20 , 0 , 1 , 0 , 0 , 0 )
274+ + b"a"
275+ )
276+ cd_offset = len (data_lf )
277+ cd_size = len (data_cd )
278+ data_eocd = zipfiles .RECORD_SIG_EOCD + struct .pack (
279+ "<xxxxHHLLH" , cd_records , cd_records , cd_size , cd_offset , 0
280+ )
281+ data = data_lf + data_cd + data_eocd
282+ with tempfile .NamedTemporaryFile (mode = "wb" ) as tmp :
283+ tmp .write (data )
284+ tmp .flush ()
285+ assert (False , error ) == zipfiles .validate_zipfile (tmp .name )
286+
287+
288+ @pytest .mark .parametrize (
289+ ("cd_offset_diff" , "error" ),
290+ [
291+ (- 1 , "Mismatched central directory offset" ),
292+ (1 , "Mismatched central directory offset" ),
293+ ],
294+ )
295+ def test_cd_and_eocd_mismatch_offset (cd_offset_diff , error ):
296+ data_lf = (
297+ zipfiles .RECORD_SIG_LOCAL_FILE
298+ + struct .pack ("<xxHHHxxxxxxLxxxxHH" , 0 , 0 , 20 , 0 , 1 , 0 )
299+ + b"a"
300+ )
301+ data_cd = (
302+ zipfiles .RECORD_SIG_CENTRAL_DIRECTORY
303+ + struct .pack ("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL" , 20 , 20 , 0 , 1 , 0 , 0 , 0 )
304+ + b"a"
305+ )
306+ cd_records = 1
307+ cd_offset = len (data_lf ) + cd_offset_diff
308+ cd_size = len (data_cd )
309+ data_eocd = zipfiles .RECORD_SIG_EOCD + struct .pack (
310+ "<xxxxHHLLH" , cd_records , cd_records , cd_size , cd_offset , 0
311+ )
312+ data = data_lf + data_cd + data_eocd
313+ with tempfile .NamedTemporaryFile (mode = "wb" ) as tmp :
314+ tmp .write (data )
315+ tmp .flush ()
316+ assert (False , error ) == zipfiles .validate_zipfile (tmp .name )
317+
318+
319+ @pytest .mark .parametrize (
320+ ("cd_size_diff" , "error" ),
321+ [
322+ (- 1 , "Bad magic number for central directory" ),
323+ (1 , "Bad magic number for central directory" ),
324+ ],
325+ )
326+ def test_cd_and_eocd_mismatch_size (cd_size_diff , error ):
327+ data_lf = (
328+ zipfiles .RECORD_SIG_LOCAL_FILE
329+ + struct .pack ("<xxHHHxxxxxxLxxxxHH" , 0 , 0 , 20 , 0 , 1 , 0 )
330+ + b"a"
331+ )
332+ data_cd = (
333+ zipfiles .RECORD_SIG_CENTRAL_DIRECTORY
334+ + struct .pack ("<HHxxxxxxxxxxxxLxxxxHHHxxxxxxxxL" , 20 , 20 , 0 , 1 , 0 , 0 , 0 )
335+ + b"a"
336+ )
337+ cd_records = 1
338+ cd_offset = len (data_lf )
339+ cd_size = len (data_cd ) + cd_size_diff
340+ data_eocd = zipfiles .RECORD_SIG_EOCD + struct .pack (
341+ "<xxxxHHLLH" , cd_records , cd_records , cd_size , cd_offset , 0
342+ )
343+ data = data_lf + data_cd + data_eocd
344+ with tempfile .NamedTemporaryFile (mode = "wb" ) as tmp :
345+ tmp .write (data )
346+ tmp .flush ()
347+ assert (False , error ) == zipfiles .validate_zipfile (tmp .name )
0 commit comments