From 3a5c1d0a06a28b148ddfee8c0b440553c8b71d58 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 13 Sep 2022 09:35:28 -0400 Subject: [PATCH 1/4] TEST: Validate issue gh-1137 --- nibabel/tests/test_loadsave.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nibabel/tests/test_loadsave.py b/nibabel/tests/test_loadsave.py index ad7c34cbcf..721c8a414b 100644 --- a/nibabel/tests/test_loadsave.py +++ b/nibabel/tests/test_loadsave.py @@ -13,6 +13,7 @@ from ..loadsave import load, read_img_data, _signature_matches_extension from ..filebasedimages import ImageFileError from ..tmpdirs import InTemporaryDirectory, TemporaryDirectory +from ..openers import Opener from ..optpkg import optional_package _, have_scipy, _ = optional_package('scipy') @@ -82,6 +83,15 @@ def test_load_bad_compressed_extension(tmp_path, extension): load(file_path) +@pytest.mark.parametrize("extension", [".gz", ".bz2"]) +def test_load_good_extension_with_bad_data(tmp_path, extension): + file_path = tmp_path / f"img.nii{extension}" + with Opener(file_path, "wb") as fobj: + fobj.write(b"bad") + with pytest.raises(ImageFileError, match="Cannot work out file type of .*"): + load(file_path) + + def test_signature_matches_extension(tmp_path): gz_signature = b"\x1f\x8b" good_file = tmp_path / "good.gz" From c0ab75580542106fbee7a2ece664e00c36d3c923 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 13 Sep 2022 09:39:53 -0400 Subject: [PATCH 2/4] FIX: Do not attempt to pass sniffed bytes to _signature_matches_extension --- nibabel/loadsave.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/nibabel/loadsave.py b/nibabel/loadsave.py index 04fee7b6a2..a75e0e664b 100644 --- a/nibabel/loadsave.py +++ b/nibabel/loadsave.py @@ -22,7 +22,7 @@ _compressed_suffixes = ('.gz', '.bz2', '.zst') -def _signature_matches_extension(filename, sniff): +def _signature_matches_extension(filename): """Check if signature aka magic number matches filename extension. Parameters @@ -30,10 +30,6 @@ def _signature_matches_extension(filename, sniff): filename : str or os.PathLike Path to the file to check - sniff : bytes or None - First bytes of the file. If not `None` and long enough to contain the - signature, avoids having to read the start of the file. - Returns ------- matches : bool @@ -56,12 +52,11 @@ def _signature_matches_extension(filename, sniff): if ext not in signatures: return True, "" expected_signature = signatures[ext]["signature"] - if sniff is None or len(sniff) < len(expected_signature): - try: - with open(filename, "rb") as fh: - sniff = fh.read(len(expected_signature)) - except OSError: - return False, f"Could not read file: {filename}" + try: + with open(filename, "rb") as fh: + sniff = fh.read(len(expected_signature)) + except OSError: + return False, f"Could not read file: {filename}" if sniff.startswith(expected_signature): return True, "" format_name = signatures[ext]["format_name"] @@ -100,7 +95,7 @@ def load(filename, **kwargs): img = image_klass.from_filename(filename, **kwargs) return img - matches, msg = _signature_matches_extension(filename, sniff) + matches, msg = _signature_matches_extension(filename) if not matches: raise ImageFileError(msg) From 8bad78fa979c493a443877a9c91266b4fca7461e Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 13 Sep 2022 09:40:09 -0400 Subject: [PATCH 3/4] TEST: Update/remove _signature_matches_extension tests --- nibabel/tests/test_loadsave.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/nibabel/tests/test_loadsave.py b/nibabel/tests/test_loadsave.py index 721c8a414b..21717df0e6 100644 --- a/nibabel/tests/test_loadsave.py +++ b/nibabel/tests/test_loadsave.py @@ -98,27 +98,19 @@ def test_signature_matches_extension(tmp_path): good_file.write_bytes(gz_signature) bad_file = tmp_path / "bad.gz" bad_file.write_bytes(b"bad") - matches, msg = _signature_matches_extension( - tmp_path / "uncompressed.nii", None) + matches, msg = _signature_matches_extension(tmp_path / "uncompressed.nii") assert matches assert msg == "" - matches, msg = _signature_matches_extension(tmp_path / "missing.gz", None) + matches, msg = _signature_matches_extension(tmp_path / "missing.gz") assert not matches assert msg.startswith("Could not read") - matches, msg = _signature_matches_extension(bad_file, None) + matches, msg = _signature_matches_extension(bad_file) assert not matches assert "is not a" in msg - matches, msg = _signature_matches_extension(bad_file, gz_signature + b"abc") + matches, msg = _signature_matches_extension(good_file) assert matches assert msg == "" - matches, msg = _signature_matches_extension( - good_file, gz_signature + b"abc") - assert matches - assert msg == "" - matches, msg = _signature_matches_extension(good_file, gz_signature[:1]) - assert matches - assert msg == "" - matches, msg = _signature_matches_extension(good_file, None) + matches, msg = _signature_matches_extension(tmp_path / "missing.nii") assert matches assert msg == "" From fcd489ba770e539fcf18e1878038eb031a6c262a Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 13 Sep 2022 09:58:22 -0400 Subject: [PATCH 4/4] ENH: Throw in .zst while here --- nibabel/filebasedimages.py | 2 +- nibabel/filename_parser.py | 2 +- nibabel/loadsave.py | 3 ++- nibabel/tests/test_loadsave.py | 9 +++++++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/nibabel/filebasedimages.py b/nibabel/filebasedimages.py index b08f5e74d4..4a194576b3 100644 --- a/nibabel/filebasedimages.py +++ b/nibabel/filebasedimages.py @@ -430,7 +430,7 @@ def _sniff_meta_for(klass, filename, sniff_nbytes, sniff=None): try: with ImageOpener(meta_fname, 'rb') as fobj: binaryblock = fobj.read(sniff_nbytes) - except IOError: + except (IOError, EOFError): return None return (binaryblock, meta_fname) diff --git a/nibabel/filename_parser.py b/nibabel/filename_parser.py index d8ed87c38a..e254019883 100644 --- a/nibabel/filename_parser.py +++ b/nibabel/filename_parser.py @@ -253,7 +253,7 @@ def _iendswith(whole, end): def splitext_addext(filename, - addexts=('.gz', '.bz2'), + addexts=('.gz', '.bz2', '.zst'), match_case=False): """ Split ``/pth/fname.ext.gz`` into ``/pth/fname, .ext, .gz`` diff --git a/nibabel/loadsave.py b/nibabel/loadsave.py index a75e0e664b..ff176f541d 100644 --- a/nibabel/loadsave.py +++ b/nibabel/loadsave.py @@ -44,7 +44,8 @@ def _signature_matches_extension(filename): """ signatures = { ".gz": {"signature": b"\x1f\x8b", "format_name": "gzip"}, - ".bz2": {"signature": b"BZh", "format_name": "bzip2"} + ".bz2": {"signature": b"BZh", "format_name": "bzip2"}, + ".zst": {"signature": b"\x28\xb5\x2f\xfd", "format_name": "ztsd"}, } filename = _stringify_path(filename) *_, ext = splitext_addext(filename) diff --git a/nibabel/tests/test_loadsave.py b/nibabel/tests/test_loadsave.py index 21717df0e6..f2cf0242d5 100644 --- a/nibabel/tests/test_loadsave.py +++ b/nibabel/tests/test_loadsave.py @@ -17,6 +17,7 @@ from ..optpkg import optional_package _, have_scipy, _ = optional_package('scipy') +_, have_pyzstd, _ = optional_package('pyzstd') from numpy.testing import (assert_almost_equal, assert_array_equal) @@ -75,16 +76,20 @@ def test_load_empty_image(): assert str(err.value).startswith('Empty file: ') -@pytest.mark.parametrize("extension", [".gz", ".bz2"]) +@pytest.mark.parametrize("extension", [".gz", ".bz2", ".zst"]) def test_load_bad_compressed_extension(tmp_path, extension): + if extension == ".zst" and not have_pyzstd: + pytest.skip() file_path = tmp_path / f"img.nii{extension}" file_path.write_bytes(b"bad") with pytest.raises(ImageFileError, match=".*is not a .* file"): load(file_path) -@pytest.mark.parametrize("extension", [".gz", ".bz2"]) +@pytest.mark.parametrize("extension", [".gz", ".bz2", ".zst"]) def test_load_good_extension_with_bad_data(tmp_path, extension): + if extension == ".zst" and not have_pyzstd: + pytest.skip() file_path = tmp_path / f"img.nii{extension}" with Opener(file_path, "wb") as fobj: fobj.write(b"bad")