diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ab27fda8dcdf5..11c22c00482b4 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -86,12 +86,12 @@ def test_stringify_path_fspath(self): result = icom.stringify_path(p) assert result == "foo/bar.csv" - def test_stringify_file_and_path_like(self): + def test_stringify_file_and_path_like(self, temp_file): # GH 38125: do not stringify file objects that are also path-like fsspec = pytest.importorskip("fsspec") - with tm.ensure_clean() as path: - with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: - assert fsspec_obj == icom.stringify_path(fsspec_obj) + path = temp_file + with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path]) def test_infer_compression_from_path(self, compression_format, path_type): @@ -338,11 +338,11 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], ) - def test_write_fspath_all(self, writer_name, writer_kwargs, module): + def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path): if writer_name in ["to_latex"]: # uses Styler implementation pytest.importorskip("jinja2") - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + p1 = tmp_path / "string" + p2 = tmp_path / "fspath" df = pd.DataFrame({"A": [1, 2]}) with p1 as string, p2 as fspath: @@ -364,15 +364,15 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module): expected = f_path.read() assert result == expected - def test_write_fspath_hdf5(self): + def test_write_fspath_hdf5(self, tmp_path): # Same test as write_fspath_all, except HDF5 files aren't # necessarily byte-for-byte identical for a given dataframe, so we'll # have to read and compare equality pytest.importorskip("tables") df = pd.DataFrame({"A": [1, 2]}) - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + p1 = tmp_path / "string" + p2 = tmp_path / "fspath" with p1 as string, p2 as fspath: mypath = CustomFSPath(fspath) @@ -432,35 +432,35 @@ def test_next(self, mmap_file): with pytest.raises(StopIteration, match=r"^$"): next(wrapper) - def test_unknown_engine(self): - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path) - with pytest.raises(ValueError, match="Unknown engine"): - pd.read_csv(path, engine="pyt") - - def test_binary_mode(self): + def test_unknown_engine(self, temp_file): + path = temp_file + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(path) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(path, engine="pyt") + + def test_binary_mode(self, temp_file): """ 'encoding' shouldn't be passed to 'open' in binary mode. GH 35058 """ - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path, mode="w+b") - tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + path = temp_file + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(path, mode="w+b") + tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"]) @pytest.mark.parametrize("compression_", ["bz2", "xz"]) - def test_warning_missing_utf_bom(self, encoding, compression_): + def test_warning_missing_utf_bom(self, encoding, compression_, temp_file): """ bz2 and xz do not write the byte order mark (BOM) for utf-16/32. @@ -473,17 +473,17 @@ def test_warning_missing_utf_bom(self, encoding, compression_): columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): - df.to_csv(path, compression=compression_, encoding=encoding) - - # reading should fail (otherwise we wouldn't need the warning) - msg = ( - r"UTF-\d+ stream does not start with BOM|" - r"'utf-\d+' codec can't decode byte" - ) - with pytest.raises(UnicodeError, match=msg): - pd.read_csv(path, compression=compression_, encoding=encoding) + path = temp_file + with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): + df.to_csv(path, compression=compression_, encoding=encoding) + + # reading should fail (otherwise we wouldn't need the warning) + msg = ( + r"UTF-\d+ stream does not start with BOM|" + r"'utf-\d+' codec can't decode byte" + ) + with pytest.raises(UnicodeError, match=msg): + pd.read_csv(path, compression=compression_, encoding=encoding) def test_is_fsspec_url(): @@ -514,38 +514,38 @@ def test_is_fsspec_url_chained(): @pytest.mark.parametrize("format", ["csv", "json"]) -def test_codecs_encoding(format): +def test_codecs_encoding(format, temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, mode="w", encoding="utf-8") as handle: - getattr(expected, f"to_{format}")(handle) - with open(path, encoding="utf-8") as handle: - if format == "csv": - df = pd.read_csv(handle, index_col=0) - else: - df = pd.read_json(handle) + path = temp_file + with open(path, mode="w", encoding="utf-8") as handle: + getattr(expected, f"to_{format}")(handle) + with open(path, encoding="utf-8") as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) tm.assert_frame_equal(expected, df) -def test_codecs_get_writer_reader(): +def test_codecs_get_writer_reader(temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, "wb") as handle: - with codecs.getwriter("utf-8")(handle) as encoded: - expected.to_csv(encoded) - with open(path, "rb") as handle: - with codecs.getreader("utf-8")(handle) as encoded: - df = pd.read_csv(encoded, index_col=0) + path = temp_file + with open(path, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(path, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) tm.assert_frame_equal(expected, df) @@ -572,7 +572,7 @@ def test_explicit_encoding(io_class, mode, msg): @pytest.mark.parametrize("encoding_errors", ["strict", "replace"]) @pytest.mark.parametrize("format", ["csv", "json"]) -def test_encoding_errors(encoding_errors, format): +def test_encoding_errors(encoding_errors, format, temp_file): # GH39450 msg = "'utf-8' codec can't decode byte" bad_encoding = b"\xe4" @@ -591,18 +591,18 @@ def test_encoding_errors(encoding_errors, format): + b'"}}' ) reader = partial(pd.read_json, orient="index") - with tm.ensure_clean() as path: - file = Path(path) - file.write_bytes(content) + path = temp_file + file = Path(path) + file.write_bytes(content) - if encoding_errors != "replace": - with pytest.raises(UnicodeDecodeError, match=msg): - reader(path, encoding_errors=encoding_errors) - else: - df = reader(path, encoding_errors=encoding_errors) - decoded = bad_encoding.decode(errors=encoding_errors) - expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) - tm.assert_frame_equal(df, expected) + if encoding_errors != "replace": + with pytest.raises(UnicodeDecodeError, match=msg): + reader(path, encoding_errors=encoding_errors) + else: + df = reader(path, encoding_errors=encoding_errors) + decoded = bad_encoding.decode(errors=encoding_errors) + expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("encoding_errors", [0, None]) @@ -616,11 +616,11 @@ def test_encoding_errors_badtype(encoding_errors): reader(content) -def test_bad_encdoing_errors(): +def test_bad_encdoing_errors(temp_file): # GH 39777 - with tm.ensure_clean() as path: - with pytest.raises(LookupError, match="unknown error handler name"): - icom.get_handle(path, "w", errors="bad") + path = temp_file + with pytest.raises(LookupError, match="unknown error handler name"): + icom.get_handle(path, "w", errors="bad") @pytest.mark.skipif(WASM, reason="limited file system access on WASM") diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 904c3a047bab2..93b2fb99662f0 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -26,34 +26,34 @@ @pytest.mark.single_cpu class TestFeather: - def check_error_on_write(self, df, exc, err_msg): + def check_error_on_write(self, df, exc, err_msg, temp_file): # check that we are raising the exception # on writing with pytest.raises(exc, match=err_msg): - with tm.ensure_clean() as path: - to_feather(df, path) + to_feather(df, temp_file) - def check_external_error_on_write(self, df): + def check_external_error_on_write(self, df, temp_file): # check that we are raising the exception # on writing with tm.external_error_raised(Exception): - with tm.ensure_clean() as path: - to_feather(df, path) + to_feather(df, temp_file) - def check_round_trip(self, df, expected=None, write_kwargs=None, **read_kwargs): + def check_round_trip( + self, df, temp_file, expected=None, write_kwargs=None, **read_kwargs + ): if write_kwargs is None: write_kwargs = {} if expected is None: expected = df.copy() - with tm.ensure_clean() as path: - to_feather(df, path, **write_kwargs) + path = temp_file + to_feather(df, path, **write_kwargs) - result = read_feather(path, **read_kwargs) + result = read_feather(path, **read_kwargs) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_error(self): msg = "feather only support IO with DataFrames" @@ -168,7 +168,7 @@ def test_http_path(self, feather_file, httpserver): tm.assert_frame_equal(expected, res) def test_read_feather_dtype_backend( - self, string_storage, dtype_backend, using_infer_string + self, string_storage, dtype_backend, using_infer_string, temp_file ): # GH#50765 df = pd.DataFrame( @@ -184,10 +184,10 @@ def test_read_feather_dtype_backend( } ) - with tm.ensure_clean() as path: - to_feather(df, path) - with pd.option_context("mode.string_storage", string_storage): - result = read_feather(path, dtype_backend=dtype_backend) + path = temp_file + to_feather(df, path) + with pd.option_context("mode.string_storage", string_storage): + result = read_feather(path, dtype_backend=dtype_backend) if dtype_backend == "pyarrow": pa = pytest.importorskip("pyarrow") @@ -231,16 +231,16 @@ def test_int_columns_and_index(self): df = pd.DataFrame({"a": [1, 2, 3]}, index=pd.Index([3, 4, 5], name="test")) self.check_round_trip(df) - def test_invalid_dtype_backend(self): + def test_invalid_dtype_backend(self, tmp_path): msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " "'pyarrow' are allowed." ) df = pd.DataFrame({"int": list(range(1, 4))}) - with tm.ensure_clean("tmp.feather") as path: - df.to_feather(path) - with pytest.raises(ValueError, match=msg): - read_feather(path, dtype_backend="numpy") + path = tmp_path / "tmp.feather" + df.to_feather(path) + with pytest.raises(ValueError, match=msg): + read_feather(path, dtype_backend="numpy") def test_string_inference(self, tmp_path, using_infer_string): # GH#54431