Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 75 additions & 75 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ def test_stringify_path_fspath(self):
result = icom.stringify_path(p)
assert result == "foo/bar.csv"

def test_stringify_file_and_path_like(self):
def test_stringify_file_and_path_like(self, temp_file):
# GH 38125: do not stringify file objects that are also path-like
fsspec = pytest.importorskip("fsspec")
with tm.ensure_clean() as path:
with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
assert fsspec_obj == icom.stringify_path(fsspec_obj)
path = temp_file
with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
assert fsspec_obj == icom.stringify_path(fsspec_obj)

@pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
def test_infer_compression_from_path(self, compression_format, path_type):
Expand Down Expand Up @@ -338,11 +338,11 @@ def test_read_fspath_all(self, reader, module, path, datapath):
("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
],
)
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path):
if writer_name in ["to_latex"]: # uses Styler implementation
pytest.importorskip("jinja2")
p1 = tm.ensure_clean("string")
p2 = tm.ensure_clean("fspath")
p1 = tmp_path / "string"
p2 = tmp_path / "fspath"
df = pd.DataFrame({"A": [1, 2]})

with p1 as string, p2 as fspath:
Expand All @@ -364,15 +364,15 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
expected = f_path.read()
assert result == expected

def test_write_fspath_hdf5(self):
def test_write_fspath_hdf5(self, tmp_path):
# Same test as write_fspath_all, except HDF5 files aren't
# necessarily byte-for-byte identical for a given dataframe, so we'll
# have to read and compare equality
pytest.importorskip("tables")

df = pd.DataFrame({"A": [1, 2]})
p1 = tm.ensure_clean("string")
p2 = tm.ensure_clean("fspath")
p1 = tmp_path / "string"
p2 = tmp_path / "fspath"

with p1 as string, p2 as fspath:
mypath = CustomFSPath(fspath)
Expand Down Expand Up @@ -432,35 +432,35 @@ def test_next(self, mmap_file):
with pytest.raises(StopIteration, match=r"^$"):
next(wrapper)

def test_unknown_engine(self):
with tm.ensure_clean() as path:
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(path)
with pytest.raises(ValueError, match="Unknown engine"):
pd.read_csv(path, engine="pyt")

def test_binary_mode(self):
def test_unknown_engine(self, temp_file):
path = temp_file
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(path)
with pytest.raises(ValueError, match="Unknown engine"):
pd.read_csv(path, engine="pyt")

def test_binary_mode(self, temp_file):
"""
'encoding' shouldn't be passed to 'open' in binary mode.

GH 35058
"""
with tm.ensure_clean() as path:
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(path, mode="w+b")
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
path = temp_file
df = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
df.to_csv(path, mode="w+b")
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))

@pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
@pytest.mark.parametrize("compression_", ["bz2", "xz"])
def test_warning_missing_utf_bom(self, encoding, compression_):
def test_warning_missing_utf_bom(self, encoding, compression_, temp_file):
"""
bz2 and xz do not write the byte order mark (BOM) for utf-16/32.

Expand All @@ -473,17 +473,17 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
df.to_csv(path, compression=compression_, encoding=encoding)

# reading should fail (otherwise we wouldn't need the warning)
msg = (
r"UTF-\d+ stream does not start with BOM|"
r"'utf-\d+' codec can't decode byte"
)
with pytest.raises(UnicodeError, match=msg):
pd.read_csv(path, compression=compression_, encoding=encoding)
path = temp_file
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
df.to_csv(path, compression=compression_, encoding=encoding)

# reading should fail (otherwise we wouldn't need the warning)
msg = (
r"UTF-\d+ stream does not start with BOM|"
r"'utf-\d+' codec can't decode byte"
)
with pytest.raises(UnicodeError, match=msg):
pd.read_csv(path, compression=compression_, encoding=encoding)


def test_is_fsspec_url():
Expand Down Expand Up @@ -514,38 +514,38 @@ def test_is_fsspec_url_chained():


@pytest.mark.parametrize("format", ["csv", "json"])
def test_codecs_encoding(format):
def test_codecs_encoding(format, temp_file):
# GH39247
expected = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with open(path, mode="w", encoding="utf-8") as handle:
getattr(expected, f"to_{format}")(handle)
with open(path, encoding="utf-8") as handle:
if format == "csv":
df = pd.read_csv(handle, index_col=0)
else:
df = pd.read_json(handle)
path = temp_file
with open(path, mode="w", encoding="utf-8") as handle:
getattr(expected, f"to_{format}")(handle)
with open(path, encoding="utf-8") as handle:
if format == "csv":
df = pd.read_csv(handle, index_col=0)
else:
df = pd.read_json(handle)
tm.assert_frame_equal(expected, df)


def test_codecs_get_writer_reader():
def test_codecs_get_writer_reader(temp_file):
# GH39247
expected = pd.DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=pd.Index(list("ABCD")),
index=pd.Index([f"i-{i}" for i in range(30)]),
)
with tm.ensure_clean() as path:
with open(path, "wb") as handle:
with codecs.getwriter("utf-8")(handle) as encoded:
expected.to_csv(encoded)
with open(path, "rb") as handle:
with codecs.getreader("utf-8")(handle) as encoded:
df = pd.read_csv(encoded, index_col=0)
path = temp_file
with open(path, "wb") as handle:
with codecs.getwriter("utf-8")(handle) as encoded:
expected.to_csv(encoded)
with open(path, "rb") as handle:
with codecs.getreader("utf-8")(handle) as encoded:
df = pd.read_csv(encoded, index_col=0)
tm.assert_frame_equal(expected, df)


Expand All @@ -572,7 +572,7 @@ def test_explicit_encoding(io_class, mode, msg):

@pytest.mark.parametrize("encoding_errors", ["strict", "replace"])
@pytest.mark.parametrize("format", ["csv", "json"])
def test_encoding_errors(encoding_errors, format):
def test_encoding_errors(encoding_errors, format, temp_file):
# GH39450
msg = "'utf-8' codec can't decode byte"
bad_encoding = b"\xe4"
Expand All @@ -591,18 +591,18 @@ def test_encoding_errors(encoding_errors, format):
+ b'"}}'
)
reader = partial(pd.read_json, orient="index")
with tm.ensure_clean() as path:
file = Path(path)
file.write_bytes(content)
path = temp_file
file = Path(path)
file.write_bytes(content)

if encoding_errors != "replace":
with pytest.raises(UnicodeDecodeError, match=msg):
reader(path, encoding_errors=encoding_errors)
else:
df = reader(path, encoding_errors=encoding_errors)
decoded = bad_encoding.decode(errors=encoding_errors)
expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
tm.assert_frame_equal(df, expected)
if encoding_errors != "replace":
with pytest.raises(UnicodeDecodeError, match=msg):
reader(path, encoding_errors=encoding_errors)
else:
df = reader(path, encoding_errors=encoding_errors)
decoded = bad_encoding.decode(errors=encoding_errors)
expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
tm.assert_frame_equal(df, expected)


@pytest.mark.parametrize("encoding_errors", [0, None])
Expand All @@ -616,11 +616,11 @@ def test_encoding_errors_badtype(encoding_errors):
reader(content)


def test_bad_encdoing_errors():
def test_bad_encdoing_errors(temp_file):
# GH 39777
with tm.ensure_clean() as path:
with pytest.raises(LookupError, match="unknown error handler name"):
icom.get_handle(path, "w", errors="bad")
path = temp_file
with pytest.raises(LookupError, match="unknown error handler name"):
icom.get_handle(path, "w", errors="bad")


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
Expand Down
42 changes: 21 additions & 21 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,34 +26,34 @@

@pytest.mark.single_cpu
class TestFeather:
def check_error_on_write(self, df, exc, err_msg):
def check_error_on_write(self, df, exc, err_msg, temp_file):
# check that we are raising the exception
# on writing

with pytest.raises(exc, match=err_msg):
with tm.ensure_clean() as path:
to_feather(df, path)
to_feather(df, temp_file)

def check_external_error_on_write(self, df):
def check_external_error_on_write(self, df, temp_file):
# check that we are raising the exception
# on writing

with tm.external_error_raised(Exception):
with tm.ensure_clean() as path:
to_feather(df, path)
to_feather(df, temp_file)

def check_round_trip(self, df, expected=None, write_kwargs=None, **read_kwargs):
def check_round_trip(
self, df, temp_file, expected=None, write_kwargs=None, **read_kwargs
):
if write_kwargs is None:
write_kwargs = {}
if expected is None:
expected = df.copy()

with tm.ensure_clean() as path:
to_feather(df, path, **write_kwargs)
path = temp_file
to_feather(df, path, **write_kwargs)

result = read_feather(path, **read_kwargs)
result = read_feather(path, **read_kwargs)

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_error(self):
msg = "feather only support IO with DataFrames"
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_http_path(self, feather_file, httpserver):
tm.assert_frame_equal(expected, res)

def test_read_feather_dtype_backend(
self, string_storage, dtype_backend, using_infer_string
self, string_storage, dtype_backend, using_infer_string, temp_file
):
# GH#50765
df = pd.DataFrame(
Expand All @@ -184,10 +184,10 @@ def test_read_feather_dtype_backend(
}
)

with tm.ensure_clean() as path:
to_feather(df, path)
with pd.option_context("mode.string_storage", string_storage):
result = read_feather(path, dtype_backend=dtype_backend)
path = temp_file
to_feather(df, path)
with pd.option_context("mode.string_storage", string_storage):
result = read_feather(path, dtype_backend=dtype_backend)

if dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")
Expand Down Expand Up @@ -231,16 +231,16 @@ def test_int_columns_and_index(self):
df = pd.DataFrame({"a": [1, 2, 3]}, index=pd.Index([3, 4, 5], name="test"))
self.check_round_trip(df)

def test_invalid_dtype_backend(self):
def test_invalid_dtype_backend(self, tmp_path):
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
df = pd.DataFrame({"int": list(range(1, 4))})
with tm.ensure_clean("tmp.feather") as path:
df.to_feather(path)
with pytest.raises(ValueError, match=msg):
read_feather(path, dtype_backend="numpy")
path = tmp_path / "tmp.feather"
df.to_feather(path)
with pytest.raises(ValueError, match=msg):
read_feather(path, dtype_backend="numpy")

def test_string_inference(self, tmp_path, using_infer_string):
# GH#54431
Expand Down
Loading