Skip to content

Commit 4dfad67

Browse files
fix str type cast in io test parquet
1 parent c9ee1c1 commit 4dfad67

File tree

1 file changed

+46
-66
lines changed

1 file changed

+46
-66
lines changed

pandas/tests/io/test_parquet.py

Lines changed: 46 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def compare(repeat):
224224
)
225225

226226
if path is None:
227-
path = str(temp_file)
227+
path = temp_file
228228
compare(repeat)
229229
else:
230230
compare(repeat)
@@ -342,27 +342,25 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp, temp_file):
342342
# cross-compat with differing reading/writing engines
343343

344344
df = df_cross_compat
345-
path = str(temp_file)
346-
df.to_parquet(path, engine=pa, compression=None)
345+
df.to_parquet(temp_file, engine=pa, compression=None)
347346

348-
result = read_parquet(path, engine=fp)
347+
result = read_parquet(temp_file, engine=fp)
349348
tm.assert_frame_equal(result, df)
350349

351-
result = read_parquet(path, engine=fp, columns=["a", "d"])
350+
result = read_parquet(temp_file, engine=fp, columns=["a", "d"])
352351
tm.assert_frame_equal(result, df[["a", "d"]])
353352

354353

355354
def test_cross_engine_fp_pa(df_cross_compat, pa, fp, temp_file):
356355
# cross-compat with differing reading/writing engines
357356
df = df_cross_compat
358-
path = str(temp_file)
359357

360-
df.to_parquet(path, engine=fp, compression=None)
358+
df.to_parquet(temp_file, engine=fp, compression=None)
361359

362-
result = read_parquet(path, engine=pa)
360+
result = read_parquet(temp_file, engine=pa)
363361
tm.assert_frame_equal(result, df)
364362

365-
result = read_parquet(path, engine=pa, columns=["a", "d"])
363+
result = read_parquet(temp_file, engine=pa, columns=["a", "d"])
366364
tm.assert_frame_equal(result, df[["a", "d"]])
367365

368366

@@ -388,8 +386,7 @@ def test_error(self, engine, temp_file):
388386
np.array([1, 2, 3]),
389387
]:
390388
msg = "to_parquet only supports IO with DataFrames"
391-
path = str(temp_file)
392-
self.check_error_on_write(obj, engine, ValueError, msg, path)
389+
self.check_error_on_write(obj, engine, ValueError, msg, temp_file)
393390

394391
def test_columns_dtypes(self, engine, temp_file):
395392
df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
@@ -535,10 +532,9 @@ def test_write_column_multiindex(self, engine, temp_file):
535532
np.random.default_rng(2).standard_normal((4, 3)), columns=mi_columns
536533
)
537534

538-
path = str(temp_file)
539535
if engine == "fastparquet":
540536
self.check_error_on_write(
541-
df, engine, TypeError, "Column name must be a string", path
537+
df, engine, TypeError, "Column name must be a string", temp_file
542538
)
543539
elif engine == "pyarrow":
544540
check_round_trip(df, temp_file, engine)
@@ -555,9 +551,8 @@ def test_write_column_multiindex_nonstring(self, engine, temp_file):
555551
np.random.default_rng(2).standard_normal((8, 8)), columns=arrays
556552
)
557553
df.columns.names = ["Level1", "Level2"]
558-
path = str(temp_file)
559554
if engine == "fastparquet":
560-
self.check_error_on_write(df, engine, ValueError, "Column name", path)
555+
self.check_error_on_write(df, engine, ValueError, "Column name", temp_file)
561556
elif engine == "pyarrow":
562557
check_round_trip(df, temp_file, engine)
563558

@@ -601,10 +596,9 @@ def test_write_column_index_nonstring(self, engine, temp_file):
601596
np.random.default_rng(2).standard_normal((8, 4)), columns=arrays
602597
)
603598
df.columns.name = "NonStringCol"
604-
path = str(temp_file)
605599
if engine == "fastparquet":
606600
self.check_error_on_write(
607-
df, engine, TypeError, "Column name must be a string", path
601+
df, engine, TypeError, "Column name must be a string", temp_file
608602
)
609603
else:
610604
check_round_trip(df, temp_file, engine)
@@ -633,11 +627,10 @@ def test_dtype_backend(self, engine, request, temp_file):
633627
"g": pyarrow.array([1.0, 2.0, 3.0, None], "float64"),
634628
}
635629
)
636-
path = str(temp_file)
637630
# write manually with pyarrow to write integers
638-
pq.write_table(table, path)
639-
result1 = read_parquet(path, engine=engine)
640-
result2 = read_parquet(path, engine=engine, dtype_backend="numpy_nullable")
631+
pq.write_table(table, temp_file)
632+
result1 = read_parquet(temp_file, engine=engine)
633+
result2 = read_parquet(temp_file, engine=engine, dtype_backend="numpy_nullable")
641634

642635
assert result1["a"].dtype == np.dtype("float64")
643636
expected = pd.DataFrame(
@@ -756,9 +749,8 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
756749
def test_duplicate_columns(self, pa, temp_file):
757750
# not currently able to handle duplicate columns
758751
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
759-
path = str(temp_file)
760752
self.check_error_on_write(
761-
df, pa, ValueError, "Duplicate column names found", path
753+
df, pa, ValueError, "Duplicate column names found", temp_file
762754
)
763755

764756
def test_timedelta(self, pa, temp_file):
@@ -770,17 +762,17 @@ def test_unsupported(self, pa, temp_file):
770762
df = pd.DataFrame({"a": ["a", 1, 2.0]})
771763
# pyarrow 0.11 raises ArrowTypeError
772764
# older pyarrows raise ArrowInvalid
773-
path = str(temp_file)
774-
self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
765+
self.check_external_error_on_write(df, pa, pyarrow.ArrowException, temp_file)
775766

776767
def test_unsupported_float16(self, pa, temp_file):
777768
# #44847, #44914
778769
# Not able to write float 16 column using pyarrow.
779770
data = np.arange(2, 10, dtype=np.float16)
780771
df = pd.DataFrame(data=data, columns=["fp16"])
781-
path = str(temp_file)
782772
if pa_version_under15p0:
783-
self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
773+
self.check_external_error_on_write(
774+
df, pa, pyarrow.ArrowException, temp_file
775+
)
784776
else:
785777
check_round_trip(df, temp_file, pa)
786778

@@ -800,8 +792,7 @@ def test_unsupported_float16_cleanup(self, pa, path_type, temp_file):
800792
data = np.arange(2, 10, dtype=np.float16)
801793
df = pd.DataFrame(data=data, columns=["fp16"])
802794

803-
path_str = str(temp_file)
804-
path = path_type(path_str)
795+
path = path_type(temp_file)
805796
with tm.external_error_raised(pyarrow.ArrowException):
806797
df.to_parquet(path=path, engine=pa)
807798
assert not os.path.isfile(path)
@@ -1046,9 +1037,8 @@ def test_filter_row_groups(self, pa, temp_file):
10461037
# https://github.com/pandas-dev/pandas/issues/26551
10471038
pytest.importorskip("pyarrow")
10481039
df = pd.DataFrame({"a": list(range(3))})
1049-
path = str(temp_file)
1050-
df.to_parquet(path, engine=pa)
1051-
result = read_parquet(path, pa, filters=[("a", "==", 0)])
1040+
df.to_parquet(temp_file, engine=pa)
1041+
result = read_parquet(temp_file, pa, filters=[("a", "==", 0)])
10521042
assert len(result) == 1
10531043

10541044
@pytest.mark.filterwarnings("ignore:make_block is deprecated:DeprecationWarning")
@@ -1251,29 +1241,27 @@ def test_columns_dtypes_invalid(self, fp, temp_file):
12511241

12521242
err = TypeError
12531243
msg = "Column name must be a string"
1254-
path = str(temp_file)
12551244

12561245
# numeric
12571246
df.columns = [0, 1]
1258-
self.check_error_on_write(df, fp, err, msg, path)
1247+
self.check_error_on_write(df, fp, err, msg, temp_file)
12591248

12601249
# bytes
12611250
df.columns = [b"foo", b"bar"]
1262-
self.check_error_on_write(df, fp, err, msg, path)
1251+
self.check_error_on_write(df, fp, err, msg, temp_file)
12631252

12641253
# python object
12651254
df.columns = [
12661255
datetime.datetime(2011, 1, 1, 0, 0),
12671256
datetime.datetime(2011, 1, 1, 1, 1),
12681257
]
1269-
self.check_error_on_write(df, fp, err, msg, path)
1258+
self.check_error_on_write(df, fp, err, msg, temp_file)
12701259

12711260
def test_duplicate_columns(self, fp, temp_file):
12721261
# not currently able to handle duplicate columns
12731262
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
12741263
msg = "Cannot create parquet dataset with duplicate column names"
1275-
path = str(temp_file)
1276-
self.check_error_on_write(df, fp, ValueError, msg, path)
1264+
self.check_error_on_write(df, fp, ValueError, msg, temp_file)
12771265

12781266
def test_bool_with_none(self, fp, request, temp_file):
12791267
df = pd.DataFrame({"a": [True, None, False]})
@@ -1286,13 +1274,12 @@ def test_unsupported(self, fp, temp_file):
12861274
# period
12871275
df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)})
12881276
# error from fastparquet -> don't check exact error message
1289-
path = str(temp_file)
1290-
self.check_error_on_write(df, fp, ValueError, None, path)
1277+
self.check_error_on_write(df, fp, ValueError, None, temp_file)
12911278

12921279
# mixed
12931280
df = pd.DataFrame({"a": ["a", 1, 2.0]})
12941281
msg = "Can't infer object conversion type"
1295-
self.check_error_on_write(df, fp, ValueError, msg, path)
1282+
self.check_error_on_write(df, fp, ValueError, msg, temp_file)
12961283

12971284
def test_categorical(self, fp, temp_file):
12981285
df = pd.DataFrame({"a": pd.Categorical(list("abc"))})
@@ -1301,9 +1288,8 @@ def test_categorical(self, fp, temp_file):
13011288
def test_filter_row_groups(self, fp, temp_file):
13021289
d = {"a": list(range(3))}
13031290
df = pd.DataFrame(d)
1304-
path = str(temp_file)
1305-
df.to_parquet(path, engine=fp, compression=None, row_group_offsets=1)
1306-
result = read_parquet(path, fp, filters=[("a", "==", 0)])
1291+
df.to_parquet(temp_file, engine=fp, compression=None, row_group_offsets=1)
1292+
result = read_parquet(temp_file, fp, filters=[("a", "==", 0)])
13071293
assert len(result) == 1
13081294

13091295
@pytest.mark.single_cpu
@@ -1403,73 +1389,68 @@ def test_timezone_aware_index(
14031389
check_round_trip(df, temp_file, fp, expected=expected)
14041390

14051391
def test_close_file_handle_on_read_error(self, temp_file):
1406-
path = str(temp_file)
1407-
pathlib.Path(path).write_bytes(b"breakit")
1392+
pathlib.Path(temp_file).write_bytes(b"breakit")
14081393
with tm.external_error_raised(Exception): # Not important which exception
1409-
read_parquet(path, engine="fastparquet")
1394+
read_parquet(temp_file, engine="fastparquet")
14101395
# The next line raises an error on Windows if the file is still open
1411-
pathlib.Path(path).unlink(missing_ok=False)
1396+
pathlib.Path(temp_file).unlink(missing_ok=False)
14121397

14131398
def test_bytes_file_name(self, engine, temp_file):
14141399
# GH#48944
14151400
df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
1416-
path = str(temp_file)
1417-
with open(path.encode(), "wb") as f:
1401+
with open(temp_file, "wb") as f:
14181402
df.to_parquet(f)
14191403

1420-
result = read_parquet(path, engine=engine)
1404+
result = read_parquet(temp_file, engine=engine)
14211405
tm.assert_frame_equal(result, df)
14221406

14231407
def test_filesystem_notimplemented(self, temp_file):
14241408
pytest.importorskip("fastparquet")
14251409
df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
1426-
path = str(temp_file)
14271410
with pytest.raises(NotImplementedError, match="filesystem is not implemented"):
1428-
df.to_parquet(path, engine="fastparquet", filesystem="foo")
1411+
df.to_parquet(temp_file, engine="fastparquet", filesystem="foo")
14291412

1430-
pathlib.Path(path).write_bytes(b"foo")
1413+
pathlib.Path(temp_file).write_bytes(b"foo")
14311414
with pytest.raises(NotImplementedError, match="filesystem is not implemented"):
1432-
read_parquet(path, engine="fastparquet", filesystem="foo")
1415+
read_parquet(temp_file, engine="fastparquet", filesystem="foo")
14331416

14341417
def test_invalid_filesystem(self, temp_file):
14351418
pytest.importorskip("pyarrow")
14361419
df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
1437-
path = str(temp_file)
14381420

14391421
with pytest.raises(
14401422
ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
14411423
):
1442-
df.to_parquet(path, engine="pyarrow", filesystem="foo")
1424+
df.to_parquet(temp_file, engine="pyarrow", filesystem="foo")
14431425

1444-
pathlib.Path(path).write_bytes(b"foo")
1426+
pathlib.Path(temp_file).write_bytes(b"foo")
14451427
with pytest.raises(
14461428
ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
14471429
):
1448-
read_parquet(path, engine="pyarrow", filesystem="foo")
1430+
read_parquet(temp_file, engine="pyarrow", filesystem="foo")
14491431

14501432
def test_unsupported_pa_filesystem_storage_options(self, temp_file):
14511433
pa_fs = pytest.importorskip("pyarrow.fs")
14521434
df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
1453-
path = str(temp_file)
14541435

14551436
with pytest.raises(
14561437
NotImplementedError,
14571438
match="storage_options not supported with a pyarrow FileSystem.",
14581439
):
14591440
df.to_parquet(
1460-
path,
1441+
temp_file,
14611442
engine="pyarrow",
14621443
filesystem=pa_fs.LocalFileSystem(),
14631444
storage_options={"foo": "bar"},
14641445
)
14651446

1466-
pathlib.Path(path).write_bytes(b"foo")
1447+
pathlib.Path(temp_file).write_bytes(b"foo")
14671448
with pytest.raises(
14681449
NotImplementedError,
14691450
match="storage_options not supported with a pyarrow FileSystem.",
14701451
):
14711452
read_parquet(
1472-
path,
1453+
temp_file,
14731454
engine="pyarrow",
14741455
filesystem=pa_fs.LocalFileSystem(),
14751456
storage_options={"foo": "bar"},
@@ -1481,7 +1462,6 @@ def test_invalid_dtype_backend(self, engine, temp_file):
14811462
"'pyarrow' are allowed."
14821463
)
14831464
df = pd.DataFrame({"int": list(range(1, 4))})
1484-
path = str(temp_file)
1485-
df.to_parquet(path)
1465+
df.to_parquet(temp_file)
14861466
with pytest.raises(ValueError, match=msg):
1487-
read_parquet(path, dtype_backend="numpy")
1467+
read_parquet(temp_file, dtype_backend="numpy")

0 commit comments

Comments
 (0)