Skip to content

Commit 8ea1832

Browse files
committed
BUG: validate path type in read_parquet; reject non-path/file-like (gh-62922)
1 parent f4851e5 commit 8ea1832

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

pandas/io/parquet.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
)
2828
from pandas.util._validators import check_dtype_backend
2929

30+
from pandas.core.dtypes.common import is_file_like
31+
3032
from pandas import (
3133
DataFrame,
3234
get_option,
@@ -656,6 +658,13 @@ def read_parquet(
656658
0 3 8
657659
1 4 9
658660
"""
661+
# gh-62922: validate path type early to match documented API expectations
662+
# and provide a consistent, clear user error immediately.
663+
if not (isinstance(path, (str, os.PathLike)) or is_file_like(path)):
664+
raise TypeError(
665+
f"read_parquet expected str/os.PathLike or file-like object, "
666+
f"got {type(path).__name__} type"
667+
)
659668

660669
impl = get_engine(engine)
661670
check_dtype_backend(dtype_backend)

pandas/tests/io/test_parquet.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,42 @@ def check_partition_names(path, expected):
248248
assert dataset.partitioning.schema.names == expected
249249

250250

251+
def test_read_parquet_invalid_path_types(tmp_path, engine):
252+
# GH #62922
253+
df = pd.DataFrame({"a": [1]})
254+
path = tmp_path / "test_read_parquet.parquet"
255+
df.to_parquet(path, engine=engine)
256+
257+
bad_path_types = [
258+
[str(path)], # list
259+
(str(path),), # tuple
260+
b"raw-bytes", # bytes
261+
]
262+
for bad in bad_path_types:
263+
match = (
264+
f"read_parquet expected str/os.PathLike or file-like object, "
265+
f"got {type(bad).__name__} type"
266+
)
267+
with pytest.raises(TypeError, match=match):
268+
read_parquet(bad, engine=engine)
269+
270+
271+
def test_read_parquet_valid_path_types(tmp_path, engine):
272+
# GH #62922
273+
df = pd.DataFrame({"a": [1]})
274+
path = tmp_path / "test_read_parquet.parquet"
275+
df.to_parquet(path, engine=engine)
276+
# str
277+
read_parquet(str(path), engine=engine)
278+
# os.PathLike
279+
read_parquet(pathlib.Path(path), engine=engine)
280+
# file-like object
281+
buf = BytesIO()
282+
df.to_parquet(buf, engine=engine)
283+
buf.seek(0)
284+
read_parquet(buf, engine=engine)
285+
286+
251287
def test_invalid_engine(df_compat, temp_file):
252288
msg = "engine must be one of 'pyarrow', 'fastparquet'"
253289
with pytest.raises(ValueError, match=msg):

0 commit comments

Comments
 (0)