diff --git a/src/datasets/load.py b/src/datasets/load.py index ae3b9825970..0398c4cad8a 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -25,8 +25,7 @@ from collections.abc import Mapping, Sequence from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Optional, Union - +from typing import Any, Optional, Union, Literal, overload import fsspec import httpx import requests @@ -1187,6 +1186,101 @@ def load_dataset_builder( return builder_instance +@overload +def load_dataset( + path: str, + name: Optional[str] = None, + data_dir: Optional[str] = None, + data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None, + split: None = None, + cache_dir: Optional[str] = None, + features: Optional[Features] = None, + download_config: Optional[DownloadConfig] = None, + download_mode: Optional[Union[DownloadMode, str]] = None, + verification_mode: Optional[Union[VerificationMode, str]] = None, + keep_in_memory: Optional[bool] = None, + save_infos: bool = False, + revision: Optional[Union[str, Version]] = None, + token: Optional[Union[bool, str]] = None, + streaming: Literal[False] = False, + num_proc: Optional[int] = None, + storage_options: Optional[dict] = None, + **config_kwargs: Any, +) -> DatasetDict: ... + + +@overload +def load_dataset( + path: str, + name: Optional[str] = None, + data_dir: Optional[str] = None, + data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None, + *, + split: Union[str, Split, list[str], list[Split]], + cache_dir: Optional[str] = None, + features: Optional[Features] = None, + download_config: Optional[DownloadConfig] = None, + download_mode: Optional[Union[DownloadMode, str]] = None, + verification_mode: Optional[Union[VerificationMode, str]] = None, + keep_in_memory: Optional[bool] = None, + save_infos: bool = False, + revision: Optional[Union[Version, str]] = None, + token: Optional[Union[bool, str]] = None, + streaming: Literal[False] = False, + num_proc: Optional[int] = None, + storage_options: Optional[dict] = None, + **config_kwargs: Any, +) -> Dataset: ... + + +@overload +def load_dataset( + path: str, + name: Optional[str] = None, + data_dir: Optional[str] = None, + data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None, + split: None = None, + cache_dir: Optional[str] = None, + features: Optional[Features] = None, + download_config: Optional[DownloadConfig] = None, + download_mode: Optional[Union[DownloadMode, str]] = None, + verification_mode: Optional[Union[VerificationMode, str]] = None, + keep_in_memory: Optional[bool] = None, + save_infos: bool = False, + revision: Optional[Union[Version, str]] = None, + token: Optional[Union[bool, str]] = None, + *, + streaming: Literal[True], + num_proc: Optional[int] = None, + storage_options: Optional[dict] = None, + **config_kwargs: Any, +) -> IterableDatasetDict: ... + + +@overload +def load_dataset( + path: str, + name: Optional[str] = None, + data_dir: Optional[str] = None, + data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None, + *, + split: Union[str, Split, list[str], list[Split]], + cache_dir: Optional[str] = None, + features: Optional[Features] = None, + download_config: Optional[DownloadConfig] = None, + download_mode: Optional[Union[DownloadMode, str]] = None, + verification_mode: Optional[Union[VerificationMode, str]] = None, + keep_in_memory: Optional[bool] = None, + save_infos: bool = False, + revision: Optional[Union[Version, str]] = None, + token: Optional[Union[bool, str]] = None, + streaming: Literal[True], + num_proc: Optional[int] = None, + storage_options: Optional[dict] = None, + **config_kwargs: Any, +) -> IterableDataset: ... + + def load_dataset( path: str, name: Optional[str] = None,