Skip to content

Commit 5afdc02

Browse files
fineguyThe TensorFlow Datasets Authors
authored andcommitted
Refactor download_and_prepare script.
PiperOrigin-RevId: 795187236
1 parent aca83df commit 5afdc02

File tree

3 files changed

+27
-45
lines changed

3 files changed

+27
-45
lines changed

tensorflow_datasets/scripts/cli/build.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,42 +33,42 @@
3333

3434
@dataclasses.dataclass(frozen=True, kw_only=True)
3535
class Args(cli_utils.Args):
36-
"""Commands for downloading and preparing datasets.
37-
38-
Attributes:
39-
positional_datasets: Name(s) of the dataset(s) to build. Default to current
40-
dir. See https://www.tensorflow.org/datasets/cli for accepted values.
41-
datasets: Datasets can also be provided as keyword argument.
42-
debug: Debug & tests options. Use --pdb to enter post-mortem debugging mode
43-
if an exception is raised.
44-
paths: Path options.
45-
generation: Generation options.
46-
publishing: Publishing options.
47-
automation: Automation options.
48-
"""
36+
"""Commands for downloading and preparing datasets."""
4937

50-
positional_datasets: list[str] = simple_parsing.field(
38+
# Name(s) of the dataset(s) to build. Default to current dir. See
39+
# https://www.tensorflow.org/datasets/cli for accepted values.
40+
positional_datasets: list[str] = simple_parsing.list_field(
5141
positional=True,
52-
nargs='*',
53-
default_factory=list,
5442
# Need to explicitly set metavar for command-line help.
5543
metavar='datasets',
5644
)
57-
datasets: list[str] = simple_parsing.field(nargs='*', default_factory=list)
45+
46+
datasets: list[str] = simple_parsing.list_field(alias='--dataset')
47+
"""Datasets can also be provided as keyword argument."""
5848

5949
debug: cli_utils.DebugOptions = cli_utils.DebugOptions()
50+
"""Debug & tests options. Use --pdb to enter post-mortem debugging mode if an
51+
exception is raised."""
52+
6053
paths: cli_utils.PathOptions = simple_parsing.field(
6154
default_factory=cli_utils.PathOptions
6255
)
56+
"""Path options."""
57+
6358
generation: cli_utils.GenerationOptions = simple_parsing.field(
6459
default_factory=cli_utils.GenerationOptions
6560
)
61+
"""Generation options."""
62+
6663
publishing: cli_utils.PublishingOptions = simple_parsing.field(
6764
default_factory=cli_utils.PublishingOptions
6865
)
66+
"""Publishing options."""
67+
6968
automation: cli_utils.AutomationOptions = simple_parsing.field(
7069
default_factory=cli_utils.AutomationOptions
7170
)
71+
"""Automation options."""
7272

7373
def execute(self) -> None:
7474
"""Build the given datasets."""

tensorflow_datasets/scripts/cli/cli_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,14 @@ class GenerationOptions:
243243

244244
download_only: bool = simple_parsing.flag(default=False)
245245
config: str | None = simple_parsing.field(default=None, alias='-c')
246-
config_idx: int | None = None
246+
config_idx: int | None = simple_parsing.field(
247+
default=None, alias='--builder_config_id'
248+
)
247249
update_metadata_only: bool = simple_parsing.flag(default=False)
248250
download_config: str | None = None
249-
imports: str | None = simple_parsing.field(default=None, alias='-i')
251+
imports: str | None = simple_parsing.field(
252+
default=None, alias=['-i', '--module_import']
253+
)
250254
register_checksums: bool = simple_parsing.flag(default=False)
251255
force_checksums_validation: bool = simple_parsing.flag(default=False)
252256
beam_pipeline_options: str | None = None

tensorflow_datasets/scripts/download_and_prepare.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,43 +15,21 @@
1515

1616
r"""Wrapper around `tfds build`."""
1717

18-
import typing
19-
2018
from absl import app
2119
from absl import flags
22-
from absl import logging
23-
from tensorflow_datasets.scripts.cli import build
2420
from tensorflow_datasets.scripts.cli import main as main_cli
2521

26-
module_import = flags.DEFINE_string('module_import', None, '`--imports` flag.')
27-
dataset = flags.DEFINE_string('dataset', None, 'singleton `--datasets` flag.')
28-
29-
builder_config_id = flags.DEFINE_integer(
30-
'builder_config_id', None, '`--config_idx` flag'
31-
)
32-
33-
3422

3523
def _parse_flags(argv: list[str]) -> main_cli.Args:
3624
"""Command lines flag parsing."""
3725
return main_cli._parse_flags([argv[0], 'build'] + argv[1:]) # pylint: disable=protected-access
3826

3927

40-
_display_warning = True
41-
42-
4328
def main(args: main_cli.Args) -> None:
44-
if _display_warning:
45-
logging.warning(
46-
'***`tfds build` should be used instead of `download_and_prepare`.***'
47-
)
48-
cmd_args = typing.cast(build.Args, args.command)
49-
if module_import.value:
50-
cmd_args.generation.imports = module_import.value
51-
if dataset.value:
52-
cmd_args.datasets = [dataset.value]
53-
if builder_config_id.value is not None:
54-
cmd_args.generation.config_idx = builder_config_id.value
29+
from absl import logging
30+
logging.warning(
31+
'***`tfds build` should be used instead of `download_and_prepare`.***'
32+
)
5533
main_cli.main(args)
5634

5735

0 commit comments

Comments
 (0)