6464 from tensorflow_datasets .core .utils import file_utils
6565 from tensorflow_datasets .core .utils import gcs_utils
6666 from tensorflow_datasets .core .utils import read_config as read_config_lib
67+ from tensorflow_datasets .core .utils import retry
6768 from tensorflow_datasets .core .utils import type_utils
6869 # pylint: enable=g-import-not-at-top
6970
@@ -290,7 +291,8 @@ def __init__(
290291 # Compute the base directory (for download) and dataset/version directory.
291292 self ._data_dir_root , self ._data_dir = self ._build_data_dir (data_dir )
292293 # If the dataset info is available, use it.
293- if dataset_info .dataset_info_path (self .data_path ).exists ():
294+ dataset_info_path = dataset_info .dataset_info_path (self .data_path )
295+ if retry .retry (dataset_info_path .exists ):
294296 self .info .read_from_directory (self ._data_dir )
295297 else : # Use the code version (do not restore data)
296298 self .info .initialize_from_bucket ()
@@ -466,8 +468,8 @@ def _checksums_path(cls) -> epath.Path | None:
466468 # zipfile.Path does not have `.parts`. Additionally, `os.fspath`
467469 # will extract the file, so use `str`.
468470 "tensorflow_datasets" in str (new_path )
469- and legacy_path .exists ( )
470- and not new_path .exists ( )
471+ and retry . retry ( legacy_path .exists )
472+ and not retry . retry ( new_path .exists )
471473 ):
472474 return legacy_path
473475 else :
@@ -484,7 +486,7 @@ def url_infos(cls) -> dict[str, download.checksums.UrlInfo] | None:
484486 # Search for the url_info file.
485487 checksums_path = cls ._checksums_path
486488 # If url_info file is found, load the urls
487- if checksums_path and checksums_path .exists ( ):
489+ if checksums_path and retry . retry ( checksums_path .exists ):
488490 return download .checksums .load_url_infos (checksums_path )
489491 else :
490492 return None
@@ -624,7 +626,7 @@ def download_and_prepare(
624626
625627 download_config = download_config or download .DownloadConfig ()
626628 data_path = self .data_path
627- data_exists = data_path .exists ( )
629+ data_exists = retry . retry ( data_path .exists )
628630
629631 # Saving nondeterministic_order in the DatasetInfo for documentation.
630632 if download_config .nondeterministic_order :
@@ -640,7 +642,7 @@ def download_and_prepare(
640642 "Deleting pre-existing dataset %s (%s)" , self .name , self .data_dir
641643 )
642644 data_path .rmtree () # Delete pre-existing data.
643- data_exists = data_path .exists ( )
645+ data_exists = retry . retry ( data_path .exists )
644646 else :
645647 logging .info ("Reusing dataset %s (%s)" , self .name , self .data_dir )
646648 return
@@ -805,7 +807,7 @@ def _post_download_and_prepare_hook(self) -> None:
805807 def _update_dataset_info (self ) -> None :
806808 """Updates the `dataset_info.json` file in the dataset dir."""
807809 info_file = self .data_path / constants .DATASET_INFO_FILENAME
808- if not info_file .exists ( ):
810+ if not retry . retry ( info_file .exists ):
809811 raise AssertionError (f"To update { info_file } , it must already exist." )
810812 new_info = self .info
811813 new_info .read_from_directory (self .data_path )
@@ -1020,7 +1022,7 @@ def as_dataset(
10201022 self .assert_is_not_blocked ()
10211023
10221024 # pylint: enable=line-too-long
1023- if not self .data_path .exists ( ):
1025+ if not retry . retry ( self .data_path .exists ):
10241026 raise AssertionError (
10251027 "Dataset %s: could not find data in %s. Please make sure to call "
10261028 "dataset_builder.download_and_prepare(), or pass download=True to "
@@ -1817,7 +1819,7 @@ def read_text_file(
18171819 """Returns the text in the given file and records the lineage."""
18181820 filename = epath .Path (filename )
18191821 self .info .add_file_data_source_access (filename )
1820- return filename .read_text ( encoding = encoding )
1822+ return retry . retry ( filename .read_text , encoding = encoding )
18211823
18221824 def read_tfrecord_as_dataset (
18231825 self ,
@@ -2057,9 +2059,9 @@ def _save_default_config_name(
20572059def load_default_config_name (builder_dir : epath .Path ) -> str | None :
20582060 """Load `builder_cls` metadata (common to all builder configs)."""
20592061 config_path = builder_dir / ".config" / constants .METADATA_FILENAME
2060- if not config_path .exists ( ):
2062+ if not retry . retry ( config_path .exists ):
20612063 return None
2062- data = json .loads (config_path .read_text ( ))
2064+ data = json .loads (retry . retry ( config_path .read_text ))
20632065 return data .get ("default_config_name" )
20642066
20652067
0 commit comments