@@ -368,6 +368,61 @@ def list_dataset_versions(
368368 return sorted (found_versions )
369369
370370
371+ def is_valid_variant_dir (
372+ variant_dir : Path ,
373+ matched_files : set [str ] | None = None ,
374+ include_old_tfds_version : bool = False ,
375+ ) -> bool :
376+ """Returns whether the variant directory is valid.
377+
378+ Valid variant directories must:
379+ - Contain a dataset_info.json file.
380+ - Contain a features.json file.
381+ - Have a valid version name.
382+
383+ Args:
384+ variant_dir: The variant directory to check.
385+ matched_files: The files that were matched in the variant directory. If
386+ None, all json files in the directory are used.
387+ include_old_tfds_version: include datasets that have been generated with
388+ TFDS before 4.0.0.
389+ """
390+ version = variant_dir .name
391+ if not version_lib .Version .is_valid (version ):
392+ logging .warning (
393+ 'Variant folder %s has invalid version %s' ,
394+ variant_dir ,
395+ version ,
396+ )
397+ return False
398+
399+ if matched_files is None :
400+ matched_files = set (
401+ matched_path .name for matched_path in variant_dir .glob ('*.json' )
402+ )
403+
404+ if constants .DATASET_INFO_FILENAME not in matched_files :
405+ logging .warning (
406+ 'Variant folder %s has no %s' ,
407+ variant_dir ,
408+ constants .DATASET_INFO_FILENAME ,
409+ )
410+ return False
411+
412+ if (
413+ not include_old_tfds_version
414+ and constants .FEATURES_FILENAME not in matched_files
415+ ):
416+ logging .warning (
417+ 'Variant folder %s has no %s' ,
418+ variant_dir ,
419+ constants .FEATURES_FILENAME ,
420+ )
421+ return False
422+
423+ return True
424+
425+
371426def list_dataset_variants (
372427 dataset_dir : Path ,
373428 namespace : str | None = None ,
@@ -401,36 +456,17 @@ def list_dataset_variants(
401456 matched_files_by_variant_dir [file .parent ].add (file .name )
402457
403458 for variant_dir , matched_files in matched_files_by_variant_dir .items ():
404- if constants .DATASET_INFO_FILENAME not in matched_files :
405- logging .warning (
406- 'Ignoring variant folder %s, which has no %s' ,
407- variant_dir ,
408- constants .DATASET_INFO_FILENAME ,
409- )
410- continue
411-
412- if (
413- not include_old_tfds_version
414- and constants .FEATURES_FILENAME not in matched_files
459+ if not is_valid_variant_dir (
460+ variant_dir = variant_dir ,
461+ matched_files = matched_files ,
462+ include_old_tfds_version = include_old_tfds_version ,
415463 ):
416- logging .info (
417- 'Ignoring variant folder %s, which has no %s' ,
418- variant_dir ,
419- constants .FEATURES_FILENAME ,
420- )
421- continue
422-
423- version = variant_dir .name
424- if not version_lib .Version .is_valid (version ):
425- logging .warning (
426- 'Ignoring variant folder %s, which has invalid version %s' ,
427- variant_dir ,
428- version ,
429- )
464+ logging .warning ('Skipping invalid variant directory: %s' , variant_dir )
430465 continue
431466
432467 config_dir = variant_dir .parent
433468 config = config_dir .name if config_dir != dataset_dir else None
469+ version = variant_dir .name
434470
435471 yield naming .DatasetReference (
436472 namespace = namespace ,
0 commit comments