@@ -226,17 +226,21 @@ def __init__(
226226 )
227227 self .VERSION = version_lib .Version (version ) # pylint: disable=invalid-name
228228 self .name = conversion_utils .to_tfds_name (hf_repo_id )
229+ self .homepage = f'https://huggingface.co/datasets/{ hf_repo_id } '
229230 self ._hf_hub_token = hf_hub_token
230231 self ._hf_num_proc = hf_num_proc
231232 self ._tfds_num_proc = tfds_num_proc
232233 self ._verification_mode = (
233234 'no_checks' if ignore_verifications else 'all_checks'
234235 )
235236 if self ._hf_config :
237+ description = self ._get_text_field ('description' )
238+ if self ._is_gated ():
239+ description = self ._gated_text + '\n ' + description
236240 self ._converted_builder_config = dataset_builder .BuilderConfig (
237241 name = tfds_config ,
238242 version = self .VERSION ,
239- description = self . _get_text_field ( ' description' ) ,
243+ description = description ,
240244 )
241245 else :
242246 self ._converted_builder_config = None
@@ -277,6 +281,48 @@ def _hf_hub_info(self) -> huggingface_hub.hf_api.DatasetInfo:
277281 self ._hf_repo_id , token = self ._hf_hub_token
278282 )
279283
284+ def _is_gated (self ) -> bool :
285+ """Whether the dataset is gated."""
286+ # Gated datasets return a string ('manual' or 'automatic').
287+ if isinstance (self ._hf_hub_info .gated , str ):
288+ return True
289+ return False
290+
291+ @property
292+ def _gated_dataset_warning (self ) -> str :
293+ """The warning message for a gated dataset."""
294+ return (
295+ 'WARNING: This dataset is gated. Before using it, make sure to sign'
296+ f' the conditions at: { self .homepage } . Important: access requests are'
297+ ' always granted to individual users rather than to entire'
298+ ' organizations.'
299+ )
300+
301+ @property
302+ def _gated_text (self ) -> str | None :
303+ """Returns the conditions for a dataset, if it is gated.
304+
305+ All datasets share the same default conditions. Extra conditions are stored
306+ in the dataset card:
307+ https://huggingface.co/docs/hub/en/datasets-gated
308+
309+ Returns:
310+ The gated text if the dataset is gated. None otherwise.
311+ """
312+ if self ._is_gated ():
313+ # This condition is the same for all gated datasets.
314+ conditions = (
315+ 'The conditions consist of:\n By agreeing you accept to share your'
316+ ' contact information (email and username) with the repository'
317+ ' authors.'
318+ )
319+ if dataset_card := self ._hf_hub_info .card_data :
320+ gated_text = dataset_card .get ('extra_gated_prompt' , None )
321+ if gated_text :
322+ conditions = conditions + '\n ' + gated_text
323+ return self ._gated_dataset_warning + '\n ' + conditions
324+ return None
325+
280326 def _hf_features (self ) -> hf_datasets .Features :
281327 if not self ._hf_info .features :
282328 # We need to download and prepare the data to know its features.
@@ -285,13 +331,19 @@ def _hf_features(self) -> hf_datasets.Features:
285331 return self ._hf_info .features
286332
287333 def _info (self ) -> dataset_info_lib .DatasetInfo :
334+ ds_description = self ._get_text_field ('description' )
335+ ds_license = self ._get_license ()
336+ if self ._is_gated ():
337+ ds_description = self ._gated_text + '\n ' + ds_description
338+ ds_license = ds_license + ' ' + self ._gated_dataset_warning
288339 return dataset_info_lib .DatasetInfo (
289340 builder = self ,
290- description = self . _get_text_field ( 'description' ) ,
341+ description = ds_description ,
291342 features = huggingface_utils .convert_hf_features (self ._hf_features ()),
292343 citation = self ._get_text_field ('citation' ),
293- license = self . _get_license () ,
344+ license = ds_license ,
294345 supervised_keys = _extract_supervised_keys (self ._hf_info ),
346+ homepage = self .homepage ,
295347 )
296348
297349 def _split_generators (
0 commit comments