@@ -186,6 +186,7 @@ def __init__(
186186 features : feature_lib .FeatureConnector | None = None ,
187187 supervised_keys : SupervisedKeysType | None = None ,
188188 disable_shuffling : bool = False ,
189+ nondeterministic_order : bool = False ,
189190 homepage : str | None = None ,
190191 citation : str | None = None ,
191192 metadata : Metadata | None = None ,
@@ -228,7 +229,11 @@ def __init__(
228229
229230 Note that selecting features in nested `tfds.features.FeaturesDict`
230231 objects is not supported.
231- disable_shuffling: `bool`, specify whether to shuffle the examples.
232+ disable_shuffling: `bool`, specifies whether to shuffle the examples.
233+ nondeterministic_order: `bool`, if True and the dataset uses beam, it will
234+ use `NoShuffleBeamWriter` which does not assure deterministic
235+ shuffling when writing' examples to disk. This might result in quicker
236+ dataset preparation.
232237 homepage: `str`, optional, the homepage for this dataset.
233238 citation: `str`, optional, the citation to use for this dataset.
234239 metadata: `tfds.core.Metadata`, additonal object which will be
@@ -268,6 +273,7 @@ def __init__(
268273 version = str (self ._identity .version ),
269274 release_notes = self ._identity .release_notes ,
270275 disable_shuffling = disable_shuffling ,
276+ nondeterministic_order = nondeterministic_order ,
271277 config_name = self ._identity .config_name ,
272278 config_description = self ._identity .config_description ,
273279 config_tags = self ._identity .config_tags ,
@@ -342,6 +348,7 @@ def from_proto(
342348 features = features ,
343349 supervised_keys = supervised_keys ,
344350 disable_shuffling = proto .disable_shuffling ,
351+ nondeterministic_order = proto .nondeterministic_order ,
345352 citation = proto .citation ,
346353 license = proto .redistribution_info .license ,
347354 split_dict = splits_lib .SplitDict .from_proto (
@@ -400,6 +407,13 @@ def release_notes(self) -> dict[str, str] | None:
400407 def disable_shuffling (self ) -> bool :
401408 return self .as_proto .disable_shuffling
402409
410+ @property
411+ def nondeterministic_order (self ) -> bool :
412+ return self ._info_proto .nondeterministic_order
413+
414+ def set_nondeterministic_order (self , nondeterministic_order : bool ) -> None :
415+ self ._info_proto .nondeterministic_order = nondeterministic_order
416+
403417 @property
404418 def homepage (self ) -> str :
405419 urls = self .as_proto .location .urls
@@ -923,6 +937,7 @@ def __repr__(self):
923937 ("features" , _indent (repr (self .features ))),
924938 ("supervised_keys" , self .supervised_keys ),
925939 ("disable_shuffling" , self .disable_shuffling ),
940+ ("nondeterministic_order" , self .nondeterministic_order ),
926941 ("splits" , splits ),
927942 ("citation" , _indent (f'"""{ self .citation } """' )),
928943 # Proto add a \n that we strip.
@@ -940,6 +955,7 @@ def __getstate__(self):
940955 "features" : self .features ,
941956 "supervised_keys" : self .supervised_keys ,
942957 "disable_shuffling" : self .disable_shuffling ,
958+ "nondeterministic_order" : self .nondeterministic_order ,
943959 "homepage" : self .homepage ,
944960 "citation" : self .citation ,
945961 "metadata" : self .metadata ,
@@ -956,6 +972,7 @@ def __setstate__(self, state):
956972 features = state ["features" ],
957973 supervised_keys = state ["supervised_keys" ],
958974 disable_shuffling = state ["disable_shuffling" ],
975+ nondeterministic_order = state ["nondeterministic_order" ],
959976 homepage = state ["homepage" ],
960977 citation = state ["citation" ],
961978 metadata = state ["metadata" ],
0 commit comments