@@ -74,34 +74,37 @@ def create_dataset(
7474 seed : int = 42 ,
7575 repeats : int = 0 ,
7676 input_img_mode : str = 'RGB' ,
77+ trust_remote_code : bool = False ,
7778 ** kwargs ,
7879):
7980 """ Dataset factory method
8081
8182 In parentheses after each arg are the type of dataset supported for each arg, one of:
82- * folder - default, timm folder (or tar) based ImageDataset
83- * torch - torchvision based datasets
83+ * Folder - default, timm folder (or tar) based ImageDataset
84+ * Torch - torchvision based datasets
8485 * HFDS - Hugging Face Datasets
86+ * HFIDS - Hugging Face Datasets Iterable (streaming mode, with IterableDataset)
8587 * TFDS - Tensorflow-datasets wrapper in IterabeDataset interface via IterableImageDataset
8688 * WDS - Webdataset
87- * all - any of the above
89+ * All - any of the above
8890
8991 Args:
90- name: dataset name, empty is okay for folder based datasets
91- root: root folder of dataset (all)
92- split: dataset split (all)
93- search_split: search for split specific child fold from root so one can specify
94- `imagenet/` instead of `/imagenet/val`, etc on cmd line / config. (folder, torch/folder)
95- class_map: specify class -> index mapping via text file or dict (folder)
96- load_bytes: load data, return images as undecoded bytes (folder)
97- download: download dataset if not present and supported (HFDS, TFDS, torch)
98- is_training: create dataset in train mode, this is different from the split.
99- For Iterable / TDFS it enables shuffle, ignored for other datasets. (TFDS, WDS)
100- batch_size: batch size hint for (TFDS, WDS)
101- seed: seed for iterable datasets (TFDS, WDS)
102- repeats: dataset repeats per iteration i.e. epoch (TFDS, WDS)
103- input_img_mode: Input image color conversion mode e.g. 'RGB', 'L' (folder, TFDS, WDS, HFDS)
104- **kwargs: other args to pass to dataset
92+ name: Dataset name, empty is okay for folder based datasets
93+ root: Root folder of dataset (All)
94+ split: Dataset split (All)
95+ search_split: Search for split specific child fold from root so one can specify
96+ `imagenet/` instead of `/imagenet/val`, etc on cmd line / config. (Folder, Torch)
97+ class_map: Specify class -> index mapping via text file or dict (Folder)
98+ load_bytes: Load data, return images as undecoded bytes (Folder)
99+ download: Download dataset if not present and supported (HFIDS, TFDS, Torch)
100+ is_training: Create dataset in train mode, this is different from the split.
101+ For Iterable / TDFS it enables shuffle, ignored for other datasets. (TFDS, WDS, HFIDS)
102+ batch_size: Batch size hint for iterable datasets (TFDS, WDS, HFIDS)
103+ seed: Seed for iterable datasets (TFDS, WDS, HFIDS)
104+ repeats: Dataset repeats per iteration i.e. epoch (TFDS, WDS, HFIDS)
105+ input_img_mode: Input image color conversion mode e.g. 'RGB', 'L' (folder, TFDS, WDS, HFDS, HFIDS)
106+ trust_remote_code: Trust remote code in Hugging Face Datasets if True (HFDS, HFIDS)
107+ **kwargs: Other args to pass through to underlying Dataset and/or Reader classes
105108
106109 Returns:
107110 Dataset object
@@ -162,6 +165,7 @@ def create_dataset(
162165 split = split ,
163166 class_map = class_map ,
164167 input_img_mode = input_img_mode ,
168+ trust_remote_code = trust_remote_code ,
165169 ** kwargs ,
166170 )
167171 elif name .startswith ('hfids/' ):
@@ -177,7 +181,8 @@ def create_dataset(
177181 repeats = repeats ,
178182 seed = seed ,
179183 input_img_mode = input_img_mode ,
180- ** kwargs
184+ trust_remote_code = trust_remote_code ,
185+ ** kwargs ,
181186 )
182187 elif name .startswith ('tfds/' ):
183188 ds = IterableImageDataset (
0 commit comments