@@ -130,5 +130,146 @@ def test_list_dataset_variants_without_configs(mock_fs: testing.MockFs):
130130 ]
131131
132132
133+ def test_list_datasets_in_data_dir (mock_fs : testing .MockFs ):
134+ data_dir = epath .Path ('/a' )
135+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/dataset_info.json' )
136+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/features.json' )
137+ mock_fs .add_file (data_dir / 'ds1/config1/2.0.0/dataset_info.json' )
138+ mock_fs .add_file (data_dir / 'ds1/config1/2.0.0/features.json' )
139+ mock_fs .add_file (data_dir / 'ds1/config2/1.0.0/dataset_info.json' )
140+ mock_fs .add_file (data_dir / 'ds1/config2/1.0.0/features.json' )
141+ mock_fs .add_file (data_dir / 'ds2/1.0.0/dataset_info.json' )
142+ mock_fs .add_file (data_dir / 'ds2/1.0.0/features.json' )
143+
144+ # The following are problematic and should thus be ignored.
145+ mock_fs .add_file (
146+ os .path .join (data_dir , 'invalid-name/1.0.0/features.json' ), content = 'x'
147+ )
148+ mock_fs .add_file (
149+ os .path .join (data_dir , 'invalid_version1/1.a.b/features.json' ),
150+ content = 'x' ,
151+ )
152+ mock_fs .add_file (
153+ os .path .join (data_dir , 'invalid_version2/1.2.3.4/features.json' ),
154+ content = 'x' ,
155+ )
156+
157+ references = sorted (
158+ file_utils .list_datasets_in_data_dir (data_dir = epath .Path (data_dir ))
159+ )
160+ data_dir = epath .Path ('/a' )
161+ assert references == [
162+ naming .DatasetReference (
163+ dataset_name = 'ds1' ,
164+ config = 'config1' ,
165+ version = '1.0.0' ,
166+ data_dir = data_dir ,
167+ ),
168+ naming .DatasetReference (
169+ dataset_name = 'ds1' ,
170+ config = 'config1' ,
171+ version = '2.0.0' ,
172+ data_dir = data_dir ,
173+ ),
174+ naming .DatasetReference (
175+ dataset_name = 'ds1' ,
176+ config = 'config2' ,
177+ version = '1.0.0' ,
178+ data_dir = data_dir ,
179+ ),
180+ naming .DatasetReference (
181+ dataset_name = 'ds2' , version = '1.0.0' , data_dir = data_dir
182+ ),
183+ ]
184+
185+
186+ def test_list_datasets_in_data_dir_with_namespace (mock_fs : testing .MockFs ):
187+ namespace = 'ns'
188+ data_dir = epath .Path ('/a' )
189+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/dataset_info.json' )
190+ mock_fs .add_file (data_dir / 'ds1/config1/1.0.0/features.json' )
191+
192+ references = sorted (
193+ file_utils .list_datasets_in_data_dir (
194+ data_dir = epath .Path (data_dir ),
195+ namespace = namespace ,
196+ include_configs = True ,
197+ include_versions = True ,
198+ )
199+ )
200+ data_dir = epath .Path ('/a' )
201+ assert references == [
202+ naming .DatasetReference (
203+ dataset_name = 'ds1' ,
204+ namespace = namespace ,
205+ config = 'config1' ,
206+ version = '1.0.0' ,
207+ data_dir = data_dir ,
208+ ),
209+ ]
210+
211+
212+ def test_find_files_without_glob (mock_fs : testing .MockFs ):
213+ folder = epath .Path ('/' )
214+ mock_fs .add_file (folder / 'a' / 'b' / 'x' )
215+ mock_fs .add_file (folder / 'a' / 'c' / 'x' )
216+ mock_fs .add_file (folder / 'b' / 'd' / 'x' )
217+ mock_fs .add_file (folder / 'b' / 'd' / 'y' ) # Should be ignored.
218+ mock_fs .add_file (folder / 'b' / '.config' / 'x' ) # Should be ignored.
219+ mock_fs .add_file (folder / 'b' / 'x' )
220+ mock_fs .add_file (folder / 'b' / 'y' ) # Should be ignored.
221+ actual = file_utils ._find_files_without_glob (
222+ folder , globs = ['*/*' , '*/*/*' ], file_names = ['x' ]
223+ )
224+ actual = [os .fspath (p ) for p in actual ]
225+ assert sorted (actual ) == ['/a/b/x' , '/a/c/x' , '/b/d/x' , '/b/x' ]
226+
227+
228+ @pytest .mark .parametrize (
229+ ['filename' , 'result' ],
230+ [
231+ ('abc' , False ),
232+ ('dataset_info.json' , True ),
233+ ('features.json' , True ),
234+ ('mnist-test.tfrecord-00000-of-00001' , True ),
235+ ('mnist-test.arrayrecord-00000-of-00001' , True ),
236+ ],
237+ )
238+ def test_looks_like_a_tfds_file (filename , result ):
239+ assert file_utils ._looks_like_a_tfds_file (filename ) == result
240+
241+
242+ @pytest .mark .parametrize (
243+ ['path' , 'glob_result' , 'expected' ],
244+ [
245+ ('/a/*' , ['/a/b' , '/a/c' ], ['/a/b' , '/a/c' ]),
246+ ('/a/b' , None , ['/a/b' ]),
247+ ('a/*' , None , ['a/*' ]),
248+ ('/a/b@*' , None , ['/a/b@*' ]),
249+ ],
250+ )
251+ def test_expand_glob (path , glob_result , expected ):
252+ with mock .patch .object (epath , 'Path' ) as mock_epath :
253+ mock_epath .return_value .expanduser .return_value = path
254+ mock_epath .return_value .glob .return_value = glob_result
255+ actual = file_utils .expand_glob (path )
256+ if glob_result is not None :
257+ mock_epath .return_value .glob .assert_called_once_with (path [1 :])
258+ else :
259+ mock_epath .return_value .glob .assert_not_called ()
260+ actual = [os .fspath (p ) for p in actual ]
261+ assert actual == expected
262+
263+
264+ def test_publish_data (mock_fs : testing .MockFs ):
265+ from_data_dir = epath .Path ('/tmp' ) / 'dummy_mnist/3.0.1'
266+ filename = 'dataset_info.json'
267+ content = 'a'
268+ mock_fs .add_file (path = from_data_dir / filename , content = content )
269+ to_data_dir = epath .Path ('/a/b' )
270+ file_utils .publish_data (from_data_dir = from_data_dir , to_data_dir = to_data_dir )
271+ assert mock_fs .read_file (to_data_dir / filename ) == content
272+
273+
133274if __name__ == '__main__' :
134275 testing .test_main ()
0 commit comments