55
66import labelbox as lb
77from labelbox .data .annotation_types .data .video import VideoData
8- from labelbox .schema .data_row import DataRow
98from labelbox .schema .media_type import MediaType
109import labelbox .types as lb_types
1110from labelbox .data .annotation_types .data import (
7069]
7170
7271
73- def remove_keys_recursive (d , keys ):
74- for k in keys :
75- if k in d :
76- del d [k ]
77- for k , v in d .items ():
78- if isinstance (v , dict ):
79- remove_keys_recursive (v , keys )
80- elif isinstance (v , list ):
81- for i in v :
82- if isinstance (i , dict ):
83- remove_keys_recursive (i , keys )
84-
85-
86- # NOTE this uses quite a primitive check for cuids but I do not think it is worth coming up with a better one
87- # Also this function is NOT written with performance in mind, good for small to mid size dicts like we have in our test
88- def rename_cuid_key_recursive (d ):
89- new_key = "<cuid>"
90- for k in list (d .keys ()):
91- if len (k ) == 25 and not k .isalpha (): # primitive check for cuid
92- d [new_key ] = d .pop (k )
93- for k , v in d .items ():
94- if isinstance (v , dict ):
95- rename_cuid_key_recursive (v )
96- elif isinstance (v , list ):
97- for i in v :
98- if isinstance (i , dict ):
99- rename_cuid_key_recursive (i )
100-
101-
10272def get_annotation_comparison_dicts_from_labels (labels ):
10373 labels_ndjson = list (NDJsonConverter .serialize (labels ))
10474 for annotation in labels_ndjson :
@@ -198,12 +168,13 @@ def test_import_data_types(
198168 data_row_json_by_data_type ,
199169 annotations_by_data_type ,
200170 data_type_class ,
171+ helpers ,
201172):
202173 project = configured_project
203174 project_id = project .uid
204175 dataset = initial_dataset
205176
206- set_project_media_type_from_data_type (project , data_type_class )
177+ helpers . set_project_media_type_from_data_type (project , data_type_class )
207178
208179 data_type_string = data_type_class .__name__ [:- 4 ].lower ()
209180 data_row_ndjson = data_row_json_by_data_type [data_type_string ]
@@ -241,12 +212,13 @@ def test_import_data_types_by_global_key(
241212 rand_gen ,
242213 data_row_json_by_data_type ,
243214 annotations_by_data_type ,
215+ helpers ,
244216):
245217 project = configured_project
246218 project_id = project .uid
247219 dataset = initial_dataset
248220 data_type_class = ImageData
249- set_project_media_type_from_data_type (project , data_type_class )
221+ helpers . set_project_media_type_from_data_type (project , data_type_class )
250222
251223 data_row_ndjson = data_row_json_by_data_type ["image" ]
252224 data_row_ndjson ["global_key" ] = str (uuid .uuid4 ())
@@ -287,24 +259,6 @@ def validate_iso_format(date_string: str):
287259 assert parsed_t .second is not None
288260
289261
290- def to_pascal_case (name : str ) -> str :
291- return "" .join ([word .capitalize () for word in name .split ("_" )])
292-
293-
294- def set_project_media_type_from_data_type (project , data_type_class ):
295- data_type_string = data_type_class .__name__ [:- 4 ].lower ()
296- media_type = to_pascal_case (data_type_string )
297- if media_type == "Conversation" :
298- media_type = "Conversational"
299- elif media_type == "Llmpromptcreation" :
300- media_type = "LLMPromptCreation"
301- elif media_type == "Llmpromptresponsecreation" :
302- media_type = "LLMPromptResponseCreation"
303- elif media_type == "Llmresponsecreation" :
304- media_type = "Text"
305- project .update (media_type = MediaType [media_type ])
306-
307-
308262@pytest .mark .parametrize (
309263 "data_type_class" ,
310264 [
@@ -331,12 +285,13 @@ def test_import_data_types_v2(
331285 exports_v2_by_data_type ,
332286 export_v2_test_helpers ,
333287 rand_gen ,
288+ helpers ,
334289):
335290 project = configured_project
336291 dataset = initial_dataset
337292 project_id = project .uid
338293
339- set_project_media_type_from_data_type (project , data_type_class )
294+ helpers . set_project_media_type_from_data_type (project , data_type_class )
340295
341296 data_type_string = data_type_class .__name__ [:- 4 ].lower ()
342297 data_row_ndjson = data_row_json_by_data_type [data_type_string ]
@@ -381,9 +336,9 @@ def test_import_data_types_v2(
381336 exported_project_labels = exported_project ["labels" ][0 ]
382337 exported_annotations = exported_project_labels ["annotations" ]
383338
384- remove_keys_recursive (exported_annotations ,
385- ["feature_id" , "feature_schema_id" ])
386- rename_cuid_key_recursive (exported_annotations )
339+ helpers . remove_keys_recursive (exported_annotations ,
340+ ["feature_id" , "feature_schema_id" ])
341+ helpers . rename_cuid_key_recursive (exported_annotations )
387342 assert exported_annotations == exports_v2_by_data_type [data_type_string ]
388343
389344 data_row = client .get_data_row (data_row .uid )
@@ -400,10 +355,11 @@ def test_import_label_annotations(
400355 data_class ,
401356 annotations ,
402357 rand_gen ,
358+ helpers ,
403359):
404360 project = configured_project_with_one_data_row
405361 dataset = initial_dataset
406- set_project_media_type_from_data_type (project , data_class )
362+ helpers . set_project_media_type_from_data_type (project , data_class )
407363
408364 data_row_json = data_row_json_by_data_type [data_type ]
409365 data_row = create_data_row_for_project (project , dataset , data_row_json ,
@@ -471,10 +427,11 @@ def test_import_mal_annotations(
471427 annotations ,
472428 rand_gen ,
473429 one_datarow ,
430+ helpers ,
474431):
475432 data_row = one_datarow
476- set_project_media_type_from_data_type (configured_project_with_one_data_row ,
477- data_class )
433+ helpers . set_project_media_type_from_data_type (
434+ configured_project_with_one_data_row , data_class )
478435
479436 configured_project_with_one_data_row .create_batch (
480437 rand_gen (str ),
@@ -500,12 +457,13 @@ def test_import_mal_annotations(
500457
501458def test_import_mal_annotations_global_key (client ,
502459 configured_project_with_one_data_row ,
503- rand_gen , one_datarow_global_key ):
460+ rand_gen , one_datarow_global_key ,
461+ helpers ):
504462 data_class = lb_types .VideoData
505463 data_row = one_datarow_global_key
506464 annotations = [video_mask_annotation ]
507- set_project_media_type_from_data_type (configured_project_with_one_data_row ,
508- data_class )
465+ helpers . set_project_media_type_from_data_type (
466+ configured_project_with_one_data_row , data_class )
509467
510468 configured_project_with_one_data_row .create_batch (
511469 rand_gen (str ),
0 commit comments