@@ -296,6 +296,25 @@ def record_process_end(
296296 self .generate_output_prov (outputs , process_run_id , process_name )
297297 self .document .wasEndedBy (process_run_id , None , self .workflow_run_uri , when )
298298
299+
300+
301+ # def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
302+ # for annotation in dataset:
303+ # if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
304+ # e.add_attributes({annotation: dataset[annotation]})
305+ # else:
306+ # nested_id = uuid.uuid4().urn
307+ # # e.add_attributes({annotation: nested_id})
308+ # nested_entity = self.document.entity(nested_id)
309+ # e.add_attributes({annotation: nested_entity.identifier})
310+ # nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
311+ # return e
312+
313+ # def _propagate_input_annotations(entity):
314+ # entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
315+ # entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
316+ # return entity
317+
299318 def declare_file (self , value : CWLObjectType ) -> Tuple [ProvEntity , ProvEntity , str ]:
300319 if value ["class" ] != "File" :
301320 raise ValueError ("Must have class:File: %s" % value )
@@ -350,7 +369,9 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
350369 file_entity .add_attributes ({CWLPROV ["nameext" ]: value ["nameext" ]})
351370 self .document .specializationOf (file_entity , entity )
352371
353- def recursive_function (dataset , e : ProvEntity ) -> ProvEntity :
372+
373+
374+ def _add_nested_annotations (dataset , e : ProvEntity ) -> ProvEntity :
354375 for annotation in dataset :
355376 if isinstance (dataset [annotation ], (str , bool , int , float )): # check if these are all allowed types
356377 e .add_attributes ({annotation : dataset [annotation ]})
@@ -359,14 +380,17 @@ def recursive_function(dataset, e: ProvEntity) -> ProvEntity:
359380 # e.add_attributes({annotation: nested_id})
360381 nested_entity = self .document .entity (nested_id )
361382 e .add_attributes ({annotation : nested_entity .identifier })
362- nested_entity = recursive_function (dataset [annotation ], nested_entity )
383+ nested_entity = _add_nested_annotations (dataset [annotation ], nested_entity )
363384 return e
364385
365386 # Transfer input data annotations to provenance:
366- if SCHEMA ["Dataset" ].uri in value :
387+ if SCHEMA ["Dataset" ].uri in value : # TODO: modify so both http:/ and https:/ are recognized
367388 entity .add_attributes ( {PROV_TYPE : SCHEMA ["Dataset" ]})
368- entity = recursive_function (value [SCHEMA ["Dataset" ].uri ], entity )
389+ entity = _add_nested_annotations (value [SCHEMA ["Dataset" ].uri ], entity )
369390
391+ # Transfer format annotations to provenance:
392+ if "format" in value :
393+ entity .add_attributes ({SCHEMA ["encodingFormat" ]: value ["format" ]})
370394
371395 # Check for secondaries
372396 for sec in cast (
@@ -413,6 +437,7 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
413437 (PROV_TYPE , RO ["Folder" ]),
414438 ],
415439 )
440+
416441 # ORE description of ro:Folder, saved separately
417442 coll_b = dir_bundle .entity (
418443 dir_id ,
@@ -473,6 +498,20 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
473498 coll .add_attributes (coll_attribs )
474499 coll_b .add_attributes (coll_b_attribs )
475500
501+ # Propagate input data annotations
502+ if SCHEMA ["Dataset" ].uri in value :
503+ # coll_annotations = [ (PROV_TYPE, SCHEMA["Dataset"]) ]
504+ coll .add_attributes ([ (PROV_TYPE , SCHEMA ["Dataset" ]) ])
505+
506+ dataset = value [SCHEMA ["Dataset" ].uri ]
507+
508+ for annotation in dataset :
509+ if isinstance (dataset [annotation ], (str , bool , int , float )): # check if these are all allowed types
510+ coll .add_attributes ({annotation : dataset [annotation ]})
511+
512+ if "format" in value :
513+ coll .add_attributes ({SCHEMA ["encodingFormat" ]: value ["format" ]})
514+
476515 # Also Save ORE Folder as annotation metadata
477516 ore_doc = ProvDocument ()
478517 ore_doc .add_namespace (ORE )
0 commit comments