@@ -76,6 +76,7 @@ def copy_job_order(
7676 return customised_job
7777
7878
79+
7980class ProvenanceProfile :
8081 """
8182 Provenance profile.
@@ -296,24 +297,23 @@ def record_process_end(
296297 self .generate_output_prov (outputs , process_run_id , process_name )
297298 self .document .wasEndedBy (process_run_id , None , self .workflow_run_uri , when )
298299
299-
300-
301- # def _add_nested_annotations(dataset, e: ProvEntity) -> ProvEntity:
302- # for annotation in dataset:
303- # if isinstance(dataset[annotation], (str, bool, int, float)): # check if these are all allowed types
304- # e.add_attributes({annotation: dataset[annotation]})
305- # else:
306- # nested_id = uuid.uuid4().urn
307- # # e.add_attributes({annotation: nested_id})
308- # nested_entity = self.document.entity(nested_id)
309- # e.add_attributes({annotation: nested_entity.identifier})
310- # nested_entity = _add_nested_annotations(dataset[annotation], nested_entity)
311- # return e
312-
313- # def _propagate_input_annotations(entity):
314- # entity.add_attributes( {PROV_TYPE: SCHEMA["Dataset"]})
315- # entity = _add_nested_annotations(value[SCHEMA["Dataset"].uri], entity)
316- # return entity
300+ def _add_nested_annotations (self , annotation_key , annotation_value , e : ProvEntity ) -> ProvEntity :
301+ """Propagate input data annotations to provenance."""
302+ # Change https:// into http:// first
303+ schema2_uri = "https://schema.org/"
304+ if schema2_uri in annotation_key :
305+ annotation_key = SCHEMA [annotation_key .replace (schema2_uri , '' )].uri
306+
307+ if not isinstance (annotation_value , (MutableSequence , MutableMapping )):
308+ e .add_attributes ({annotation_key : str (annotation_value )})
309+ else :
310+ nested_id = uuid .uuid4 ().urn
311+ nested_entity = self .document .entity (nested_id )
312+ e .add_attributes ({annotation_key : nested_entity .identifier })
313+ for nested_key in annotation_value :
314+ nested_value = annotation_value [nested_key ]
315+ nested_entity = self ._add_nested_annotations (nested_key , nested_value , nested_entity )
316+ return e
317317
318318 def declare_file (self , value : CWLObjectType ) -> Tuple [ProvEntity , ProvEntity , str ]:
319319 if value ["class" ] != "File" :
@@ -369,24 +369,16 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
369369 file_entity .add_attributes ({CWLPROV ["nameext" ]: value ["nameext" ]})
370370 self .document .specializationOf (file_entity , entity )
371371
372-
372+ # Identify all schema annotations
373+ schema_annotations = dict ([(v , value [v ]) for v in value .keys () if 'schema.org' in v ])
373374
374- def _add_nested_annotations (dataset , e : ProvEntity ) -> ProvEntity :
375- for annotation in dataset :
376- if isinstance (dataset [annotation ], (str , bool , int , float )): # check if these are all allowed types
377- e .add_attributes ({annotation : dataset [annotation ]})
378- else :
379- nested_id = uuid .uuid4 ().urn
380- # e.add_attributes({annotation: nested_id})
381- nested_entity = self .document .entity (nested_id )
382- e .add_attributes ({annotation : nested_entity .identifier })
383- nested_entity = _add_nested_annotations (dataset [annotation ], nested_entity )
384- return e
385-
386- # Transfer input data annotations to provenance:
387- if SCHEMA ["Dataset" ].uri in value : # TODO: modify so both http:/ and https:/ are recognized
388- entity .add_attributes ( {PROV_TYPE : SCHEMA ["Dataset" ]})
389- entity = _add_nested_annotations (value [SCHEMA ["Dataset" ].uri ], entity )
375+ # Transfer SCHEMA annotations to provenance
376+ for s in schema_annotations :
377+ if "additionalType" in s :
378+ additional_type = schema_annotations [s ].split (sep = '/' )[- 1 ] # find better method?
379+ entity .add_attributes ( {PROV_TYPE : SCHEMA [additional_type ]})
380+ else :
381+ entity = self ._add_nested_annotations (s , schema_annotations [s ], entity )
390382
391383 # Transfer format annotations to provenance:
392384 if "format" in value :
0 commit comments