@@ -27,6 +27,66 @@ def _add_deleted_status_to_filter(base_filter):
2727 return base_filter
2828
2929
30+ def ignore_missing_files (f ):
31+ def _ignore_missing_files (path , * args , ** kwargs ):
32+ try :
33+ return f (path , * args , ** kwargs )
34+ except NoFile :
35+ logger .error ("Could not find file %s" , path )
36+ return ""
37+
38+ return _ignore_missing_files
39+
40+
41+ @ignore_missing_files
42+ def read_file (result_data_store , path , is_json = False ):
43+ r = result_data_store .get_last_version (path ).read ()
44+ try :
45+ return "" if not r else json .loads (r ) if is_json else r .decode ("utf8" )
46+ except UnicodeDecodeError :
47+ return r
48+
49+
50+ @ignore_missing_files
51+ def read_bytes_file (result_data_store , path ):
52+ return result_data_store .get_last_version (path ).read ()
53+
54+
55+ def load_files_from_gridfs (result_data_store : gridfs .GridFS , result : Dict , do_read = True ) -> List [str ]:
56+
57+ gridfs_filenames = []
58+ all_html_output_paths = result .get ("raw_html_resources" , {}).get ("outputs" , [])
59+ gridfs_filenames .extend (all_html_output_paths )
60+ if do_read :
61+ outputs = {path : read_file (result_data_store , path ) for path in all_html_output_paths }
62+ result ["raw_html_resources" ]["outputs" ] = outputs
63+ if result .get ("generate_pdf_output" ):
64+ pdf_filename = _pdf_filename (result ["job_id" ])
65+ if do_read :
66+ result ["pdf" ] = read_bytes_file (result_data_store , pdf_filename )
67+ gridfs_filenames .append (pdf_filename )
68+ if not result .get ("raw_ipynb_json" ):
69+ json_filename = _raw_json_filename (result ["job_id" ])
70+ if do_read :
71+ result ["raw_ipynb_json" ] = read_file (result_data_store , json_filename , is_json = True )
72+ gridfs_filenames .append (json_filename )
73+ if not result .get ("raw_html" ):
74+ html_filename = _raw_html_filename (result ["job_id" ])
75+ if do_read :
76+ result ["raw_html" ] = read_file (result_data_store , html_filename )
77+ gridfs_filenames .append (html_filename )
78+ if not result .get ("email_html" ):
79+ email_filename = _raw_email_html_filename (result ["job_id" ])
80+ result ["email_html" ] = read_file (result_data_store , email_filename )
81+ gridfs_filenames .append (email_filename )
82+ if result .get ("raw_html_resources" ) and not result .get ("raw_html_resources" , {}).get ("inlining" ):
83+ css_inlining_filename = _css_inlining_filename (result ["job_id" ])
84+ if do_read :
85+ result ["raw_html_resources" ]["inlining" ] = read_file (result_data_store , css_inlining_filename , is_json = True )
86+ gridfs_filenames .append (css_inlining_filename )
87+ return gridfs_filenames
88+
89+
3090class MongoResultSerializer (ABC ):
3191 # This class is the interface between Mongo and the rest of the application
3292
@@ -212,47 +272,12 @@ def _convert_result(
212272 if cls is None :
213273 return None
214274
215- def ignore_missing_files (f ):
216- def _ignore_missing_files (path , * args , ** kwargs ):
217- try :
218- return f (path , * args , ** kwargs )
219- except NoFile :
220- logger .error ("Could not find file %s in %s" , path , self .result_data_store )
221- return ""
222- return _ignore_missing_files
223-
224- @ignore_missing_files
225- def read_file (path , is_json = False ):
226- r = self .result_data_store .get_last_version (path ).read ()
227- try :
228- return "" if not r else json .loads (r ) if is_json else r .decode ("utf8" )
229- except UnicodeDecodeError :
230- return r
231-
232- @ignore_missing_files
233- def read_bytes_file (path ):
234- return self .result_data_store .get_last_version (path ).read ()
235-
236275 if not load_payload :
237276 result .pop ("stdout" , None )
238277
239278 if cls == NotebookResultComplete :
240279 if load_payload :
241- outputs = {path : read_file (path ) for path in result .get ("raw_html_resources" , {}).get ("outputs" , [])}
242- result ["raw_html_resources" ]["outputs" ] = outputs
243- if result .get ("generate_pdf_output" ):
244- pdf_filename = _pdf_filename (result ["job_id" ])
245- result ["pdf" ] = read_bytes_file (pdf_filename )
246- if not result .get ("raw_ipynb_json" ):
247- result ["raw_ipynb_json" ] = read_file (_raw_json_filename (result ["job_id" ]), is_json = True )
248- if not result .get ("raw_html" ):
249- result ["raw_html" ] = read_file (_raw_html_filename (result ["job_id" ]))
250- if not result .get ("email_html" ):
251- result ["email_html" ] = read_file (_raw_email_html_filename (result ["job_id" ]))
252- if result .get ("raw_html_resources" ) and not result .get ("raw_html_resources" , {}).get ("inlining" ):
253- result ["raw_html_resources" ]["inlining" ] = read_file (
254- _css_inlining_filename (result ["job_id" ]), is_json = True
255- )
280+ load_files_from_gridfs (self .result_data_store , result , do_read = True )
256281 else :
257282 result .pop ("raw_html" , None )
258283 result .pop ("raw_ipynb_json" , None )
@@ -298,7 +323,7 @@ def read_bytes_file(path):
298323 elif cls == NotebookResultError :
299324 if load_payload :
300325 if not result .get ("error_info" ):
301- result ["error_info" ] = read_file (_error_info_filename (result ["job_id" ]))
326+ result ["error_info" ] = read_file (self . result_data_store , _error_info_filename (result ["job_id" ]))
302327 else :
303328 result .pop ("error_info" , None )
304329 return NotebookResultError (
@@ -319,11 +344,14 @@ def read_bytes_file(path):
319344 else :
320345 raise ValueError ("Could not deserialise {} into result object." .format (result ))
321346
347+ def _get_raw_check_result (self , job_id : str ):
348+ return self .library .find_one ({"job_id" : job_id }, {"_id" : 0 })
349+
322350 def get_check_result (
323- self , job_id : AnyStr
351+ self , job_id : AnyStr , load_payload : bool = True
324352 ) -> Optional [Union [NotebookResultError , NotebookResultComplete , NotebookResultPending ]]:
325- result = self .library . find_one ({ " job_id" : job_id }, { "_id" : 0 } )
326- return self ._convert_result (result )
353+ result = self ._get_raw_check_result ( job_id )
354+ return self ._convert_result (result , load_payload = load_payload )
327355
328356 def _get_raw_results (self , base_filter , projection , limit ):
329357 base_filter = _add_deleted_status_to_filter (base_filter )
@@ -462,8 +490,19 @@ def get_latest_successful_job_ids_for_name_all_params(self, report_name: str) ->
462490 def n_all_results_for_report_name (self , report_name : str ) -> int :
463491 return self ._get_result_count ({"report_name" : report_name })
464492
465- def delete_result (self , job_id : AnyStr ) -> None :
493+ def delete_result (self , job_id : AnyStr ) -> Dict [str , Any ]:
494+ result = self ._get_raw_check_result (job_id )
495+ status = JobStatus .from_string (result ["status" ])
496+ gridfs_filenames = []
497+ if status == JobStatus .DONE :
498+ gridfs_filenames = load_files_from_gridfs (self .result_data_store , result , do_read = False )
499+ elif status in (JobStatus .ERROR , JobStatus .TIMEOUT , JobStatus .CANCELLED ):
500+ gridfs_filenames = [_error_info_filename (job_id )]
466501 self .update_check_status (job_id , JobStatus .DELETED )
502+ for filename in gridfs_filenames :
503+ logger .info (f"Deleting { filename } " )
504+ self .result_data_store .delete (filename )
505+ return {"deleted_result_document" : result , "gridfs_filenames" : gridfs_filenames }
467506
468507
469508def _pdf_filename (job_id : str ) -> str :
0 commit comments