22import os
33from pathlib import Path , PurePath
44import shutil
5+ import uuid
56
67from pycrdt import Map
8+ import nbformat
79
810from traitlets .config import LoggingConfigurable
911from traitlets import Dict , Instance , Int , default
@@ -112,18 +114,18 @@ def get_stream(self, file_id, cell_id):
112114 output = f .read ()
113115 return output
114116
115- def write (self , file_id , cell_id , output , display_id = None ) :
117+ def write (self , file_id , cell_id , output , display_id = None , asdict : bool = False ) -> Map | dict :
116118 """Write a new output for file_id and cell_id.
117119
118120 Returns a placeholder output (pycrdt.Map) or None if no placeholder
119121 output should be written to the ydoc.
120122 """
121- placeholder = self .write_output (file_id , cell_id , output , display_id )
123+ placeholder = self .write_output (file_id , cell_id , output , display_id , asdict = asdict )
122124 if output ["output_type" ] == "stream" and self .stream_limit is not None :
123- placeholder = self .write_stream (file_id , cell_id , output , placeholder )
125+ placeholder = self .write_stream (file_id , cell_id , output , placeholder , asdict = asdict )
124126 return placeholder
125127
126- def write_output (self , file_id , cell_id , output , display_id = None ) :
128+ def write_output (self , file_id , cell_id , output , display_id = None , asdict : bool = False ) -> Map | dict :
127129 self ._ensure_path (file_id , cell_id )
128130 index = self ._compute_output_index (cell_id , display_id )
129131 path = self ._build_path (file_id , cell_id , index )
@@ -132,9 +134,12 @@ def write_output(self, file_id, cell_id, output, display_id=None):
132134 f .write (data )
133135 url = create_output_url (file_id , cell_id , index )
134136 self .log .info (f"Wrote output: { url } " )
135- return create_placeholder_output (output ["output_type" ], url )
137+ placeholder = create_placeholder_dict (output ["output_type" ], url )
138+ if not asdict :
139+ placeholder = Map (placeholder )
140+ return placeholder
136141
137- def write_stream (self , file_id , cell_id , output , placeholder ) -> Map :
142+ def write_stream (self , file_id , cell_id , output , placeholder , asdict : bool = False ) -> Map | dict :
138143 # How many stream outputs have been written for this cell previously
139144 count = self ._stream_count .get (cell_id , 0 )
140145
@@ -156,7 +161,9 @@ def write_stream(self, file_id, cell_id, output, placeholder) -> Map:
156161 placeholder = placeholder
157162 elif count == self .stream_limit :
158163 # Return a link to the full stream output
159- placeholder = create_placeholder_output ("display_data" , url , full = True )
164+ placeholder = create_placeholder_dict ("display_data" , url , full = True )
165+ if not asdict :
166+ placeholder = Map (placeholder )
160167 elif count > self .stream_limit :
161168 # Return None to indicate that no placeholder should be written to the ydoc
162169 placeholder = None
@@ -180,10 +187,152 @@ def clear(self, file_id, cell_id=None):
180187 except FileNotFoundError :
181188 pass
182189
190+ def process_loaded_notebook (self , file_id : str , file_data : dict ) -> dict :
191+ """Process a loaded notebook and handle outputs through the outputs manager.
183192
184- def create_output_url (file_id : str , cell_id : str , output_index : int = None ) -> str :
193+ This method processes a notebook that has been loaded from disk.
194+ If the notebook metadata has placeholder_outputs set to True,
195+ outputs are loaded from disk and set as the cell outputs.
196+
197+ Args:
198+ file_id (str): The file identifier
199+ file_data (dict): The file data containing the notebook content
200+ from calling ContentsManager.get()
201+
202+ Returns:
203+ dict: The modified file data with processed outputs
204+ """
205+ self .log .info (f"Processing loaded notebook: { file_id } " )
206+
207+ # Notebook content is a tree of nbformat.NotebookNode objects,
208+ # which are a subclass of dict.
209+ nb = file_data ['content' ]
210+
211+ # Check if the notebook metadata has placeholder_outputs set to True
212+ if nb .get ('metadata' , {}).get ('placeholder_outputs' ) is True :
213+ nb = self ._process_loaded_placeholders (file_id = file_id , nb = nb )
214+ else :
215+ nb = self ._process_loaded_no_placeholders (file_id = file_id , nb = nb )
216+
217+ file_data ['content' ] = nb
218+ return file_data
219+
220+ def _process_loaded_placeholders (self , file_id : str , nb : dict ) -> dict :
221+ """Process a notebook with placeholder_outputs metadata set to True.
222+
223+ This method processes notebooks that have been saved with placeholder outputs.
224+ It attempts to load actual outputs from disk and creates placeholder outputs
225+ for each code cell. If no outputs exist on disk for a cell, the cell's
226+ outputs are set to an empty list.
227+
228+ Args:
229+ file_id (str): The file identifier
230+ nb (dict): The notebook dictionary
231+
232+ Returns:
233+ dict: The notebook with placeholder outputs loaded from disk
234+ """
235+ for cell in nb .get ('cells' , []):
236+ if cell .get ('cell_type' ) == 'code' :
237+ cell_id = cell .get ('id' , str (uuid .uuid4 ()))
238+ try :
239+ # Try to get outputs from disk
240+ output_strings = self .get_outputs (file_id = file_id , cell_id = cell_id )
241+ outputs = []
242+ for output_string in output_strings :
243+ output_dict = json .loads (output_string )
244+ placeholder = create_placeholder_dict (
245+ output_dict ["output_type" ],
246+ url = create_output_url (file_id , cell_id )
247+ )
248+ outputs .append (placeholder )
249+ cell ['outputs' ] = outputs
250+ except FileNotFoundError :
251+ # No outputs on disk for this cell, set empty outputs
252+ cell ['outputs' ] = []
253+ return nb
254+
255+ def _process_loaded_no_placeholders (self , file_id : str , nb : dict ) -> dict :
256+ """Process a notebook that doesn't have placeholder_outputs metadata.
257+
258+ This method processes notebooks with actual output data in the cells.
259+ It saves existing outputs to disk and replaces them with placeholder
260+ outputs that reference the saved files. Outputs that already have
261+ a URL in their metadata are left as-is.
262+
263+ Args:
264+ file_id (str): The file identifier
265+ nb (dict): The notebook dictionary
266+
267+ Returns:
268+ dict: The notebook with outputs saved to disk and replaced with placeholders
185269 """
186- Create the URL for an output or stream.
270+ for cell in nb .get ('cells' , []):
271+ if cell .get ('cell_type' ) != 'code' or 'outputs' not in cell :
272+ continue
273+
274+ cell_id = cell .get ('id' , str (uuid .uuid4 ()))
275+ processed_outputs = []
276+ for output in cell .get ('outputs' , []):
277+ display_id = output .get ('metadata' , {}).get ('display_id' )
278+ url = output .get ('metadata' , {}).get ('url' )
279+ if url is None :
280+ # Save output to disk and replace with placeholder
281+ try :
282+ placeholder = self .write (
283+ file_id ,
284+ cell_id ,
285+ output ,
286+ display_id ,
287+ asdict = True ,
288+ )
289+ except Exception as e :
290+ self .log .error (f"Error writing output: { e } " )
291+ # If we can't write the output to disk, keep the original
292+ placeholder = output
293+ else :
294+ # In this case, there is a placeholder already so keep it
295+ placeholder = output
296+
297+ if placeholder is not None :
298+ # A placeholder of None means to not add to the YDoc
299+ processed_outputs .append (nbformat .from_dict (placeholder ))
300+
301+ # Replace the outputs with processed ones
302+ cell ['outputs' ] = processed_outputs
303+ return nb
304+
305+ def process_saving_notebook (self , nb : dict ) -> dict :
306+ """Process a notebook before saving to disk.
307+
308+ This method is called when the yroom_file_api saves notebooks.
309+ It sets the placeholder_outputs key to True in the notebook metadata
310+ and clears the outputs array for each cell.
311+
312+ Args:
313+ nb (dict): The notebook dict
314+
315+ Returns:
316+ dict: The modified file data with placeholder_outputs set to True
317+ and empty outputs arrays
318+ """
319+ # Ensure metadata exists
320+ if 'metadata' not in nb :
321+ nb ['metadata' ] = {}
322+
323+ # Set placeholder_outputs to True
324+ nb ['metadata' ]['placeholder_outputs' ] = True
325+
326+ # Clear outputs for all code cells, as they are saved to disk
327+ for cell in nb .get ('cells' , []):
328+ if cell .get ('cell_type' ) == 'code' :
329+ cell ['outputs' ] = []
330+
331+ return nb
332+
333+
334+ def create_output_url (file_id : str , cell_id : str , output_index : int = None ) -> str :
335+ """Create the URL for an output or stream.
187336
188337 Parameters:
189338 - file_id (str): The ID of the file.
@@ -198,9 +347,9 @@ def create_output_url(file_id: str, cell_id: str, output_index: int = None) -> s
198347 else :
199348 return f"/api/outputs/{ file_id } /{ cell_id } /{ output_index } .output"
200349
201- def create_placeholder_dict (output_type : str , url : str , full : bool = False ):
202- """
203- Build a placeholder output dict for the given output_type and url.
350+ def create_placeholder_dict (output_type : str , url : str , full : bool = False ) -> dict :
351+ """Build a placeholder output dict for the given output_type and url.
352+
204353 If full is True and output_type is "display_data", returns a display_data output
205354 with an HTML link to the full stream output.
206355
@@ -234,18 +383,3 @@ def create_placeholder_dict(output_type: str, url: str, full: bool = False):
234383 else :
235384 raise ValueError (f"Unknown output_type: { output_type } " )
236385
237- def create_placeholder_output (output_type : str , url : str , full : bool = False ):
238- """
239- Creates a placeholder output Map for the given output_type and url.
240- If full is True and output_type is "display_data", creates a display_data output with an HTML link.
241-
242- Parameters:
243- - output_type (str): The type of the output.
244- - url (str): The URL associated with the output.
245- - full (bool): Whether to create a full output placeholder with a link.
246-
247- Returns:
248- - Map: The placeholder output `ycrdt.Map`.
249- """
250- output_dict = create_placeholder_dict (output_type , url , full = full )
251- return Map (output_dict )
0 commit comments