From bd8843dd356bfc5dde5baebef221b25b5e9f7553 Mon Sep 17 00:00:00 2001 From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:34:50 +0530 Subject: [PATCH 1/4] adding aml consumption for rf3 model ipynb --- .../rf3-modelforge/aml-rf3-modelforge.ipynb | 478 ++++++++++++++++++ 1 file changed, 478 insertions(+) create mode 100644 sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb diff --git a/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb b/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb new file mode 100644 index 000000000..e1160ca42 --- /dev/null +++ b/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb @@ -0,0 +1,478 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a6320f26", + "metadata": {}, + "source": [ + "# Extract Files from JSON\n", + "\n", + "This notebook processes JSON files containing base64-encoded data and extracts them as files to a specified output directory. It handles both .gz compressed files (like .cif.gz) and CSV files.\n", + "\n", + "## Overview\n", + "The JSON structure is expected to have an \"outputs\" section with file names as keys and base64-encoded file contents as values." + ] + }, + { + "cell_type": "markdown", + "id": "9bd9270a", + "metadata": {}, + "source": [ + "## 1. Import Required Libraries\n", + "\n", + "Import all necessary libraries for file operations, JSON processing, and base64 decoding." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "562ffbd4", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import base64\n", + "import gzip\n", + "import os\n", + "from pathlib import Path\n", + "import shutil" + ] + }, + { + "cell_type": "markdown", + "id": "78bfecf4", + "metadata": {}, + "source": [ + "## 2. Configuration\n", + "\n", + "Set up file paths and configuration parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "36ffcbad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JSON file path: out.temp\n", + "Output directory: extracted_files\n" + ] + } + ], + "source": [ + "# Configuration parameters\n", + "JSON_FILE_PATH = \"out.temp\" # Update this path to your JSON file\n", + "OUTPUT_DIRECTORY = \"extracted_files\" # Output directory for extracted files\n", + "\n", + "print(f\"JSON file path: {JSON_FILE_PATH}\")\n", + "print(f\"Output directory: {OUTPUT_DIRECTORY}\")" + ] + }, + { + "cell_type": "markdown", + "id": "96c2a60c", + "metadata": {}, + "source": [ + "## 3. Load JSON Data\n", + "\n", + "Load and parse the JSON file containing the base64-encoded data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fbfc1dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully loaded JSON file: out.temp\n", + "Found 'outputs' section with 7 items\n", + "\n", + "JSON structure:\n", + "Request ID: req_6b3e11172ce141b6924b2dddc5ef8efe\n", + "Status: success\n", + "Output files found: ['pb-msa_model_0.cif.gz', 'pb-msa_model_3.cif.gz', 'pb-msa_model_4.cif.gz', 'pb-msa_model_2.cif.gz', 'pb-msa_model_1.cif.gz', 'pb-msa.score', 'pb-msa_metrics.csv']\n" + ] + } + ], + "source": [ + "def load_json_data(json_file_path):\n", + " \"\"\"\n", + " Load JSON data from file and return the parsed content.\n", + " \"\"\"\n", + " try:\n", + " with open(json_file_path, 'r') as file:\n", + " data = json.load(file)\n", + " print(f\"Successfully loaded JSON file: {json_file_path}\")\n", + " \n", + " # Check if the JSON has the expected structure\n", + " if 'outputs' in data:\n", + " print(f\"Found 'outputs' section with {len(data['outputs'])} items\")\n", + " return data\n", + " else:\n", + " print(\"Warning: 'outputs' section not found in JSON\")\n", + " return data\n", + " \n", + " except FileNotFoundError:\n", + " print(f\"Error: File {json_file_path} not found\")\n", + " return None\n", + " except json.JSONDecodeError:\n", + " print(f\"Error: Invalid JSON format in {json_file_path}\")\n", + " return None\n", + "\n", + "# Load the JSON data\n", + "json_data = load_json_data(JSON_FILE_PATH)\n", + "\n", + "if json_data:\n", + " print(\"\\nJSON structure:\")\n", + " print(f\"Request ID: {json_data.get('request_id', 'Not found')}\")\n", + " print(f\"Status: {json_data.get('status', 'Not found')}\")\n", + " if 'outputs' in json_data:\n", + " print(f\"Output files found: {list(json_data['outputs'].keys())}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4b010695", + "metadata": {}, + "source": [ + "## 4. Setup Output Directory\n", + "\n", + "Create the output directory if it doesn't exist." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ff0937ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output directory ready: /home/mekshirs/azurefiles/UW_IPD/extracted_files\n" + ] + } + ], + "source": [ + "def setup_output_directory(output_dir):\n", + " \"\"\"\n", + " Create output directory if it doesn't exist.\n", + " \"\"\"\n", + " output_path = Path(output_dir)\n", + " output_path.mkdir(parents=True, exist_ok=True)\n", + " print(f\"Output directory ready: {output_path.absolute()}\")\n", + " return output_path\n", + "\n", + "# Setup output directory\n", + "output_path = setup_output_directory(OUTPUT_DIRECTORY)" + ] + }, + { + "cell_type": "markdown", + "id": "245ddcc6", + "metadata": {}, + "source": [ + "## 5. File Extraction Functions\n", + "\n", + "Define functions to handle base64 decoding and file saving for different file types." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d96f300d", + "metadata": {}, + "outputs": [], + "source": [ + "def decode_and_save_gz_file(filename, base64_data, output_path):\n", + " \"\"\"\n", + " Decode base64 data and save as a .gz file.\n", + " \"\"\"\n", + " try:\n", + " # Decode base64 data\n", + " decoded_data = base64.b64decode(base64_data)\n", + " \n", + " # Create full file path\n", + " file_path = output_path / filename\n", + " \n", + " # Write the decoded data directly as a .gz file\n", + " with open(file_path, 'wb') as f:\n", + " f.write(decoded_data)\n", + " \n", + " print(f\"✓ Saved .gz file: {filename} ({len(decoded_data)} bytes)\")\n", + " return True\n", + " \n", + " except Exception as e:\n", + " print(f\"✗ Error saving {filename}: {e}\")\n", + " return False\n", + "\n", + "def decode_and_save_csv_file(filename, base64_data, output_path):\n", + " \"\"\"\n", + " Decode base64 data and save as a CSV file.\n", + " \"\"\"\n", + " try:\n", + " # Decode base64 data\n", + " decoded_data = base64.b64decode(base64_data)\n", + " \n", + " # Create full file path\n", + " file_path = output_path / filename\n", + " \n", + " # Write the decoded data as text (CSV is text-based)\n", + " with open(file_path, 'wb') as f:\n", + " f.write(decoded_data)\n", + " \n", + " print(f\"✓ Saved CSV file: {filename} ({len(decoded_data)} bytes)\")\n", + " return True\n", + " \n", + " except Exception as e:\n", + " print(f\"✗ Error saving {filename}: {e}\")\n", + " return False\n", + "\n", + "def decode_and_save_generic_file(filename, base64_data, output_path):\n", + " \"\"\"\n", + " Decode base64 data and save as a generic file.\n", + " \"\"\"\n", + " try:\n", + " # Decode base64 data\n", + " decoded_data = base64.b64decode(base64_data)\n", + " \n", + " # Create full file path\n", + " file_path = output_path / filename\n", + " \n", + " # Write the decoded data\n", + " with open(file_path, 'wb') as f:\n", + " f.write(decoded_data)\n", + " \n", + " print(f\"✓ Saved file: {filename} ({len(decoded_data)} bytes)\")\n", + " return True\n", + " \n", + " except Exception as e:\n", + " print(f\"✗ Error saving {filename}: {e}\")\n", + " return False" + ] + }, + { + "cell_type": "markdown", + "id": "cb0fc671", + "metadata": {}, + "source": [ + "## 6. Process and Extract Files\n", + "\n", + "Extract all files from the JSON data based on their file extensions." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ed646eb0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Processing 7 files...\n", + "--------------------------------------------------\n", + "✓ Saved .gz file: pb-msa_model_0.cif.gz (60745 bytes)\n", + "✓ Saved .gz file: pb-msa_model_3.cif.gz (60828 bytes)\n", + "✓ Saved .gz file: pb-msa_model_4.cif.gz (61190 bytes)\n", + "✓ Saved .gz file: pb-msa_model_2.cif.gz (60931 bytes)\n", + "✓ Saved .gz file: pb-msa_model_1.cif.gz (61087 bytes)\n", + "✓ Saved file: pb-msa.score (2299 bytes)\n", + "✓ Saved CSV file: pb-msa_metrics.csv (904 bytes)\n", + "--------------------------------------------------\n", + "Extraction Summary:\n", + " .gz files saved: 5\n", + " .csv files saved: 1\n", + " Other files saved: 1\n", + " Total successful: 7/7\n" + ] + } + ], + "source": [ + "def process_files(json_data, output_path):\n", + " \"\"\"\n", + " Process all files in the JSON data and save them to the output directory.\n", + " \"\"\"\n", + " if not json_data or 'outputs' not in json_data:\n", + " print(\"No outputs found in JSON data\")\n", + " return\n", + " \n", + " outputs = json_data['outputs']\n", + " \n", + " # Counters for different file types\n", + " gz_files = 0\n", + " csv_files = 0\n", + " other_files = 0\n", + " successful_saves = 0\n", + " \n", + " print(f\"\\nProcessing {len(outputs)} files...\")\n", + " print(\"-\" * 50)\n", + " \n", + " for filename, base64_data in outputs.items():\n", + " # Check file extension and process accordingly\n", + " if filename.endswith('.gz'):\n", + " # Handle .gz files (including .cif.gz)\n", + " if decode_and_save_gz_file(filename, base64_data, output_path):\n", + " gz_files += 1\n", + " successful_saves += 1\n", + " \n", + " elif filename.endswith('.csv'):\n", + " # Handle CSV files\n", + " if decode_and_save_csv_file(filename, base64_data, output_path):\n", + " csv_files += 1\n", + " successful_saves += 1\n", + " \n", + " else:\n", + " # Handle other file types\n", + " if decode_and_save_generic_file(filename, base64_data, output_path):\n", + " other_files += 1\n", + " successful_saves += 1\n", + " \n", + " # Print summary\n", + " print(\"-\" * 50)\n", + " print(f\"Extraction Summary:\")\n", + " print(f\" .gz files saved: {gz_files}\")\n", + " print(f\" .csv files saved: {csv_files}\")\n", + " print(f\" Other files saved: {other_files}\")\n", + " print(f\" Total successful: {successful_saves}/{len(outputs)}\")\n", + "\n", + "# Process the files if JSON data is available\n", + "if json_data:\n", + " process_files(json_data, output_path)\n", + "else:\n", + " print(\"Cannot process files - JSON data not loaded\")" + ] + }, + { + "cell_type": "markdown", + "id": "b909e7ac", + "metadata": {}, + "source": [ + "## 7. Verify Extracted Files\n", + "\n", + "Check the output directory and verify that files were extracted correctly." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3289bc5b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Files in output directory (extracted_files):\n", + "------------------------------------------------------------\n", + "\n", + " Compressed (.gz) files:\n", + " pb-msa_model_0.cif.gz 60,745 bytes\n", + " pb-msa_model_1.cif.gz 61,087 bytes\n", + " pb-msa_model_2.cif.gz 60,931 bytes\n", + " pb-msa_model_3.cif.gz 60,828 bytes\n", + " pb-msa_model_4.cif.gz 61,190 bytes\n", + "\n", + " CSV files:\n", + " pb-msa_metrics.csv 904 bytes\n", + "\n", + " Other files:\n", + " pb-msa.score 2,299 bytes\n", + "\n", + "Total files extracted: 7\n" + ] + } + ], + "source": [ + "def verify_extracted_files(output_path):\n", + " \"\"\"\n", + " Verify the extracted files in the output directory.\n", + " \"\"\"\n", + " if not output_path.exists():\n", + " print(f\"Output directory {output_path} does not exist\")\n", + " return\n", + " \n", + " files = list(output_path.iterdir())\n", + " \n", + " if not files:\n", + " print(f\"No files found in {output_path}\")\n", + " return\n", + " \n", + " print(f\"\\nFiles in output directory ({output_path}):\")\n", + " print(\"-\" * 60)\n", + " \n", + " # Sort files by extension for better organization\n", + " gz_files = [f for f in files if f.name.endswith('.gz')]\n", + " csv_files = [f for f in files if f.name.endswith('.csv')]\n", + " other_files = [f for f in files if not f.name.endswith('.gz') and not f.name.endswith('.csv')]\n", + " \n", + " # Display .gz files\n", + " if gz_files:\n", + " print(\"\\n Compressed (.gz) files:\")\n", + " for file in sorted(gz_files):\n", + " size = file.stat().st_size\n", + " print(f\" {file.name:<40} {size:>10,} bytes\")\n", + " \n", + " # Display CSV files\n", + " if csv_files:\n", + " print(\"\\n CSV files:\")\n", + " for file in sorted(csv_files):\n", + " size = file.stat().st_size\n", + " print(f\" {file.name:<40} {size:>10,} bytes\")\n", + " \n", + " # Display other files\n", + " if other_files:\n", + " print(\"\\n Other files:\")\n", + " for file in sorted(other_files):\n", + " size = file.stat().st_size\n", + " print(f\" {file.name:<40} {size:>10,} bytes\")\n", + " \n", + " print(f\"\\nTotal files extracted: {len(files)}\")\n", + "\n", + "# Verify the extracted files\n", + "verify_extracted_files(output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6954a1d6-6b3b-4258-9251-b9c67b8f3c05", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "trace", + "language": "python", + "name": "trace" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 93ef70e1fa11e444f8f1bfe8c61cc54557a544f4 Mon Sep 17 00:00:00 2001 From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:48:28 +0530 Subject: [PATCH 2/4] adding rf3 webrequest ipynb --- .github/CODEOWNERS | 2 + .../rf3-modelforge/webrequest-rf3.ipynb | 478 ++++++++++++++++++ 2 files changed, 480 insertions(+) create mode 100644 sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 40902c36f..1bcef64b1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -23,6 +23,8 @@ sdk/python/foundation-models/cohere/command_faiss_langchain.ipynb @stewart-co @k sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @kseniia-cohere /sdk/python/foundation-models/nixtla/ @AzulGarza /sdk/python/foundation-models/healthcare-ai/ @jmerkow @ivantarapov +/sdk/python/foundation-models/cohere/cohere-aisearch-langchain-rag.ipynb +/sdk/python/foundation-models/rf3-modelforge/ @pabhatia-ms /sdk/python/assets/data/versioning.ipynb @ShakutaiGit /sdk/python/jobs/finetuning @amltres @sasum @marici /sdk/python/jobs/grpo @sharvin2187 @rtanase @gpenumetsa-msft @yeshsurya @babu-namburi diff --git a/sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb b/sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb new file mode 100644 index 000000000..e1160ca42 --- /dev/null +++ b/sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb @@ -0,0 +1,478 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a6320f26", + "metadata": {}, + "source": [ + "# Extract Files from JSON\n", + "\n", + "This notebook processes JSON files containing base64-encoded data and extracts them as files to a specified output directory. It handles both .gz compressed files (like .cif.gz) and CSV files.\n", + "\n", + "## Overview\n", + "The JSON structure is expected to have an \"outputs\" section with file names as keys and base64-encoded file contents as values." + ] + }, + { + "cell_type": "markdown", + "id": "9bd9270a", + "metadata": {}, + "source": [ + "## 1. Import Required Libraries\n", + "\n", + "Import all necessary libraries for file operations, JSON processing, and base64 decoding." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "562ffbd4", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import base64\n", + "import gzip\n", + "import os\n", + "from pathlib import Path\n", + "import shutil" + ] + }, + { + "cell_type": "markdown", + "id": "78bfecf4", + "metadata": {}, + "source": [ + "## 2. Configuration\n", + "\n", + "Set up file paths and configuration parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "36ffcbad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JSON file path: out.temp\n", + "Output directory: extracted_files\n" + ] + } + ], + "source": [ + "# Configuration parameters\n", + "JSON_FILE_PATH = \"out.temp\" # Update this path to your JSON file\n", + "OUTPUT_DIRECTORY = \"extracted_files\" # Output directory for extracted files\n", + "\n", + "print(f\"JSON file path: {JSON_FILE_PATH}\")\n", + "print(f\"Output directory: {OUTPUT_DIRECTORY}\")" + ] + }, + { + "cell_type": "markdown", + "id": "96c2a60c", + "metadata": {}, + "source": [ + "## 3. Load JSON Data\n", + "\n", + "Load and parse the JSON file containing the base64-encoded data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fbfc1dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully loaded JSON file: out.temp\n", + "Found 'outputs' section with 7 items\n", + "\n", + "JSON structure:\n", + "Request ID: req_6b3e11172ce141b6924b2dddc5ef8efe\n", + "Status: success\n", + "Output files found: ['pb-msa_model_0.cif.gz', 'pb-msa_model_3.cif.gz', 'pb-msa_model_4.cif.gz', 'pb-msa_model_2.cif.gz', 'pb-msa_model_1.cif.gz', 'pb-msa.score', 'pb-msa_metrics.csv']\n" + ] + } + ], + "source": [ + "def load_json_data(json_file_path):\n", + " \"\"\"\n", + " Load JSON data from file and return the parsed content.\n", + " \"\"\"\n", + " try:\n", + " with open(json_file_path, 'r') as file:\n", + " data = json.load(file)\n", + " print(f\"Successfully loaded JSON file: {json_file_path}\")\n", + " \n", + " # Check if the JSON has the expected structure\n", + " if 'outputs' in data:\n", + " print(f\"Found 'outputs' section with {len(data['outputs'])} items\")\n", + " return data\n", + " else:\n", + " print(\"Warning: 'outputs' section not found in JSON\")\n", + " return data\n", + " \n", + " except FileNotFoundError:\n", + " print(f\"Error: File {json_file_path} not found\")\n", + " return None\n", + " except json.JSONDecodeError:\n", + " print(f\"Error: Invalid JSON format in {json_file_path}\")\n", + " return None\n", + "\n", + "# Load the JSON data\n", + "json_data = load_json_data(JSON_FILE_PATH)\n", + "\n", + "if json_data:\n", + " print(\"\\nJSON structure:\")\n", + " print(f\"Request ID: {json_data.get('request_id', 'Not found')}\")\n", + " print(f\"Status: {json_data.get('status', 'Not found')}\")\n", + " if 'outputs' in json_data:\n", + " print(f\"Output files found: {list(json_data['outputs'].keys())}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4b010695", + "metadata": {}, + "source": [ + "## 4. Setup Output Directory\n", + "\n", + "Create the output directory if it doesn't exist." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ff0937ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output directory ready: /home/mekshirs/azurefiles/UW_IPD/extracted_files\n" + ] + } + ], + "source": [ + "def setup_output_directory(output_dir):\n", + " \"\"\"\n", + " Create output directory if it doesn't exist.\n", + " \"\"\"\n", + " output_path = Path(output_dir)\n", + " output_path.mkdir(parents=True, exist_ok=True)\n", + " print(f\"Output directory ready: {output_path.absolute()}\")\n", + " return output_path\n", + "\n", + "# Setup output directory\n", + "output_path = setup_output_directory(OUTPUT_DIRECTORY)" + ] + }, + { + "cell_type": "markdown", + "id": "245ddcc6", + "metadata": {}, + "source": [ + "## 5. File Extraction Functions\n", + "\n", + "Define functions to handle base64 decoding and file saving for different file types." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d96f300d", + "metadata": {}, + "outputs": [], + "source": [ + "def decode_and_save_gz_file(filename, base64_data, output_path):\n", + " \"\"\"\n", + " Decode base64 data and save as a .gz file.\n", + " \"\"\"\n", + " try:\n", + " # Decode base64 data\n", + " decoded_data = base64.b64decode(base64_data)\n", + " \n", + " # Create full file path\n", + " file_path = output_path / filename\n", + " \n", + " # Write the decoded data directly as a .gz file\n", + " with open(file_path, 'wb') as f:\n", + " f.write(decoded_data)\n", + " \n", + " print(f\"✓ Saved .gz file: {filename} ({len(decoded_data)} bytes)\")\n", + " return True\n", + " \n", + " except Exception as e:\n", + " print(f\"✗ Error saving {filename}: {e}\")\n", + " return False\n", + "\n", + "def decode_and_save_csv_file(filename, base64_data, output_path):\n", + " \"\"\"\n", + " Decode base64 data and save as a CSV file.\n", + " \"\"\"\n", + " try:\n", + " # Decode base64 data\n", + " decoded_data = base64.b64decode(base64_data)\n", + " \n", + " # Create full file path\n", + " file_path = output_path / filename\n", + " \n", + " # Write the decoded data as text (CSV is text-based)\n", + " with open(file_path, 'wb') as f:\n", + " f.write(decoded_data)\n", + " \n", + " print(f\"✓ Saved CSV file: {filename} ({len(decoded_data)} bytes)\")\n", + " return True\n", + " \n", + " except Exception as e:\n", + " print(f\"✗ Error saving {filename}: {e}\")\n", + " return False\n", + "\n", + "def decode_and_save_generic_file(filename, base64_data, output_path):\n", + " \"\"\"\n", + " Decode base64 data and save as a generic file.\n", + " \"\"\"\n", + " try:\n", + " # Decode base64 data\n", + " decoded_data = base64.b64decode(base64_data)\n", + " \n", + " # Create full file path\n", + " file_path = output_path / filename\n", + " \n", + " # Write the decoded data\n", + " with open(file_path, 'wb') as f:\n", + " f.write(decoded_data)\n", + " \n", + " print(f\"✓ Saved file: {filename} ({len(decoded_data)} bytes)\")\n", + " return True\n", + " \n", + " except Exception as e:\n", + " print(f\"✗ Error saving {filename}: {e}\")\n", + " return False" + ] + }, + { + "cell_type": "markdown", + "id": "cb0fc671", + "metadata": {}, + "source": [ + "## 6. Process and Extract Files\n", + "\n", + "Extract all files from the JSON data based on their file extensions." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ed646eb0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Processing 7 files...\n", + "--------------------------------------------------\n", + "✓ Saved .gz file: pb-msa_model_0.cif.gz (60745 bytes)\n", + "✓ Saved .gz file: pb-msa_model_3.cif.gz (60828 bytes)\n", + "✓ Saved .gz file: pb-msa_model_4.cif.gz (61190 bytes)\n", + "✓ Saved .gz file: pb-msa_model_2.cif.gz (60931 bytes)\n", + "✓ Saved .gz file: pb-msa_model_1.cif.gz (61087 bytes)\n", + "✓ Saved file: pb-msa.score (2299 bytes)\n", + "✓ Saved CSV file: pb-msa_metrics.csv (904 bytes)\n", + "--------------------------------------------------\n", + "Extraction Summary:\n", + " .gz files saved: 5\n", + " .csv files saved: 1\n", + " Other files saved: 1\n", + " Total successful: 7/7\n" + ] + } + ], + "source": [ + "def process_files(json_data, output_path):\n", + " \"\"\"\n", + " Process all files in the JSON data and save them to the output directory.\n", + " \"\"\"\n", + " if not json_data or 'outputs' not in json_data:\n", + " print(\"No outputs found in JSON data\")\n", + " return\n", + " \n", + " outputs = json_data['outputs']\n", + " \n", + " # Counters for different file types\n", + " gz_files = 0\n", + " csv_files = 0\n", + " other_files = 0\n", + " successful_saves = 0\n", + " \n", + " print(f\"\\nProcessing {len(outputs)} files...\")\n", + " print(\"-\" * 50)\n", + " \n", + " for filename, base64_data in outputs.items():\n", + " # Check file extension and process accordingly\n", + " if filename.endswith('.gz'):\n", + " # Handle .gz files (including .cif.gz)\n", + " if decode_and_save_gz_file(filename, base64_data, output_path):\n", + " gz_files += 1\n", + " successful_saves += 1\n", + " \n", + " elif filename.endswith('.csv'):\n", + " # Handle CSV files\n", + " if decode_and_save_csv_file(filename, base64_data, output_path):\n", + " csv_files += 1\n", + " successful_saves += 1\n", + " \n", + " else:\n", + " # Handle other file types\n", + " if decode_and_save_generic_file(filename, base64_data, output_path):\n", + " other_files += 1\n", + " successful_saves += 1\n", + " \n", + " # Print summary\n", + " print(\"-\" * 50)\n", + " print(f\"Extraction Summary:\")\n", + " print(f\" .gz files saved: {gz_files}\")\n", + " print(f\" .csv files saved: {csv_files}\")\n", + " print(f\" Other files saved: {other_files}\")\n", + " print(f\" Total successful: {successful_saves}/{len(outputs)}\")\n", + "\n", + "# Process the files if JSON data is available\n", + "if json_data:\n", + " process_files(json_data, output_path)\n", + "else:\n", + " print(\"Cannot process files - JSON data not loaded\")" + ] + }, + { + "cell_type": "markdown", + "id": "b909e7ac", + "metadata": {}, + "source": [ + "## 7. Verify Extracted Files\n", + "\n", + "Check the output directory and verify that files were extracted correctly." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3289bc5b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Files in output directory (extracted_files):\n", + "------------------------------------------------------------\n", + "\n", + " Compressed (.gz) files:\n", + " pb-msa_model_0.cif.gz 60,745 bytes\n", + " pb-msa_model_1.cif.gz 61,087 bytes\n", + " pb-msa_model_2.cif.gz 60,931 bytes\n", + " pb-msa_model_3.cif.gz 60,828 bytes\n", + " pb-msa_model_4.cif.gz 61,190 bytes\n", + "\n", + " CSV files:\n", + " pb-msa_metrics.csv 904 bytes\n", + "\n", + " Other files:\n", + " pb-msa.score 2,299 bytes\n", + "\n", + "Total files extracted: 7\n" + ] + } + ], + "source": [ + "def verify_extracted_files(output_path):\n", + " \"\"\"\n", + " Verify the extracted files in the output directory.\n", + " \"\"\"\n", + " if not output_path.exists():\n", + " print(f\"Output directory {output_path} does not exist\")\n", + " return\n", + " \n", + " files = list(output_path.iterdir())\n", + " \n", + " if not files:\n", + " print(f\"No files found in {output_path}\")\n", + " return\n", + " \n", + " print(f\"\\nFiles in output directory ({output_path}):\")\n", + " print(\"-\" * 60)\n", + " \n", + " # Sort files by extension for better organization\n", + " gz_files = [f for f in files if f.name.endswith('.gz')]\n", + " csv_files = [f for f in files if f.name.endswith('.csv')]\n", + " other_files = [f for f in files if not f.name.endswith('.gz') and not f.name.endswith('.csv')]\n", + " \n", + " # Display .gz files\n", + " if gz_files:\n", + " print(\"\\n Compressed (.gz) files:\")\n", + " for file in sorted(gz_files):\n", + " size = file.stat().st_size\n", + " print(f\" {file.name:<40} {size:>10,} bytes\")\n", + " \n", + " # Display CSV files\n", + " if csv_files:\n", + " print(\"\\n CSV files:\")\n", + " for file in sorted(csv_files):\n", + " size = file.stat().st_size\n", + " print(f\" {file.name:<40} {size:>10,} bytes\")\n", + " \n", + " # Display other files\n", + " if other_files:\n", + " print(\"\\n Other files:\")\n", + " for file in sorted(other_files):\n", + " size = file.stat().st_size\n", + " print(f\" {file.name:<40} {size:>10,} bytes\")\n", + " \n", + " print(f\"\\nTotal files extracted: {len(files)}\")\n", + "\n", + "# Verify the extracted files\n", + "verify_extracted_files(output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6954a1d6-6b3b-4258-9251-b9c67b8f3c05", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "trace", + "language": "python", + "name": "trace" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6b9e7bb0f013507f2e4517c48ca3b98abcc64976 Mon Sep 17 00:00:00 2001 From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:50:26 +0530 Subject: [PATCH 3/4] changes to codeonwers --- .github/CODEOWNERS | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1bcef64b1..c0567f642 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -23,7 +23,6 @@ sdk/python/foundation-models/cohere/command_faiss_langchain.ipynb @stewart-co @k sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @kseniia-cohere /sdk/python/foundation-models/nixtla/ @AzulGarza /sdk/python/foundation-models/healthcare-ai/ @jmerkow @ivantarapov -/sdk/python/foundation-models/cohere/cohere-aisearch-langchain-rag.ipynb /sdk/python/foundation-models/rf3-modelforge/ @pabhatia-ms /sdk/python/assets/data/versioning.ipynb @ShakutaiGit /sdk/python/jobs/finetuning @amltres @sasum @marici From 8e863893a39d3c977f325d42f49870fe80bd7aae Mon Sep 17 00:00:00 2001 From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:51:53 +0530 Subject: [PATCH 4/4] delete files in system folder --- .../rf3-modelforge/aml-rf3-modelforge.ipynb | 478 ------------------ 1 file changed, 478 deletions(-) delete mode 100644 sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb diff --git a/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb b/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb deleted file mode 100644 index e1160ca42..000000000 --- a/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb +++ /dev/null @@ -1,478 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a6320f26", - "metadata": {}, - "source": [ - "# Extract Files from JSON\n", - "\n", - "This notebook processes JSON files containing base64-encoded data and extracts them as files to a specified output directory. It handles both .gz compressed files (like .cif.gz) and CSV files.\n", - "\n", - "## Overview\n", - "The JSON structure is expected to have an \"outputs\" section with file names as keys and base64-encoded file contents as values." - ] - }, - { - "cell_type": "markdown", - "id": "9bd9270a", - "metadata": {}, - "source": [ - "## 1. Import Required Libraries\n", - "\n", - "Import all necessary libraries for file operations, JSON processing, and base64 decoding." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "562ffbd4", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import base64\n", - "import gzip\n", - "import os\n", - "from pathlib import Path\n", - "import shutil" - ] - }, - { - "cell_type": "markdown", - "id": "78bfecf4", - "metadata": {}, - "source": [ - "## 2. Configuration\n", - "\n", - "Set up file paths and configuration parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "36ffcbad", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "JSON file path: out.temp\n", - "Output directory: extracted_files\n" - ] - } - ], - "source": [ - "# Configuration parameters\n", - "JSON_FILE_PATH = \"out.temp\" # Update this path to your JSON file\n", - "OUTPUT_DIRECTORY = \"extracted_files\" # Output directory for extracted files\n", - "\n", - "print(f\"JSON file path: {JSON_FILE_PATH}\")\n", - "print(f\"Output directory: {OUTPUT_DIRECTORY}\")" - ] - }, - { - "cell_type": "markdown", - "id": "96c2a60c", - "metadata": {}, - "source": [ - "## 3. Load JSON Data\n", - "\n", - "Load and parse the JSON file containing the base64-encoded data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0fbfc1dc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully loaded JSON file: out.temp\n", - "Found 'outputs' section with 7 items\n", - "\n", - "JSON structure:\n", - "Request ID: req_6b3e11172ce141b6924b2dddc5ef8efe\n", - "Status: success\n", - "Output files found: ['pb-msa_model_0.cif.gz', 'pb-msa_model_3.cif.gz', 'pb-msa_model_4.cif.gz', 'pb-msa_model_2.cif.gz', 'pb-msa_model_1.cif.gz', 'pb-msa.score', 'pb-msa_metrics.csv']\n" - ] - } - ], - "source": [ - "def load_json_data(json_file_path):\n", - " \"\"\"\n", - " Load JSON data from file and return the parsed content.\n", - " \"\"\"\n", - " try:\n", - " with open(json_file_path, 'r') as file:\n", - " data = json.load(file)\n", - " print(f\"Successfully loaded JSON file: {json_file_path}\")\n", - " \n", - " # Check if the JSON has the expected structure\n", - " if 'outputs' in data:\n", - " print(f\"Found 'outputs' section with {len(data['outputs'])} items\")\n", - " return data\n", - " else:\n", - " print(\"Warning: 'outputs' section not found in JSON\")\n", - " return data\n", - " \n", - " except FileNotFoundError:\n", - " print(f\"Error: File {json_file_path} not found\")\n", - " return None\n", - " except json.JSONDecodeError:\n", - " print(f\"Error: Invalid JSON format in {json_file_path}\")\n", - " return None\n", - "\n", - "# Load the JSON data\n", - "json_data = load_json_data(JSON_FILE_PATH)\n", - "\n", - "if json_data:\n", - " print(\"\\nJSON structure:\")\n", - " print(f\"Request ID: {json_data.get('request_id', 'Not found')}\")\n", - " print(f\"Status: {json_data.get('status', 'Not found')}\")\n", - " if 'outputs' in json_data:\n", - " print(f\"Output files found: {list(json_data['outputs'].keys())}\")" - ] - }, - { - "cell_type": "markdown", - "id": "4b010695", - "metadata": {}, - "source": [ - "## 4. Setup Output Directory\n", - "\n", - "Create the output directory if it doesn't exist." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ff0937ba", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Output directory ready: /home/mekshirs/azurefiles/UW_IPD/extracted_files\n" - ] - } - ], - "source": [ - "def setup_output_directory(output_dir):\n", - " \"\"\"\n", - " Create output directory if it doesn't exist.\n", - " \"\"\"\n", - " output_path = Path(output_dir)\n", - " output_path.mkdir(parents=True, exist_ok=True)\n", - " print(f\"Output directory ready: {output_path.absolute()}\")\n", - " return output_path\n", - "\n", - "# Setup output directory\n", - "output_path = setup_output_directory(OUTPUT_DIRECTORY)" - ] - }, - { - "cell_type": "markdown", - "id": "245ddcc6", - "metadata": {}, - "source": [ - "## 5. File Extraction Functions\n", - "\n", - "Define functions to handle base64 decoding and file saving for different file types." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d96f300d", - "metadata": {}, - "outputs": [], - "source": [ - "def decode_and_save_gz_file(filename, base64_data, output_path):\n", - " \"\"\"\n", - " Decode base64 data and save as a .gz file.\n", - " \"\"\"\n", - " try:\n", - " # Decode base64 data\n", - " decoded_data = base64.b64decode(base64_data)\n", - " \n", - " # Create full file path\n", - " file_path = output_path / filename\n", - " \n", - " # Write the decoded data directly as a .gz file\n", - " with open(file_path, 'wb') as f:\n", - " f.write(decoded_data)\n", - " \n", - " print(f\"✓ Saved .gz file: {filename} ({len(decoded_data)} bytes)\")\n", - " return True\n", - " \n", - " except Exception as e:\n", - " print(f\"✗ Error saving {filename}: {e}\")\n", - " return False\n", - "\n", - "def decode_and_save_csv_file(filename, base64_data, output_path):\n", - " \"\"\"\n", - " Decode base64 data and save as a CSV file.\n", - " \"\"\"\n", - " try:\n", - " # Decode base64 data\n", - " decoded_data = base64.b64decode(base64_data)\n", - " \n", - " # Create full file path\n", - " file_path = output_path / filename\n", - " \n", - " # Write the decoded data as text (CSV is text-based)\n", - " with open(file_path, 'wb') as f:\n", - " f.write(decoded_data)\n", - " \n", - " print(f\"✓ Saved CSV file: {filename} ({len(decoded_data)} bytes)\")\n", - " return True\n", - " \n", - " except Exception as e:\n", - " print(f\"✗ Error saving {filename}: {e}\")\n", - " return False\n", - "\n", - "def decode_and_save_generic_file(filename, base64_data, output_path):\n", - " \"\"\"\n", - " Decode base64 data and save as a generic file.\n", - " \"\"\"\n", - " try:\n", - " # Decode base64 data\n", - " decoded_data = base64.b64decode(base64_data)\n", - " \n", - " # Create full file path\n", - " file_path = output_path / filename\n", - " \n", - " # Write the decoded data\n", - " with open(file_path, 'wb') as f:\n", - " f.write(decoded_data)\n", - " \n", - " print(f\"✓ Saved file: {filename} ({len(decoded_data)} bytes)\")\n", - " return True\n", - " \n", - " except Exception as e:\n", - " print(f\"✗ Error saving {filename}: {e}\")\n", - " return False" - ] - }, - { - "cell_type": "markdown", - "id": "cb0fc671", - "metadata": {}, - "source": [ - "## 6. Process and Extract Files\n", - "\n", - "Extract all files from the JSON data based on their file extensions." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ed646eb0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Processing 7 files...\n", - "--------------------------------------------------\n", - "✓ Saved .gz file: pb-msa_model_0.cif.gz (60745 bytes)\n", - "✓ Saved .gz file: pb-msa_model_3.cif.gz (60828 bytes)\n", - "✓ Saved .gz file: pb-msa_model_4.cif.gz (61190 bytes)\n", - "✓ Saved .gz file: pb-msa_model_2.cif.gz (60931 bytes)\n", - "✓ Saved .gz file: pb-msa_model_1.cif.gz (61087 bytes)\n", - "✓ Saved file: pb-msa.score (2299 bytes)\n", - "✓ Saved CSV file: pb-msa_metrics.csv (904 bytes)\n", - "--------------------------------------------------\n", - "Extraction Summary:\n", - " .gz files saved: 5\n", - " .csv files saved: 1\n", - " Other files saved: 1\n", - " Total successful: 7/7\n" - ] - } - ], - "source": [ - "def process_files(json_data, output_path):\n", - " \"\"\"\n", - " Process all files in the JSON data and save them to the output directory.\n", - " \"\"\"\n", - " if not json_data or 'outputs' not in json_data:\n", - " print(\"No outputs found in JSON data\")\n", - " return\n", - " \n", - " outputs = json_data['outputs']\n", - " \n", - " # Counters for different file types\n", - " gz_files = 0\n", - " csv_files = 0\n", - " other_files = 0\n", - " successful_saves = 0\n", - " \n", - " print(f\"\\nProcessing {len(outputs)} files...\")\n", - " print(\"-\" * 50)\n", - " \n", - " for filename, base64_data in outputs.items():\n", - " # Check file extension and process accordingly\n", - " if filename.endswith('.gz'):\n", - " # Handle .gz files (including .cif.gz)\n", - " if decode_and_save_gz_file(filename, base64_data, output_path):\n", - " gz_files += 1\n", - " successful_saves += 1\n", - " \n", - " elif filename.endswith('.csv'):\n", - " # Handle CSV files\n", - " if decode_and_save_csv_file(filename, base64_data, output_path):\n", - " csv_files += 1\n", - " successful_saves += 1\n", - " \n", - " else:\n", - " # Handle other file types\n", - " if decode_and_save_generic_file(filename, base64_data, output_path):\n", - " other_files += 1\n", - " successful_saves += 1\n", - " \n", - " # Print summary\n", - " print(\"-\" * 50)\n", - " print(f\"Extraction Summary:\")\n", - " print(f\" .gz files saved: {gz_files}\")\n", - " print(f\" .csv files saved: {csv_files}\")\n", - " print(f\" Other files saved: {other_files}\")\n", - " print(f\" Total successful: {successful_saves}/{len(outputs)}\")\n", - "\n", - "# Process the files if JSON data is available\n", - "if json_data:\n", - " process_files(json_data, output_path)\n", - "else:\n", - " print(\"Cannot process files - JSON data not loaded\")" - ] - }, - { - "cell_type": "markdown", - "id": "b909e7ac", - "metadata": {}, - "source": [ - "## 7. Verify Extracted Files\n", - "\n", - "Check the output directory and verify that files were extracted correctly." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "3289bc5b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Files in output directory (extracted_files):\n", - "------------------------------------------------------------\n", - "\n", - " Compressed (.gz) files:\n", - " pb-msa_model_0.cif.gz 60,745 bytes\n", - " pb-msa_model_1.cif.gz 61,087 bytes\n", - " pb-msa_model_2.cif.gz 60,931 bytes\n", - " pb-msa_model_3.cif.gz 60,828 bytes\n", - " pb-msa_model_4.cif.gz 61,190 bytes\n", - "\n", - " CSV files:\n", - " pb-msa_metrics.csv 904 bytes\n", - "\n", - " Other files:\n", - " pb-msa.score 2,299 bytes\n", - "\n", - "Total files extracted: 7\n" - ] - } - ], - "source": [ - "def verify_extracted_files(output_path):\n", - " \"\"\"\n", - " Verify the extracted files in the output directory.\n", - " \"\"\"\n", - " if not output_path.exists():\n", - " print(f\"Output directory {output_path} does not exist\")\n", - " return\n", - " \n", - " files = list(output_path.iterdir())\n", - " \n", - " if not files:\n", - " print(f\"No files found in {output_path}\")\n", - " return\n", - " \n", - " print(f\"\\nFiles in output directory ({output_path}):\")\n", - " print(\"-\" * 60)\n", - " \n", - " # Sort files by extension for better organization\n", - " gz_files = [f for f in files if f.name.endswith('.gz')]\n", - " csv_files = [f for f in files if f.name.endswith('.csv')]\n", - " other_files = [f for f in files if not f.name.endswith('.gz') and not f.name.endswith('.csv')]\n", - " \n", - " # Display .gz files\n", - " if gz_files:\n", - " print(\"\\n Compressed (.gz) files:\")\n", - " for file in sorted(gz_files):\n", - " size = file.stat().st_size\n", - " print(f\" {file.name:<40} {size:>10,} bytes\")\n", - " \n", - " # Display CSV files\n", - " if csv_files:\n", - " print(\"\\n CSV files:\")\n", - " for file in sorted(csv_files):\n", - " size = file.stat().st_size\n", - " print(f\" {file.name:<40} {size:>10,} bytes\")\n", - " \n", - " # Display other files\n", - " if other_files:\n", - " print(\"\\n Other files:\")\n", - " for file in sorted(other_files):\n", - " size = file.stat().st_size\n", - " print(f\" {file.name:<40} {size:>10,} bytes\")\n", - " \n", - " print(f\"\\nTotal files extracted: {len(files)}\")\n", - "\n", - "# Verify the extracted files\n", - "verify_extracted_files(output_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6954a1d6-6b3b-4258-9251-b9c67b8f3c05", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "trace", - "language": "python", - "name": "trace" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.14.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}