From bd8843dd356bfc5dde5baebef221b25b5e9f7553 Mon Sep 17 00:00:00 2001
From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com>
Date: Mon, 10 Nov 2025 16:34:50 +0530
Subject: [PATCH 1/4] adding aml consumption for rf3 model ipynb

---
 .../rf3-modelforge/aml-rf3-modelforge.ipynb   | 478 ++++++++++++++++++
 1 file changed, 478 insertions(+)
 create mode 100644 sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb

diff --git a/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb b/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb
new file mode 100644
index 000000000..e1160ca42
--- /dev/null
+++ b/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb
@@ -0,0 +1,478 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a6320f26",
+   "metadata": {},
+   "source": [
+    "# Extract Files from JSON\n",
+    "\n",
+    "This notebook processes JSON files containing base64-encoded data and extracts them as files to a specified output directory. It handles both .gz compressed files (like .cif.gz) and CSV files.\n",
+    "\n",
+    "## Overview\n",
+    "The JSON structure is expected to have an \"outputs\" section with file names as keys and base64-encoded file contents as values."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bd9270a",
+   "metadata": {},
+   "source": [
+    "## 1. Import Required Libraries\n",
+    "\n",
+    "Import all necessary libraries for file operations, JSON processing, and base64 decoding."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "562ffbd4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import base64\n",
+    "import gzip\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "import shutil"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78bfecf4",
+   "metadata": {},
+   "source": [
+    "## 2. Configuration\n",
+    "\n",
+    "Set up file paths and configuration parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "36ffcbad",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "JSON file path: out.temp\n",
+      "Output directory: extracted_files\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Configuration parameters\n",
+    "JSON_FILE_PATH = \"out.temp\"  # Update this path to your JSON file\n",
+    "OUTPUT_DIRECTORY = \"extracted_files\"  # Output directory for extracted files\n",
+    "\n",
+    "print(f\"JSON file path: {JSON_FILE_PATH}\")\n",
+    "print(f\"Output directory: {OUTPUT_DIRECTORY}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "96c2a60c",
+   "metadata": {},
+   "source": [
+    "## 3. Load JSON Data\n",
+    "\n",
+    "Load and parse the JSON file containing the base64-encoded data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0fbfc1dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully loaded JSON file: out.temp\n",
+      "Found 'outputs' section with 7 items\n",
+      "\n",
+      "JSON structure:\n",
+      "Request ID: req_6b3e11172ce141b6924b2dddc5ef8efe\n",
+      "Status: success\n",
+      "Output files found: ['pb-msa_model_0.cif.gz', 'pb-msa_model_3.cif.gz', 'pb-msa_model_4.cif.gz', 'pb-msa_model_2.cif.gz', 'pb-msa_model_1.cif.gz', 'pb-msa.score', 'pb-msa_metrics.csv']\n"
+     ]
+    }
+   ],
+   "source": [
+    "def load_json_data(json_file_path):\n",
+    "    \"\"\"\n",
+    "    Load JSON data from file and return the parsed content.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        with open(json_file_path, 'r') as file:\n",
+    "            data = json.load(file)\n",
+    "        print(f\"Successfully loaded JSON file: {json_file_path}\")\n",
+    "        \n",
+    "        # Check if the JSON has the expected structure\n",
+    "        if 'outputs' in data:\n",
+    "            print(f\"Found 'outputs' section with {len(data['outputs'])} items\")\n",
+    "            return data\n",
+    "        else:\n",
+    "            print(\"Warning: 'outputs' section not found in JSON\")\n",
+    "            return data\n",
+    "            \n",
+    "    except FileNotFoundError:\n",
+    "        print(f\"Error: File {json_file_path} not found\")\n",
+    "        return None\n",
+    "    except json.JSONDecodeError:\n",
+    "        print(f\"Error: Invalid JSON format in {json_file_path}\")\n",
+    "        return None\n",
+    "\n",
+    "# Load the JSON data\n",
+    "json_data = load_json_data(JSON_FILE_PATH)\n",
+    "\n",
+    "if json_data:\n",
+    "    print(\"\\nJSON structure:\")\n",
+    "    print(f\"Request ID: {json_data.get('request_id', 'Not found')}\")\n",
+    "    print(f\"Status: {json_data.get('status', 'Not found')}\")\n",
+    "    if 'outputs' in json_data:\n",
+    "        print(f\"Output files found: {list(json_data['outputs'].keys())}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b010695",
+   "metadata": {},
+   "source": [
+    "## 4. Setup Output Directory\n",
+    "\n",
+    "Create the output directory if it doesn't exist."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ff0937ba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output directory ready: /home/mekshirs/azurefiles/UW_IPD/extracted_files\n"
+     ]
+    }
+   ],
+   "source": [
+    "def setup_output_directory(output_dir):\n",
+    "    \"\"\"\n",
+    "    Create output directory if it doesn't exist.\n",
+    "    \"\"\"\n",
+    "    output_path = Path(output_dir)\n",
+    "    output_path.mkdir(parents=True, exist_ok=True)\n",
+    "    print(f\"Output directory ready: {output_path.absolute()}\")\n",
+    "    return output_path\n",
+    "\n",
+    "# Setup output directory\n",
+    "output_path = setup_output_directory(OUTPUT_DIRECTORY)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "245ddcc6",
+   "metadata": {},
+   "source": [
+    "## 5. File Extraction Functions\n",
+    "\n",
+    "Define functions to handle base64 decoding and file saving for different file types."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d96f300d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decode_and_save_gz_file(filename, base64_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Decode base64 data and save as a .gz file.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Decode base64 data\n",
+    "        decoded_data = base64.b64decode(base64_data)\n",
+    "        \n",
+    "        # Create full file path\n",
+    "        file_path = output_path / filename\n",
+    "        \n",
+    "        # Write the decoded data directly as a .gz file\n",
+    "        with open(file_path, 'wb') as f:\n",
+    "            f.write(decoded_data)\n",
+    "        \n",
+    "        print(f\"✓ Saved .gz file: {filename} ({len(decoded_data)} bytes)\")\n",
+    "        return True\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"✗ Error saving {filename}: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "def decode_and_save_csv_file(filename, base64_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Decode base64 data and save as a CSV file.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Decode base64 data\n",
+    "        decoded_data = base64.b64decode(base64_data)\n",
+    "        \n",
+    "        # Create full file path\n",
+    "        file_path = output_path / filename\n",
+    "        \n",
+    "        # Write the decoded data as text (CSV is text-based)\n",
+    "        with open(file_path, 'wb') as f:\n",
+    "            f.write(decoded_data)\n",
+    "        \n",
+    "        print(f\"✓ Saved CSV file: {filename} ({len(decoded_data)} bytes)\")\n",
+    "        return True\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"✗ Error saving {filename}: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "def decode_and_save_generic_file(filename, base64_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Decode base64 data and save as a generic file.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Decode base64 data\n",
+    "        decoded_data = base64.b64decode(base64_data)\n",
+    "        \n",
+    "        # Create full file path\n",
+    "        file_path = output_path / filename\n",
+    "        \n",
+    "        # Write the decoded data\n",
+    "        with open(file_path, 'wb') as f:\n",
+    "            f.write(decoded_data)\n",
+    "        \n",
+    "        print(f\"✓ Saved file: {filename} ({len(decoded_data)} bytes)\")\n",
+    "        return True\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"✗ Error saving {filename}: {e}\")\n",
+    "        return False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb0fc671",
+   "metadata": {},
+   "source": [
+    "## 6. Process and Extract Files\n",
+    "\n",
+    "Extract all files from the JSON data based on their file extensions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ed646eb0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Processing 7 files...\n",
+      "--------------------------------------------------\n",
+      "✓ Saved .gz file: pb-msa_model_0.cif.gz (60745 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_3.cif.gz (60828 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_4.cif.gz (61190 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_2.cif.gz (60931 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_1.cif.gz (61087 bytes)\n",
+      "✓ Saved file: pb-msa.score (2299 bytes)\n",
+      "✓ Saved CSV file: pb-msa_metrics.csv (904 bytes)\n",
+      "--------------------------------------------------\n",
+      "Extraction Summary:\n",
+      "  .gz files saved: 5\n",
+      "  .csv files saved: 1\n",
+      "  Other files saved: 1\n",
+      "  Total successful: 7/7\n"
+     ]
+    }
+   ],
+   "source": [
+    "def process_files(json_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Process all files in the JSON data and save them to the output directory.\n",
+    "    \"\"\"\n",
+    "    if not json_data or 'outputs' not in json_data:\n",
+    "        print(\"No outputs found in JSON data\")\n",
+    "        return\n",
+    "    \n",
+    "    outputs = json_data['outputs']\n",
+    "    \n",
+    "    # Counters for different file types\n",
+    "    gz_files = 0\n",
+    "    csv_files = 0\n",
+    "    other_files = 0\n",
+    "    successful_saves = 0\n",
+    "    \n",
+    "    print(f\"\\nProcessing {len(outputs)} files...\")\n",
+    "    print(\"-\" * 50)\n",
+    "    \n",
+    "    for filename, base64_data in outputs.items():\n",
+    "        # Check file extension and process accordingly\n",
+    "        if filename.endswith('.gz'):\n",
+    "            # Handle .gz files (including .cif.gz)\n",
+    "            if decode_and_save_gz_file(filename, base64_data, output_path):\n",
+    "                gz_files += 1\n",
+    "                successful_saves += 1\n",
+    "                \n",
+    "        elif filename.endswith('.csv'):\n",
+    "            # Handle CSV files\n",
+    "            if decode_and_save_csv_file(filename, base64_data, output_path):\n",
+    "                csv_files += 1\n",
+    "                successful_saves += 1\n",
+    "                \n",
+    "        else:\n",
+    "            # Handle other file types\n",
+    "            if decode_and_save_generic_file(filename, base64_data, output_path):\n",
+    "                other_files += 1\n",
+    "                successful_saves += 1\n",
+    "    \n",
+    "    # Print summary\n",
+    "    print(\"-\" * 50)\n",
+    "    print(f\"Extraction Summary:\")\n",
+    "    print(f\"  .gz files saved: {gz_files}\")\n",
+    "    print(f\"  .csv files saved: {csv_files}\")\n",
+    "    print(f\"  Other files saved: {other_files}\")\n",
+    "    print(f\"  Total successful: {successful_saves}/{len(outputs)}\")\n",
+    "\n",
+    "# Process the files if JSON data is available\n",
+    "if json_data:\n",
+    "    process_files(json_data, output_path)\n",
+    "else:\n",
+    "    print(\"Cannot process files - JSON data not loaded\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b909e7ac",
+   "metadata": {},
+   "source": [
+    "## 7. Verify Extracted Files\n",
+    "\n",
+    "Check the output directory and verify that files were extracted correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3289bc5b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Files in output directory (extracted_files):\n",
+      "------------------------------------------------------------\n",
+      "\n",
+      " Compressed (.gz) files:\n",
+      "  pb-msa_model_0.cif.gz                        60,745 bytes\n",
+      "  pb-msa_model_1.cif.gz                        61,087 bytes\n",
+      "  pb-msa_model_2.cif.gz                        60,931 bytes\n",
+      "  pb-msa_model_3.cif.gz                        60,828 bytes\n",
+      "  pb-msa_model_4.cif.gz                        61,190 bytes\n",
+      "\n",
+      " CSV files:\n",
+      "  pb-msa_metrics.csv                              904 bytes\n",
+      "\n",
+      " Other files:\n",
+      "  pb-msa.score                                  2,299 bytes\n",
+      "\n",
+      "Total files extracted: 7\n"
+     ]
+    }
+   ],
+   "source": [
+    "def verify_extracted_files(output_path):\n",
+    "    \"\"\"\n",
+    "    Verify the extracted files in the output directory.\n",
+    "    \"\"\"\n",
+    "    if not output_path.exists():\n",
+    "        print(f\"Output directory {output_path} does not exist\")\n",
+    "        return\n",
+    "    \n",
+    "    files = list(output_path.iterdir())\n",
+    "    \n",
+    "    if not files:\n",
+    "        print(f\"No files found in {output_path}\")\n",
+    "        return\n",
+    "    \n",
+    "    print(f\"\\nFiles in output directory ({output_path}):\")\n",
+    "    print(\"-\" * 60)\n",
+    "    \n",
+    "    # Sort files by extension for better organization\n",
+    "    gz_files = [f for f in files if f.name.endswith('.gz')]\n",
+    "    csv_files = [f for f in files if f.name.endswith('.csv')]\n",
+    "    other_files = [f for f in files if not f.name.endswith('.gz') and not f.name.endswith('.csv')]\n",
+    "    \n",
+    "    # Display .gz files\n",
+    "    if gz_files:\n",
+    "        print(\"\\n Compressed (.gz) files:\")\n",
+    "        for file in sorted(gz_files):\n",
+    "            size = file.stat().st_size\n",
+    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
+    "    \n",
+    "    # Display CSV files\n",
+    "    if csv_files:\n",
+    "        print(\"\\n CSV files:\")\n",
+    "        for file in sorted(csv_files):\n",
+    "            size = file.stat().st_size\n",
+    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
+    "    \n",
+    "    # Display other files\n",
+    "    if other_files:\n",
+    "        print(\"\\n Other files:\")\n",
+    "        for file in sorted(other_files):\n",
+    "            size = file.stat().st_size\n",
+    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
+    "    \n",
+    "    print(f\"\\nTotal files extracted: {len(files)}\")\n",
+    "\n",
+    "# Verify the extracted files\n",
+    "verify_extracted_files(output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6954a1d6-6b3b-4258-9251-b9c67b8f3c05",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "trace",
+   "language": "python",
+   "name": "trace"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 93ef70e1fa11e444f8f1bfe8c61cc54557a544f4 Mon Sep 17 00:00:00 2001
From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com>
Date: Mon, 10 Nov 2025 16:48:28 +0530
Subject: [PATCH 2/4] adding rf3 webrequest ipynb

---
 .github/CODEOWNERS                            |   2 +
 .../rf3-modelforge/webrequest-rf3.ipynb       | 478 ++++++++++++++++++
 2 files changed, 480 insertions(+)
 create mode 100644 sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 40902c36f..1bcef64b1 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -23,6 +23,8 @@ sdk/python/foundation-models/cohere/command_faiss_langchain.ipynb @stewart-co @k
 sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @kseniia-cohere
 /sdk/python/foundation-models/nixtla/ @AzulGarza
 /sdk/python/foundation-models/healthcare-ai/ @jmerkow @ivantarapov
+/sdk/python/foundation-models/cohere/cohere-aisearch-langchain-rag.ipynb
+/sdk/python/foundation-models/rf3-modelforge/ @pabhatia-ms 
 /sdk/python/assets/data/versioning.ipynb @ShakutaiGit
 /sdk/python/jobs/finetuning @amltres @sasum @marici
 /sdk/python/jobs/grpo @sharvin2187 @rtanase @gpenumetsa-msft @yeshsurya @babu-namburi
diff --git a/sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb b/sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb
new file mode 100644
index 000000000..e1160ca42
--- /dev/null
+++ b/sdk/python/foundation-models/rf3-modelforge/webrequest-rf3.ipynb
@@ -0,0 +1,478 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a6320f26",
+   "metadata": {},
+   "source": [
+    "# Extract Files from JSON\n",
+    "\n",
+    "This notebook processes JSON files containing base64-encoded data and extracts them as files to a specified output directory. It handles both .gz compressed files (like .cif.gz) and CSV files.\n",
+    "\n",
+    "## Overview\n",
+    "The JSON structure is expected to have an \"outputs\" section with file names as keys and base64-encoded file contents as values."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bd9270a",
+   "metadata": {},
+   "source": [
+    "## 1. Import Required Libraries\n",
+    "\n",
+    "Import all necessary libraries for file operations, JSON processing, and base64 decoding."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "562ffbd4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import base64\n",
+    "import gzip\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "import shutil"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78bfecf4",
+   "metadata": {},
+   "source": [
+    "## 2. Configuration\n",
+    "\n",
+    "Set up file paths and configuration parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "36ffcbad",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "JSON file path: out.temp\n",
+      "Output directory: extracted_files\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Configuration parameters\n",
+    "JSON_FILE_PATH = \"out.temp\"  # Update this path to your JSON file\n",
+    "OUTPUT_DIRECTORY = \"extracted_files\"  # Output directory for extracted files\n",
+    "\n",
+    "print(f\"JSON file path: {JSON_FILE_PATH}\")\n",
+    "print(f\"Output directory: {OUTPUT_DIRECTORY}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "96c2a60c",
+   "metadata": {},
+   "source": [
+    "## 3. Load JSON Data\n",
+    "\n",
+    "Load and parse the JSON file containing the base64-encoded data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0fbfc1dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully loaded JSON file: out.temp\n",
+      "Found 'outputs' section with 7 items\n",
+      "\n",
+      "JSON structure:\n",
+      "Request ID: req_6b3e11172ce141b6924b2dddc5ef8efe\n",
+      "Status: success\n",
+      "Output files found: ['pb-msa_model_0.cif.gz', 'pb-msa_model_3.cif.gz', 'pb-msa_model_4.cif.gz', 'pb-msa_model_2.cif.gz', 'pb-msa_model_1.cif.gz', 'pb-msa.score', 'pb-msa_metrics.csv']\n"
+     ]
+    }
+   ],
+   "source": [
+    "def load_json_data(json_file_path):\n",
+    "    \"\"\"\n",
+    "    Load JSON data from file and return the parsed content.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        with open(json_file_path, 'r') as file:\n",
+    "            data = json.load(file)\n",
+    "        print(f\"Successfully loaded JSON file: {json_file_path}\")\n",
+    "        \n",
+    "        # Check if the JSON has the expected structure\n",
+    "        if 'outputs' in data:\n",
+    "            print(f\"Found 'outputs' section with {len(data['outputs'])} items\")\n",
+    "            return data\n",
+    "        else:\n",
+    "            print(\"Warning: 'outputs' section not found in JSON\")\n",
+    "            return data\n",
+    "            \n",
+    "    except FileNotFoundError:\n",
+    "        print(f\"Error: File {json_file_path} not found\")\n",
+    "        return None\n",
+    "    except json.JSONDecodeError:\n",
+    "        print(f\"Error: Invalid JSON format in {json_file_path}\")\n",
+    "        return None\n",
+    "\n",
+    "# Load the JSON data\n",
+    "json_data = load_json_data(JSON_FILE_PATH)\n",
+    "\n",
+    "if json_data:\n",
+    "    print(\"\\nJSON structure:\")\n",
+    "    print(f\"Request ID: {json_data.get('request_id', 'Not found')}\")\n",
+    "    print(f\"Status: {json_data.get('status', 'Not found')}\")\n",
+    "    if 'outputs' in json_data:\n",
+    "        print(f\"Output files found: {list(json_data['outputs'].keys())}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b010695",
+   "metadata": {},
+   "source": [
+    "## 4. Setup Output Directory\n",
+    "\n",
+    "Create the output directory if it doesn't exist."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ff0937ba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output directory ready: /home/mekshirs/azurefiles/UW_IPD/extracted_files\n"
+     ]
+    }
+   ],
+   "source": [
+    "def setup_output_directory(output_dir):\n",
+    "    \"\"\"\n",
+    "    Create output directory if it doesn't exist.\n",
+    "    \"\"\"\n",
+    "    output_path = Path(output_dir)\n",
+    "    output_path.mkdir(parents=True, exist_ok=True)\n",
+    "    print(f\"Output directory ready: {output_path.absolute()}\")\n",
+    "    return output_path\n",
+    "\n",
+    "# Setup output directory\n",
+    "output_path = setup_output_directory(OUTPUT_DIRECTORY)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "245ddcc6",
+   "metadata": {},
+   "source": [
+    "## 5. File Extraction Functions\n",
+    "\n",
+    "Define functions to handle base64 decoding and file saving for different file types."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d96f300d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decode_and_save_gz_file(filename, base64_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Decode base64 data and save as a .gz file.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Decode base64 data\n",
+    "        decoded_data = base64.b64decode(base64_data)\n",
+    "        \n",
+    "        # Create full file path\n",
+    "        file_path = output_path / filename\n",
+    "        \n",
+    "        # Write the decoded data directly as a .gz file\n",
+    "        with open(file_path, 'wb') as f:\n",
+    "            f.write(decoded_data)\n",
+    "        \n",
+    "        print(f\"✓ Saved .gz file: {filename} ({len(decoded_data)} bytes)\")\n",
+    "        return True\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"✗ Error saving {filename}: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "def decode_and_save_csv_file(filename, base64_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Decode base64 data and save as a CSV file.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Decode base64 data\n",
+    "        decoded_data = base64.b64decode(base64_data)\n",
+    "        \n",
+    "        # Create full file path\n",
+    "        file_path = output_path / filename\n",
+    "        \n",
+    "        # Write the decoded data as text (CSV is text-based)\n",
+    "        with open(file_path, 'wb') as f:\n",
+    "            f.write(decoded_data)\n",
+    "        \n",
+    "        print(f\"✓ Saved CSV file: {filename} ({len(decoded_data)} bytes)\")\n",
+    "        return True\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"✗ Error saving {filename}: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "def decode_and_save_generic_file(filename, base64_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Decode base64 data and save as a generic file.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Decode base64 data\n",
+    "        decoded_data = base64.b64decode(base64_data)\n",
+    "        \n",
+    "        # Create full file path\n",
+    "        file_path = output_path / filename\n",
+    "        \n",
+    "        # Write the decoded data\n",
+    "        with open(file_path, 'wb') as f:\n",
+    "            f.write(decoded_data)\n",
+    "        \n",
+    "        print(f\"✓ Saved file: {filename} ({len(decoded_data)} bytes)\")\n",
+    "        return True\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"✗ Error saving {filename}: {e}\")\n",
+    "        return False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb0fc671",
+   "metadata": {},
+   "source": [
+    "## 6. Process and Extract Files\n",
+    "\n",
+    "Extract all files from the JSON data based on their file extensions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ed646eb0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Processing 7 files...\n",
+      "--------------------------------------------------\n",
+      "✓ Saved .gz file: pb-msa_model_0.cif.gz (60745 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_3.cif.gz (60828 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_4.cif.gz (61190 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_2.cif.gz (60931 bytes)\n",
+      "✓ Saved .gz file: pb-msa_model_1.cif.gz (61087 bytes)\n",
+      "✓ Saved file: pb-msa.score (2299 bytes)\n",
+      "✓ Saved CSV file: pb-msa_metrics.csv (904 bytes)\n",
+      "--------------------------------------------------\n",
+      "Extraction Summary:\n",
+      "  .gz files saved: 5\n",
+      "  .csv files saved: 1\n",
+      "  Other files saved: 1\n",
+      "  Total successful: 7/7\n"
+     ]
+    }
+   ],
+   "source": [
+    "def process_files(json_data, output_path):\n",
+    "    \"\"\"\n",
+    "    Process all files in the JSON data and save them to the output directory.\n",
+    "    \"\"\"\n",
+    "    if not json_data or 'outputs' not in json_data:\n",
+    "        print(\"No outputs found in JSON data\")\n",
+    "        return\n",
+    "    \n",
+    "    outputs = json_data['outputs']\n",
+    "    \n",
+    "    # Counters for different file types\n",
+    "    gz_files = 0\n",
+    "    csv_files = 0\n",
+    "    other_files = 0\n",
+    "    successful_saves = 0\n",
+    "    \n",
+    "    print(f\"\\nProcessing {len(outputs)} files...\")\n",
+    "    print(\"-\" * 50)\n",
+    "    \n",
+    "    for filename, base64_data in outputs.items():\n",
+    "        # Check file extension and process accordingly\n",
+    "        if filename.endswith('.gz'):\n",
+    "            # Handle .gz files (including .cif.gz)\n",
+    "            if decode_and_save_gz_file(filename, base64_data, output_path):\n",
+    "                gz_files += 1\n",
+    "                successful_saves += 1\n",
+    "                \n",
+    "        elif filename.endswith('.csv'):\n",
+    "            # Handle CSV files\n",
+    "            if decode_and_save_csv_file(filename, base64_data, output_path):\n",
+    "                csv_files += 1\n",
+    "                successful_saves += 1\n",
+    "                \n",
+    "        else:\n",
+    "            # Handle other file types\n",
+    "            if decode_and_save_generic_file(filename, base64_data, output_path):\n",
+    "                other_files += 1\n",
+    "                successful_saves += 1\n",
+    "    \n",
+    "    # Print summary\n",
+    "    print(\"-\" * 50)\n",
+    "    print(f\"Extraction Summary:\")\n",
+    "    print(f\"  .gz files saved: {gz_files}\")\n",
+    "    print(f\"  .csv files saved: {csv_files}\")\n",
+    "    print(f\"  Other files saved: {other_files}\")\n",
+    "    print(f\"  Total successful: {successful_saves}/{len(outputs)}\")\n",
+    "\n",
+    "# Process the files if JSON data is available\n",
+    "if json_data:\n",
+    "    process_files(json_data, output_path)\n",
+    "else:\n",
+    "    print(\"Cannot process files - JSON data not loaded\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b909e7ac",
+   "metadata": {},
+   "source": [
+    "## 7. Verify Extracted Files\n",
+    "\n",
+    "Check the output directory and verify that files were extracted correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3289bc5b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Files in output directory (extracted_files):\n",
+      "------------------------------------------------------------\n",
+      "\n",
+      " Compressed (.gz) files:\n",
+      "  pb-msa_model_0.cif.gz                        60,745 bytes\n",
+      "  pb-msa_model_1.cif.gz                        61,087 bytes\n",
+      "  pb-msa_model_2.cif.gz                        60,931 bytes\n",
+      "  pb-msa_model_3.cif.gz                        60,828 bytes\n",
+      "  pb-msa_model_4.cif.gz                        61,190 bytes\n",
+      "\n",
+      " CSV files:\n",
+      "  pb-msa_metrics.csv                              904 bytes\n",
+      "\n",
+      " Other files:\n",
+      "  pb-msa.score                                  2,299 bytes\n",
+      "\n",
+      "Total files extracted: 7\n"
+     ]
+    }
+   ],
+   "source": [
+    "def verify_extracted_files(output_path):\n",
+    "    \"\"\"\n",
+    "    Verify the extracted files in the output directory.\n",
+    "    \"\"\"\n",
+    "    if not output_path.exists():\n",
+    "        print(f\"Output directory {output_path} does not exist\")\n",
+    "        return\n",
+    "    \n",
+    "    files = list(output_path.iterdir())\n",
+    "    \n",
+    "    if not files:\n",
+    "        print(f\"No files found in {output_path}\")\n",
+    "        return\n",
+    "    \n",
+    "    print(f\"\\nFiles in output directory ({output_path}):\")\n",
+    "    print(\"-\" * 60)\n",
+    "    \n",
+    "    # Sort files by extension for better organization\n",
+    "    gz_files = [f for f in files if f.name.endswith('.gz')]\n",
+    "    csv_files = [f for f in files if f.name.endswith('.csv')]\n",
+    "    other_files = [f for f in files if not f.name.endswith('.gz') and not f.name.endswith('.csv')]\n",
+    "    \n",
+    "    # Display .gz files\n",
+    "    if gz_files:\n",
+    "        print(\"\\n Compressed (.gz) files:\")\n",
+    "        for file in sorted(gz_files):\n",
+    "            size = file.stat().st_size\n",
+    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
+    "    \n",
+    "    # Display CSV files\n",
+    "    if csv_files:\n",
+    "        print(\"\\n CSV files:\")\n",
+    "        for file in sorted(csv_files):\n",
+    "            size = file.stat().st_size\n",
+    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
+    "    \n",
+    "    # Display other files\n",
+    "    if other_files:\n",
+    "        print(\"\\n Other files:\")\n",
+    "        for file in sorted(other_files):\n",
+    "            size = file.stat().st_size\n",
+    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
+    "    \n",
+    "    print(f\"\\nTotal files extracted: {len(files)}\")\n",
+    "\n",
+    "# Verify the extracted files\n",
+    "verify_extracted_files(output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6954a1d6-6b3b-4258-9251-b9c67b8f3c05",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "trace",
+   "language": "python",
+   "name": "trace"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 6b9e7bb0f013507f2e4517c48ca3b98abcc64976 Mon Sep 17 00:00:00 2001
From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com>
Date: Mon, 10 Nov 2025 16:50:26 +0530
Subject: [PATCH 3/4] changes to codeonwers

---
 .github/CODEOWNERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 1bcef64b1..c0567f642 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -23,7 +23,6 @@ sdk/python/foundation-models/cohere/command_faiss_langchain.ipynb @stewart-co @k
 sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @kseniia-cohere
 /sdk/python/foundation-models/nixtla/ @AzulGarza
 /sdk/python/foundation-models/healthcare-ai/ @jmerkow @ivantarapov
-/sdk/python/foundation-models/cohere/cohere-aisearch-langchain-rag.ipynb
 /sdk/python/foundation-models/rf3-modelforge/ @pabhatia-ms 
 /sdk/python/assets/data/versioning.ipynb @ShakutaiGit
 /sdk/python/jobs/finetuning @amltres @sasum @marici

From 8e863893a39d3c977f325d42f49870fe80bd7aae Mon Sep 17 00:00:00 2001
From: Parikshit Bhatia <189966490+pabhatia-ms@users.noreply.github.com>
Date: Mon, 10 Nov 2025 16:51:53 +0530
Subject: [PATCH 4/4] delete files in system folder

---
 .../rf3-modelforge/aml-rf3-modelforge.ipynb   | 478 ------------------
 1 file changed, 478 deletions(-)
 delete mode 100644 sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb

diff --git a/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb b/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb
deleted file mode 100644
index e1160ca42..000000000
--- a/sdk/python/foundation-models/system/inference/rf3-modelforge/aml-rf3-modelforge.ipynb
+++ /dev/null
@@ -1,478 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "a6320f26",
-   "metadata": {},
-   "source": [
-    "# Extract Files from JSON\n",
-    "\n",
-    "This notebook processes JSON files containing base64-encoded data and extracts them as files to a specified output directory. It handles both .gz compressed files (like .cif.gz) and CSV files.\n",
-    "\n",
-    "## Overview\n",
-    "The JSON structure is expected to have an \"outputs\" section with file names as keys and base64-encoded file contents as values."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9bd9270a",
-   "metadata": {},
-   "source": [
-    "## 1. Import Required Libraries\n",
-    "\n",
-    "Import all necessary libraries for file operations, JSON processing, and base64 decoding."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "562ffbd4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import base64\n",
-    "import gzip\n",
-    "import os\n",
-    "from pathlib import Path\n",
-    "import shutil"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "78bfecf4",
-   "metadata": {},
-   "source": [
-    "## 2. Configuration\n",
-    "\n",
-    "Set up file paths and configuration parameters."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "36ffcbad",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "JSON file path: out.temp\n",
-      "Output directory: extracted_files\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Configuration parameters\n",
-    "JSON_FILE_PATH = \"out.temp\"  # Update this path to your JSON file\n",
-    "OUTPUT_DIRECTORY = \"extracted_files\"  # Output directory for extracted files\n",
-    "\n",
-    "print(f\"JSON file path: {JSON_FILE_PATH}\")\n",
-    "print(f\"Output directory: {OUTPUT_DIRECTORY}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "96c2a60c",
-   "metadata": {},
-   "source": [
-    "## 3. Load JSON Data\n",
-    "\n",
-    "Load and parse the JSON file containing the base64-encoded data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0fbfc1dc",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Successfully loaded JSON file: out.temp\n",
-      "Found 'outputs' section with 7 items\n",
-      "\n",
-      "JSON structure:\n",
-      "Request ID: req_6b3e11172ce141b6924b2dddc5ef8efe\n",
-      "Status: success\n",
-      "Output files found: ['pb-msa_model_0.cif.gz', 'pb-msa_model_3.cif.gz', 'pb-msa_model_4.cif.gz', 'pb-msa_model_2.cif.gz', 'pb-msa_model_1.cif.gz', 'pb-msa.score', 'pb-msa_metrics.csv']\n"
-     ]
-    }
-   ],
-   "source": [
-    "def load_json_data(json_file_path):\n",
-    "    \"\"\"\n",
-    "    Load JSON data from file and return the parsed content.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        with open(json_file_path, 'r') as file:\n",
-    "            data = json.load(file)\n",
-    "        print(f\"Successfully loaded JSON file: {json_file_path}\")\n",
-    "        \n",
-    "        # Check if the JSON has the expected structure\n",
-    "        if 'outputs' in data:\n",
-    "            print(f\"Found 'outputs' section with {len(data['outputs'])} items\")\n",
-    "            return data\n",
-    "        else:\n",
-    "            print(\"Warning: 'outputs' section not found in JSON\")\n",
-    "            return data\n",
-    "            \n",
-    "    except FileNotFoundError:\n",
-    "        print(f\"Error: File {json_file_path} not found\")\n",
-    "        return None\n",
-    "    except json.JSONDecodeError:\n",
-    "        print(f\"Error: Invalid JSON format in {json_file_path}\")\n",
-    "        return None\n",
-    "\n",
-    "# Load the JSON data\n",
-    "json_data = load_json_data(JSON_FILE_PATH)\n",
-    "\n",
-    "if json_data:\n",
-    "    print(\"\\nJSON structure:\")\n",
-    "    print(f\"Request ID: {json_data.get('request_id', 'Not found')}\")\n",
-    "    print(f\"Status: {json_data.get('status', 'Not found')}\")\n",
-    "    if 'outputs' in json_data:\n",
-    "        print(f\"Output files found: {list(json_data['outputs'].keys())}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4b010695",
-   "metadata": {},
-   "source": [
-    "## 4. Setup Output Directory\n",
-    "\n",
-    "Create the output directory if it doesn't exist."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "ff0937ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Output directory ready: /home/mekshirs/azurefiles/UW_IPD/extracted_files\n"
-     ]
-    }
-   ],
-   "source": [
-    "def setup_output_directory(output_dir):\n",
-    "    \"\"\"\n",
-    "    Create output directory if it doesn't exist.\n",
-    "    \"\"\"\n",
-    "    output_path = Path(output_dir)\n",
-    "    output_path.mkdir(parents=True, exist_ok=True)\n",
-    "    print(f\"Output directory ready: {output_path.absolute()}\")\n",
-    "    return output_path\n",
-    "\n",
-    "# Setup output directory\n",
-    "output_path = setup_output_directory(OUTPUT_DIRECTORY)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "245ddcc6",
-   "metadata": {},
-   "source": [
-    "## 5. File Extraction Functions\n",
-    "\n",
-    "Define functions to handle base64 decoding and file saving for different file types."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "d96f300d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def decode_and_save_gz_file(filename, base64_data, output_path):\n",
-    "    \"\"\"\n",
-    "    Decode base64 data and save as a .gz file.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        # Decode base64 data\n",
-    "        decoded_data = base64.b64decode(base64_data)\n",
-    "        \n",
-    "        # Create full file path\n",
-    "        file_path = output_path / filename\n",
-    "        \n",
-    "        # Write the decoded data directly as a .gz file\n",
-    "        with open(file_path, 'wb') as f:\n",
-    "            f.write(decoded_data)\n",
-    "        \n",
-    "        print(f\"✓ Saved .gz file: {filename} ({len(decoded_data)} bytes)\")\n",
-    "        return True\n",
-    "        \n",
-    "    except Exception as e:\n",
-    "        print(f\"✗ Error saving {filename}: {e}\")\n",
-    "        return False\n",
-    "\n",
-    "def decode_and_save_csv_file(filename, base64_data, output_path):\n",
-    "    \"\"\"\n",
-    "    Decode base64 data and save as a CSV file.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        # Decode base64 data\n",
-    "        decoded_data = base64.b64decode(base64_data)\n",
-    "        \n",
-    "        # Create full file path\n",
-    "        file_path = output_path / filename\n",
-    "        \n",
-    "        # Write the decoded data as text (CSV is text-based)\n",
-    "        with open(file_path, 'wb') as f:\n",
-    "            f.write(decoded_data)\n",
-    "        \n",
-    "        print(f\"✓ Saved CSV file: {filename} ({len(decoded_data)} bytes)\")\n",
-    "        return True\n",
-    "        \n",
-    "    except Exception as e:\n",
-    "        print(f\"✗ Error saving {filename}: {e}\")\n",
-    "        return False\n",
-    "\n",
-    "def decode_and_save_generic_file(filename, base64_data, output_path):\n",
-    "    \"\"\"\n",
-    "    Decode base64 data and save as a generic file.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        # Decode base64 data\n",
-    "        decoded_data = base64.b64decode(base64_data)\n",
-    "        \n",
-    "        # Create full file path\n",
-    "        file_path = output_path / filename\n",
-    "        \n",
-    "        # Write the decoded data\n",
-    "        with open(file_path, 'wb') as f:\n",
-    "            f.write(decoded_data)\n",
-    "        \n",
-    "        print(f\"✓ Saved file: {filename} ({len(decoded_data)} bytes)\")\n",
-    "        return True\n",
-    "        \n",
-    "    except Exception as e:\n",
-    "        print(f\"✗ Error saving {filename}: {e}\")\n",
-    "        return False"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cb0fc671",
-   "metadata": {},
-   "source": [
-    "## 6. Process and Extract Files\n",
-    "\n",
-    "Extract all files from the JSON data based on their file extensions."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "ed646eb0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Processing 7 files...\n",
-      "--------------------------------------------------\n",
-      "✓ Saved .gz file: pb-msa_model_0.cif.gz (60745 bytes)\n",
-      "✓ Saved .gz file: pb-msa_model_3.cif.gz (60828 bytes)\n",
-      "✓ Saved .gz file: pb-msa_model_4.cif.gz (61190 bytes)\n",
-      "✓ Saved .gz file: pb-msa_model_2.cif.gz (60931 bytes)\n",
-      "✓ Saved .gz file: pb-msa_model_1.cif.gz (61087 bytes)\n",
-      "✓ Saved file: pb-msa.score (2299 bytes)\n",
-      "✓ Saved CSV file: pb-msa_metrics.csv (904 bytes)\n",
-      "--------------------------------------------------\n",
-      "Extraction Summary:\n",
-      "  .gz files saved: 5\n",
-      "  .csv files saved: 1\n",
-      "  Other files saved: 1\n",
-      "  Total successful: 7/7\n"
-     ]
-    }
-   ],
-   "source": [
-    "def process_files(json_data, output_path):\n",
-    "    \"\"\"\n",
-    "    Process all files in the JSON data and save them to the output directory.\n",
-    "    \"\"\"\n",
-    "    if not json_data or 'outputs' not in json_data:\n",
-    "        print(\"No outputs found in JSON data\")\n",
-    "        return\n",
-    "    \n",
-    "    outputs = json_data['outputs']\n",
-    "    \n",
-    "    # Counters for different file types\n",
-    "    gz_files = 0\n",
-    "    csv_files = 0\n",
-    "    other_files = 0\n",
-    "    successful_saves = 0\n",
-    "    \n",
-    "    print(f\"\\nProcessing {len(outputs)} files...\")\n",
-    "    print(\"-\" * 50)\n",
-    "    \n",
-    "    for filename, base64_data in outputs.items():\n",
-    "        # Check file extension and process accordingly\n",
-    "        if filename.endswith('.gz'):\n",
-    "            # Handle .gz files (including .cif.gz)\n",
-    "            if decode_and_save_gz_file(filename, base64_data, output_path):\n",
-    "                gz_files += 1\n",
-    "                successful_saves += 1\n",
-    "                \n",
-    "        elif filename.endswith('.csv'):\n",
-    "            # Handle CSV files\n",
-    "            if decode_and_save_csv_file(filename, base64_data, output_path):\n",
-    "                csv_files += 1\n",
-    "                successful_saves += 1\n",
-    "                \n",
-    "        else:\n",
-    "            # Handle other file types\n",
-    "            if decode_and_save_generic_file(filename, base64_data, output_path):\n",
-    "                other_files += 1\n",
-    "                successful_saves += 1\n",
-    "    \n",
-    "    # Print summary\n",
-    "    print(\"-\" * 50)\n",
-    "    print(f\"Extraction Summary:\")\n",
-    "    print(f\"  .gz files saved: {gz_files}\")\n",
-    "    print(f\"  .csv files saved: {csv_files}\")\n",
-    "    print(f\"  Other files saved: {other_files}\")\n",
-    "    print(f\"  Total successful: {successful_saves}/{len(outputs)}\")\n",
-    "\n",
-    "# Process the files if JSON data is available\n",
-    "if json_data:\n",
-    "    process_files(json_data, output_path)\n",
-    "else:\n",
-    "    print(\"Cannot process files - JSON data not loaded\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b909e7ac",
-   "metadata": {},
-   "source": [
-    "## 7. Verify Extracted Files\n",
-    "\n",
-    "Check the output directory and verify that files were extracted correctly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "3289bc5b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Files in output directory (extracted_files):\n",
-      "------------------------------------------------------------\n",
-      "\n",
-      " Compressed (.gz) files:\n",
-      "  pb-msa_model_0.cif.gz                        60,745 bytes\n",
-      "  pb-msa_model_1.cif.gz                        61,087 bytes\n",
-      "  pb-msa_model_2.cif.gz                        60,931 bytes\n",
-      "  pb-msa_model_3.cif.gz                        60,828 bytes\n",
-      "  pb-msa_model_4.cif.gz                        61,190 bytes\n",
-      "\n",
-      " CSV files:\n",
-      "  pb-msa_metrics.csv                              904 bytes\n",
-      "\n",
-      " Other files:\n",
-      "  pb-msa.score                                  2,299 bytes\n",
-      "\n",
-      "Total files extracted: 7\n"
-     ]
-    }
-   ],
-   "source": [
-    "def verify_extracted_files(output_path):\n",
-    "    \"\"\"\n",
-    "    Verify the extracted files in the output directory.\n",
-    "    \"\"\"\n",
-    "    if not output_path.exists():\n",
-    "        print(f\"Output directory {output_path} does not exist\")\n",
-    "        return\n",
-    "    \n",
-    "    files = list(output_path.iterdir())\n",
-    "    \n",
-    "    if not files:\n",
-    "        print(f\"No files found in {output_path}\")\n",
-    "        return\n",
-    "    \n",
-    "    print(f\"\\nFiles in output directory ({output_path}):\")\n",
-    "    print(\"-\" * 60)\n",
-    "    \n",
-    "    # Sort files by extension for better organization\n",
-    "    gz_files = [f for f in files if f.name.endswith('.gz')]\n",
-    "    csv_files = [f for f in files if f.name.endswith('.csv')]\n",
-    "    other_files = [f for f in files if not f.name.endswith('.gz') and not f.name.endswith('.csv')]\n",
-    "    \n",
-    "    # Display .gz files\n",
-    "    if gz_files:\n",
-    "        print(\"\\n Compressed (.gz) files:\")\n",
-    "        for file in sorted(gz_files):\n",
-    "            size = file.stat().st_size\n",
-    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
-    "    \n",
-    "    # Display CSV files\n",
-    "    if csv_files:\n",
-    "        print(\"\\n CSV files:\")\n",
-    "        for file in sorted(csv_files):\n",
-    "            size = file.stat().st_size\n",
-    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
-    "    \n",
-    "    # Display other files\n",
-    "    if other_files:\n",
-    "        print(\"\\n Other files:\")\n",
-    "        for file in sorted(other_files):\n",
-    "            size = file.stat().st_size\n",
-    "            print(f\"  {file.name:<40} {size:>10,} bytes\")\n",
-    "    \n",
-    "    print(f\"\\nTotal files extracted: {len(files)}\")\n",
-    "\n",
-    "# Verify the extracted files\n",
-    "verify_extracted_files(output_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6954a1d6-6b3b-4258-9251-b9c67b8f3c05",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "trace",
-   "language": "python",
-   "name": "trace"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.14.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}