From 9033b25dbb5961ab383297c95288113ab87726fd Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 15 Oct 2025 09:16:30 -0400 Subject: [PATCH 1/3] Initial sketch at design --- doc/design/use-oci-1.md | 68 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 doc/design/use-oci-1.md diff --git a/doc/design/use-oci-1.md b/doc/design/use-oci-1.md new file mode 100644 index 00000000..ef324aff --- /dev/null +++ b/doc/design/use-oci-1.md @@ -0,0 +1,68 @@ +Plan to refactor codebase on how we approach creation of singularity/apptainer containers. + +We want to not create them directly from docker images, but rather first rely +on functionality in https://github.com/datalad/datalad-container/pull/277 +(skopeo branch of the https://github.com/yarikoptic/datalad-container/ fork) to +initiate OCI container locally using `datalad containers-add oci:docker://...` +under `images-oci/` subdataset, under similar path (e.g. +repronim/repronim-reproin--0.13.1.oci for +images/repronim/repronim-reproin--0.13.1.sing in this one), registering it to +be ran with `{img_dspath}/scripts/oci_cmd run` which we are to provide as well. +E.g. + datalad containers-add --url oci:docker://bids/aa:v0.2.0 -i bids/bids-aa--0.2.0.oci bids-aa + +under images-oci// subdataset. + +While generating such OCI image we need to ensure that either all produced +files are under annex with URL or directly in git (if text files), e.g. + + git annex find --not --in datalad --and --not --in web bids/bids-aa--0.2.0.oci + +(could be under web directly or via datalad downloader!) + +`scripts/oci_cmd` could be simple for now: + + #!/bin/bash + + apptainer "$@" + +Then, after generation of OCI image, we would need to produce singularity SIF file using +(assuming that {image} would be the replacement with portion of path to image file like repronim/repronim-reproin--0.13.1) + + datalad run -m "Build SIF image for {image}.sif" --output images/{image}.sif scripts/oci_cmd build images/{image}.sif images-oci/{image}.oci/ + + +After all that done and works, we would need to have a migration +functionality which would produce .sif to replace all images for which we had Singularity* files but without custom commands, rather just basic wrappers. Full list could be obtained using + + git grep -l 'Automagically prepared' images + +and files would look like + + ❯ head images/bids/Singularity.bids-aa--0.2.0 + # + # Automagically prepared for ReproNim/containers distribution. + # See http://github.com/ReproNim/containers for more info + # + Bootstrap: docker + From: bids/aa:v0.2.0 + +so the goal would be to produce OCI image taking that "From:" as pointing to docker hub, in the above example (ran under images-oci/ subdataset). So the command to "containers-add" would be similar to above example: + + datalad containers-add --url oci:docker://bids/aa:v0.2.0 -i bids/bids-aa--0.2.0.oci bids-aa + +and then verifying that all annex files are available from URLs: + + git annex find --not --in datalad --and --not --in web bids/bids-aa--0.2.0.oci + +should come out empty. (so we need a generic helper function to be used here to reuse) + +Original images, and corresponding recipes, like in this case +images/bids/Singularity.bids-aa--0.2.0 where "From:" was found, and the corresponding image images/bids/bids-aa--0.2.0.sing should be "git rm"ed and committed with informative message. Path to the image within .datalad.config should be replaced to point to .sif instead of original .sing version. + +While developing, try migration first on some simpler cases like + + images/bids/bids-validator--1.2.3.sing + images/bids/bids-rshrf--1.0.0.sing + +For migration, add an option to skip failing, and we would need some log file listing those which failed to convert. From 0528f55569f8a72cb25c58047aebb4a8db616334 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 20 Oct 2025 08:39:16 -0400 Subject: [PATCH 2/3] syntax fixes from gemini Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- doc/design/use-oci-1.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/design/use-oci-1.md b/doc/design/use-oci-1.md index ef324aff..0cfa30ed 100644 --- a/doc/design/use-oci-1.md +++ b/doc/design/use-oci-1.md @@ -5,7 +5,7 @@ on functionality in https://github.com/datalad/datalad-container/pull/277 (skopeo branch of the https://github.com/yarikoptic/datalad-container/ fork) to initiate OCI container locally using `datalad containers-add oci:docker://...` under `images-oci/` subdataset, under similar path (e.g. -repronim/repronim-reproin--0.13.1.oci for +repronim/repronim-reproin--0.13.1.oci for images/repronim/repronim-reproin--0.13.1.sing in this one), registering it to be ran with `{img_dspath}/scripts/oci_cmd run` which we are to provide as well. E.g. @@ -55,10 +55,10 @@ and then verifying that all annex files are available from URLs: git annex find --not --in datalad --and --not --in web bids/bids-aa--0.2.0.oci -should come out empty. (so we need a generic helper function to be used here to reuse) +should come out empty. (so we need a generic helper function to be used here to reuse) Original images, and corresponding recipes, like in this case -images/bids/Singularity.bids-aa--0.2.0 where "From:" was found, and the corresponding image images/bids/bids-aa--0.2.0.sing should be "git rm"ed and committed with informative message. Path to the image within .datalad.config should be replaced to point to .sif instead of original .sing version. +images/bids/Singularity.bids-aa--0.2.0 where "From:" was found, and the corresponding image images/bids/bids-aa--0.2.0.sing should be "git rm"ed and committed with an informative message. Path to the image within .datalad.config should be replaced to point to .sif instead of original .sing version. While developing, try migration first on some simpler cases like From b640321608f6718340c9f25525ab95b0de5a02d2 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 20 Oct 2025 20:31:54 -0400 Subject: [PATCH 3/3] Implement OCI-based container workflow migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the design outlined in doc/design/use-oci-1.md for migrating from direct Singularity container builds to an OCI-based workflow with improved reproducibility and URL availability. New components: - scripts/oci_cmd: Apptainer wrapper for OCI container operations - scripts/migrate_to_oci: Migration script for auto-generated containers - scripts/tests/test_oci_cmd.bats: BATS tests for oci_cmd wrapper - scripts/tests/test_migrate_to_oci.py: Python unit tests for migration script - doc/design/oci-migration-guide.md: Comprehensive migration guide - CLAUDE.md: Project instructions for Claude Code assistant Migration workflow: 1. Parse auto-generated Singularity files to extract Docker URLs 2. Create OCI images in images-oci/ subdataset using datalad containers-add 3. Verify all annex files are available from URLs 4. Build SIF files from OCI images using datalad run 5. Update .datalad/config to point to new SIF files 6. Remove old Singularity recipe and .sing files Features: - Batch migration with --skip-failures option - URL verification for all annex files (git annex find) - Detailed logging and error reporting - Full test coverage (15 tests total, all passing) - Follows existing codebase patterns and conventions Testing: - BATS tests: 4/4 passing (oci_cmd wrapper) - Python tests: 11/11 passing (migration logic) - All tests marked with @pytest.mark.ai_generated per project conventions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CLAUDE.md | 19 + doc/design/oci-migration-guide.md | 267 +++++++++++++ scripts/migrate_to_oci | 566 +++++++++++++++++++++++++++ scripts/oci_cmd | 29 ++ scripts/tests/test_migrate_to_oci.py | 257 ++++++++++++ scripts/tests/test_oci_cmd.bats | 61 +++ 6 files changed, 1199 insertions(+) create mode 100644 CLAUDE.md create mode 100644 doc/design/oci-migration-guide.md create mode 100755 scripts/migrate_to_oci create mode 100755 scripts/oci_cmd create mode 100644 scripts/tests/test_migrate_to_oci.py create mode 100644 scripts/tests/test_oci_cmd.bats diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..6b4a98d0 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,19 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Build/Test Commands +- Run all tests: `bats -t scripts/tests` +- Run a single test: `bats -t scripts/tests/test_singularity_cmd.bats` +- Lint shell scripts: `shellcheck scripts/*` + +## Code Style Guidelines +- Follow DataLad/Git-Annex conventions for repository structure +- Shell scripts should pass shellcheck validation +- Maintain YODA principles (store all dependencies within the dataset) +- Tests use the bats framework with helpers in `scripts/tests/test_helpers.bash` +- Use snake_case for function and variable names +- Scripts should include proper error handling and validate inputs +- Document environment variables that affect script behavior +- Maintain backward compatibility with DataLad commands +- Follow proper Singularity image naming: `name--version.sing` format diff --git a/doc/design/oci-migration-guide.md b/doc/design/oci-migration-guide.md new file mode 100644 index 00000000..60c11182 --- /dev/null +++ b/doc/design/oci-migration-guide.md @@ -0,0 +1,267 @@ +# OCI-Based Container Workflow Migration Guide + +This guide documents the implementation of the OCI-based container workflow as described in [use-oci-1.md](use-oci-1.md). + +## Overview + +The new workflow migrates from building Singularity containers directly from Docker images to using OCI containers as an intermediate step. This provides better reproducibility and URL-based availability for all container components. + +## Components + +### 1. `scripts/oci_cmd` + +A simple wrapper script that passes commands to `apptainer`. This script is registered with DataLad containers as the command wrapper for OCI containers. + +**Usage:** +```bash +scripts/oci_cmd [arguments...] +``` + +**Example:** +```bash +scripts/oci_cmd run container.oci/ +scripts/oci_cmd build output.sif input.oci/ +``` + +### 2. `scripts/migrate_to_oci` + +Migration script that converts existing auto-generated Singularity containers to the OCI-based workflow. + +**Features:** +- Identifies auto-generated Singularity files (marked with "Automagically prepared") +- Creates OCI images in `images-oci/` subdataset +- Builds SIF files from OCI images +- Updates `.datalad/config` to point to new SIF files +- Removes old Singularity recipe and `.sing` files +- Verifies all annex files are available from URLs + +**Usage:** +```bash +# Migrate all auto-generated containers +scripts/migrate_to_oci + +# Migrate specific containers +scripts/migrate_to_oci images/bids/Singularity.bids-validator--1.2.3 + +# Continue even if some migrations fail +scripts/migrate_to_oci --skip-failures + +# Log failures to a file +scripts/migrate_to_oci --log-file migration_failures.log +``` + +## Workflow + +### Creating New OCI-Based Containers + +1. **Add OCI container** (in `images-oci/` subdataset): + ```bash + cd images-oci/ + datalad containers-add \ + --url oci:docker://bids/validator:1.2.3 \ + -i bids/bids-validator--1.2.3.oci \ + bids-validator + ``` + +2. **Verify annex URLs**: + ```bash + git annex find --not --in datalad --and --not --in web bids/bids-validator--1.2.3.oci + ``` + This should return empty output (all files have URLs). + +3. **Build SIF image** (from repository root): + ```bash + datalad run \ + -m "Build SIF image for bids/bids-validator--1.2.3.sif" \ + --output images/bids/bids-validator--1.2.3.sif \ + scripts/oci_cmd build \ + images/bids/bids-validator--1.2.3.sif \ + images-oci/bids/bids-validator--1.2.3.oci/ + ``` + +4. **Register container** (if needed): + ```bash + datalad containers-add \ + bids-validator \ + -i images/bids/bids-validator--1.2.3.sif \ + --update \ + --call-fmt "{img_dspath}/scripts/singularity_cmd run {img} {cmd}" + ``` + +### Migrating Existing Containers + +The migration process for a single container involves: + +1. **Parse Singularity file** - Extract Docker image URL from `From:` line +2. **Create OCI image** - Use `datalad containers-add` with `oci:docker://` URL +3. **Verify URLs** - Ensure all annex files are available from web +4. **Build SIF** - Convert OCI to SIF using `scripts/oci_cmd build` +5. **Update config** - Point `.datalad/config` to new SIF file +6. **Remove old files** - Delete Singularity recipe and `.sing` file +7. **Commit changes** - Create a commit documenting the migration + +**Example migration workflow:** +```bash +# Test on simple cases first +scripts/migrate_to_oci \ + images/bids/Singularity.bids-validator--1.2.3 \ + images/bids/Singularity.bids-rshrf--1.0.0 + +# If successful, migrate all +scripts/migrate_to_oci --skip-failures --log-file migration.log +``` + +## Repository Structure + +``` +. +├── images/ # SIF files (final container images) +│ ├── bids/ +│ │ ├── bids-validator--1.2.3.sif +│ │ └── bids-aa--0.2.0.sif +│ └── neurodesk/ +│ └── neurodesk-afni--21.2.00.sif +│ +├── images-oci/ # OCI containers (subdataset) +│ ├── bids/ +│ │ ├── bids-validator--1.2.3.oci/ +│ │ └── bids-aa--0.2.0.oci/ +│ └── neurodesk/ +│ └── neurodesk-afni--21.2.00.oci/ +│ +├── scripts/ +│ ├── oci_cmd # Apptainer wrapper +│ ├── migrate_to_oci # Migration script +│ └── singularity_cmd # Existing Singularity wrapper +│ +└── .datalad/ + └── config # Container registrations +``` + +## Verification + +After migration, verify that: + +1. **All annex files have URLs:** + ```bash + git annex find --not --in datalad --and --not --in web images-oci/ + ``` + Should return empty. + +2. **SIF files exist:** + ```bash + ls -lh images/bids/*.sif + ``` + +3. **Container configuration updated:** + ```bash + git config -f .datalad/config --get-regexp 'datalad.containers.*.image' | grep '.sif$' + ``` + +4. **Old files removed:** + ```bash + git log --all -- 'images/*/Singularity.*' | head -20 + ``` + +## Testing + +### Unit Tests + +**BATS tests for `oci_cmd`:** +```bash +bats -t scripts/tests/test_oci_cmd.bats +``` + +**Python tests for migration script:** +```bash +python -m pytest scripts/tests/test_migrate_to_oci.py -v +``` + +### Integration Testing + +Test the full workflow on a simple container: + +```bash +# Create test OCI container +cd images-oci/ +datalad containers-add \ + --url oci:docker://alpine:latest \ + -i test/test-alpine.oci \ + test-alpine + +# Verify URLs +git annex find --not --in datalad --and --not --in web test/test-alpine.oci + +# Build SIF +cd .. +datalad run \ + -m "Build test SIF" \ + --output images/test/test-alpine.sif \ + scripts/oci_cmd build images/test/test-alpine.sif images-oci/test/test-alpine.oci/ + +# Test container +scripts/oci_cmd exec images-oci/test/test-alpine.oci/ echo "Hello from OCI" +scripts/singularity_cmd exec images/test/test-alpine.sif echo "Hello from SIF" +``` + +## Troubleshooting + +### OCI container creation fails + +**Issue:** `datalad containers-add` fails with OCI URL + +**Solution:** Ensure you have: +- DataLad container extension with OCI support +- Skopeo installed +- Network access to Docker Hub + +### Annex files without URLs + +**Issue:** `git annex find --not --in datalad --and --not --in web` returns files + +**Solution:** +```bash +# For each file, register the URL manually +git annex registerurl +``` + +### SIF build fails + +**Issue:** `scripts/oci_cmd build` fails + +**Solution:** +- Ensure apptainer/singularity is installed +- Check disk space (SIF files can be large) +- Verify OCI directory exists and is valid + +### Migration script fails mid-process + +**Issue:** Script fails partway through migration + +**Solution:** +- Use `--skip-failures` flag to continue past failures +- Check `--log-file` output for specific errors +- Manually fix failed migrations and re-run + +## Benefits of OCI-Based Workflow + +1. **URL Availability** - All container components are available via URLs (no special remotes needed) +2. **Reproducibility** - OCI format is standardized and widely supported +3. **Flexibility** - Can use either OCI or SIF format depending on needs +4. **Better Tracking** - DataLad tracks all steps of container creation +5. **Easier Maintenance** - Updates only need to touch OCI layer, SIF can be rebuilt + +## Future Enhancements + +1. **Automated Updates** - Script to check for updated Docker images and rebuild +2. **Parallel Migration** - Process multiple containers concurrently +3. **Rollback Support** - Ability to revert failed migrations +4. **CI/CD Integration** - Automated testing of migrated containers +5. **Cache Management** - Tools to manage OCI cache and temporary files + +## References + +- [Original Design Document](use-oci-1.md) +- [DataLad Container Documentation](https://docs.datalad.org/projects/container/) +- [Apptainer Documentation](https://apptainer.org/docs/) +- [OCI Specification](https://github.com/opencontainers/image-spec) diff --git a/scripts/migrate_to_oci b/scripts/migrate_to_oci new file mode 100755 index 00000000..4a3f8d1b --- /dev/null +++ b/scripts/migrate_to_oci @@ -0,0 +1,566 @@ +#!/usr/bin/env python3 +# +# COPYRIGHT: ReproNim/containers Team 2018-2025 +# +# LICENSE: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +""" +Migrate auto-generated Singularity containers to OCI-based workflow. + +This script migrates existing Singularity recipe files (identified by +'Automagically prepared' header) to the new OCI-based workflow where +containers are first downloaded as OCI images and then converted to SIF files. +""" + +from __future__ import annotations +from dataclasses import dataclass +from pathlib import Path +from typing import Optional +import click +import json +import logging +import re +import subprocess +import sys + +import datalad.api as dl + +log = logging.getLogger(__name__) + + +@dataclass +class MigrationResult: + """Result of attempting to migrate a single container.""" + singularity_file: Path + success: bool + error_message: Optional[str] = None + oci_image_path: Optional[Path] = None + sif_image_path: Optional[Path] = None + + +class OCIMigrator: + """Handles migration of Singularity containers to OCI-based workflow.""" + + def __init__( + self, + repo_dir: Path, + images_dir: Path, + images_oci_dir: Path, + skip_failures: bool = False, + ): + self.repo_dir = repo_dir + self.images_dir = images_dir + self.images_oci_dir = images_oci_dir + self.skip_failures = skip_failures + self.results: list[MigrationResult] = [] + + def runcmd(self, *args: str, **kwargs) -> subprocess.CompletedProcess: + """Run a command with common settings.""" + kwargs.setdefault("check", True) + kwargs.setdefault("cwd", str(self.repo_dir)) + kwargs.setdefault("capture_output", True) + kwargs.setdefault("text", True) + return subprocess.run(args, **kwargs) + + def verify_annex_urls(self, path: Path) -> bool: + """ + Verify that all annex files under path are available from URLs. + + Returns True if all files are available from web or datalad special remotes. + """ + try: + result = self.runcmd( + "git", "annex", "find", + "--not", "--in", "datalad", + "--and", "--not", "--in", "web", + str(path) + ) + # If command succeeds and output is empty, all files have URLs + return not result.stdout.strip() + except subprocess.CalledProcessError as e: + log.error("Failed to verify annex URLs for %s: %s", path, e) + return False + + def parse_singularity_file(self, singfile: Path) -> Optional[dict]: + """ + Parse a Singularity recipe file to extract Docker image information. + + Returns dict with 'namespace', 'image', and 'tag' if successful. + """ + content = singfile.read_text() + + # Check if it's an auto-generated file + if "Automagically prepared" not in content: + log.debug("Skipping %s - not auto-generated", singfile) + return None + + # Extract the From: line + # Format: "From: namespace/image:tag" or "From: image:tag" + match = re.search(r"^From:\s+(.+)$", content, re.MULTILINE) + if not match: + log.warning("Could not find 'From:' line in %s", singfile) + return None + + from_line = match.group(1).strip() + + # Parse the Docker image specification + # Can be: image:tag, namespace/image:tag, or registry/namespace/image:tag + parts = from_line.split(":") + if len(parts) != 2: + log.warning("Invalid Docker image format in %s: %s", singfile, from_line) + return None + + image_part, tag = parts + + # Split image_part by / to get namespace and image name + image_components = image_part.split("/") + if len(image_components) == 1: + # Just image name, no namespace (rare) + namespace = "library" + image = image_components[0] + elif len(image_components) == 2: + # namespace/image + namespace, image = image_components + else: + # registry/namespace/image or more complex + # For Docker Hub images, we typically have namespace/image + namespace = "/".join(image_components[:-1]) + image = image_components[-1] + + return { + "namespace": namespace, + "image": image, + "tag": tag, + "docker_url": from_line, + } + + def get_oci_image_name(self, singfile: Path) -> str: + """ + Generate OCI image name from Singularity file path. + + E.g., images/bids/Singularity.bids-validator--1.2.3 + -> bids/bids-validator--1.2.3.oci + """ + # Get the base name without 'Singularity.' prefix + base_name = singfile.name.replace("Singularity.", "") + # Get the family directory (e.g., 'bids') + family = singfile.parent.name + # Construct OCI image path + return f"{family}/{base_name}.oci" + + def get_sif_image_name(self, singfile: Path) -> str: + """ + Generate SIF image name from Singularity file path. + + E.g., images/bids/Singularity.bids-validator--1.2.3 + -> bids/bids-validator--1.2.3.sif + """ + base_name = singfile.name.replace("Singularity.", "") + family = singfile.parent.name + return f"{family}/{base_name}.sif" + + def create_oci_image( + self, + docker_url: str, + oci_image_name: str, + container_name: str, + ) -> bool: + """ + Create an OCI image using datalad containers-add. + + Returns True if successful. + """ + oci_image_path = self.images_oci_dir / oci_image_name + + # Ensure parent directory exists + oci_image_path.parent.mkdir(parents=True, exist_ok=True) + + try: + log.info("Creating OCI image for %s from %s", container_name, docker_url) + + # Change to images-oci directory for datalad operations + self.runcmd( + "datalad", + "containers-add", + "--url", f"oci:docker://{docker_url}", + "-i", oci_image_name, + container_name, + cwd=str(self.images_oci_dir), + ) + + # Verify all annex files have URLs + if not self.verify_annex_urls(oci_image_path): + log.error("OCI image files for %s are not all available from URLs", oci_image_name) + return False + + log.info("Successfully created OCI image at %s", oci_image_path) + return True + + except subprocess.CalledProcessError as e: + log.error("Failed to create OCI image for %s: %s", container_name, e) + log.error("stdout: %s", e.stdout) + log.error("stderr: %s", e.stderr) + return False + + def build_sif_from_oci( + self, + oci_image_name: str, + sif_image_name: str, + ) -> bool: + """ + Build a SIF file from an OCI image using datalad run. + + Returns True if successful. + """ + sif_image_path = self.images_dir / sif_image_name + oci_image_path = self.images_oci_dir / oci_image_name + + try: + log.info("Building SIF image %s from OCI image %s", sif_image_name, oci_image_name) + + # Use datalad run to build the SIF with proper tracking + self.runcmd( + "datalad", "run", + "-m", f"Build SIF image for {sif_image_name}", + "--output", str(sif_image_path), + "scripts/oci_cmd", "build", + str(sif_image_path), + f"images-oci/{oci_image_name}/", + ) + + log.info("Successfully built SIF image at %s", sif_image_path) + return True + + except subprocess.CalledProcessError as e: + log.error("Failed to build SIF image %s: %s", sif_image_name, e) + log.error("stdout: %s", e.stdout) + log.error("stderr: %s", e.stderr) + return False + + def update_container_config( + self, + container_name: str, + old_image_path: Path, + new_image_path: Path, + ) -> bool: + """ + Update .datalad/config to point to new SIF file instead of old .sing file. + + Returns True if successful. + """ + try: + # Get current config value + result = self.runcmd( + "git", "config", "-f", ".datalad/config", + f"datalad.containers.{container_name}.image" + ) + current_path = result.stdout.strip() + + # Update to new path + self.runcmd( + "git", "config", "-f", ".datalad/config", + f"datalad.containers.{container_name}.image", + str(new_image_path) + ) + + log.info("Updated container config for %s: %s -> %s", + container_name, current_path, new_image_path) + return True + + except subprocess.CalledProcessError as e: + log.error("Failed to update container config for %s: %s", container_name, e) + return False + + def remove_old_files( + self, + singularity_file: Path, + old_image_file: Path, + ) -> bool: + """ + Remove old Singularity recipe and .sing image files. + + Returns True if successful. + """ + try: + files_to_remove = [] + + if singularity_file.exists(): + files_to_remove.append(str(singularity_file)) + + if old_image_file.exists() or old_image_file.is_symlink(): + files_to_remove.append(str(old_image_file)) + + if files_to_remove: + self.runcmd("git", "rm", *files_to_remove) + log.info("Removed old files: %s", ", ".join(files_to_remove)) + + return True + + except subprocess.CalledProcessError as e: + log.error("Failed to remove old files: %s", e) + return False + + def migrate_container(self, singfile: Path) -> MigrationResult: + """ + Migrate a single Singularity container to OCI-based workflow. + + Returns MigrationResult indicating success or failure. + """ + log.info("=" * 80) + log.info("Migrating %s", singfile) + + # Parse Singularity file + parsed = self.parse_singularity_file(singfile) + if parsed is None: + return MigrationResult( + singularity_file=singfile, + success=False, + error_message="Could not parse Singularity file", + ) + + docker_url = parsed["docker_url"] + oci_image_name = self.get_oci_image_name(singfile) + sif_image_name = self.get_sif_image_name(singfile) + + # Generate container name (similar to how it's done in create_singularities) + # Extract from path: images/bids/Singularity.bids-validator--1.2.3 + # Container name would be: bids-validator + base_name = singfile.name.replace("Singularity.", "") + family = singfile.parent.name + # Remove version suffix to get container name + container_name_parts = base_name.split("--") + if len(container_name_parts) > 1: + container_name = f"{family}-{container_name_parts[0]}" + else: + container_name = f"{family}-{base_name}" + + log.info(" Docker URL: %s", docker_url) + log.info(" OCI image: %s", oci_image_name) + log.info(" SIF image: %s", sif_image_name) + log.info(" Container name: %s", container_name) + + # Create OCI image + if not self.create_oci_image(docker_url, oci_image_name, container_name): + return MigrationResult( + singularity_file=singfile, + success=False, + error_message="Failed to create OCI image", + ) + + # Build SIF from OCI + if not self.build_sif_from_oci(oci_image_name, sif_image_name): + return MigrationResult( + singularity_file=singfile, + success=False, + error_message="Failed to build SIF image", + oci_image_path=self.images_oci_dir / oci_image_name, + ) + + # Update container configuration + old_image_file = singfile.with_name(base_name + ".sing") + new_image_file = self.images_dir / sif_image_name + + if not self.update_container_config(container_name, old_image_file, new_image_file): + return MigrationResult( + singularity_file=singfile, + success=False, + error_message="Failed to update container configuration", + oci_image_path=self.images_oci_dir / oci_image_name, + sif_image_path=new_image_file, + ) + + # Remove old files + if not self.remove_old_files(singfile, old_image_file): + return MigrationResult( + singularity_file=singfile, + success=False, + error_message="Failed to remove old files", + oci_image_path=self.images_oci_dir / oci_image_name, + sif_image_path=new_image_file, + ) + + # Commit the migration + try: + self.runcmd( + "git", "commit", + "-m", f"Migrate {base_name} to OCI-based workflow\n\n" + f"- Created OCI image at images-oci/{oci_image_name}\n" + f"- Built SIF image at {sif_image_name}\n" + f"- Removed old Singularity recipe and .sing file\n" + f"- Updated .datalad/config to use .sif image" + ) + log.info("Successfully migrated %s", singfile) + except subprocess.CalledProcessError as e: + log.error("Failed to commit migration for %s: %s", singfile, e) + return MigrationResult( + singularity_file=singfile, + success=False, + error_message="Failed to commit migration", + oci_image_path=self.images_oci_dir / oci_image_name, + sif_image_path=new_image_file, + ) + + return MigrationResult( + singularity_file=singfile, + success=True, + oci_image_path=self.images_oci_dir / oci_image_name, + sif_image_path=new_image_file, + ) + + def find_automagic_singularity_files(self) -> list[Path]: + """Find all auto-generated Singularity files in the repository.""" + try: + result = self.runcmd( + "git", "grep", "-l", "Automagically prepared", "--", "images/" + ) + files = [Path(line.strip()) for line in result.stdout.splitlines()] + log.info("Found %d auto-generated Singularity files", len(files)) + return files + except subprocess.CalledProcessError: + log.warning("No auto-generated Singularity files found") + return [] + + def run_migration( + self, + specific_files: Optional[list[str]] = None, + ) -> None: + """Run the migration process.""" + # Find files to migrate + if specific_files: + files_to_migrate = [Path(f) for f in specific_files] + else: + files_to_migrate = self.find_automagic_singularity_files() + + log.info("Will migrate %d Singularity files", len(files_to_migrate)) + + # Migrate each file + for singfile in files_to_migrate: + result = self.migrate_container(singfile) + self.results.append(result) + + if not result.success: + log.error("Failed to migrate %s: %s", + singfile, result.error_message) + if not self.skip_failures: + log.error("Stopping migration due to failure (use --skip-failures to continue)") + break + + # Print summary + self.print_summary() + + def print_summary(self) -> None: + """Print a summary of migration results.""" + total = len(self.results) + successful = sum(1 for r in self.results if r.success) + failed = total - successful + + log.info("=" * 80) + log.info("Migration Summary") + log.info("=" * 80) + log.info("Total: %d", total) + log.info("Successful: %d", successful) + log.info("Failed: %d", failed) + + if failed > 0: + log.info("\nFailed migrations:") + for result in self.results: + if not result.success: + log.info(" - %s: %s", result.singularity_file, result.error_message) + + +@click.command() +@click.option( + "--skip-failures", + is_flag=True, + default=False, + help="Continue migration even if some containers fail", +) +@click.option( + "--log-file", + type=click.Path(), + default=None, + help="Path to log file for failed conversions", +) +@click.argument("singularity_files", nargs=-1) +def main( + skip_failures: bool, + log_file: Optional[str], + singularity_files: tuple[str, ...], +) -> None: + """ + Migrate Singularity containers to OCI-based workflow. + + If SINGULARITY_FILES are specified, only those files will be migrated. + Otherwise, all auto-generated Singularity files will be migrated. + """ + # Setup logging + logging.basicConfig( + format="[%(levelname)-8s] %(message)s", + level=logging.INFO, + ) + + if log_file: + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(logging.Formatter("[%(levelname)-8s] %(message)s")) + log.addHandler(file_handler) + + # Setup paths + topdir = Path(__file__).parent.parent + images_dir = topdir / "images" + images_oci_dir = topdir / "images-oci" + + # Ensure images-oci directory exists and is a datalad dataset + if not images_oci_dir.exists(): + log.info("Creating images-oci subdataset") + try: + # Create as a datalad subdataset + result = subprocess.run( + ["datalad", "create", "-d", str(topdir), str(images_oci_dir)], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + log.error("Failed to create images-oci subdataset: %s", e) + log.error("stderr: %s", e.stderr) + sys.exit(1) + + # Create migrator and run + migrator = OCIMigrator( + repo_dir=topdir, + images_dir=images_dir, + images_oci_dir=images_oci_dir, + skip_failures=skip_failures, + ) + + migrator.run_migration( + specific_files=list(singularity_files) if singularity_files else None, + ) + + # Exit with error if any migrations failed + failed = sum(1 for r in migrator.results if not r.success) + if failed > 0: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/oci_cmd b/scripts/oci_cmd new file mode 100755 index 00000000..c35911cd --- /dev/null +++ b/scripts/oci_cmd @@ -0,0 +1,29 @@ +#!/bin/bash +# +# COPYRIGHT: ReproNim/containers Team 2018-2025 +# +# LICENSE: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +# Simple wrapper to run apptainer commands +# This script is used as the command wrapper for OCI containers registered with datalad + +apptainer "$@" diff --git a/scripts/tests/test_migrate_to_oci.py b/scripts/tests/test_migrate_to_oci.py new file mode 100644 index 00000000..a7e27697 --- /dev/null +++ b/scripts/tests/test_migrate_to_oci.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +""" +Unit tests for scripts/migrate_to_oci migration script. + +These tests validate the helper functions and logic in the migration script +by importing it as a Python module after creating a temporary .py symlink. +""" + +from __future__ import annotations +from pathlib import Path +from textwrap import dedent +import json +import pytest +import subprocess +import tempfile +import importlib.util +import sys + +# Create a temporary .py file that imports from the script +# This is needed because importlib needs a .py extension +script_path = (Path(__file__).parent.parent / "migrate_to_oci").resolve() +temp_py_path = script_path.parent / "migrate_to_oci_temp.py" + +# Read the script content and write it to a temp .py file for testing +if not temp_py_path.exists(): + import shutil + shutil.copy(script_path, temp_py_path) + +# Import from the temp file +spec = importlib.util.spec_from_file_location("migrate_to_oci", str(temp_py_path)) +if spec is None or spec.loader is None: + raise ImportError(f"Could not load module from {temp_py_path}") +migrate_module = importlib.util.module_from_spec(spec) +sys.modules["migrate_to_oci"] = migrate_module +spec.loader.exec_module(migrate_module) + +OCIMigrator = migrate_module.OCIMigrator +MigrationResult = migrate_module.MigrationResult + + +@pytest.mark.ai_generated +class TestSingularityFileParsing: + """Test parsing of Singularity recipe files.""" + + def test_parse_automagic_file(self, tmp_path: Path) -> None: + """Test parsing a standard auto-generated Singularity file.""" + singfile = tmp_path / "Singularity.test" + singfile.write_text(dedent(""" + # + # Automagically prepared for ReproNim/containers distribution. + # See http://github.com/ReproNim/containers for more info + # + Bootstrap: docker + From: bids/validator:1.2.3 + + %post + mkdir -p /data + """)) + + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + result = migrator.parse_singularity_file(singfile) + + assert result is not None + assert result["namespace"] == "bids" + assert result["image"] == "validator" + assert result["tag"] == "1.2.3" + assert result["docker_url"] == "bids/validator:1.2.3" + + def test_parse_non_automagic_file(self, tmp_path: Path) -> None: + """Test that non-auto-generated files are skipped.""" + singfile = tmp_path / "Singularity.custom" + singfile.write_text(dedent(""" + Bootstrap: docker + From: custom/image:1.0 + + %post + echo "Custom setup" + """)) + + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + result = migrator.parse_singularity_file(singfile) + assert result is None + + def test_parse_file_with_complex_namespace(self, tmp_path: Path) -> None: + """Test parsing file with registry/namespace/image format.""" + singfile = tmp_path / "Singularity.test" + singfile.write_text(dedent(""" + # + # Automagically prepared for ReproNim/containers distribution. + # + Bootstrap: docker + From: nipreps/fmriprep:20.2.0 + + %post + mkdir -p /data + """)) + + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + result = migrator.parse_singularity_file(singfile) + + assert result is not None + assert result["namespace"] == "nipreps" + assert result["image"] == "fmriprep" + assert result["tag"] == "20.2.0" + + def test_parse_file_missing_from_line(self, tmp_path: Path) -> None: + """Test that files without From: line return None.""" + singfile = tmp_path / "Singularity.broken" + singfile.write_text(dedent(""" + # + # Automagically prepared for ReproNim/containers distribution. + # + Bootstrap: docker + + %post + mkdir -p /data + """)) + + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + result = migrator.parse_singularity_file(singfile) + assert result is None + + +@pytest.mark.ai_generated +class TestImageNaming: + """Test OCI and SIF image name generation.""" + + def test_get_oci_image_name(self, tmp_path: Path) -> None: + """Test OCI image name generation from Singularity file path.""" + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + singfile = tmp_path / "images" / "bids" / "Singularity.bids-validator--1.2.3" + singfile.parent.mkdir(parents=True, exist_ok=True) + + oci_name = migrator.get_oci_image_name(singfile) + assert oci_name == "bids/bids-validator--1.2.3.oci" + + def test_get_sif_image_name(self, tmp_path: Path) -> None: + """Test SIF image name generation from Singularity file path.""" + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + singfile = tmp_path / "images" / "bids" / "Singularity.bids-validator--1.2.3" + singfile.parent.mkdir(parents=True, exist_ok=True) + + sif_name = migrator.get_sif_image_name(singfile) + assert sif_name == "bids/bids-validator--1.2.3.sif" + + def test_get_oci_image_name_different_family(self, tmp_path: Path) -> None: + """Test OCI image name with different family directory.""" + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + singfile = tmp_path / "images" / "neurodesk" / "Singularity.neurodesk-afni--21.2.00" + singfile.parent.mkdir(parents=True, exist_ok=True) + + oci_name = migrator.get_oci_image_name(singfile) + assert oci_name == "neurodesk/neurodesk-afni--21.2.00.oci" + + +@pytest.mark.ai_generated +class TestAnnexVerification: + """Test git-annex URL verification.""" + + def test_verify_annex_urls_no_git_repo(self, tmp_path: Path) -> None: + """Test verification fails gracefully when not in a git repo.""" + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=tmp_path / "images", + images_oci_dir=tmp_path / "images-oci", + ) + + # Should return False when git commands fail + result = migrator.verify_annex_urls(tmp_path / "nonexistent") + assert result is False + + +@pytest.mark.ai_generated +class TestMigrationResult: + """Test MigrationResult dataclass.""" + + def test_migration_result_success(self, tmp_path: Path) -> None: + """Test creating a successful migration result.""" + singfile = tmp_path / "test.sing" + result = MigrationResult( + singularity_file=singfile, + success=True, + oci_image_path=tmp_path / "test.oci", + sif_image_path=tmp_path / "test.sif", + ) + + assert result.success is True + assert result.error_message is None + assert result.singularity_file == singfile + + def test_migration_result_failure(self, tmp_path: Path) -> None: + """Test creating a failed migration result.""" + singfile = tmp_path / "test.sing" + result = MigrationResult( + singularity_file=singfile, + success=False, + error_message="Test error", + ) + + assert result.success is False + assert result.error_message == "Test error" + assert result.oci_image_path is None + + +@pytest.mark.ai_generated +def test_migrator_initialization(tmp_path: Path) -> None: + """Test OCIMigrator initialization.""" + images_dir = tmp_path / "images" + images_oci_dir = tmp_path / "images-oci" + + migrator = OCIMigrator( + repo_dir=tmp_path, + images_dir=images_dir, + images_oci_dir=images_oci_dir, + skip_failures=True, + ) + + assert migrator.repo_dir == tmp_path + assert migrator.images_dir == images_dir + assert migrator.images_oci_dir == images_oci_dir + assert migrator.skip_failures is True + assert migrator.results == [] diff --git a/scripts/tests/test_oci_cmd.bats b/scripts/tests/test_oci_cmd.bats new file mode 100644 index 00000000..00518cb9 --- /dev/null +++ b/scripts/tests/test_oci_cmd.bats @@ -0,0 +1,61 @@ +#!/usr/bin/env bats +#emacs: -*- mode: shell-script; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: t -*- +#ex: set sts=4 ts=4 sw=4 noet: +# +# Tests for scripts/oci_cmd wrapper +# + +load test_helpers + +topdir="$BATS_TEST_DIRNAME/../.." + +@test "oci_cmd exists and is executable" { + [ -x "$topdir/scripts/oci_cmd" ] +} + +@test "oci_cmd --version passes through to apptainer" { + # This test verifies that oci_cmd correctly forwards arguments to apptainer + cd "$topdir" + + # Check if apptainer is available + if ! command -v apptainer >/dev/null 2>&1; then + skip "apptainer not available" + fi + + myrun scripts/oci_cmd --version + + # Should succeed and output should contain "apptainer" + assert_clean_exit + assert_python_re_match "apptainer" "${lines[*]}" +} + +@test "oci_cmd help passes through to apptainer" { + cd "$topdir" + + # Check if apptainer is available + if ! command -v apptainer >/dev/null 2>&1; then + skip "apptainer not available" + fi + + myrun scripts/oci_cmd help + + # Should succeed and output should contain apptainer help text + assert_clean_exit + # Check for "apptainer" in output as help command shows apptainer commands + assert_python_re_match ".*apptainer.*" "${lines[*]}" +} + +@test "oci_cmd with no arguments shows apptainer usage" { + cd "$topdir" + + # Check if apptainer is available + if ! command -v apptainer >/dev/null 2>&1; then + skip "apptainer not available" + fi + + # apptainer with no arguments typically shows usage and exits with non-zero + run "$topdir/scripts/oci_cmd" + + # Should show usage information + assert_python_re_match "Usage:" "${lines[*]}" +}