Add unique index for submitted jobs (#3044)

ychiucco · web-flow · commit 944d86188f5f · 2025-12-02T11:04:45.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 > NOTE: This version requires running a data-migration script (`fractalctl update-db-data`).
 
+> WARNING: Before upgrading to this version, make sure that no jobs are marked as submitted in the current database tables.
+
 The main contents of this release are the introduction of the project sharing and a review of the authorization scheme for [`fractal-data`](https://github.com/fractal-analytics-platform/fractal-data).
 
 * API:
@@ -24,6 +26,7 @@ The main contents of this release are the introduction of the project sharing an
 * Database:
     * Add project-sharing-related `LinkUserProjectV2` columns (\#2999).
     * Move `UserOAuth.project_dir` to `.project_dirs` and drop `UserGrop.viewer_paths` (\#3031).
+    * Enforce max one submitted `JobV2` per `DatasetV2` (\#3044).
 * Settings:
     * Drop `DataSettings` (\#3031).
     * Reduce API logging level for some endpoints (\#3010).
diff --git a/fractal_server/app/models/v2/job.py b/fractal_server/app/models/v2/job.py
@@ -6,7 +6,9 @@
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.types import DateTime
 from sqlmodel import Field
+from sqlmodel import Index
 from sqlmodel import SQLModel
+from sqlmodel import text
 
 from fractal_server.app.schemas.v2 import JobStatusType
 from fractal_server.utils import get_timestamp
@@ -66,3 +68,12 @@ class JobV2(SQLModel, table=True):
     type_filters: dict[str, bool] = Field(
         sa_column=Column(JSONB, nullable=False, server_default="{}")
     )
+
+    __table_args__ = (
+        Index(
+            "ix_jobv2_one_submitted_job_per_dataset",
+            "dataset_id",
+            unique=True,
+            postgresql_where=text(f"status = '{JobStatusType.SUBMITTED}'"),
+        ),
+    )
diff --git a/fractal_server/app/routes/api/v2/_aux_functions.py b/fractal_server/app/routes/api/v2/_aux_functions.py
@@ -7,7 +7,6 @@
 
 from fastapi import HTTPException
 from fastapi import status
-from sqlalchemy.exc import MultipleResultsFound
 from sqlalchemy.orm.attributes import flag_modified
 from sqlmodel import select
 from sqlmodel.sql.expression import SelectOfScalar
@@ -554,41 +553,6 @@ async def _get_workflowtask_or_404(
         return wftask
 
 
-async def _get_submitted_job_or_none(
-    *,
-    dataset_id: int,
-    workflow_id: int,
-    db: AsyncSession,
-) -> JobV2 | None:
-    """
-    Get the submitted job for given dataset/workflow, if any.
-
-    This function also handles the invalid branch where more than one job
-    is found.
-
-    Args:
-        dataset_id:
-        workflow_id:
-        db:
-    """
-    res = await db.execute(
-        _get_submitted_jobs_statement()
-        .where(JobV2.dataset_id == dataset_id)
-        .where(JobV2.workflow_id == workflow_id)
-    )
-    try:
-        return res.scalars().one_or_none()
-    except MultipleResultsFound as e:
-        error_msg = (
-            f"Multiple running jobs found for {dataset_id=} and {workflow_id=}."
-        )
-        logger.error(f"{error_msg} Original error: {str(e)}.")
-        raise HTTPException(
-            status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
-            detail=error_msg,
-        )
-
-
 async def _get_user_resource_id(user_id: int, db: AsyncSession) -> int | None:
     res = await db.execute(
         select(Resource.id)
diff --git a/fractal_server/app/routes/api/v2/history.py b/fractal_server/app/routes/api/v2/history.py
@@ -33,7 +33,7 @@
 from fractal_server.logger import set_logger
 
 from ._aux_functions import _get_dataset_check_access
-from ._aux_functions import _get_submitted_job_or_none
+from ._aux_functions import _get_submitted_jobs_statement
 from ._aux_functions import _get_workflow_check_access
 from ._aux_functions_history import _verify_workflow_and_dataset_access
 from ._aux_functions_history import get_history_run_or_404
@@ -90,11 +90,13 @@ async def get_workflow_tasks_statuses(
         db=db,
     )
 
-    running_job = await _get_submitted_job_or_none(
-        db=db,
-        dataset_id=dataset_id,
-        workflow_id=workflow_id,
+    res = await db.execute(
+        _get_submitted_jobs_statement()
+        .where(JobV2.dataset_id == dataset_id)
+        .where(JobV2.workflow_id == workflow_id)
     )
+    running_job = res.scalars().one_or_none()
+
     if running_job is not None:
         running_wftasks = workflow.task_list[
             running_job.first_task_index : running_job.last_task_index + 1
diff --git a/fractal_server/app/routes/api/v2/status_legacy.py b/fractal_server/app/routes/api/v2/status_legacy.py
@@ -1,7 +1,5 @@
 from fastapi import APIRouter
 from fastapi import Depends
-from fastapi import HTTPException
-from fastapi import status
 
 from fractal_server.app.db import AsyncSession
 from fractal_server.app.db import get_async_db
@@ -64,24 +62,12 @@ async def get_workflowtask_status(
     # Check whether there exists a submitted job associated to this
     # workflow/dataset pair. If it does exist, it will be used later.
     # If there are multiple jobs, raise an error.
-    stm = _get_submitted_jobs_statement()
-    stm = stm.where(JobV2.dataset_id == dataset_id)
-    stm = stm.where(JobV2.workflow_id == workflow_id)
-    res = await db.execute(stm)
-    running_jobs = res.scalars().all()
-    if len(running_jobs) == 0:
-        running_job = None
-    elif len(running_jobs) == 1:
-        running_job = running_jobs[0]
-    else:
-        string_ids = str([job.id for job in running_jobs])[1:-1]
-        raise HTTPException(
-            status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
-            detail=(
-                f"Cannot get WorkflowTaskV2 statuses as DatasetV2 {dataset.id}"
-                f" is linked to multiple active jobs: {string_ids}."
-            ),
-        )
+    res = await db.execute(
+        _get_submitted_jobs_statement()
+        .where(JobV2.dataset_id == dataset_id)
+        .where(JobV2.workflow_id == workflow_id)
+    )
+    running_job = res.scalars().one_or_none()
 
     # Initialize empty dictionary for WorkflowTaskV2 status
     workflow_tasks_status_dict: dict = {}
diff --git a/fractal_server/migrations/versions/f0702066b007_one_submitted_job_per_dataset.py b/fractal_server/migrations/versions/f0702066b007_one_submitted_job_per_dataset.py
@@ -0,0 +1,40 @@
+"""One submitted Job per Dataset
+
+Revision ID: f0702066b007
+Revises: 7910eed4cf97
+Create Date: 2025-12-01 20:54:03.137093
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "f0702066b007"
+down_revision = "7910eed4cf97"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("jobv2", schema=None) as batch_op:
+        batch_op.create_index(
+            "ix_jobv2_one_submitted_job_per_dataset",
+            ["dataset_id"],
+            unique=True,
+            postgresql_where=sa.text("status = 'submitted'"),
+        )
+
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("jobv2", schema=None) as batch_op:
+        batch_op.drop_index(
+            "ix_jobv2_one_submitted_job_per_dataset",
+            postgresql_where=sa.text("status = 'submitted'"),
+        )
+
+    # ### end Alembic commands ###
diff --git a/tests/v2/test_02_models/test_model_job.py b/tests/v2/test_02_models/test_model_job.py
@@ -0,0 +1,137 @@
+import pytest
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.orm.attributes import flag_modified
+
+from fractal_server.app.models.v2 import DatasetV2
+from fractal_server.app.models.v2 import JobV2
+from fractal_server.app.models.v2 import ProjectV2
+from fractal_server.app.models.v2 import WorkflowV2
+from fractal_server.app.schemas.v2.job import JobStatusType
+
+
+async def test_unique_job_submitted_per_dataset(db, local_resource_profile_db):
+    resource, _ = local_resource_profile_db
+
+    project = ProjectV2(name="Project", resource_id=resource.id)
+    db.add(project)
+    await db.commit()
+    await db.refresh(project)
+
+    workflow = WorkflowV2(name="Workflow", project_id=project.id)
+    dataset1 = DatasetV2(
+        name="Dataset1", project_id=project.id, zarr_dir="/fake"
+    )
+    dataset2 = DatasetV2(
+        name="Dataset2", project_id=project.id, zarr_dir="/fake"
+    )
+    db.add_all([workflow, dataset1, dataset2])
+    await db.commit()
+    await db.refresh(workflow)
+    await db.refresh(dataset1)
+    await db.refresh(dataset2)
+
+    dataset1_id = dataset1.id
+    dataset2_id = dataset2.id
+
+    common_args = dict(
+        project_id=project.id,
+        workflow_id=workflow.id,
+        user_email="user@example.org",
+        dataset_dump={},
+        workflow_dump={},
+        project_dump={},
+        first_task_index=0,
+        last_task_index=0,
+        attribute_filters={},
+        type_filters={},
+    )
+
+    # Dataset 1, SUBMITTED -> OK
+    db.add(
+        JobV2(
+            dataset_id=dataset1_id,
+            status=JobStatusType.SUBMITTED,
+            **common_args,
+        )
+    )
+    await db.commit()
+
+    # Dataset 1, NON SUBMITTED -> OK
+    db.add(
+        JobV2(
+            dataset_id=dataset1_id,
+            status=JobStatusType.FAILED,
+            **common_args,
+        )
+    )
+    await db.commit()
+
+    # Dataset 1, SUBMITTED -> FAIL
+    db.add(
+        JobV2(
+            dataset_id=dataset1_id,
+            status=JobStatusType.SUBMITTED,
+            **common_args,
+        )
+    )
+    with pytest.raises(IntegrityError) as e:
+        await db.commit()
+    assert "ix_jobv2_one_submitted_job_per_dataset" in e.value.args[0]
+    await db.rollback()
+
+    # Dataset 2, SUBMITTED -> OK
+    db.add(
+        JobV2(
+            dataset_id=dataset2_id,
+            status=JobStatusType.SUBMITTED,
+            **common_args,
+        )
+    )
+    await db.commit()
+
+    # NOTE: the following tests a situation that should never happen,
+    # i.e. dataset_id=None, status="submitted"
+
+    # Dataset NULL, SUBMITTED -> OK
+    db.add(
+        JobV2(
+            dataset_id=None,
+            status=JobStatusType.SUBMITTED,
+            **common_args,
+        )
+    )
+    await db.commit()
+
+    # Dataset NULL, SUBMITTED -> OK
+    db.add(
+        JobV2(
+            dataset_id=None,
+            status=JobStatusType.SUBMITTED,
+            **common_args,
+        )
+    )
+    await db.commit()
+
+    # Dataset 2, test PATCH
+    job_to_patch = JobV2(
+        dataset_id=dataset2_id,
+        status=JobStatusType.FAILED,
+        **common_args,
+    )
+    db.add(job_to_patch)
+    await db.commit()
+
+    # PATCH status to DONE -> OK
+    job_to_patch.status = JobStatusType.DONE
+    flag_modified(job_to_patch, "status")
+    await db.commit()
+    await db.refresh(job_to_patch)
+    assert job_to_patch.status == JobStatusType.DONE
+
+    # PATCH status to SUBMITTED -> FAIL
+    job_to_patch.status = JobStatusType.SUBMITTED
+    flag_modified(job_to_patch, "status")
+    with pytest.raises(IntegrityError) as e:
+        await db.commit()
+    assert "ix_jobv2_one_submitted_job_per_dataset" in e.value.args[0]
+    await db.rollback()
diff --git a/tests/v2/test_03_api/admin/test_admin_job.py b/tests/v2/test_03_api/admin/test_admin_job.py
@@ -66,6 +66,7 @@ async def test_view_job(
             project_id=project.id,
             log="log-b",
             dataset_id=dataset.id,
+            status=JobStatusType.DONE,
             workflow_id=workflow2.id,
             start_timestamp=datetime(2023, 1, 1, tzinfo=timezone.utc),
             end_timestamp=datetime(2023, 11, 9, tzinfo=timezone.utc),
@@ -134,7 +135,10 @@ async def test_view_job(
         assert len(res.json()["items"]) == 0
         res = await client.get(f"{PREFIX}/job/?status=submitted")
         assert res.status_code == 200
-        assert len(res.json()["items"]) == 2
+        assert len(res.json()["items"]) == 1
+        res = await client.get(f"{PREFIX}/job/?status=done")
+        assert res.status_code == 200
+        assert len(res.json()["items"]) == 1
 
         # get jobs by [start/end]_timestamp_[min/max]
 
diff --git a/tests/v2/test_03_api/test_api_history.py b/tests/v2/test_03_api/test_api_history.py
diff --git a/tests/v2/test_03_api/test_status_legacy.py b/tests/v2/test_03_api/test_status_legacy.py
diff --git a/tests/v2/test_04_runner/test_dummy_examples.py b/tests/v2/test_04_runner/test_dummy_examples.py