From 30588874c6f139d41a1ffc71bad49d35656e48f7 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 3 Nov 2025 17:01:25 +0000 Subject: [PATCH 01/10] Ensure OOD app installs work for site image builds --- .../common/inventory/group_vars/all/openondemand.yml | 9 +++++---- .../common/inventory/group_vars/builder/defaults.yml | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index 84a6c2055..b5af263e7 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -12,11 +12,12 @@ openondemand_servername: "{{ hostvars[groups['openondemand'].0].ansible_host if openondemand_auth: basic_pam -openondemand_jupyter_partition: "{{ openhpc_partitions[0]['name'] }}" -openondemand_desktop_partition: "{{ openhpc_partitions[0]['name'] }}" -openondemand_rstudio_partition: "{{ openhpc_partitions[0]['name'] }}" +# Manages case where openhpc_partitions are not defined e.g. for site image builds +openondemand_jupyter_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" +openondemand_desktop_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" +openondemand_rstudio_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" openondemand_matlab_partition: '' # Requires target site to already have MATLAB so set to empty -openondemand_codeserver_partition: "{{ openhpc_partitions[0]['name'] }}" +openondemand_codeserver_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" # Regex defining hosts which openondemand can proxy; the default regex is compute nodes (for apps) and grafana host, # e.g. if the group `compute` has hosts `compute-{0,1,2,..}` this will be '(compute-\d+)|(control)'. diff --git a/environments/common/inventory/group_vars/builder/defaults.yml b/environments/common/inventory/group_vars/builder/defaults.yml index dc28e44f5..617a8de4f 100644 --- a/environments/common/inventory/group_vars/builder/defaults.yml +++ b/environments/common/inventory/group_vars/builder/defaults.yml @@ -27,3 +27,9 @@ sssd_enabled: false slurm_exporter_state: stopped appliances_mode: build proxy_remove: true +# for image build ood partition var is just truthy as no cluster_groups defined +openondemand_jupyter_partition: true +openondemand_desktop_partition: true +openondemand_rstudio_partition: false +openondemand_matlab_partition: false +openondemand_codeserver_partition: false \ No newline at end of file From 9e60834b36850c49cee25ccad5c857c0114e41d2 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 4 Nov 2025 10:02:00 +0000 Subject: [PATCH 02/10] add ood app image build docs --- docs/openondemand.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/openondemand.md b/docs/openondemand.md index cd33cd54a..91e31c76e 100644 --- a/docs/openondemand.md +++ b/docs/openondemand.md @@ -63,6 +63,10 @@ The appliance automatically configures Open OnDemand to proxy Grafana and adds a [^1]: Note that if `openondemand_auth` is `basic_pam` and anonymous Grafana login is enabled, the appliance will (by default) configure Open OnDemand's Apache server to remove the Authorisation header from proxying of all `node/` addresses. This is done as otherwise Grafana tries to use this header to authenticate, which fails with the default configuration where only the admin Grafana user `grafana` is created. Note that the removal of this header in this configuration means it cannot be used to authenticate proxied interactive applications - however the appliance-deployed remote desktop and Jupyter Notebook server applications use other authentication methods. An alternative if using `basic_pam` is not to enable anonymous Grafana login and to create Grafana users matching the local users (e.g. in `environments//hooks/post.yml`). +## Image Build + +For local site image builds, the preferred method of installing ood apps in the image is by toggling the `openondemand__partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. In this case the variables are not strings and are instead simply truthy i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not. + ## Access By default the appliance authenticates against OOD with basic auth through PAM. When creating a new environment, a new user with username `demo_user` will be created. From 690f6b63d844b52c4a0f4e0241ca4167bae45328 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 7 Nov 2025 17:27:32 +0000 Subject: [PATCH 03/10] fix partition defaults and document --- ansible/roles/openondemand/README.md | 4 ++-- docs/openondemand.md | 5 +++-- environments/common/inventory/group_vars/all/openhpc.yml | 1 + .../common/inventory/group_vars/all/openondemand.yml | 8 ++++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ansible/roles/openondemand/README.md b/ansible/roles/openondemand/README.md index a215173b1..0e44f6fc9 100644 --- a/ansible/roles/openondemand/README.md +++ b/ansible/roles/openondemand/README.md @@ -69,10 +69,10 @@ This role enables SSL on the Open Ondemand server, using the following self-sign - `new_window`: Optional. Whether to open link in new window. Bool, default `false`. - `app_name`: Optional. Unique name for app appended to `/var/www/ood/apps/sys/`. Default is `name`, useful if that is not unique or not suitable as a path component. - `openondemand_dashboard_support_url`: Optional. URL or email etc to show as support contact under Help in dashboard. Default `(undefined)`. -- `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops. Requires a corresponding group named "openondemand_desktop" and entry in openhpc_partitions. +- `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops, by default supplied with `openhpc_partitions` entry. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image. - `openondemand_desktop_screensaver`: Optional. Whether to enable screen locking/screensaver. **NB:** Users must have passwords if this is enabled. Bool, default `false`. - `openondemand_filesapp_paths`: List of paths (in addition to $HOME, which is always added) to include shortcuts to within the Files dashboard app. -- `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers. Requires a corresponding group named "openondemand_jupyter" and entry in openhpc_partitions. +- `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers, by default supplied with `openhpc_partitions` entry. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image. - `openondemand_gres_options`: Optional. A list of `[label, value]` items used to provide a drop-down for resource/GRES selection in application forms. The default constructs a list from all GRES definitions in the cluster. See the diff --git a/docs/openondemand.md b/docs/openondemand.md index 91e31c76e..a0829bb68 100644 --- a/docs/openondemand.md +++ b/docs/openondemand.md @@ -63,9 +63,10 @@ The appliance automatically configures Open OnDemand to proxy Grafana and adds a [^1]: Note that if `openondemand_auth` is `basic_pam` and anonymous Grafana login is enabled, the appliance will (by default) configure Open OnDemand's Apache server to remove the Authorisation header from proxying of all `node/` addresses. This is done as otherwise Grafana tries to use this header to authenticate, which fails with the default configuration where only the admin Grafana user `grafana` is created. Note that the removal of this header in this configuration means it cannot be used to authenticate proxied interactive applications - however the appliance-deployed remote desktop and Jupyter Notebook server applications use other authentication methods. An alternative if using `basic_pam` is not to enable anonymous Grafana login and to create Grafana users matching the local users (e.g. in `environments//hooks/post.yml`). -## Image Build +## Image Build +By default, most ondemand apps are installed in image builds when the build includes the inventory group `openondemand` (which is the default for "fatimage" builds). The apps installed are defined by the `openondemand__partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. Note that in this case the values are not strings and are instead simply truthy, i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not. -For local site image builds, the preferred method of installing ood apps in the image is by toggling the `openondemand__partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. In this case the variables are not strings and are instead simply truthy i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not. +For e.g. site-specific image builds where different app installs are required, due to precedence rules these must overriden in a `builder`-groupvars file e.g. `environments/site/inventory/group_vars/all/builder/defaults.yml`. ## Access diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index bf212cb80..daccc947a 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -15,6 +15,7 @@ openhpc_slurmdbd_mysql_password: "{{ vault_mysql_slurm_password }}" openhpc_slurmdbd_mysql_username: slurm openhpc_slurm_control_host: "{{ groups['control'] | first }}" # avoid using hostvars for compute-init openhpc_slurmdbd_host: "{{ openhpc_slurm_control_host }}" +cluster_compute_groups: [] # without any nodes deployed/inventory file we get empty openhpc_nodegroups and empty openhpc_partitions. openhpc_rebuild_partition: # not a role var - could actually add more indirection here for things we're expecting to be modified, e.g. groups and maxtime name: rebuild nodegroups: "{{ cluster_compute_groups | default([]) }}" diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index b5af263e7..4337e0d89 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -13,11 +13,11 @@ openondemand_servername: "{{ hostvars[groups['openondemand'].0].ansible_host if openondemand_auth: basic_pam # Manages case where openhpc_partitions are not defined e.g. for site image builds -openondemand_jupyter_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" -openondemand_desktop_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" -openondemand_rstudio_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" +openondemand_jupyter_partition: "{{ openhpc_partitions[0].name | default('') }}" +openondemand_desktop_partition: "{{ openhpc_partitions[0].name | default('') }}" +openondemand_rstudio_partition: "{{ openhpc_partitions[0].name | default('') }}" openondemand_matlab_partition: '' # Requires target site to already have MATLAB so set to empty -openondemand_codeserver_partition: "{{ openhpc_partitions[0].name if (openhpc_partitions | default([])) else '' }}" +openondemand_codeserver_partition: "{{ openhpc_partitions[0].name | default('') }}" # Regex defining hosts which openondemand can proxy; the default regex is compute nodes (for apps) and grafana host, # e.g. if the group `compute` has hosts `compute-{0,1,2,..}` this will be '(compute-\d+)|(control)'. From 5052ce2b1ff53108a15d84217f754ddd2207540c Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 7 Nov 2025 17:29:31 +0000 Subject: [PATCH 04/10] Add detail to docs --- ansible/roles/openondemand/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/openondemand/README.md b/ansible/roles/openondemand/README.md index 0e44f6fc9..3727d21b2 100644 --- a/ansible/roles/openondemand/README.md +++ b/ansible/roles/openondemand/README.md @@ -69,10 +69,10 @@ This role enables SSL on the Open Ondemand server, using the following self-sign - `new_window`: Optional. Whether to open link in new window. Bool, default `false`. - `app_name`: Optional. Unique name for app appended to `/var/www/ood/apps/sys/`. Default is `name`, useful if that is not unique or not suitable as a path component. - `openondemand_dashboard_support_url`: Optional. URL or email etc to show as support contact under Help in dashboard. Default `(undefined)`. -- `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops, by default supplied with `openhpc_partitions` entry. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image. +- `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops, by default supplied with `openhpc_partitions` entry. During open ondemand config the string is used to provide a default partition in the UX. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image. - `openondemand_desktop_screensaver`: Optional. Whether to enable screen locking/screensaver. **NB:** Users must have passwords if this is enabled. Bool, default `false`. - `openondemand_filesapp_paths`: List of paths (in addition to $HOME, which is always added) to include shortcuts to within the Files dashboard app. -- `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers, by default supplied with `openhpc_partitions` entry. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image. +- `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers, by default supplied with `openhpc_partitions` entry. During open ondemand config the string is used to provide a default partition in the UX. During image build, with `openondemand` group, setting this partition as a boolean determines if app installed in image. - `openondemand_gres_options`: Optional. A list of `[label, value]` items used to provide a drop-down for resource/GRES selection in application forms. The default constructs a list from all GRES definitions in the cluster. See the From 353e113f85da91d97c3cfab94aefbaeb9d10b770 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 7 Nov 2025 17:31:58 +0000 Subject: [PATCH 05/10] add newlines --- environments/common/inventory/group_vars/builder/defaults.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/common/inventory/group_vars/builder/defaults.yml b/environments/common/inventory/group_vars/builder/defaults.yml index 617a8de4f..b7000c4a6 100644 --- a/environments/common/inventory/group_vars/builder/defaults.yml +++ b/environments/common/inventory/group_vars/builder/defaults.yml @@ -32,4 +32,4 @@ openondemand_jupyter_partition: true openondemand_desktop_partition: true openondemand_rstudio_partition: false openondemand_matlab_partition: false -openondemand_codeserver_partition: false \ No newline at end of file +openondemand_codeserver_partition: false From 38e53802ba03613605e57c3e3292db018996a4ef Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 7 Nov 2025 20:08:41 +0000 Subject: [PATCH 06/10] fix linting --- docs/openondemand.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/openondemand.md b/docs/openondemand.md index a0829bb68..6a17a7a86 100644 --- a/docs/openondemand.md +++ b/docs/openondemand.md @@ -64,7 +64,9 @@ The appliance automatically configures Open OnDemand to proxy Grafana and adds a [^1]: Note that if `openondemand_auth` is `basic_pam` and anonymous Grafana login is enabled, the appliance will (by default) configure Open OnDemand's Apache server to remove the Authorisation header from proxying of all `node/` addresses. This is done as otherwise Grafana tries to use this header to authenticate, which fails with the default configuration where only the admin Grafana user `grafana` is created. Note that the removal of this header in this configuration means it cannot be used to authenticate proxied interactive applications - however the appliance-deployed remote desktop and Jupyter Notebook server applications use other authentication methods. An alternative if using `basic_pam` is not to enable anonymous Grafana login and to create Grafana users matching the local users (e.g. in `environments//hooks/post.yml`). ## Image Build -By default, most ondemand apps are installed in image builds when the build includes the inventory group `openondemand` (which is the default for "fatimage" builds). The apps installed are defined by the `openondemand__partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. Note that in this case the values are not strings and are instead simply truthy, i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not. +By default, most ondemand apps are installed in image builds when the build includes the inventory group `openondemand` (which is the default for "fatimage" builds). The apps installed are +defined by the `openondemand__partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. Note that in this case the values are not strings and are instead +simply truthy, i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not. For e.g. site-specific image builds where different app installs are required, due to precedence rules these must overriden in a `builder`-groupvars file e.g. `environments/site/inventory/group_vars/all/builder/defaults.yml`. From e5a77ac2916eb57854cea219596a8aa019756251 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 7 Nov 2025 20:17:37 +0000 Subject: [PATCH 07/10] markdown prettier --- docs/openondemand.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/openondemand.md b/docs/openondemand.md index 6a17a7a86..77de1b54f 100644 --- a/docs/openondemand.md +++ b/docs/openondemand.md @@ -64,6 +64,7 @@ The appliance automatically configures Open OnDemand to proxy Grafana and adds a [^1]: Note that if `openondemand_auth` is `basic_pam` and anonymous Grafana login is enabled, the appliance will (by default) configure Open OnDemand's Apache server to remove the Authorisation header from proxying of all `node/` addresses. This is done as otherwise Grafana tries to use this header to authenticate, which fails with the default configuration where only the admin Grafana user `grafana` is created. Note that the removal of this header in this configuration means it cannot be used to authenticate proxied interactive applications - however the appliance-deployed remote desktop and Jupyter Notebook server applications use other authentication methods. An alternative if using `basic_pam` is not to enable anonymous Grafana login and to create Grafana users matching the local users (e.g. in `environments//hooks/post.yml`). ## Image Build + By default, most ondemand apps are installed in image builds when the build includes the inventory group `openondemand` (which is the default for "fatimage" builds). The apps installed are defined by the `openondemand__partition` variables in `environments/common/inventory/group_vars/all/builder/defaults.yml`. Note that in this case the values are not strings and are instead simply truthy, i.e. they do not describe cluster partition groups but just whether those apps will be installed in the image or not. From e8c1a39da7e5a8eae4de98a721ccd410f9d1d918 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Sat, 8 Nov 2025 12:54:20 +0000 Subject: [PATCH 08/10] bump CI images --- environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 0be7322ec..45a76b85b 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-251027-1123-d389c00b", - "RL9": "openhpc-RL9-251027-1123-d389c00b" + "RL8": "openhpc-RL8-251108-0123-e5a77ac2", + "RL9": "openhpc-RL9-251108-0123-e5a77ac2" } } From e306e163591922dd69ac648185b7d60042e71640 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 10 Nov 2025 09:37:04 +0000 Subject: [PATCH 09/10] Fix trivyscan action version --- .github/workflows/trivyscan.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 1898d8558..df449451b 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -102,7 +102,7 @@ jobs: run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@v0.33.1 + uses: aquasecurity/trivy-action@0.33.1 with: scan-type: fs scan-ref: "${{ steps.manifest.outputs.image-name }}" @@ -122,7 +122,7 @@ jobs: category: "${{ matrix.build }}" - name: Fail if scan has CRITICAL vulnerabilities - uses: aquasecurity/trivy-action@v0.33.1 + uses: aquasecurity/trivy-action@0.33.1 with: scan-type: fs scan-ref: "${{ steps.manifest.outputs.image-name }}" From 7cf46100d91ff6cd8036127618aa9ea39e629108 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 10 Nov 2025 17:55:33 +0000 Subject: [PATCH 10/10] fix cluster_compute_group group_var --- environments/common/inventory/group_vars/all/openhpc.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index daccc947a..075a2a53d 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -15,10 +15,10 @@ openhpc_slurmdbd_mysql_password: "{{ vault_mysql_slurm_password }}" openhpc_slurmdbd_mysql_username: slurm openhpc_slurm_control_host: "{{ groups['control'] | first }}" # avoid using hostvars for compute-init openhpc_slurmdbd_host: "{{ openhpc_slurm_control_host }}" -cluster_compute_groups: [] # without any nodes deployed/inventory file we get empty openhpc_nodegroups and empty openhpc_partitions. +_cluster_compute_groups_safe: "{{ cluster_compute_groups | default([]) }}" # Safe default for undefined cluster_compute_groups in site-image builds openhpc_rebuild_partition: # not a role var - could actually add more indirection here for things we're expecting to be modified, e.g. groups and maxtime name: rebuild - nodegroups: "{{ cluster_compute_groups | default([]) }}" + nodegroups: "{{ _cluster_compute_groups_safe }}" default: false maxtime: 30 partition_params: @@ -28,7 +28,7 @@ openhpc_rebuild_partition: # not a role var - could actually add more indirectio DisableRootJobs: false PreemptMode: "OFF" OverSubscribe: EXCLUSIVE -openhpc_nodegroups: "{{ cluster_compute_groups | map('community.general.dict_kv', 'name') }}" # create nodegroup for each compute group +openhpc_nodegroups: "{{ _cluster_compute_groups_safe | map('community.general.dict_kv', 'name') }}" # create nodegroup for each compute group openhpc_user_partitions: "{{ openhpc_nodegroups }}" # create partition for each nodegroup (actually role default) - this is what we'd expect to be changed # yamllint disable-line rule:line-length openhpc_partitions: "{{ openhpc_user_partitions + ([openhpc_rebuild_partition] if groups['rebuild'] | length > 0 else []) }}" # auto-create rebuild partition if reqd.