From 2c45c6cffa9b5068196c22c360ff0071e197f074 Mon Sep 17 00:00:00 2001 From: Claudia Watson Date: Tue, 28 Oct 2025 15:27:31 +0000 Subject: [PATCH 1/4] Adding tasks to essi/configure.yml to make eessi configure gpu node automatically --- .../roles/compute_init/files/compute-init.yml | 16 ++++++++++++++++ ansible/roles/eessi/tasks/configure.yml | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index 81dedf8fb..0a21303a6 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -294,6 +294,22 @@ ansible.builtin.command: cmd: "cvmfs_config setup" + # configure gpus + - name: Check for NVIDIA driver + ansible.builtin.stat: + path: /dev/nvidia0 + register: nvidia_driver + + - name: Set fact if NVIDIA driver is present + ansible.builtin.set_fact: + has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}" + + - name: Expose GPU drivers + ansible.builtin.shell: | + source /cvmfs/software.eessi.io/versions/2023.06/init/bash + /cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + when: has_nvidia_driver + - name: Configure VGPUs ansible.builtin.include_role: name: stackhpc.linux.vgpu diff --git a/ansible/roles/eessi/tasks/configure.yml b/ansible/roles/eessi/tasks/configure.yml index 2c765d20c..23030c3e5 100644 --- a/ansible/roles/eessi/tasks/configure.yml +++ b/ansible/roles/eessi/tasks/configure.yml @@ -15,3 +15,20 @@ - name: Ensure CVMFS config is setup # noqa: no-changed-when ansible.builtin.command: cmd: "cvmfs_config setup" + +# configure gpus +- name: Check for NVIDIA driver + ansible.builtin.stat: + path: /dev/nvidia0 + register: nvidia_driver + +- name: Set fact if NVIDIA driver is present + ansible.builtin.set_fact: + has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}" + +- name: Expose GPU drivers + ansible.builtin.shell: | + source /cvmfs/software.eessi.io/versions/2023.06/init/bash + /cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + when: has_nvidia_driver + changed_when: true From fa78672bf8267abe189a842d6791464d6dec1903 Mon Sep 17 00:00:00 2001 From: Claudia Watson Date: Tue, 28 Oct 2025 15:27:31 +0000 Subject: [PATCH 2/4] Adding tasks to essi/configure.yml to make eessi configure gpu node automatically --- .../roles/compute_init/files/compute-init.yml | 16 ++++++++++++++++ ansible/roles/eessi/tasks/configure.yml | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index 81dedf8fb..0a21303a6 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -294,6 +294,22 @@ ansible.builtin.command: cmd: "cvmfs_config setup" + # configure gpus + - name: Check for NVIDIA driver + ansible.builtin.stat: + path: /dev/nvidia0 + register: nvidia_driver + + - name: Set fact if NVIDIA driver is present + ansible.builtin.set_fact: + has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}" + + - name: Expose GPU drivers + ansible.builtin.shell: | + source /cvmfs/software.eessi.io/versions/2023.06/init/bash + /cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + when: has_nvidia_driver + - name: Configure VGPUs ansible.builtin.include_role: name: stackhpc.linux.vgpu diff --git a/ansible/roles/eessi/tasks/configure.yml b/ansible/roles/eessi/tasks/configure.yml index 2c765d20c..23030c3e5 100644 --- a/ansible/roles/eessi/tasks/configure.yml +++ b/ansible/roles/eessi/tasks/configure.yml @@ -15,3 +15,20 @@ - name: Ensure CVMFS config is setup # noqa: no-changed-when ansible.builtin.command: cmd: "cvmfs_config setup" + +# configure gpus +- name: Check for NVIDIA driver + ansible.builtin.stat: + path: /dev/nvidia0 + register: nvidia_driver + +- name: Set fact if NVIDIA driver is present + ansible.builtin.set_fact: + has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}" + +- name: Expose GPU drivers + ansible.builtin.shell: | + source /cvmfs/software.eessi.io/versions/2023.06/init/bash + /cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh + when: has_nvidia_driver + changed_when: true From d5ac3f76d0f856a6f0334a11f44ab93e47f023f7 Mon Sep 17 00:00:00 2001 From: Claudia Watson Date: Tue, 4 Nov 2025 11:55:57 +0000 Subject: [PATCH 3/4] applying edits to task names --- ansible/roles/eessi/tasks/configure.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/eessi/tasks/configure.yml b/ansible/roles/eessi/tasks/configure.yml index 23030c3e5..c5949ba76 100644 --- a/ansible/roles/eessi/tasks/configure.yml +++ b/ansible/roles/eessi/tasks/configure.yml @@ -17,12 +17,12 @@ cmd: "cvmfs_config setup" # configure gpus -- name: Check for NVIDIA driver +- name: Check for NVIDIA GPU ansible.builtin.stat: path: /dev/nvidia0 register: nvidia_driver -- name: Set fact if NVIDIA driver is present +- name: Set fact if NVIDIA GPU is present ansible.builtin.set_fact: has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}" From 5f1dddeb797a7753d19074be6cecf289a399d657 Mon Sep 17 00:00:00 2001 From: Claudia Watson Date: Fri, 7 Nov 2025 16:54:46 +0000 Subject: [PATCH 4/4] replacing EESSI block with running the configure.yml task directly --- .../roles/compute_init/files/compute-init.yml | 33 ++----------------- ansible/roles/compute_init/tasks/install.yml | 2 ++ 2 files changed, 5 insertions(+), 30 deletions(-) diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index 0a21303a6..4d8c26072 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -277,38 +277,11 @@ name: basic_users when: enable_basic_users - - name: EESSI - when: enable_eessi # NB: don't need conditional block on enable_compute as have already exited # if not the case - block: - - name: Copy cvmfs config - ansible.builtin.copy: - src: /var/tmp/cluster/cvmfs/default.local - dest: /etc/cvmfs/default.local - owner: root - group: root - mode: "0644" - - - name: Ensure CVMFS config is setup # noqa: no-changed-when - ansible.builtin.command: - cmd: "cvmfs_config setup" - - # configure gpus - - name: Check for NVIDIA driver - ansible.builtin.stat: - path: /dev/nvidia0 - register: nvidia_driver - - - name: Set fact if NVIDIA driver is present - ansible.builtin.set_fact: - has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}" - - - name: Expose GPU drivers - ansible.builtin.shell: | - source /cvmfs/software.eessi.io/versions/2023.06/init/bash - /cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh - when: has_nvidia_driver + - name: Configure EESSI + ansible.builtin.include_tasks: tasks/eessi.yml + when: enable_eessi - name: Configure VGPUs ansible.builtin.include_role: diff --git a/ansible/roles/compute_init/tasks/install.yml b/ansible/roles/compute_init/tasks/install.yml index f7ee87645..b239877b1 100644 --- a/ansible/roles/compute_init/tasks/install.yml +++ b/ansible/roles/compute_init/tasks/install.yml @@ -54,6 +54,8 @@ dest: roles/ - src: ../../nhc dest: roles/ + - src: ../../eessi/tasks/configure.yml + dest: tasks/eessi.yml - name: Add filter_plugins to ansible.cfg ansible.builtin.lineinfile: