Skip to content

Commit 06503bd

Browse files
authored
Make eessi configure gpu node automatically (#841)
* Adding tasks to essi/configure.yml to make eessi configure gpu node automatically * Adding tasks to essi/configure.yml to make eessi configure gpu node automatically * applying edits to task names * replacing EESSI block with running the configure.yml task directly * Bump CI imageX * removed v from trivyscan.yml * fix eessi compute-init tasks to include role defaults * removed eessi tasks from compute-init/tasks/export.yml * bump CI image
1 parent c7054bd commit 06503bd

File tree

6 files changed

+28
-29
lines changed

6 files changed

+28
-29
lines changed

.github/workflows/trivyscan.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}'
103103

104104
- name: Run Trivy vulnerability scanner
105-
uses: aquasecurity/trivy-action@v0.33.1
105+
uses: aquasecurity/trivy-action@0.33.1
106106
with:
107107
scan-type: fs
108108
scan-ref: "${{ steps.manifest.outputs.image-name }}"
@@ -122,7 +122,7 @@ jobs:
122122
category: "${{ matrix.build }}"
123123

124124
- name: Fail if scan has CRITICAL vulnerabilities
125-
uses: aquasecurity/trivy-action@v0.33.1
125+
uses: aquasecurity/trivy-action@0.33.1
126126
with:
127127
scan-type: fs
128128
scan-ref: "${{ steps.manifest.outputs.image-name }}"

ansible/roles/compute_init/files/compute-init.yml

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -277,22 +277,13 @@
277277
name: basic_users
278278
when: enable_basic_users
279279

280-
- name: EESSI
281-
when: enable_eessi
282280
# NB: don't need conditional block on enable_compute as have already exited
283281
# if not the case
284-
block:
285-
- name: Copy cvmfs config
286-
ansible.builtin.copy:
287-
src: /var/tmp/cluster/cvmfs/default.local
288-
dest: /etc/cvmfs/default.local
289-
owner: root
290-
group: root
291-
mode: "0644"
292-
293-
- name: Ensure CVMFS config is setup # noqa: no-changed-when
294-
ansible.builtin.command:
295-
cmd: "cvmfs_config setup"
282+
- name: Configure EESSI
283+
ansible.builtin.include_role:
284+
name: eessi
285+
tasks_from: configure.yml
286+
when: enable_eessi
296287

297288
- name: Configure VGPUs
298289
ansible.builtin.include_role:

ansible/roles/compute_init/tasks/export.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,6 @@
6262
run_once: true
6363
delegate_to: "{{ groups['control'] | first }}"
6464

65-
- name: Copy EESSI CVMFS config to /exports/cluster
66-
ansible.builtin.copy:
67-
src: /etc/cvmfs/default.local
68-
dest: /exports/cluster/cvmfs/default.local
69-
owner: slurm
70-
group: root
71-
mode: "0644"
72-
remote_src: true
73-
run_once: true
74-
delegate_to: "{{ groups['control'] | first }}"
75-
7665
- name: Export cacerts
7766
ansible.builtin.include_role:
7867
name: cacerts

ansible/roles/compute_init/tasks/install.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
dest: roles/
5555
- src: ../../nhc
5656
dest: roles/
57+
- src: ../../eessi
58+
dest: roles/
5759

5860
- name: Add filter_plugins to ansible.cfg
5961
ansible.builtin.lineinfile:

ansible/roles/eessi/tasks/configure.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,20 @@
1515
- name: Ensure CVMFS config is setup # noqa: no-changed-when
1616
ansible.builtin.command:
1717
cmd: "cvmfs_config setup"
18+
19+
# configure gpus
20+
- name: Check for NVIDIA GPU
21+
ansible.builtin.stat:
22+
path: /dev/nvidia0
23+
register: nvidia_driver
24+
25+
- name: Set fact if NVIDIA GPU is present
26+
ansible.builtin.set_fact:
27+
has_nvidia_driver: "{{ nvidia_driver.stat.exists | default(false) }}"
28+
29+
- name: Expose GPU drivers
30+
ansible.builtin.shell: |
31+
source /cvmfs/software.eessi.io/versions/2023.06/init/bash
32+
/cvmfs/software.eessi.io/versions/2023.06/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
33+
when: has_nvidia_driver
34+
changed_when: true
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"cluster_image": {
3-
"RL8": "openhpc-RL8-251027-1123-d389c00b",
4-
"RL9": "openhpc-RL9-251027-1123-d389c00b"
3+
"RL8": "openhpc-RL8-251119-1202-332ac921",
4+
"RL9": "openhpc-RL9-251119-1202-332ac921"
55
}
66
}

0 commit comments

Comments
 (0)