Skip to content

Commit 2f2a6fb

Browse files
authored
Merge pull request #1348 from elezar/systemd_e2e_rework
Add tests for CDI refresh systemd unit
2 parents c905502 + 30ad4de commit 2f2a6fb

File tree

7 files changed

+522
-225
lines changed

7 files changed

+522
-225
lines changed

tests/e2e/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ GINKGO_BIN := $(CURDIR)/bin/ginkgo
2424
# current available tests:
2525
# - nvidia-container-cli
2626
# - docker
27+
# - nvidia-cdi-refresh
2728
GINKGO_FOCUS ?=
2829

2930
test: $(GINKGO_BIN)

tests/e2e/e2e_test.go

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"errors"
2323
"os"
2424
"strconv"
25+
"strings"
2526
"testing"
2627

2728
. "github.com/onsi/ginkgo/v2"
@@ -30,17 +31,21 @@ import (
3031

3132
// Test context
3233
var (
34+
runner Runner
35+
3336
ctx context.Context
3437

3538
installCTK bool
3639

37-
imageName string
38-
imageTag string
40+
nvidiaContainerToolkitImage string
3941

4042
sshKey string
4143
sshUser string
4244
sshHost string
4345
sshPort string
46+
47+
localCacheDir string
48+
toolkitInstaller *ToolkitInstaller
4449
)
4550

4651
func TestMain(t *testing.T) {
@@ -49,31 +54,66 @@ func TestMain(t *testing.T) {
4954
RegisterFailHandler(Fail)
5055

5156
ctx = context.Background()
52-
getTestEnv()
5357

5458
RunSpecs(t,
5559
suiteName,
5660
)
5761
}
5862

63+
var _ = BeforeSuite(func() {
64+
getTestEnv()
65+
66+
runner = NewRunner(
67+
WithHost(sshHost),
68+
WithPort(sshPort),
69+
WithSshKey(sshKey),
70+
WithSshUser(sshUser),
71+
)
72+
73+
// Create a tempdir on the runner.
74+
tmpdir, _, err := runner.Run("mktemp -d --tmpdir=/tmp nvctk-e2e-test-cacheXXX")
75+
Expect(err).ToNot(HaveOccurred())
76+
Expect(strings.TrimSpace(tmpdir)).ToNot(BeEmpty())
77+
78+
localCacheDir = strings.TrimSpace(tmpdir)
79+
80+
toolkitInstaller, err = NewToolkitInstaller(
81+
WithToolkitImage(nvidiaContainerToolkitImage),
82+
WithCacheDir(localCacheDir),
83+
)
84+
Expect(err).ToNot(HaveOccurred())
85+
86+
_, _, err = toolkitInstaller.PrepareCache(runner)
87+
Expect(err).ToNot(HaveOccurred())
88+
89+
if installCTK {
90+
_, _, err := toolkitInstaller.Install(runner)
91+
Expect(err).ToNot(HaveOccurred())
92+
93+
_, _, err = runner.Run(`sudo nvidia-ctk runtime configure --runtime=docker`)
94+
Expect(err).ToNot(HaveOccurred())
95+
96+
_, _, err = runner.Run(`sudo systemctl restart docker`)
97+
Expect(err).ToNot(HaveOccurred())
98+
}
99+
})
100+
59101
// getTestEnv gets the test environment variables
60102
func getTestEnv() {
61103
defer GinkgoRecover()
62104

63105
installCTK = getEnvVarOrDefault("E2E_INSTALL_CTK", false)
64106

65-
if installCTK {
66-
imageName = getRequiredEnvvar[string]("E2E_IMAGE_NAME")
67-
imageTag = getRequiredEnvvar[string]("E2E_IMAGE_TAG")
68-
}
107+
imageName := getRequiredEnvvar[string]("E2E_IMAGE_NAME")
108+
imageTag := getRequiredEnvvar[string]("E2E_IMAGE_TAG")
109+
nvidiaContainerToolkitImage = imageName + ":" + imageTag
69110

70111
sshHost = getEnvVarOrDefault("E2E_SSH_HOST", "")
71112
if sshHost != "" {
72113
sshKey = getRequiredEnvvar[string]("E2E_SSH_KEY")
73114
sshUser = getRequiredEnvvar[string]("E2E_SSH_USER")
74115
sshPort = getEnvVarOrDefault("E2E_SSH_PORT", "22")
75116
}
76-
77117
}
78118

79119
// getRequiredEnvvar returns the specified envvar if set or raises an error.

tests/e2e/installer.go

Lines changed: 69 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -19,107 +19,114 @@ package e2e
1919
import (
2020
"bytes"
2121
"fmt"
22+
"strings"
2223
"text/template"
2324
)
2425

25-
// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
26-
// on a host using Docker.
27-
var dockerInstallTemplate = `
28-
#! /usr/bin/env bash
26+
var prepareInstallerCacheTemplate = `
2927
set -xe
3028
31-
# if the TEMP_DIR is already set, use it
32-
if [ -f /tmp/ctk_e2e_temp_dir.txt ]; then
33-
TEMP_DIR=$(cat /tmp/ctk_e2e_temp_dir.txt)
34-
else
35-
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
36-
echo "$TEMP_DIR" > /tmp/ctk_e2e_temp_dir.txt
37-
fi
38-
39-
# if TEMP_DIR does not exist, create it
40-
if [ ! -d "$TEMP_DIR" ]; then
41-
mkdir -p "$TEMP_DIR"
42-
fi
43-
44-
# Given that docker has an init function that checks for the existence of the
45-
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
46-
# in the /usr/bin directory.
47-
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
48-
sudo rm -f /usr/bin/nvidia-container-runtime-hook
49-
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook
50-
51-
docker run --pid=host --rm -i --privileged \
52-
-v /:/host \
53-
-v /var/run/docker.sock:/var/run/docker.sock \
54-
-v "$TEMP_DIR:$TEMP_DIR" \
55-
-v /etc/docker:/config-root \
56-
{{.Image}} \
57-
--root "$TEMP_DIR" \
58-
--runtime=docker \
59-
--config=/config-root/daemon.json \
60-
--driver-root=/ \
61-
--no-daemon \
62-
--restart-mode=systemd
29+
mkdir -p {{.CacheDir}}
30+
31+
docker run --rm -v {{.CacheDir}}:/cache --entrypoint="sh" {{.ToolkitImage}}-packaging -c "cp -p -R /artifacts/* /cache/"
6332
`
6433

65-
type ToolkitInstaller struct {
66-
runner Runner
67-
template string
34+
var installFromImageTemplate = `
35+
set -xe
6836
69-
Image string
70-
}
37+
cd {{.CacheDir}}/packages/ubuntu18.04/amd64
7138
72-
type installerOption func(*ToolkitInstaller)
39+
{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_amd64.deb \
40+
libnvidia-container-tools_*_amd64.deb \
41+
nvidia-container-toolkit-base_*_amd64.deb \
42+
nvidia-container-toolkit_*_amd64.deb
7343
74-
func WithRunner(r Runner) installerOption {
75-
return func(i *ToolkitInstaller) {
76-
i.runner = r
77-
}
44+
cd -
45+
46+
nvidia-container-cli --version
47+
`
48+
49+
type ToolkitInstaller struct {
50+
ToolkitImage string
51+
CacheDir string
7852
}
7953

80-
func WithImage(image string) installerOption {
54+
type installerOption func(*ToolkitInstaller)
55+
56+
func WithToolkitImage(image string) installerOption {
8157
return func(i *ToolkitInstaller) {
82-
i.Image = image
58+
i.ToolkitImage = image
8359
}
8460
}
8561

86-
func WithTemplate(template string) installerOption {
62+
func WithCacheDir(cacheDir string) installerOption {
8763
return func(i *ToolkitInstaller) {
88-
i.template = template
64+
i.CacheDir = cacheDir
8965
}
9066
}
9167

9268
func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
93-
i := &ToolkitInstaller{
94-
runner: localRunner{},
95-
template: dockerInstallTemplate,
96-
}
69+
i := &ToolkitInstaller{}
9770

9871
for _, opt := range opts {
9972
opt(i)
10073
}
10174

102-
if i.Image == "" {
75+
if i.ToolkitImage == "" {
10376
return nil, fmt.Errorf("image is required")
10477
}
10578

10679
return i, nil
10780
}
10881

109-
func (i *ToolkitInstaller) Install() error {
82+
// PrepareCache ensures that the installer (package) cache is created on the runner.
83+
// The can be used to ensure that docker is not REQUIRED in an inner container.
84+
func (i *ToolkitInstaller) PrepareCache(runner Runner) (string, string, error) {
85+
renderedScript, err := i.renderScript(prepareInstallerCacheTemplate, false)
86+
if err != nil {
87+
return "", "", err
88+
}
89+
90+
return runner.Run(renderedScript)
91+
}
92+
93+
func (i *ToolkitInstaller) Install(runner Runner) (string, string, error) {
94+
uid, _, err := runner.Run("id -u")
95+
if err != nil {
96+
return "", "", err
97+
}
98+
withSudo := false
99+
if strings.TrimSpace(uid) != "0" {
100+
withSudo = true
101+
}
102+
renderedScript, err := i.renderScript(installFromImageTemplate, withSudo)
103+
if err != nil {
104+
return "", "", err
105+
}
106+
107+
return runner.Run(renderedScript)
108+
}
109+
110+
func (i *ToolkitInstaller) renderScript(scriptTemplate string, withSudo bool) (string, error) {
110111
// Parse the combined template
111-
tmpl, err := template.New("installScript").Parse(i.template)
112+
tmpl, err := template.New("template").Parse(scriptTemplate)
112113
if err != nil {
113-
return fmt.Errorf("error parsing template: %w", err)
114+
return "", fmt.Errorf("error parsing template: %w", err)
114115
}
115116

117+
templateInfo := struct {
118+
*ToolkitInstaller
119+
WithSudo bool
120+
}{
121+
ToolkitInstaller: i,
122+
WithSudo: withSudo,
123+
}
116124
// Execute the template
117125
var renderedScript bytes.Buffer
118-
err = tmpl.Execute(&renderedScript, i)
126+
err = tmpl.Execute(&renderedScript, templateInfo)
119127
if err != nil {
120-
return fmt.Errorf("error executing template: %w", err)
128+
return "", fmt.Errorf("error executing template: %w", err)
121129
}
122130

123-
_, _, err = i.runner.Run(renderedScript.String())
124-
return err
131+
return renderedScript.String(), nil
125132
}

0 commit comments

Comments
 (0)