Skip to content

Commit 4d5bc4f

Browse files
authored
Merge pull request #1432 from elezar/enable-arm64-testing
Enable arm64 testing for contianer toolkit
2 parents e36e976 + 088234b commit 4d5bc4f

File tree

4 files changed

+38
-23
lines changed

4 files changed

+38
-23
lines changed

tests/e2e/installer.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,20 @@ docker run --rm -v {{.CacheDir}}:/cache --entrypoint="sh" {{.ToolkitImage}}-pack
3434
var installFromImageTemplate = `
3535
set -xe
3636
37-
cd {{.CacheDir}}/packages/ubuntu18.04/amd64
38-
39-
{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_amd64.deb \
40-
libnvidia-container-tools_*_amd64.deb \
41-
nvidia-container-toolkit-base_*_amd64.deb \
42-
nvidia-container-toolkit_*_amd64.deb
37+
arch="$(uname -m)"
38+
case "${arch##*-}" in
39+
x86_64 | amd64) ARCH='amd64' ;;
40+
ppc64el | ppc64le) ARCH='ppc64le' ;;
41+
aarch64 | arm64) ARCH='arm64' ;;
42+
*) echo "unsupported architecture" ; exit 1 ;;
43+
esac
44+
45+
cd {{.CacheDir}}/packages/ubuntu18.04/${ARCH}
46+
47+
{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_${ARCH}.deb \
48+
libnvidia-container-tools_*_${ARCH}.deb \
49+
nvidia-container-toolkit-base_*_${ARCH}.deb \
50+
nvidia-container-toolkit_*_${ARCH}.deb
4351
4452
cd -
4553

tests/e2e/nvidia-cdi-refresh_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,7 @@ var _ = Describe("nvidia-cdi-refresh", Ordered, ContinueOnFailure, Label("system
132132

133133
BeforeAll(func(ctx context.Context) {
134134
var err error
135-
// TODO: We set installCTK to true here to SKIP the mounting of the files from the host.
136-
// The test here does NOT require the host toolkit.
137-
systemdRunner, err = NewNestedContainerRunner(runner, outerContainerImage, true, containerName, localCacheDir)
135+
systemdRunner, err = NewNestedContainerRunner(runner, outerContainerImage, false, containerName, localCacheDir, true)
138136
Expect(err).ToNot(HaveOccurred())
139137
for range 10 {
140138
state, _, err := systemdRunner.Run(getSystemStateScript)

tests/e2e/nvidia-container-cli_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn
7878

7979
BeforeAll(func(ctx context.Context) {
8080
var err error
81-
nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", installCTK, containerName, localCacheDir)
81+
nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", !installCTK, containerName, localCacheDir, true)
8282
Expect(err).ToNot(HaveOccurred())
8383

8484
if installCTK {

tests/e2e/runner.go

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@ import (
3030

3131
const (
3232
installPrerequisitesScript = `
33-
export DEBIAN_FRONTEND=noninteractive
34-
apt-get update && apt-get install -y curl gnupg2
35-
`
33+
set -e
34+
export DEBIAN_FRONTEND=noninteractive
35+
# Install prerequisites
36+
apt-get update
37+
apt-get install -y curl gnupg2
38+
`
3639
)
3740

3841
type localRunner struct{}
@@ -96,7 +99,7 @@ func NewRunner(opts ...runnerOption) Runner {
9699
// NewNestedContainerRunner creates a new nested container runner.
97100
// A nested container runs a container inside another container based on a
98101
// given runner (remote or local).
99-
func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, containerName string, cacheDir string) (Runner, error) {
102+
func NewNestedContainerRunner(runner Runner, baseImage string, mountToolkitFromHost bool, containerName string, cacheDir string, requiresGPUs bool) (Runner, error) {
100103
// If a container with the same name exists from a previous test run, remove it first.
101104
// Ignore errors as container might not exist
102105
_, _, err := runner.Run(fmt.Sprintf("docker rm -f %s 2>/dev/null || true", containerName))
@@ -106,13 +109,24 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
106109

107110
var additionalContainerArguments []string
108111

112+
if requiresGPUs {
113+
// If the container requires access to GPUs we explicitly add the nvidia
114+
// runtime and set `NVIDIA_VISIBLE_DEVICES` to trigger jit-cdi spec
115+
// generation.
116+
additionalContainerArguments = append(additionalContainerArguments,
117+
"--runtime=nvidia",
118+
"-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all",
119+
)
120+
}
121+
109122
if cacheDir != "" {
110123
additionalContainerArguments = append(additionalContainerArguments,
111124
"-v "+cacheDir+":"+cacheDir+":ro",
112125
)
113126
}
114127

115-
if !installCTK {
128+
if mountToolkitFromHost {
129+
// TODO: This is actually ONLY needed for the CLI tests.
116130
// If installCTK is false, we use the preinstalled toolkit.
117131
// This means we need to add toolkit libraries and binaries from the "host"
118132

@@ -179,6 +193,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
179193
if err != nil {
180194
return nil, err
181195
}
196+
182197
_, _, err = runner.Run(script)
183198
if err != nil {
184199
return nil, fmt.Errorf("failed to run start container script: %w", err)
@@ -191,7 +206,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
191206

192207
_, _, err = inContainer.Run(installPrerequisitesScript)
193208
if err != nil {
194-
return nil, fmt.Errorf("failed to install docker: %w", err)
209+
return nil, fmt.Errorf("failed to install prerequisites: %w", err)
195210
}
196211

197212
return inContainer, nil
@@ -296,20 +311,14 @@ func connectOrDie(sshKey, sshUser, host, port string) (*ssh.Client, error) {
296311

297312
// outerContainerTemplate represents a template to start a container with
298313
// a name specified.
299-
// The container is given access to all NVIDIA gpus by explicitly using the
300-
// nvidia runtime and the `runtime.nvidia.com/gpu=all` device to trigger JIT
301-
// CDI spec generation.
302-
// The template also allows for additional arguments to be specified.
303314
type outerContainer struct {
304315
Name string
305316
BaseImage string
306317
AdditionalArguments []string
307318
}
308319

309320
func (o *outerContainer) Render() (string, error) {
310-
tmpl, err := template.New("startContainer").Parse(`docker run -d --name {{.Name}} --privileged --runtime=nvidia \
311-
-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \
312-
-e NVIDIA_DRIVER_CAPABILITIES=all \
321+
tmpl, err := template.New("startContainer").Parse(`docker run -d --name {{.Name}} --privileged \
313322
{{ range $i, $a := .AdditionalArguments -}}
314323
{{ $a }} \
315324
{{ end -}}

0 commit comments

Comments
 (0)