Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions tests/e2e/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,20 @@ docker run --rm -v {{.CacheDir}}:/cache --entrypoint="sh" {{.ToolkitImage}}-pack
var installFromImageTemplate = `
set -xe

cd {{.CacheDir}}/packages/ubuntu18.04/amd64

{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_amd64.deb \
libnvidia-container-tools_*_amd64.deb \
nvidia-container-toolkit-base_*_amd64.deb \
nvidia-container-toolkit_*_amd64.deb
arch="$(uname -m)"
case "${arch##*-}" in
x86_64 | amd64) ARCH='amd64' ;;
ppc64el | ppc64le) ARCH='ppc64le' ;;
aarch64 | arm64) ARCH='arm64' ;;
*) echo "unsupported architecture" ; exit 1 ;;
esac

cd {{.CacheDir}}/packages/ubuntu18.04/${ARCH}

{{if .WithSudo }}sudo {{end}}dpkg -i libnvidia-container1_*_${ARCH}.deb \
libnvidia-container-tools_*_${ARCH}.deb \
nvidia-container-toolkit-base_*_${ARCH}.deb \
nvidia-container-toolkit_*_${ARCH}.deb

cd -

Expand Down
4 changes: 1 addition & 3 deletions tests/e2e/nvidia-cdi-refresh_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,7 @@ var _ = Describe("nvidia-cdi-refresh", Ordered, ContinueOnFailure, Label("system

BeforeAll(func(ctx context.Context) {
var err error
// TODO: We set installCTK to true here to SKIP the mounting of the files from the host.
// The test here does NOT require the host toolkit.
systemdRunner, err = NewNestedContainerRunner(runner, outerContainerImage, true, containerName, localCacheDir)
systemdRunner, err = NewNestedContainerRunner(runner, outerContainerImage, false, containerName, localCacheDir, true)
Expect(err).ToNot(HaveOccurred())
for range 10 {
state, _, err := systemdRunner.Run(getSystemStateScript)
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/nvidia-container-cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn

BeforeAll(func(ctx context.Context) {
var err error
nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", installCTK, containerName, localCacheDir)
nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", !installCTK, containerName, localCacheDir, true)
Expect(err).ToNot(HaveOccurred())

if installCTK {
Expand Down
35 changes: 22 additions & 13 deletions tests/e2e/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@ import (

const (
installPrerequisitesScript = `
export DEBIAN_FRONTEND=noninteractive
apt-get update && apt-get install -y curl gnupg2
`
set -e
export DEBIAN_FRONTEND=noninteractive
# Install prerequisites
apt-get update
apt-get install -y curl gnupg2
`
)

type localRunner struct{}
Expand Down Expand Up @@ -96,7 +99,7 @@ func NewRunner(opts ...runnerOption) Runner {
// NewNestedContainerRunner creates a new nested container runner.
// A nested container runs a container inside another container based on a
// given runner (remote or local).
func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, containerName string, cacheDir string) (Runner, error) {
func NewNestedContainerRunner(runner Runner, baseImage string, mountToolkitFromHost bool, containerName string, cacheDir string, requiresGPUs bool) (Runner, error) {
// If a container with the same name exists from a previous test run, remove it first.
// Ignore errors as container might not exist
_, _, err := runner.Run(fmt.Sprintf("docker rm -f %s 2>/dev/null || true", containerName))
Expand All @@ -106,13 +109,24 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,

var additionalContainerArguments []string

if requiresGPUs {
// If the container requires access to GPUs we explicitly add the nvidia
// runtime and set `NVIDIA_VISIBLE_DEVICES` to trigger jit-cdi spec
// generation.
additionalContainerArguments = append(additionalContainerArguments,
"--runtime=nvidia",
"-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all",
)
}

if cacheDir != "" {
additionalContainerArguments = append(additionalContainerArguments,
"-v "+cacheDir+":"+cacheDir+":ro",
)
}

if !installCTK {
if mountToolkitFromHost {
// TODO: This is actually ONLY needed for the CLI tests.
// If installCTK is false, we use the preinstalled toolkit.
// This means we need to add toolkit libraries and binaries from the "host"

Expand Down Expand Up @@ -179,6 +193,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,
if err != nil {
return nil, err
}

_, _, err = runner.Run(script)
if err != nil {
return nil, fmt.Errorf("failed to run start container script: %w", err)
Expand All @@ -191,7 +206,7 @@ func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool,

_, _, err = inContainer.Run(installPrerequisitesScript)
if err != nil {
return nil, fmt.Errorf("failed to install docker: %w", err)
return nil, fmt.Errorf("failed to install prerequisites: %w", err)
}

return inContainer, nil
Expand Down Expand Up @@ -296,20 +311,14 @@ func connectOrDie(sshKey, sshUser, host, port string) (*ssh.Client, error) {

// outerContainerTemplate represents a template to start a container with
// a name specified.
// The container is given access to all NVIDIA gpus by explicitly using the
// nvidia runtime and the `runtime.nvidia.com/gpu=all` device to trigger JIT
// CDI spec generation.
// The template also allows for additional arguments to be specified.
type outerContainer struct {
Name string
BaseImage string
AdditionalArguments []string
}

func (o *outerContainer) Render() (string, error) {
tmpl, err := template.New("startContainer").Parse(`docker run -d --name {{.Name}} --privileged --runtime=nvidia \
-e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \
-e NVIDIA_DRIVER_CAPABILITIES=all \
tmpl, err := template.New("startContainer").Parse(`docker run -d --name {{.Name}} --privileged \
{{ range $i, $a := .AdditionalArguments -}}
{{ $a }} \
{{ end -}}
Expand Down