Skip to content

Commit 72a0400

Browse files
committed
Only allow host-relative LDConfig paths
This change only allows host-relative LDConfig paths. An allow-ldconfig-from-container feature flag is added to allow for this the default behaviour to be changed. Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent 6385bd5 commit 72a0400

File tree

6 files changed

+230
-66
lines changed

6 files changed

+230
-66
lines changed

internal/config/config.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package config
1818

1919
import (
2020
"bufio"
21+
"errors"
22+
"fmt"
2123
"os"
2224
"path/filepath"
2325
"strings"
@@ -51,6 +53,8 @@ var (
5153
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
5254
)
5355

56+
var errInvalidConfig = errors.New("invalid config value")
57+
5458
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
5559
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
5660
type Config struct {
@@ -127,7 +131,18 @@ func GetDefault() (*Config, error) {
127131
return &d, nil
128132
}
129133

130-
func getLdConfigPath() string {
134+
// assertValid checks for a valid config.
135+
func (c *Config) assertValid() error {
136+
if !c.Features.AllowLDConfigFromContainer.IsEnabled() && !strings.HasPrefix(c.NVIDIAContainerCLIConfig.Ldconfig, "@") {
137+
return fmt.Errorf("%w: nvidia-container-cli.ldconfig value %q is not host-relative (does not start with a '@')", errInvalidConfig, c.NVIDIAContainerCLIConfig.Ldconfig)
138+
}
139+
return nil
140+
}
141+
142+
// getLdConfigPath allows us to override this function for testing.
143+
var getLdConfigPath = getLdConfigPathStub
144+
145+
func getLdConfigPathStub() string {
131146
return NormalizeLDConfigPath("@/sbin/ldconfig")
132147
}
133148

internal/config/config_test.go

Lines changed: 131 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,21 @@ func TestGetConfigWithCustomConfig(t *testing.T) {
4444

4545
func TestGetConfig(t *testing.T) {
4646
testCases := []struct {
47-
description string
48-
contents []string
49-
expectedError error
50-
inspectLdconfig bool
51-
distIdsLike []string
52-
expectedConfig *Config
47+
description string
48+
contents []string
49+
expectedError error
50+
distIdsLike []string
51+
expectedConfig *Config
5352
}{
5453
{
55-
description: "empty config is default",
56-
inspectLdconfig: true,
54+
description: "empty config is default",
5755
expectedConfig: &Config{
5856
AcceptEnvvarUnprivileged: true,
5957
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
6058
NVIDIAContainerCLIConfig: ContainerCLIConfig{
6159
Root: "",
6260
LoadKmods: true,
63-
Ldconfig: "WAS_CHECKED",
61+
Ldconfig: "@/test/ld/config/path",
6462
},
6563
NVIDIAContainerRuntimeConfig: RuntimeConfig{
6664
DebugFilePath: "/dev/null",
@@ -93,7 +91,7 @@ func TestGetConfig(t *testing.T) {
9391
"supported-driver-capabilities = \"compute,utility\"",
9492
"nvidia-container-cli.root = \"/bar/baz\"",
9593
"nvidia-container-cli.load-kmods = false",
96-
"nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"",
94+
"nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"",
9795
"nvidia-container-cli.user = \"foo:bar\"",
9896
"nvidia-container-runtime.debug = \"/foo/bar\"",
9997
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
@@ -113,7 +111,7 @@ func TestGetConfig(t *testing.T) {
113111
NVIDIAContainerCLIConfig: ContainerCLIConfig{
114112
Root: "/bar/baz",
115113
LoadKmods: false,
116-
Ldconfig: "/foo/bar/ldconfig",
114+
Ldconfig: "@/foo/bar/ldconfig",
117115
User: "foo:bar",
118116
},
119117
NVIDIAContainerRuntimeConfig: RuntimeConfig{
@@ -146,6 +144,53 @@ func TestGetConfig(t *testing.T) {
146144
},
147145
},
148146
},
147+
{
148+
description: "feature allows ldconfig to be overridden",
149+
contents: []string{
150+
"[nvidia-container-cli]",
151+
"ldconfig = \"/foo/bar/ldconfig\"",
152+
"[features]",
153+
"allow-ldconfig-from-container = true",
154+
},
155+
expectedConfig: &Config{
156+
AcceptEnvvarUnprivileged: true,
157+
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
158+
NVIDIAContainerCLIConfig: ContainerCLIConfig{
159+
Ldconfig: "/foo/bar/ldconfig",
160+
LoadKmods: true,
161+
},
162+
NVIDIAContainerRuntimeConfig: RuntimeConfig{
163+
DebugFilePath: "/dev/null",
164+
LogLevel: "info",
165+
Runtimes: []string{"docker-runc", "runc", "crun"},
166+
Mode: "auto",
167+
Modes: modesConfig{
168+
CSV: csvModeConfig{
169+
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
170+
},
171+
CDI: cdiModeConfig{
172+
DefaultKind: "nvidia.com/gpu",
173+
AnnotationPrefixes: []string{
174+
"cdi.k8s.io/",
175+
},
176+
SpecDirs: []string{
177+
"/etc/cdi",
178+
"/var/run/cdi",
179+
},
180+
},
181+
},
182+
},
183+
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
184+
Path: "nvidia-container-runtime-hook",
185+
},
186+
NVIDIACTKConfig: CTKConfig{
187+
Path: "nvidia-ctk",
188+
},
189+
Features: features{
190+
AllowLDConfigFromContainer: ptr(feature(true)),
191+
},
192+
},
193+
},
149194
{
150195
description: "config options set in section",
151196
contents: []string{
@@ -154,7 +199,7 @@ func TestGetConfig(t *testing.T) {
154199
"[nvidia-container-cli]",
155200
"root = \"/bar/baz\"",
156201
"load-kmods = false",
157-
"ldconfig = \"/foo/bar/ldconfig\"",
202+
"ldconfig = \"@/foo/bar/ldconfig\"",
158203
"user = \"foo:bar\"",
159204
"[nvidia-container-runtime]",
160205
"debug = \"/foo/bar\"",
@@ -179,7 +224,7 @@ func TestGetConfig(t *testing.T) {
179224
NVIDIAContainerCLIConfig: ContainerCLIConfig{
180225
Root: "/bar/baz",
181226
LoadKmods: false,
182-
Ldconfig: "/foo/bar/ldconfig",
227+
Ldconfig: "@/foo/bar/ldconfig",
183228
User: "foo:bar",
184229
},
185230
NVIDIAContainerRuntimeConfig: RuntimeConfig{
@@ -213,16 +258,15 @@ func TestGetConfig(t *testing.T) {
213258
},
214259
},
215260
{
216-
description: "suse config",
217-
distIdsLike: []string{"suse", "opensuse"},
218-
inspectLdconfig: true,
261+
description: "suse config",
262+
distIdsLike: []string{"suse", "opensuse"},
219263
expectedConfig: &Config{
220264
AcceptEnvvarUnprivileged: true,
221265
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
222266
NVIDIAContainerCLIConfig: ContainerCLIConfig{
223267
Root: "",
224268
LoadKmods: true,
225-
Ldconfig: "WAS_CHECKED",
269+
Ldconfig: "@/test/ld/config/path",
226270
User: "root:video",
227271
},
228272
NVIDIAContainerRuntimeConfig: RuntimeConfig{
@@ -250,9 +294,8 @@ func TestGetConfig(t *testing.T) {
250294
},
251295
},
252296
{
253-
description: "suse config overrides user",
254-
distIdsLike: []string{"suse", "opensuse"},
255-
inspectLdconfig: true,
297+
description: "suse config overrides user",
298+
distIdsLike: []string{"suse", "opensuse"},
256299
contents: []string{
257300
"nvidia-container-cli.user = \"foo:bar\"",
258301
},
@@ -262,7 +305,7 @@ func TestGetConfig(t *testing.T) {
262305
NVIDIAContainerCLIConfig: ContainerCLIConfig{
263306
Root: "",
264307
LoadKmods: true,
265-
Ldconfig: "WAS_CHECKED",
308+
Ldconfig: "@/test/ld/config/path",
266309
User: "foo:bar",
267310
},
268311
NVIDIAContainerRuntimeConfig: RuntimeConfig{
@@ -293,6 +336,7 @@ func TestGetConfig(t *testing.T) {
293336

294337
for _, tc := range testCases {
295338
t.Run(tc.description, func(t *testing.T) {
339+
defer setGetLdConfigPathForTest()()
296340
defer setGetDistIDLikeForTest(tc.distIdsLike)()
297341
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
298342

@@ -305,17 +349,59 @@ func TestGetConfig(t *testing.T) {
305349
cfg, err := tomlCfg.Config()
306350
require.NoError(t, err)
307351

308-
// We first handle the ldconfig path since this is currently system-dependent.
309-
if tc.inspectLdconfig {
310-
ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig
311-
require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig"))
312-
remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig")
313-
require.True(t, remaining == ".real" || remaining == "")
352+
require.EqualValues(t, tc.expectedConfig, cfg)
353+
})
354+
}
355+
}
314356

315-
cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED"
316-
}
357+
func TestAssertValid(t *testing.T) {
358+
defer setGetLdConfigPathForTest()()
317359

318-
require.EqualValues(t, tc.expectedConfig, cfg)
360+
testCases := []struct {
361+
description string
362+
config *Config
363+
expectedError error
364+
}{
365+
{
366+
description: "default is valid",
367+
config: func() *Config {
368+
config, _ := GetDefault()
369+
return config
370+
}(),
371+
},
372+
{
373+
description: "alternative host ldconfig path is valid",
374+
config: &Config{
375+
NVIDIAContainerCLIConfig: ContainerCLIConfig{
376+
Ldconfig: "@/some/host/path",
377+
},
378+
},
379+
},
380+
{
381+
description: "non-host path is invalid",
382+
config: &Config{
383+
NVIDIAContainerCLIConfig: ContainerCLIConfig{
384+
Ldconfig: "/non/host/path",
385+
},
386+
},
387+
expectedError: errInvalidConfig,
388+
},
389+
{
390+
description: "feature flag allows non-host path",
391+
config: &Config{
392+
NVIDIAContainerCLIConfig: ContainerCLIConfig{
393+
Ldconfig: "/non/host/path",
394+
},
395+
Features: features{
396+
AllowLDConfigFromContainer: ptr(feature(true)),
397+
},
398+
},
399+
},
400+
}
401+
402+
for _, tc := range testCases {
403+
t.Run(tc.description, func(t *testing.T) {
404+
require.ErrorIs(t, tc.config.assertValid(), tc.expectedError)
319405
})
320406
}
321407
}
@@ -335,3 +421,18 @@ func setGetDistIDLikeForTest(ids []string) func() {
335421
getDistIDLike = original
336422
}
337423
}
424+
425+
// prt returns a reference to whatever type is passed into it
426+
func ptr[T any](x T) *T {
427+
return &x
428+
}
429+
430+
func setGetLdConfigPathForTest() func() {
431+
previous := getLdConfigPath
432+
getLdConfigPath = func() string {
433+
return "@/test/ld/config/path"
434+
}
435+
return func() {
436+
getLdConfigPath = previous
437+
}
438+
}

internal/config/features.go

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ package config
1919
type featureName string
2020

2121
const (
22-
FeatureGDS = featureName("gds")
23-
FeatureMOFED = featureName("mofed")
24-
FeatureNVSWITCH = featureName("nvswitch")
25-
FeatureGDRCopy = featureName("gdrcopy")
22+
FeatureGDS = featureName("gds")
23+
FeatureMOFED = featureName("mofed")
24+
FeatureNVSWITCH = featureName("nvswitch")
25+
FeatureGDRCopy = featureName("gdrcopy")
26+
FeatureAllowLDConfigFromContainer = featureName("allow-ldconfig-from-container")
2627
)
2728

2829
// features specifies a set of named features.
@@ -31,53 +32,58 @@ type features struct {
3132
MOFED *feature `toml:"mofed,omitempty"`
3233
NVSWITCH *feature `toml:"nvswitch,omitempty"`
3334
GDRCopy *feature `toml:"gdrcopy,omitempty"`
35+
// AllowLDConfigFromContainer allows non-host ldconfig paths to be used.
36+
// If this feature flag is not set to 'true' only host-rooted config paths
37+
// (i.e. paths starting with an '@' are considered valid)
38+
AllowLDConfigFromContainer *feature `toml:"allow-ldconfig-from-container,omitempty"`
3439
}
3540

3641
type feature bool
3742

38-
// IsEnabled checks whether a specified named feature is enabled.
43+
// IsEnabledInEnvironment checks whether a specified named feature is enabled.
3944
// An optional list of environments to check for feature-specific environment
4045
// variables can also be supplied.
41-
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
42-
featureEnvvars := map[featureName]string{
43-
FeatureGDS: "NVIDIA_GDS",
44-
FeatureMOFED: "NVIDIA_MOFED",
45-
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
46-
FeatureGDRCopy: "NVIDIA_GDRCOPY",
47-
}
48-
49-
envvar := featureEnvvars[n]
46+
func (fs features) IsEnabledInEnvironment(n featureName, in ...getenver) bool {
5047
switch n {
48+
// Features with envvar overrides
5149
case FeatureGDS:
52-
return fs.GDS.isEnabled(envvar, in...)
50+
return fs.GDS.isEnabledWithEnvvarOverride("NVIDIA_GDS", in...)
5351
case FeatureMOFED:
54-
return fs.MOFED.isEnabled(envvar, in...)
52+
return fs.MOFED.isEnabledWithEnvvarOverride("NVIDIA_MOFED", in...)
5553
case FeatureNVSWITCH:
56-
return fs.NVSWITCH.isEnabled(envvar, in...)
54+
return fs.NVSWITCH.isEnabledWithEnvvarOverride("NVIDIA_NVSWITCH", in...)
5755
case FeatureGDRCopy:
58-
return fs.GDRCopy.isEnabled(envvar, in...)
56+
return fs.GDRCopy.isEnabledWithEnvvarOverride("NVIDIA_GDRCOPY", in...)
57+
// Features without envvar overrides
58+
case FeatureAllowLDConfigFromContainer:
59+
return fs.AllowLDConfigFromContainer.IsEnabled()
5960
default:
6061
return false
6162
}
6263
}
6364

64-
// isEnabled checks whether a feature is enabled.
65-
// If the enabled value is explicitly set, this is returned, otherwise the
66-
// associated envvar is checked in the specified getenver for the string "enabled"
67-
// A CUDA container / image can be passed here.
68-
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
65+
// IsEnabled checks whether a feature is enabled.
66+
func (f *feature) IsEnabled() bool {
6967
if f != nil {
7068
return bool(*f)
7169
}
72-
if envvar == "" {
73-
return false
74-
}
75-
for _, in := range ins {
76-
if in.Getenv(envvar) == "enabled" {
77-
return true
70+
return false
71+
}
72+
73+
// isEnabledWithEnvvarOverride checks whether a feature is enabled and allows an envvar to overide the feature.
74+
// If the enabled value is explicitly set, this is returned, otherwise the
75+
// associated envvar is checked in the specified getenver for the string "enabled"
76+
// A CUDA container / image can be passed here.
77+
func (f *feature) isEnabledWithEnvvarOverride(envvar string, ins ...getenver) bool {
78+
if envvar != "" {
79+
for _, in := range ins {
80+
if in.Getenv(envvar) == "enabled" {
81+
return true
82+
}
7883
}
7984
}
80-
return false
85+
86+
return f.IsEnabled()
8187
}
8288

8389
type getenver interface {

0 commit comments

Comments
 (0)