Skip to content

Commit c09799f

Browse files
authored
Merge pull request #1010 from elezar/add-imex-init-container
Add init container to handle imex nodes config mount
2 parents 6decc15 + 7fc6642 commit c09799f

File tree

6 files changed

+54
-29
lines changed

6 files changed

+54
-29
lines changed

api/config/v1/config.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) {
6161
if c.IsSet("imex-required") {
6262
config.Imex.Required = c.Bool("imex-required")
6363
}
64-
updateFromCLIFlag(&config.Imex.NodesConfigFile, c, "imex-nodes-config-file")
6564

6665
// If nvidiaDevRoot (the path to the device nodes on the host) is not set,
6766
// we default to using the driver root on the host.

api/config/v1/imex.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@ type Imex struct {
3939
// If it is not required its injection is skipped if the device nodes do not exist or if its
4040
// existence cannot be queried.
4141
Required bool `json:"required,omitempty" yaml:"required,omitempty"`
42-
// NodesConfigFile defines the location to the IMEX nodes config file.
43-
// Such a nodes config file contains the IP addresses of nodes that are part of the IMEX domain.
44-
// Note that this is the absolute path to the file in the device plugin container.
45-
NodesConfigFile *string `json:"nodesConfigFile,omitempty" yaml:"nodesConfigFile,omitempty"`
4642
}
4743

4844
// AssertChannelIDsIsValid checks whether the specified list of channel IDs is valid.

cmd/gpu-feature-discovery/main.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,6 @@ func main() {
8686
Value: "/etc/kubernetes/node-feature-discovery/features.d/gfd",
8787
EnvVars: []string{"GFD_OUTPUT_FILE"},
8888
},
89-
&cli.StringFlag{
90-
Name: "imex-nodes-config-file",
91-
Usage: "Path to the IMEX nodes config file. This file contains a list of IP addresses of the nodes in the IMEX domain.",
92-
Value: "/etc/nvidia-imex/nodes_config.cfg",
93-
EnvVars: []string{"GFD_IMEX_NODES_CONFIG_FILE"},
94-
},
9589
&cli.StringFlag{
9690
Name: "machine-type-file",
9791
Value: "/sys/class/dmi/id/product_name",

deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,35 @@ spec:
5757
{{- end }}
5858
{{- if $options.hasConfigMap }}
5959
shareProcessNamespace: true
60+
{{- end }}
6061
initContainers:
62+
- image: {{ include "nvidia-device-plugin.fullimage" . }}
63+
name: gpu-feature-discovery-imex-init
64+
command: ["/bin/bash", "-c"]
65+
args:
66+
- |
67+
IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg
68+
if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then
69+
echo "Removing cached IMEX nodes config"
70+
rm -f /config/${IMEX_NODES_CONFIG_FILE}
71+
fi
72+
73+
if [[ ! -f /driver-root/${IMEX_NODES_CONFIG_FILE} ]]; then
74+
echo "No IMEX nodes config path detected; Skipping"
75+
exit 0
76+
fi
77+
78+
echo "Copying IMEX nodes config"
79+
mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE})
80+
cp /driver-root/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE}
81+
volumeMounts:
82+
- name: config
83+
mountPath: /config
84+
- name: driver-root
85+
mountPath: /driver-root/etc
86+
subPath: etc
87+
readOnly: true
88+
{{- if $options.hasConfigMap }}
6189
- image: {{ include "nvidia-device-plugin.fullimage" . }}
6290
name: gpu-feature-discovery-init
6391
command: ["config-manager"]
@@ -182,14 +210,12 @@ spec:
182210
mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
183211
- name: host-sys
184212
mountPath: "/sys"
185-
- name: nvidia-imex-dir
186-
mountPath: "/etc/nvidia-imex"
187213
{{- if $options.hasConfigMap }}
188214
- name: available-configs
189215
mountPath: /available-configs
216+
{{- end }}
190217
- name: config
191218
mountPath: /config
192-
{{- end }}
193219
{{- with .Values.resources }}
194220
resources:
195221
{{- toYaml . | nindent 10 }}
@@ -201,17 +227,17 @@ spec:
201227
- name: host-sys
202228
hostPath:
203229
path: "/sys"
204-
- name: nvidia-imex-dir
205-
type: DirectoryOrCreate
230+
- name: driver-root
206231
hostPath:
207-
path: {{ clean ( join "/" ( list "/" .Values.nvidiaDriverRoot "/etc/nvidia-imex" ) ) | quote }}
232+
path: {{ clean ( join "/" ( list "/" .Values.nvidiaDriverRoot ) ) | quote }}
233+
type: Directory
208234
{{- if $options.hasConfigMap }}
209235
- name: available-configs
210236
configMap:
211237
name: {{ $configMapName }}
238+
{{- end }}
212239
- name: config
213240
emptyDir: {}
214-
{{- end }}
215241
{{- with .Values.nodeSelector }}
216242
nodeSelector:
217243
{{- toYaml . | nindent 8 }}

internal/lm/fabric.go renamed to internal/lm/imex.go

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,16 @@ import (
3434
"github.com/NVIDIA/k8s-device-plugin/internal/resource"
3535
)
3636

37-
func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) {
38-
if config.Imex.NodesConfigFile == nil || *config.Imex.NodesConfigFile == "" {
39-
// No imex config file, return empty labels
40-
return empty{}, nil
41-
}
42-
43-
nodesConfigFiles := []string{*config.Imex.NodesConfigFile}
44-
if root := config.Flags.Plugin.ContainerDriverRoot; root != nil && *root != "" {
45-
nodesConfigFiles = append(nodesConfigFiles, filepath.Join(*root, *config.Imex.NodesConfigFile))
46-
}
37+
const (
38+
// ImexNodesConfigFilePath is the path to the IMEX nodes config file.
39+
// This file contains a list of IP addresses of the nodes in the IMEX domain.
40+
ImexNodesConfigFilePath = "/etc/nvidia-imex/nodes_config.cfg"
41+
)
4742

43+
func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) {
4844
var errs error
49-
for _, configFilePath := range nodesConfigFiles {
45+
for _, root := range imexNodesConfigFilePathSearchRoots(config) {
46+
configFilePath := filepath.Join(root, ImexNodesConfigFilePath)
5047
imexLabeler, err := imexLabelerForConfigFile(configFilePath, devices)
5148
if err != nil {
5249
errs = errors.Join(errs, err)
@@ -64,6 +61,19 @@ func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, er
6461
return empty{}, nil
6562
}
6663

64+
// imexNodesConfigFilePathSearchRoots returns a list of roots to search for the IMEX nodes config file.
65+
func imexNodesConfigFilePathSearchRoots(config *spec.Config) []string {
66+
// By default, search / and /config for config files.
67+
roots := []string{"/", "/config"}
68+
69+
if config == nil || config.Flags.Plugin == nil || config.Flags.Plugin.ContainerDriverRoot == nil {
70+
return roots
71+
}
72+
73+
// If a driver root is specified, it is also searched.
74+
return append(roots, *config.Flags.Plugin.ContainerDriverRoot)
75+
}
76+
6777
func imexLabelerForConfigFile(configFilePath string, devices []resource.Device) (Labeler, error) {
6878
imexConfigFile, err := os.Open(configFilePath)
6979
if os.IsNotExist(err) {
File renamed without changes.

0 commit comments

Comments
 (0)