@@ -29,7 +29,7 @@ import (
2929)
3030
3131const (
32- startTestContainerTemplate = `docker run -d --name {{.ContainerName}} --privileged --runtime=nvidia \
32+ outerContainerTemplate = `docker run -d --name {{.ContainerName}} --privileged --runtime=nvidia \
3333 -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all \
3434 -e NVIDIA_DRIVER_CAPABILITIES=all \
3535 {{ range $i, $a := .AdditionalArguments -}}
@@ -156,6 +156,8 @@ func NewNestedContainerRunner(runner Runner, installCTK bool, image string, cont
156156 return nil , fmt .Errorf ("failed to remove container: %w" , err )
157157 }
158158
159+ // If installCTK is true, install the toolkit on the host. before creating
160+ // the nested container.
159161 if installCTK {
160162 installer , err := NewToolkitInstaller (
161163 WithRunner (runner ),
@@ -172,7 +174,6 @@ func NewNestedContainerRunner(runner Runner, installCTK bool, image string, cont
172174 }
173175 } else {
174176 // If installCTK is false, we use the preinstalled toolkit.
175- // TODO: This should be updated for other distributions and other components of the toolkit.
176177 output , _ , err := runner .Run ("ls /lib/**/libnvidia-container*.so.*.*" )
177178 if err != nil {
178179 return nil , fmt .Errorf ("failed to list toolkit libraries: %w" , err )
@@ -186,16 +187,48 @@ func NewNestedContainerRunner(runner Runner, installCTK bool, image string, cont
186187 for _ , lib := range strings .Split (output , "\n " ) {
187188 additionalContainerArguments = append (additionalContainerArguments , "-v " + lib + ":" + lib )
188189 }
189- additionalContainerArguments = append (additionalContainerArguments , "-v /usr/bin/nvidia-container-cli:/usr/bin/nvidia-container-cli" )
190+
191+ // Look for NVIDIA binaries in standard locations and mount them as volumes
192+ nvidiaBinaries := []string {
193+ "nvidia-container-cli" ,
194+ "nvidia-container-runtime" ,
195+ "nvidia-container-runtime-hook" ,
196+ "nvidia-ctk" ,
197+ "nvidia-cdi-hook" ,
198+ "nvidia-container-runtime.cdi" ,
199+ "nvidia-container-runtime.legacy" ,
200+ }
201+
202+ searchPaths := []string {
203+ "/usr/bin" ,
204+ "/usr/sbin" ,
205+ "/usr/local/bin" ,
206+ "/usr/local/sbin" ,
207+ }
208+
209+ for _ , binary := range nvidiaBinaries {
210+ for _ , searchPath := range searchPaths {
211+ binaryPath := searchPath + "/" + binary
212+ // Check if the binary exists at this path
213+ checkCmd := fmt .Sprintf ("test -f %s && echo 'exists'" , binaryPath )
214+ output , _ , err := runner .Run (checkCmd )
215+ if err == nil && strings .TrimSpace (output ) == "exists" {
216+ // Binary found, add it as a volume mount
217+ additionalContainerArguments = append (additionalContainerArguments ,
218+ fmt .Sprintf ("-v %s:%s" , binaryPath , binaryPath ))
219+ break // Move to the next binary once found
220+ }
221+ }
222+ }
190223 }
191224
192225 // Launch the container in detached mode.
193- var startContainerScriptBuilder strings.Builder
194- startContainerTemplate , err := template .New ("startContainer " ).Parse (startTestContainerTemplate )
226+ var outerContainerScriptBuilder strings.Builder
227+ outerContainerTemplate , err := template .New ("outerContainer " ).Parse (outerContainerTemplate )
195228 if err != nil {
196229 return nil , fmt .Errorf ("failed to parse start container template: %w" , err )
197230 }
198- err = startContainerTemplate .Execute (& startContainerScriptBuilder , struct {
231+ err = outerContainerTemplate .Execute (& outerContainerScriptBuilder , struct {
199232 ContainerName string
200233 AdditionalArguments []string
201234 }{
@@ -206,8 +239,8 @@ func NewNestedContainerRunner(runner Runner, installCTK bool, image string, cont
206239 return nil , fmt .Errorf ("failed to execute start container template: %w" , err )
207240 }
208241
209- startContainerScript := startContainerScriptBuilder .String ()
210- _ , _ , err = runner .Run (startContainerScript )
242+ outerContainerScript := outerContainerScriptBuilder .String ()
243+ _ , _ , err = runner .Run (outerContainerScript )
211244 if err != nil {
212245 return nil , fmt .Errorf ("failed to run start container script: %w" , err )
213246 }
0 commit comments