Skip to content

Commit fa16d83

Browse files
committed
[no-relnotes] Add initial unit test for MIG CDI spec generation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
1 parent c5c124b commit fa16d83

File tree

4 files changed

+127
-21
lines changed

4 files changed

+127
-21
lines changed

internal/platform-support/dgpu/dgpu.go

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,29 @@ import (
2121

2222
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
2323
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
24+
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
2425
)
2526

2627
// NewForDevice creates a discoverer for the specified Device.
2728
func NewForDevice(d device.Device, opts ...Option) (discover.Discover, error) {
28-
o := &options{}
29-
for _, opt := range opts {
30-
opt(o)
31-
}
32-
33-
if o.logger == nil {
34-
o.logger = logger.New()
35-
}
29+
o := new(opts...)
3630

3731
return o.newNvmlDGPUDiscoverer(&toRequiredInfo{d})
3832
}
3933

4034
// NewForDevice creates a discoverer for the specified device and its associated MIG device.
4135
func NewForMigDevice(d device.Device, mig device.MigDevice, opts ...Option) (discover.Discover, error) {
36+
o := new(opts...)
37+
38+
return o.newNvmlMigDiscoverer(
39+
&toRequiredMigInfo{
40+
MigDevice: mig,
41+
parent: &toRequiredInfo{d},
42+
},
43+
)
44+
}
45+
46+
func new(opts ...Option) *options {
4247
o := &options{}
4348
for _, opt := range opts {
4449
opt(o)
@@ -48,10 +53,15 @@ func NewForMigDevice(d device.Device, mig device.MigDevice, opts ...Option) (dis
4853
o.logger = logger.New()
4954
}
5055

51-
return o.newNvmlMigDiscoverer(
52-
&toRequiredMigInfo{
53-
MigDevice: mig,
54-
parent: &toRequiredInfo{d},
55-
},
56-
)
56+
if o.migCaps == nil {
57+
migCaps, err := nvcaps.NewMigCaps()
58+
if err != nil {
59+
o.logger.Debugf("ignoring error getting MIG capability device paths: %v", err)
60+
o.migCapsError = err
61+
} else {
62+
o.migCaps = migCaps
63+
}
64+
}
65+
66+
return o
5767
}

internal/platform-support/dgpu/nvml.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,23 @@ type requiredMigInfo interface {
7878
}
7979

8080
func (o *options) newNvmlMigDiscoverer(d requiredMigInfo) (discover.Discover, error) {
81-
gpu, gi, ci, err := d.getPlacementInfo()
82-
if err != nil {
83-
return nil, fmt.Errorf("error getting placement info: %w", err)
81+
if o.migCaps == nil || o.migCapsError != nil {
82+
return nil, fmt.Errorf("error getting MIG capability device paths: %v", o.migCapsError)
8483
}
8584

86-
migCaps, err := nvcaps.NewMigCaps()
85+
gpu, gi, ci, err := d.getPlacementInfo()
8786
if err != nil {
88-
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
87+
return nil, fmt.Errorf("error getting placement info: %w", err)
8988
}
9089

9190
giCap := nvcaps.NewGPUInstanceCap(gpu, gi)
92-
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
91+
giCapDevicePath, err := o.migCaps.GetCapDevicePath(giCap)
9392
if err != nil {
9493
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
9594
}
9695

9796
ciCap := nvcaps.NewComputeInstanceCap(gpu, gi, ci)
98-
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
97+
ciCapDevicePath, err := o.migCaps.GetCapDevicePath(ciCap)
9998
if err != nil {
10099
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
101100
}

internal/platform-support/dgpu/nvml_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/stretchr/testify/require"
2727

2828
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
29+
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
2930
)
3031

3132
// TODO: In order to properly test this, we need a mechanism to inject /
@@ -85,3 +86,86 @@ func TestNewNvmlDGPUDiscoverer(t *testing.T) {
8586
})
8687
}
8788
}
89+
90+
func TestNewNvmlMIGDiscoverer(t *testing.T) {
91+
logger, _ := testlog.NewNullLogger()
92+
93+
nvmllib := &mock.Interface{}
94+
devicelib := device.New(
95+
nvmllib,
96+
)
97+
98+
testCases := []struct {
99+
description string
100+
mig *mock.Device
101+
parent nvml.Device
102+
migCaps nvcaps.MigCaps
103+
expectedError error
104+
expectedDevices []discover.Device
105+
expectedHooks []discover.Hook
106+
expectedMounts []discover.Mount
107+
}{
108+
{
109+
description: "",
110+
mig: &mock.Device{
111+
IsMigDeviceHandleFunc: func() (bool, nvml.Return) {
112+
return true, nvml.SUCCESS
113+
},
114+
GetGpuInstanceIdFunc: func() (int, nvml.Return) {
115+
return 1, nvml.SUCCESS
116+
},
117+
GetComputeInstanceIdFunc: func() (int, nvml.Return) {
118+
return 2, nvml.SUCCESS
119+
},
120+
},
121+
parent: &mock.Device{
122+
GetMinorNumberFunc: func() (int, nvml.Return) {
123+
return 3, nvml.SUCCESS
124+
},
125+
GetPciInfoFunc: func() (nvml.PciInfo, nvml.Return) {
126+
var busID [32]int8
127+
for i, b := range []byte("00000000:45:00:00") {
128+
busID[i] = int8(b)
129+
}
130+
info := nvml.PciInfo{
131+
BusId: busID,
132+
}
133+
return info, nvml.SUCCESS
134+
},
135+
},
136+
migCaps: nvcaps.MigCaps{
137+
"gpu3/gi1/access": 31,
138+
"gpu3/gi1/ci2/access": 312,
139+
},
140+
expectedDevices: nil,
141+
expectedMounts: nil,
142+
expectedHooks: []discover.Hook{},
143+
},
144+
}
145+
for _, tc := range testCases {
146+
t.Run(tc.description, func(t *testing.T) {
147+
148+
tc.mig.GetDeviceHandleFromMigDeviceHandleFunc = func() (nvml.Device, nvml.Return) {
149+
return tc.parent, nvml.SUCCESS
150+
}
151+
parent, err := devicelib.NewDevice(tc.parent)
152+
require.NoError(t, err)
153+
154+
mig, err := devicelib.NewMigDevice(tc.mig)
155+
require.NoError(t, err)
156+
157+
d, err := NewForMigDevice(parent, mig,
158+
WithLogger(logger),
159+
WithMIGCaps(tc.migCaps),
160+
)
161+
require.ErrorIs(t, err, tc.expectedError)
162+
163+
devices, _ := d.Devices()
164+
require.EqualValues(t, tc.expectedDevices, devices)
165+
hooks, _ := d.Hooks()
166+
require.EqualValues(t, tc.expectedHooks, hooks)
167+
mounts, _ := d.Mounts()
168+
require.EqualValues(t, tc.expectedMounts, mounts)
169+
})
170+
}
171+
}

internal/platform-support/dgpu/options.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,18 @@ package dgpu
1818

1919
import (
2020
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
21+
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
2122
)
2223

2324
type options struct {
2425
logger logger.Interface
2526
devRoot string
2627
nvidiaCDIHookPath string
28+
29+
// migCaps stores the MIG capabilities for the system.
30+
// If MIG is not available, this is nil.
31+
migCaps nvcaps.MigCaps
32+
migCapsError error
2733
}
2834

2935
type Option func(*options)
@@ -48,3 +54,10 @@ func WithNVIDIACDIHookPath(path string) Option {
4854
l.nvidiaCDIHookPath = path
4955
}
5056
}
57+
58+
// WithMIGCaps sets the MIG capabilities.
59+
func WithMIGCaps(migCaps nvcaps.MigCaps) Option {
60+
return func(l *options) {
61+
l.migCaps = migCaps
62+
}
63+
}

0 commit comments

Comments
 (0)