Skip to content

Commit 390a6b7

Browse files
authored
Disallow AMD GPU instance types (e.g. g4ad) (#2155)
1 parent b3aa326 commit 390a6b7

File tree

3 files changed

+38
-2
lines changed

3 files changed

+38
-2
lines changed

pkg/lib/aws/ec2.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ import (
3030
s "github.com/cortexlabs/cortex/pkg/lib/strings"
3131
)
3232

33-
var _digitsRegex = regexp.MustCompile(`[0-9]+`)
33+
var (
34+
_digitsRegex = regexp.MustCompile(`[0-9]+`)
35+
_gpuInstanceFamilies = strset.New("g", "p")
36+
)
3437

3538
type ParsedInstanceType struct {
3639
Family string
@@ -117,6 +120,23 @@ func IsARMInstance(instanceType string) (bool, error) {
117120
return false, nil
118121
}
119122

123+
func IsAMDGPUInstance(instanceType string) (bool, error) {
124+
parsedType, err := ParseInstanceType(instanceType)
125+
if err != nil {
126+
return false, err
127+
}
128+
129+
if !_gpuInstanceFamilies.Has(parsedType.Family) {
130+
return false, nil
131+
}
132+
133+
if parsedType.Capabilities.Has("a") {
134+
return true, nil
135+
}
136+
137+
return false, nil
138+
}
139+
120140
func (c *Client) SpotInstancePrice(instanceType string) (float64, error) {
121141
result, err := c.EC2().DescribeSpotPriceHistory(&ec2.DescribeSpotPriceHistoryInput{
122142
InstanceTypes: []*string{aws.String(instanceType)},

pkg/types/clusterconfig/cluster_config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,14 @@ func validateInstanceType(instanceType string) (string, error) {
12691269
return "", ErrorARMInstancesNotSupported(instanceType)
12701270
}
12711271

1272+
isAMDGPU, err := aws.IsAMDGPUInstance(instanceType)
1273+
if err != nil {
1274+
return "", err
1275+
}
1276+
if isAMDGPU {
1277+
return "", ErrorAMDGPUInstancesNotSupported(instanceType)
1278+
}
1279+
12721280
if err := checkCNISupport(instanceType); err != nil {
12731281
return "", err
12741282
}

pkg/types/clusterconfig/errors.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ const (
4747
ErrSpotPriceGreaterThanMaxPrice = "clusterconfig.spot_price_greater_than_max_price"
4848
ErrInstanceTypeNotSupportedByCortex = "clusterconfig.instance_type_not_supported_by_cortex"
4949
ErrARMInstancesNotSupported = "clusterconfig.arm_instances_not_supported"
50+
ErrAMDGPUInstancesNotSupported = "clusterconfig.amd_gpu_instances_not_supported"
5051
ErrAtLeastOneInstanceDistribution = "clusterconfig.at_least_one_instance_distribution"
5152
ErrNoCompatibleSpotInstanceFound = "clusterconfig.no_compatible_spot_instance_found"
5253
ErrConfiguredWhenSpotIsNotEnabled = "clusterconfig.configured_when_spot_is_not_enabled"
@@ -203,7 +204,14 @@ func ErrorInstanceTypeNotSupportedByCortex(instanceType string) error {
203204
func ErrorARMInstancesNotSupported(instanceType string) error {
204205
return errors.WithStack(&errors.Error{
205206
Kind: ErrARMInstancesNotSupported,
206-
Message: fmt.Sprintf("ARM-based instances (including %s) are not supported", instanceType),
207+
Message: fmt.Sprintf("ARM-based instances (including %s) are not supported by cortex", instanceType),
208+
})
209+
}
210+
211+
func ErrorAMDGPUInstancesNotSupported(instanceType string) error {
212+
return errors.WithStack(&errors.Error{
213+
Kind: ErrAMDGPUInstancesNotSupported,
214+
Message: fmt.Sprintf("AMD GPU instances (including %s) are not supported by cortex", instanceType),
207215
})
208216
}
209217

0 commit comments

Comments
 (0)