|
| 1 | +/* |
| 2 | +Copyright 2024 The CloudPilot AI Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package cache |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "fmt" |
| 22 | + "sync/atomic" |
| 23 | + "time" |
| 24 | + |
| 25 | + "github.com/patrickmn/go-cache" |
| 26 | + "knative.dev/pkg/logging" |
| 27 | + "sigs.k8s.io/karpenter/pkg/apis/v1beta1" |
| 28 | +) |
| 29 | + |
| 30 | +var ( |
| 31 | + spotKey = key("", "", v1beta1.CapacityTypeSpot) |
| 32 | +) |
| 33 | + |
| 34 | +// UnavailableOfferings stores any offerings that return ICE (insufficient capacity errors) when |
| 35 | +// attempting to launch the capacity. These offerings are ignored as long as they are in the cache on |
| 36 | +// GetInstanceTypes responses |
| 37 | +type UnavailableOfferings struct { |
| 38 | + // key: <capacityType>:<instanceType>:<zone>, value: struct{}{} |
| 39 | + cache *cache.Cache |
| 40 | + SeqNum uint64 |
| 41 | +} |
| 42 | + |
| 43 | +func NewUnavailableOfferingsWithCache(c *cache.Cache) *UnavailableOfferings { |
| 44 | + uo := &UnavailableOfferings{ |
| 45 | + cache: c, |
| 46 | + SeqNum: 0, |
| 47 | + } |
| 48 | + uo.cache.OnEvicted(func(_ string, _ interface{}) { |
| 49 | + atomic.AddUint64(&uo.SeqNum, 1) |
| 50 | + }) |
| 51 | + return uo |
| 52 | +} |
| 53 | + |
| 54 | +func NewUnavailableOfferings() *UnavailableOfferings { |
| 55 | + return NewUnavailableOfferingsWithCache( |
| 56 | + cache.New(UnavailableOfferingsTTL, UnavailableOfferingsCleanupInterval)) |
| 57 | +} |
| 58 | + |
| 59 | +// IsUnavailable returns true if the offering appears in the cache |
| 60 | +func (u *UnavailableOfferings) IsUnavailable(instanceType, zone, capacityType string) bool { |
| 61 | + if capacityType == v1beta1.CapacityTypeSpot { |
| 62 | + if _, found := u.cache.Get(spotKey); found { |
| 63 | + return true |
| 64 | + } |
| 65 | + } |
| 66 | + _, found := u.cache.Get(key(instanceType, zone, capacityType)) |
| 67 | + return found |
| 68 | +} |
| 69 | + |
| 70 | +// MarkUnavailableWithTTL allows us to mark an offering unavailable with a custom TTL |
| 71 | +func (u *UnavailableOfferings) MarkUnavailableWithTTL(ctx context.Context, unavailableReason, instanceType, zone, capacityType string, ttl time.Duration) { |
| 72 | + // even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL |
| 73 | + logging.FromContext(ctx).With( |
| 74 | + "unavailable", unavailableReason, |
| 75 | + "instance-type", instanceType, |
| 76 | + "zone", zone, |
| 77 | + "capacity-type", capacityType, |
| 78 | + "ttl", ttl).Debugf("removing offering from offerings") |
| 79 | + u.cache.Set(key(instanceType, zone, capacityType), struct{}{}, ttl) |
| 80 | + atomic.AddUint64(&u.SeqNum, 1) |
| 81 | +} |
| 82 | + |
| 83 | +// MarkUnavailable communicates recently observed temporary capacity shortages in the provided offerings |
| 84 | +func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason, instanceType, zone, capacityType string) { |
| 85 | + u.MarkUnavailableWithTTL(ctx, unavailableReason, instanceType, zone, capacityType, UnavailableOfferingsTTL) |
| 86 | +} |
| 87 | + |
| 88 | +func (u *UnavailableOfferings) Flush() { |
| 89 | + u.cache.Flush() |
| 90 | + atomic.AddUint64(&u.SeqNum, 1) |
| 91 | +} |
| 92 | + |
| 93 | +// key returns the cache key for all offerings in the cache |
| 94 | +func key(instanceType string, zone string, capacityType string) string { |
| 95 | + return fmt.Sprintf("%s:%s:%s", capacityType, instanceType, zone) |
| 96 | +} |
0 commit comments