Skip to content

Commit 3151f47

Browse files
committed
Bump kv-cache-manager to v0.4.0-rc1
Signed-off-by: Pierangelo Di Pilato <pierdipi@redhat.com>
1 parent f712dfa commit 3151f47

File tree

4 files changed

+27
-10
lines changed

4 files changed

+27
-10
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ require (
1010
github.com/google/uuid v1.6.0
1111
github.com/hashicorp/golang-lru/v2 v2.0.7
1212
github.com/jellydator/ttlcache/v3 v3.4.0
13-
github.com/llm-d/llm-d-kv-cache-manager v0.3.2
13+
github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1
1414
github.com/onsi/ginkgo/v2 v2.27.2
1515
github.com/onsi/gomega v1.38.2
1616
github.com/openai/openai-go v1.12.0

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
183183
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
184184
github.com/llm-d/llm-d-kv-cache-manager v0.3.2 h1:omSTXtuII3ol37CaoI9h+2VxE0m8EoeVOor+CkQh99I=
185185
github.com/llm-d/llm-d-kv-cache-manager v0.3.2/go.mod h1:q6u7LnzMxNcHHb5/LRdHNNeZzzGMSENFSP1NGfsJEmA=
186+
github.com/llm-d/llm-d-kv-cache-manager v0.3.3-0.20251119172839-f8bb3049d991 h1:zGC/uDL4ytR4idUKd4iP/Doto0HNdxuJtgR6mn9w2Ro=
187+
github.com/llm-d/llm-d-kv-cache-manager v0.3.3-0.20251119172839-f8bb3049d991/go.mod h1:oEmDhEjW1pEoOSlEFy8CKoMc7ixQmSKEbhLt9CoH/a0=
188+
github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1 h1:gWkZ9yp7sU5j1vbNB7eO95lxbvgJV+qd/60LnPfNk9w=
189+
github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1/go.mod h1:oEmDhEjW1pEoOSlEFy8CKoMc7ixQmSKEbhLt9CoH/a0=
186190
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
187191
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
188192
github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo=

pkg/plugins/scorer/precise_prefix_cache.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,23 @@ var _ framework.Scorer = &PrecisePrefixCacheScorer{}
4141
// a new instance of the PrefixCacheTrackingPlugin.
4242
func PrecisePrefixCachePluginFactory(name string, rawParameters json.RawMessage,
4343
handle plugins.Handle) (plugins.Plugin, error) {
44+
45+
indexerConfig, err := kvcache.NewDefaultConfig()
46+
if err != nil {
47+
return nil, fmt.Errorf("failed to initialize indexer config: %w", err)
48+
}
49+
4450
parameters := PrecisePrefixCachePluginConfig{
45-
IndexerConfig: kvcache.NewDefaultConfig(),
51+
IndexerConfig: indexerConfig,
4652
KVEventsConfig: kvevents.DefaultConfig(),
4753
}
4854

4955
// read hugging face token from environment variable if set
50-
if token := os.Getenv("HF_TOKEN"); token != "" {
51-
parameters.IndexerConfig.TokenizersPoolConfig.HuggingFaceToken = token
56+
if token := os.Getenv("HF_TOKEN"); token != "" &&
57+
parameters.IndexerConfig != nil &&
58+
parameters.IndexerConfig.TokenizersPoolConfig != nil &&
59+
parameters.IndexerConfig.TokenizersPoolConfig.HFTokenizerConfig != nil {
60+
parameters.IndexerConfig.TokenizersPoolConfig.HFTokenizerConfig.HuggingFaceToken = token
5261
}
5362

5463
if rawParameters != nil {

pkg/plugins/scorer/utils.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
package scorer
22

3-
import "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
3+
import (
4+
"math"
5+
6+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
7+
)
48

59
// podToKey is a function type that converts a Pod to a string key.
610
// It returns the key and a boolean indicating success.
@@ -11,7 +15,7 @@ type podToKeyFunc func(pod types.Pod) (string, bool)
1115
// a pod to a key, and a map of scores indexed by those keys. It returns a map
1216
// of pods to their normalized scores.
1317
func indexedScoresToNormalizedScoredPods(pods []types.Pod, podToKey podToKeyFunc,
14-
scores map[string]int) map[types.Pod]float64 {
18+
scores map[string]float64) map[types.Pod]float64 {
1519
scoredPods := make(map[types.Pod]float64)
1620
minScore, maxScore := getMinMax(scores)
1721

@@ -27,7 +31,7 @@ func indexedScoresToNormalizedScoredPods(pods []types.Pod, podToKey podToKeyFunc
2731
continue
2832
}
2933

30-
scoredPods[pod] = float64(score-minScore) / float64(maxScore-minScore)
34+
scoredPods[pod] = (score - minScore) / (maxScore - minScore)
3135
} else {
3236
scoredPods[pod] = 0.0
3337
}
@@ -36,9 +40,9 @@ func indexedScoresToNormalizedScoredPods(pods []types.Pod, podToKey podToKeyFunc
3640
return scoredPods
3741
}
3842

39-
func getMinMax(scores map[string]int) (int, int) {
40-
minScore := int(^uint(0) >> 1) // max int
41-
maxScore := -1
43+
func getMinMax(scores map[string]float64) (float64, float64) {
44+
minScore := math.MaxFloat64
45+
maxScore := float64(-1)
4246

4347
for _, score := range scores {
4448
if score < minScore {

0 commit comments

Comments
 (0)