Skip to content

Commit cf0d629

Browse files
committed
feat: Add initial metrics and update dependencies
This commit introduces Prometheus metrics to the scheduler, starting with a request counter.
1 parent f2048dc commit cf0d629

File tree

6 files changed

+96
-3
lines changed

6 files changed

+96
-3
lines changed

cmd/epp/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,17 @@ import (
3030
ctrl "sigs.k8s.io/controller-runtime"
3131
"sigs.k8s.io/gateway-api-inference-extension/cmd/epp/runner"
3232

33+
"github.com/llm-d/llm-d-inference-scheduler/pkg/metrics"
3334
"github.com/llm-d/llm-d-inference-scheduler/pkg/plugins"
3435
)
3536

3637
func main() {
3738
// Register llm-d-inference-scheduler plugins
3839
plugins.RegisterAllPlugins()
3940

40-
if err := runner.NewRunner().Run(ctrl.SetupSignalHandler()); err != nil {
41+
if err := runner.NewRunner().
42+
WithCustomCollectors(metrics.GetCollectors()...).
43+
Run(ctrl.SetupSignalHandler()); err != nil {
4144
os.Exit(1)
4245
}
4346
}

docs/metrics.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Metrics
2+
3+
The `llm-d-inference-scheduler` exposes the following Prometheus metrics to monitor its behavior and performance, particularly concerning Prefill/Decode Disaggregation.
4+
5+
All metrics are in the `llm_d_inference_scheduler` subsystem.
6+
7+
## Scrape and see the metric
8+
9+
Metrics defined in the scheduler plugin are extention of Inference Gateway metrics. For more details of seeing metrics, see the [Instruction](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/site-src/guides/metrics-and-observability.md).
10+
11+
## Metric Details
12+
13+
### `pd_decision_total`
14+
15+
* **Type:** Counter
16+
* **Labels:**
17+
* `decision_type`: string ("decode-only" or "prefill-decode")
18+
* **Release Stage:** ALPHA
19+
* **Description:** Counts the number of requests processed, broken down by the Prefill/Decode disaggregation decision.
20+
* `prefill-decode`: The request was split into separate Prefill and Decode stages.
21+
* `decode-only`: The request used the Decode-only path.
22+
* **Usage:** Provides a high-level view of how many requests are utilizing the disaggregated path versus the unified path.
23+
* **Actionability:**
24+
* Monitor the ratio of "prefill-decode" to "decode-only" to understand the P/D engagement rate.
25+
* Sudden changes in this ratio might indicate configuration issues, changes in workload patterns, or problems with the decision logic.

go.mod

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@ require (
1414
github.com/onsi/ginkgo/v2 v2.27.2
1515
github.com/onsi/gomega v1.38.2
1616
github.com/openai/openai-go v1.12.0
17+
github.com/prometheus/client_golang v1.23.2
1718
github.com/stretchr/testify v1.11.1
1819
golang.org/x/sync v0.18.0
1920
google.golang.org/grpc v1.76.0
2021
k8s.io/api v0.34.2
2122
k8s.io/apiextensions-apiserver v0.34.2
2223
k8s.io/apimachinery v0.34.2
2324
k8s.io/client-go v0.34.2
25+
k8s.io/component-base v0.34.2
2426
k8s.io/klog/v2 v2.130.1
2527
k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
2628
sigs.k8s.io/controller-runtime v0.22.4
@@ -67,6 +69,7 @@ require (
6769
github.com/inconshreveable/mousetrap v1.1.0 // indirect
6870
github.com/josharian/intern v1.0.0 // indirect
6971
github.com/json-iterator/go v1.1.12 // indirect
72+
github.com/kylelemons/godebug v1.1.0 // indirect
7073
github.com/mailru/easyjson v0.9.0 // indirect
7174
github.com/moby/spdystream v0.5.0 // indirect
7275
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -76,7 +79,6 @@ require (
7679
github.com/pebbe/zmq4 v1.4.0 // indirect
7780
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
7881
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
79-
github.com/prometheus/client_golang v1.23.2 // indirect
8082
github.com/prometheus/client_model v0.6.2 // indirect
8183
github.com/prometheus/common v0.67.2 // indirect
8284
github.com/prometheus/procfs v0.17.0 // indirect
@@ -124,7 +126,6 @@ require (
124126
gopkg.in/inf.v0 v0.9.1 // indirect
125127
gopkg.in/yaml.v3 v3.0.1 // indirect
126128
k8s.io/apiserver v0.34.2 // indirect
127-
k8s.io/component-base v0.34.2 // indirect
128129
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
129130
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
130131
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect

pkg/metrics/metrics.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package metrics
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
compbasemetrics "k8s.io/component-base/metrics"
6+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/metrics"
7+
)
8+
9+
const (
10+
SchedulerSubsystem = "llm_d_inference_scheduler"
11+
12+
// Decision types for pd_decision_total metric
13+
DecisionTypeDecodeOnly = "decode-only"
14+
DecisionTypePrefillDecode = "prefill-decode"
15+
)
16+
17+
var (
18+
SchedulerPDDecisionCount = prometheus.NewCounterVec(
19+
prometheus.CounterOpts{
20+
Subsystem: SchedulerSubsystem,
21+
Name: "pd_decision_total",
22+
Help: metrics.HelpMsgWithStability("Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
23+
},
24+
[]string{"decision_type"}, // "decode-only" or "prefill-decode"
25+
)
26+
)
27+
28+
// GetCollectors returns all custom collectors for the llm-d-inference-scheduler.
29+
func GetCollectors() []prometheus.Collector {
30+
return []prometheus.Collector{
31+
SchedulerPDDecisionCount,
32+
}
33+
}
34+
35+
// RecordPDDecisionCounter records the type of P/D disaggregation decision made.
36+
func RecordPDDecisionCounter(decisionType string) {
37+
SchedulerPDDecisionCount.WithLabelValues(decisionType).Inc()
38+
}

pkg/metrics/metrics_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package metrics
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/prometheus/client_golang/prometheus/testutil"
8+
)
9+
10+
func TestSchedulerPDDecisionCount(t *testing.T) {
11+
RecordPDDecisionCounter(DecisionTypePrefillDecode)
12+
RecordPDDecisionCounter(DecisionTypeDecodeOnly)
13+
RecordPDDecisionCounter(DecisionTypePrefillDecode)
14+
if err := testutil.CollectAndCompare(SchedulerPDDecisionCount, strings.NewReader(`
15+
# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] Total number of P/D disaggregation decisions made
16+
# TYPE llm_d_inference_scheduler_pd_decision_total counter
17+
llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only"} 1
18+
llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode"} 2
19+
`), "decision_type"); err != nil {
20+
t.Errorf("RecordPDDecisionCounter() failed: %v", err)
21+
}
22+
}

pkg/plugins/profile/pd_profile_handler.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"net"
1010
"strconv"
1111

12+
"github.com/llm-d/llm-d-inference-scheduler/pkg/metrics"
13+
1214
"sigs.k8s.io/controller-runtime/pkg/log"
1315
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
1416
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
@@ -156,10 +158,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat
156158

157159
if (1.0-hitPercentagePrefix)*float64(len(userInput)) < float64(h.pdThreshold) {
158160
log.FromContext(ctx).Info("Non-cached suffix is smaller than threshold, using decode profile only", "hitPercentage", hitPercentagePrefix)
161+
metrics.RecordPDDecisionCounter(metrics.DecisionTypeDecodeOnly)
159162
return map[string]*framework.SchedulerProfile{} // do not run prefill
160163
}
161164
}
162165

166+
metrics.RecordPDDecisionCounter(metrics.DecisionTypePrefillDecode)
163167
// run the prefill profile
164168
return map[string]*framework.SchedulerProfile{
165169
h.prefillProfile: profiles[h.prefillProfile],

0 commit comments

Comments
 (0)