Skip to content

Commit 2c3e179

Browse files
committed
feat: Add initial metrics and update dependencies
This commit introduces Prometheus metrics to the scheduler, starting with a request counter. It also updates several Go dependencies and adjusts the Dockerfile to work with the vendored dependencies.
1 parent 12e5e7c commit 2c3e179

File tree

10 files changed

+165
-39
lines changed

10 files changed

+165
-39
lines changed

Dockerfile.epp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ WORKDIR /workspace
1414
# Copy the Go Modules manifests
1515
COPY go.mod go.mod
1616
COPY go.sum go.sum
17+
COPY vendor/ vendor/
1718

1819
# Copy the go source
1920
COPY cmd/ cmd/
@@ -36,7 +37,7 @@ ENV GOOS=${TARGETOS:-linux}
3637
ENV GOARCH=${TARGETARCH}
3738
ARG COMMIT_SHA=unknown
3839
ARG BUILD_REF
39-
RUN go build -a -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib' -X sigs.k8s.io/gateway-api-inference-extension/version.CommitSHA=${COMMIT_SHA} -X sigs.k8s.io/gateway-api-inference-extension/version.BuildRef=${BUILD_REF}" cmd/epp/main.go
40+
RUN go build -mod=vendor -a -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib' -X sigs.k8s.io/gateway-api-inference-extension/version.CommitSHA=${COMMIT_SHA} -X sigs.k8s.io/gateway-api-inference-extension/version.BuildRef=${BUILD_REF}" cmd/epp/main.go
4041

4142
# Use ubi9 as a minimal base image to package the manager binary
4243
# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details

Dockerfile.sidecar

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ WORKDIR /workspace
99
# Copy the Go Modules manifests
1010
COPY go.mod go.mod
1111
COPY go.sum go.sum
12-
# cache deps before building and copying source so that we don't need to re-download as much
13-
# and so that source changes don't invalidate our downloaded layer
14-
RUN go mod download
12+
COPY vendor/ vendor/
1513

1614
# Copy the go source
1715
COPY cmd/pd-sidecar/main.go cmd/cmd.go
@@ -26,7 +24,7 @@ COPY pkg/common pkg/common
2624
ENV CGO_ENABLED=0
2725
ENV GOOS=${TARGETOS:-linux}
2826
ENV GOARCH=${TARGETARCH}
29-
RUN go build -a -o bin/pd-sidecar \
27+
RUN go build -mod=vendor -a -o bin/pd-sidecar \
3028
-ldflags="-X github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/version.CommitSHA=${COMMIT_SHA} -X github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/version.BuildRef=${BUILD_REF}" \
3129
cmd/cmd.go
3230

cmd/epp/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,17 @@ import (
3030
ctrl "sigs.k8s.io/controller-runtime"
3131
"sigs.k8s.io/gateway-api-inference-extension/cmd/epp/runner"
3232

33+
"github.com/llm-d/llm-d-inference-scheduler/pkg/metrics"
3334
"github.com/llm-d/llm-d-inference-scheduler/pkg/plugins"
3435
)
3536

3637
func main() {
3738
// Register llm-d-inference-scheduler plugins
3839
plugins.RegisterAllPlugins()
3940

40-
if err := runner.NewRunner().Run(ctrl.SetupSignalHandler()); err != nil {
41+
if err := runner.NewRunner().
42+
WithCustomCollectors(metrics.GetCollectors()...).
43+
Run(ctrl.SetupSignalHandler()); err != nil {
4144
os.Exit(1)
4245
}
4346
}

docs/metrics.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Metrics
2+
3+
The `llm-d-inference-scheduler` exposes the following Prometheus metrics to monitor its behavior and performance, particularly concerning Prefill/Decode Disaggregation.
4+
5+
All metrics are in the `llm_d_inference_scheduler` subsystem.
6+
7+
## Scrape and see the metric
8+
9+
Metrics defined in the scheduler plugin are extention of Inference Gateway metrics. For more details of seeing metrics, see the [Instruction](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/site-src/guides/metrics-and-observability.md).
10+
11+
## Metric Details
12+
13+
### `pd_decision_total`
14+
15+
* **Type:** Counter
16+
* **Labels:**
17+
* `decision_type`: string ("decode-only" or "prefill-decode")
18+
* **Release Stage:** ALPHA
19+
* **Description:** Counts the number of requests processed, broken down by the Prefill/Decode disaggregation decision.
20+
* `prefill-decode`: The request was split into separate Prefill and Decode stages.
21+
* `decode-only`: The request used the Decode-only path.
22+
* **Usage:** Provides a high-level view of how many requests are utilizing the disaggregated path versus the unified path.
23+
* **Actionability:**
24+
* Monitor the ratio of "prefill-decode" to "decode-only" to understand the P/D engagement rate.
25+
* Sudden changes in this ratio might indicate configuration issues, changes in workload patterns, or problems with the decision logic.

go.mod

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,24 @@ require (
1414
github.com/onsi/ginkgo/v2 v2.27.2
1515
github.com/onsi/gomega v1.38.2
1616
github.com/openai/openai-go v1.12.0
17+
github.com/prometheus/client_golang v1.23.2
1718
github.com/stretchr/testify v1.11.1
1819
golang.org/x/sync v0.18.0
1920
google.golang.org/grpc v1.76.0
2021
k8s.io/api v0.34.1
2122
k8s.io/apiextensions-apiserver v0.34.1
2223
k8s.io/apimachinery v0.34.1
2324
k8s.io/client-go v0.34.1
25+
k8s.io/component-base v0.34.1
2426
k8s.io/klog/v2 v2.130.1
2527
k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
2628
sigs.k8s.io/controller-runtime v0.22.4
2729
sigs.k8s.io/gateway-api v1.4.0
2830
sigs.k8s.io/gateway-api-inference-extension v1.1.0
2931
)
3032

33+
replace sigs.k8s.io/gateway-api-inference-extension v1.1.0 => ../gateway-api-inference-extension
34+
3135
require (
3236
cel.dev/expr v0.24.0 // indirect
3337
github.com/Masterminds/semver/v3 v3.4.0 // indirect
@@ -44,7 +48,7 @@ require (
4448
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
4549
github.com/dustin/go-humanize v1.0.1 // indirect
4650
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
47-
github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect
51+
github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
4852
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
4953
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
5054
github.com/felixge/httpsnoop v1.0.4 // indirect
@@ -67,6 +71,7 @@ require (
6771
github.com/inconshreveable/mousetrap v1.1.0 // indirect
6872
github.com/josharian/intern v1.0.0 // indirect
6973
github.com/json-iterator/go v1.1.12 // indirect
74+
github.com/kylelemons/godebug v1.1.0 // indirect
7075
github.com/mailru/easyjson v0.9.0 // indirect
7176
github.com/moby/spdystream v0.5.0 // indirect
7277
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -76,12 +81,11 @@ require (
7681
github.com/pebbe/zmq4 v1.4.0 // indirect
7782
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
7883
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
79-
github.com/prometheus/client_golang v1.23.2 // indirect
8084
github.com/prometheus/client_model v0.6.2 // indirect
81-
github.com/prometheus/common v0.67.1 // indirect
85+
github.com/prometheus/common v0.67.2 // indirect
8286
github.com/prometheus/procfs v0.17.0 // indirect
83-
github.com/prometheus/prometheus v0.307.1 // indirect
84-
github.com/redis/go-redis/v9 v9.11.0 // indirect
87+
github.com/prometheus/prometheus v0.307.3 // indirect
88+
github.com/redis/go-redis/v9 v9.8.0 // indirect
8589
github.com/spf13/cobra v1.9.1 // indirect
8690
github.com/spf13/pflag v1.0.7 // indirect
8791
github.com/stoewer/go-strcase v1.3.0 // indirect
@@ -109,11 +113,11 @@ require (
109113
go.yaml.in/yaml/v3 v3.0.4 // indirect
110114
golang.org/x/exp v0.0.0-20250808145144-a408d31f581a // indirect
111115
golang.org/x/mod v0.28.0 // indirect
112-
golang.org/x/net v0.44.0 // indirect
113-
golang.org/x/oauth2 v0.31.0 // indirect
114-
golang.org/x/sys v0.36.0 // indirect
115-
golang.org/x/term v0.35.0 // indirect
116-
golang.org/x/text v0.29.0 // indirect
116+
golang.org/x/net v0.46.0 // indirect
117+
golang.org/x/oauth2 v0.32.0 // indirect
118+
golang.org/x/sys v0.37.0 // indirect
119+
golang.org/x/term v0.36.0 // indirect
120+
golang.org/x/text v0.30.0 // indirect
117121
golang.org/x/time v0.13.0 // indirect
118122
golang.org/x/tools v0.37.0 // indirect
119123
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
@@ -124,7 +128,6 @@ require (
124128
gopkg.in/inf.v0 v0.9.1 // indirect
125129
gopkg.in/yaml.v3 v3.0.1 // indirect
126130
k8s.io/apiserver v0.34.1 // indirect
127-
k8s.io/component-base v0.34.1 // indirect
128131
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
129132
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
130133
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect

go.sum

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp
8585
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
8686
github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=
8787
github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
88-
github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo=
89-
github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs=
88+
github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g=
89+
github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98=
9090
github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
9191
github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
9292
github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
@@ -227,18 +227,18 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h
227227
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
228228
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
229229
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
230-
github.com/prometheus/common v0.67.1 h1:OTSON1P4DNxzTg4hmKCc37o4ZAZDv0cfXLkOt0oEowI=
231-
github.com/prometheus/common v0.67.1/go.mod h1:RpmT9v35q2Y+lsieQsdOh5sXZ6ajUGC8NjZAmr8vb0Q=
230+
github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
231+
github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
232232
github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos=
233233
github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM=
234234
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
235235
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
236-
github.com/prometheus/prometheus v0.307.1 h1:Hh3kRMFn+xpQGLe/bR6qpUfW4GXQO0spuYeY7f2JZs4=
237-
github.com/prometheus/prometheus v0.307.1/go.mod h1:/7YQG/jOLg7ktxGritmdkZvezE1fa6aWDj0MGDIZvcY=
236+
github.com/prometheus/prometheus v0.307.3 h1:zGIN3EpiKacbMatcUL2i6wC26eRWXdoXfNPjoBc2l34=
237+
github.com/prometheus/prometheus v0.307.3/go.mod h1:sPbNW+KTS7WmzFIafC3Inzb6oZVaGLnSvwqTdz2jxRQ=
238238
github.com/prometheus/sigv4 v0.2.1 h1:hl8D3+QEzU9rRmbKIRwMKRwaFGyLkbPdH5ZerglRHY0=
239239
github.com/prometheus/sigv4 v0.2.1/go.mod h1:ySk6TahIlsR2sxADuHy4IBFhwEjRGGsfbbLGhFYFj6Q=
240-
github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs=
241-
github.com/redis/go-redis/v9 v9.11.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
240+
github.com/redis/go-redis/v9 v9.8.0 h1:q3nRvjrlge/6UD7eTu/DSg2uYiU2mCL0G/uzBWqhicI=
241+
github.com/redis/go-redis/v9 v9.8.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
242242
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
243243
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
244244
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@@ -317,8 +317,8 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
317317
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
318318
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
319319
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
320-
golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
321-
golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
320+
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
321+
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
322322
golang.org/x/exp v0.0.0-20250808145144-a408d31f581a h1:Y+7uR/b1Mw2iSXZ3G//1haIiSElDQZ8KWh0h+sZPG90=
323323
golang.org/x/exp v0.0.0-20250808145144-a408d31f581a/go.mod h1:rT6SFzZ7oxADUDx58pcaKFTcZ+inxAa9fTrYx/uVYwg=
324324
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
@@ -329,10 +329,10 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
329329
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
330330
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
331331
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
332-
golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I=
333-
golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
334-
golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo=
335-
golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
332+
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
333+
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
334+
golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
335+
golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
336336
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
337337
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
338338
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -341,14 +341,14 @@ golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
341341
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
342342
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
343343
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
344-
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
345-
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
346-
golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
347-
golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=
344+
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
345+
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
346+
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
347+
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
348348
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
349349
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
350-
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
351-
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
350+
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
351+
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
352352
golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
353353
golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
354354
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -409,8 +409,6 @@ sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327U
409409
sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8=
410410
sigs.k8s.io/gateway-api v1.4.0 h1:ZwlNM6zOHq0h3WUX2gfByPs2yAEsy/EenYJB78jpQfQ=
411411
sigs.k8s.io/gateway-api v1.4.0/go.mod h1:AR5RSqciWP98OPckEjOjh2XJhAe2Na4LHyXD2FUY7Qk=
412-
sigs.k8s.io/gateway-api-inference-extension v1.1.0 h1:MqRYk+3LNUWB0MbTgTZVhmJGNDTvm8l3ze4MOlzR7MU=
413-
sigs.k8s.io/gateway-api-inference-extension v1.1.0/go.mod h1:BmJy8Hvc2EHl3Oa/Ka8/4RqwVHCCbX7BLndLdMNtugI=
414412
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
415413
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
416414
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=

pkg/metrics/metrics.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package metrics
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
compbasemetrics "k8s.io/component-base/metrics"
6+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/metrics"
7+
)
8+
9+
const (
10+
SchedulerSubsystem = "llm_d_inference_scheduler"
11+
12+
// Decision types for pd_decision_total metric
13+
DecisionTypeDecodeOnly = "decode-only"
14+
DecisionTypePrefillDecode = "prefill-decode"
15+
)
16+
17+
var (
18+
SchedulerPDDecisionCount = prometheus.NewCounterVec(
19+
prometheus.CounterOpts{
20+
Subsystem: SchedulerSubsystem,
21+
Name: "pd_decision_total",
22+
Help: metrics.HelpMsgWithStability("Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
23+
},
24+
[]string{"decision_type"}, // "decode-only" or "prefill-decode"
25+
)
26+
)
27+
28+
// GetCollectors returns all custom collectors for the llm-d-inference-scheduler.
29+
func GetCollectors() []prometheus.Collector {
30+
return []prometheus.Collector{
31+
SchedulerPDDecisionCount,
32+
}
33+
}
34+
35+
// RecordPDDecisionCounter records the type of P/D disaggregation decision made.
36+
func RecordPDDecisionCounter(decisionType string) {
37+
SchedulerPDDecisionCount.WithLabelValues(decisionType).Inc()
38+
}

pkg/metrics/metrics_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package metrics
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/prometheus/client_golang/prometheus/testutil"
8+
)
9+
10+
func TestSchedulerPDDecisionCount(t *testing.T) {
11+
RecordPDDecisionCounter(DecisionTypePrefillDecode)
12+
RecordPDDecisionCounter(DecisionTypeDecodeOnly)
13+
RecordPDDecisionCounter(DecisionTypePrefillDecode)
14+
if err := testutil.CollectAndCompare(SchedulerPDDecisionCount, strings.NewReader(`
15+
# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] Total number of P/D disaggregation decisions made
16+
# TYPE llm_d_inference_scheduler_pd_decision_total counter
17+
llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only"} 1
18+
llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode"} 2
19+
`), "decision_type"); err != nil {
20+
t.Errorf("RecordPDDecisionCounter() failed: %v", err)
21+
}
22+
}

pkg/plugins/profile/pd_profile_handler.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"net"
1010
"strconv"
1111

12+
"github.com/llm-d/llm-d-inference-scheduler/pkg/metrics"
13+
1214
"sigs.k8s.io/controller-runtime/pkg/log"
1315
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
1416
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
@@ -142,10 +144,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat
142144

143145
if (1.0-hitPercentagePrefix)*float64(len(userInput)) < float64(h.pdThreshold) {
144146
log.FromContext(ctx).Info("Non-cached suffix is smaller than threshold, using decode profile only", "hitPercentage", hitPercentagePrefix)
147+
metrics.RecordPDDecisionCounter(metrics.DecisionTypeDecodeOnly)
145148
return map[string]*framework.SchedulerProfile{} // do not run prefill
146149
}
147150
}
148151

152+
metrics.RecordPDDecisionCounter(metrics.DecisionTypePrefillDecode)
149153
// run the prefill profile
150154
return map[string]*framework.SchedulerProfile{
151155
h.prefillProfile: profiles[h.prefillProfile],

version/version.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package version
18+
19+
var (
20+
// The git hash of the latest commit in the build.
21+
CommitSHA string
22+
23+
// The build ref from the _PULL_BASE_REF from cloud build trigger.
24+
BuildRef string
25+
)
26+
27+
const (
28+
// BundleVersionAnnotation is the annotation key used in the Gateway API inference extension CRDs to specify
29+
// the installed Gateway API inference extension version.
30+
BundleVersionAnnotation = "inference.networking.k8s.io/bundle-version"
31+
32+
// BundleVersion is the value used for labeling the version of the gateway-api-inference-extension.
33+
BundleVersion = "main-dev"
34+
)

0 commit comments

Comments
 (0)