Skip to content

Commit 6e2bb8a

Browse files
authored
feat: Moved the Routing Sidecar from its own repo to the inference-scheduler repo (#379)
* Moved prefill header definition to common import Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Moved Routing Sidecar into this repo Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Moved Routing Sidecar tests into this repo Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Moved Routing Sidecar Dockerfile into this repo Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Added Routing Sidecar to Makefile Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Added Routing Sidecar to CI stream Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Fixed lint error Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Review fixes and added version info Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Test Nixl V2 instead of the deleted Nixl V1 Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Fixed lint errors Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> --------- Signed-off-by: Shmuel Kallner <kallner@il.ibm.com>
1 parent 8d73dc4 commit 6e2bb8a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2991
-9
lines changed

.github/actions/docker-build-and-push/action.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
name: Docker Build - ghcr
22
description: Build image using buildx
33
inputs:
4+
docker-file:
5+
required: true
6+
description: Dockerfile name
47
image-name:
58
required: true
69
description: Image name
@@ -43,5 +46,5 @@ runs:
4346
docker buildx build \
4447
--platform linux/amd64,linux/arm64 \
4548
-t ${{ inputs.registry }}/${{ inputs.image-name }}:${{ inputs.tag }} \
46-
${LATEST_TAG} --push .
49+
${LATEST_TAG} -f ${{ inputs.docker-file }} --push .
4750
shell: bash

.github/workflows/ci-pr-checks.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ jobs:
4545
- name: Run make test
4646
shell: bash
4747
run: |
48-
make test
48+
make test sidecar-test
4949
5050
- name: Run make build
5151
shell: bash
5252
run: |
53-
make build
53+
make build sidecar-build

.github/workflows/ci-release.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,26 @@ jobs:
3737
echo "prerelease=${PRE_RELEASE}" >> "$GITHUB_OUTPUT"
3838
shell: bash
3939

40-
- name: Build and push image
40+
- name: Build and push EPP image
4141
uses: ./.github/actions/docker-build-and-push
4242
with:
43+
docker-file: Dockerfile
4344
tag: ${{ steps.tag.outputs.tag }}
4445
image-name: ${{ steps.version.outputs.project_name }}
4546
registry: ghcr.io/llm-d
4647
github-token: ${{ secrets.GHCR_TOKEN }}
4748
prerelease: ${{ steps.tag.outputs.prerelease }}
4849

50+
- name: Build and push sidecar image
51+
uses: ./.github/actions/docker-build-and-push
52+
with:
53+
docker-file: Dockerfile.sidecar
54+
tag: ${{ steps.tag.outputs.tag }}
55+
image-name: llm-d-routing-sidecar
56+
registry: ghcr.io/llm-d
57+
github-token: ${{ secrets.GHCR_TOKEN }}
58+
prerelease: ${{ steps.tag.outputs.prerelease }}
59+
4960
- name: Run Trivy scan
5061
uses: ./.github/actions/trivy-scan
5162
with:

Dockerfile.sidecar

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Build Stage: using Go 1.25 image
2+
FROM quay.io/projectquay/golang:1.25 AS builder
3+
ARG TARGETOS
4+
ARG TARGETARCH
5+
ARG COMMIT_SHA=unknown
6+
ARG BUILD_REF
7+
8+
WORKDIR /workspace
9+
# Copy the Go Modules manifests
10+
COPY go.mod go.mod
11+
COPY go.sum go.sum
12+
# cache deps before building and copying source so that we don't need to re-download as much
13+
# and so that source changes don't invalidate our downloaded layer
14+
RUN go mod download
15+
16+
# Copy the go source
17+
COPY cmd/pd-sidecar/main.go cmd/cmd.go
18+
COPY pkg/sidecar pkg/sidecar
19+
COPY pkg/common pkg/common
20+
21+
# Build
22+
# the GOARCH has not a default value to allow the binary be built according to the host where the command
23+
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
24+
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
25+
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
26+
ENV CGO_ENABLED=0
27+
ENV GOOS=${TARGETOS:-linux}
28+
ENV GOARCH=${TARGETARCH}
29+
RUN go build -a -o bin/pd-sidecar \
30+
-ldflags="-X github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/version.CommitSHA=${COMMIT_SHA} -X github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/version.BuildRef=${BUILD_REF}" \
31+
cmd/cmd.go
32+
33+
FROM registry.access.redhat.com/ubi9/ubi-micro:latest
34+
WORKDIR /
35+
COPY --from=builder /workspace/bin/pd-sidecar /app/pd-sidecar
36+
USER 65532:65532
37+
38+
ENTRYPOINT ["/app/pd-sidecar"]

Makefile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@ SHELL := /usr/bin/env bash
77
TARGETOS ?= $(shell go env GOOS)
88
TARGETARCH ?= $(shell go env GOARCH)
99
PROJECT_NAME ?= llm-d-inference-scheduler
10+
SIDECAR_IMAGE_NAME ?= llm-d-routing-sidecar
11+
SIDECAR_NAME ?= pd-sidecar
1012
IMAGE_REGISTRY ?= ghcr.io/llm-d
1113
IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
1214
EPP_TAG ?= dev
1315
IMG = $(IMAGE_TAG_BASE):$(EPP_TAG)
16+
SIDECAR_TAG ?= dev
17+
SIDECAR_IMAGE_TAG_BASE ?= ghcr.io/llm-d/$(SIDECAR_IMAGE_NAME)
18+
SIDECAR_IMG = $(SIDECAR_IMAGE_TAG_BASE):$(SIDECAR_TAG)
1419
NAMESPACE ?= hc4ai-operator
1520

1621
# Map go arch to typos arch
@@ -103,6 +108,14 @@ post-deploy-test: ## Run post deployment tests
103108
echo Success!
104109
@echo "Post-deployment tests passed."
105110

111+
.PHONY: sidecar-test
112+
sidecar-test: sidecar-test-unit ## Run Sidecar tests
113+
114+
.PHONY: sidecar-test-unit
115+
sidecar-test-unit: ## Run Sidecar unit tests
116+
@printf "\033[33;1m==== Running tests ====\033[0m\n"
117+
go test -v $$(echo $$(go list ./pkg/sidecar/...)) -ginkgo.v
118+
106119
.PHONY: lint
107120
lint: check-golangci-lint check-typos ## Run lint
108121
@printf "\033[33;1m==== Running linting ====\033[0m\n"
@@ -116,6 +129,11 @@ build: check-go install-dependencies download-tokenizer ## Build the project
116129
@printf "\033[33;1m==== Building ====\033[0m\n"
117130
go build -ldflags="$(LDFLAGS)" -o bin/epp cmd/epp/main.go
118131

132+
.PHONY: sidecar-build
133+
sidecar-build: check-go ## Build the Sidecar
134+
@printf "\033[33;1m==== Building the Sidecar ====\033[0m\n"
135+
go build -o bin/$(SIDECAR_NAME) cmd/$(SIDECAR_NAME)/main.go
136+
119137
##@ Container Build/Push
120138

121139
.PHONY: image-build
@@ -134,6 +152,21 @@ image-push: check-container-tool ## Push Docker image $(IMG) to registry
134152
@printf "\033[33;1m==== Pushing Docker image $(IMG) ====\033[0m\n"
135153
$(CONTAINER_TOOL) push $(IMG)
136154

155+
.PHONY: sidecar-image-build
156+
sidecar-image-build: check-container-tool ## Build Sidecar Docker image ## Build Sidecar Docker image using $(CONTAINER_TOOL)
157+
@printf "\033[33;1m==== Building Sidecar Docker image $(SIDECAR_IMG) ====\033[0m\n"
158+
$(CONTAINER_TOOL) build \
159+
--build-arg TARGETOS=linux \
160+
--build-arg TARGETARCH=$(TARGETARCH) \
161+
--build-arg COMMIT_SHA=${GIT_COMMIT_SHA} \
162+
--build-arg BUILD_REF=${BUILD_REF} \
163+
-t $(SIDECAR_IMG) -f Dockerfile.sidecar .
164+
165+
.PHONY: sidecar-image-push
166+
sidecar-image-push: check-container-tool load-version-json ## Push Sidecar Docker image $(SIDECAR_IMG) to registry
167+
@printf "\033[33;1m==== Pushing Sidecar Docker image $(SIDECAR_IMG) ====\033[0m\n"
168+
$(CONTAINER_TOOL) push $(SIDECAR_IMG)
169+
137170
##@ Install/Uninstall Targets
138171

139172
# Default install/uninstall (Docker)

cmd/pd-sidecar/main.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
Copyright 2025 The llm-d Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
package main
17+
18+
import (
19+
"flag"
20+
"net/url"
21+
"os"
22+
23+
"k8s.io/klog/v2"
24+
ctrl "sigs.k8s.io/controller-runtime"
25+
26+
"github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/proxy"
27+
"github.com/llm-d/llm-d-inference-scheduler/pkg/sidecar/version"
28+
)
29+
30+
func main() {
31+
port := flag.String("port", "8000", "the port the sidecar is listening on")
32+
vLLMPort := flag.String("vllm-port", "8001", "the port vLLM is listening on")
33+
connector := flag.String("connector", "nixlv2", "the P/D connector being used. Either nixl, nixlv2 or lmcache")
34+
prefillerUseTLS := flag.Bool("prefiller-use-tls", false, "whether to use TLS when sending requests to prefillers")
35+
decoderUseTLS := flag.Bool("decoder-use-tls", false, "whether to use TLS when sending requests to the decoder")
36+
prefillerInsecureSkipVerify := flag.Bool("prefiller-tls-insecure-skip-verify", false, "configures the proxy to skip TLS verification for requests to prefiller")
37+
decoderInsecureSkipVerify := flag.Bool("decoder-tls-insecure-skip-verify", false, "configures the proxy to skip TLS verification for requests to decoder")
38+
secureProxy := flag.Bool("secure-proxy", true, "Enables secure proxy. Defaults to true.")
39+
certPath := flag.String(
40+
"cert-path", "", "The path to the certificate for secure proxy. The certificate and private key files "+
41+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureProxy is enabled, "+
42+
"then a self-signed certificate is used (for testing).")
43+
enableSSRFProtection := flag.Bool("enable-ssrf-protection", false, "enable SSRF protection using InferencePool allowlisting")
44+
inferencePoolNamespace := flag.String("inference-pool-namespace", os.Getenv("INFERENCE_POOL_NAMESPACE"), "the Kubernetes namespace to watch for InferencePool resources (defaults to INFERENCE_POOL_NAMESPACE env var)")
45+
inferencePoolName := flag.String("inference-pool-name", os.Getenv("INFERENCE_POOL_NAME"), "the specific InferencePool name to watch (defaults to INFERENCE_POOL_NAME env var)")
46+
47+
klog.InitFlags(nil)
48+
flag.Parse()
49+
50+
// make sure to flush logs before exiting
51+
defer klog.Flush()
52+
53+
ctx := ctrl.SetupSignalHandler()
54+
logger := klog.FromContext(ctx)
55+
56+
logger.Info("Proxy starting", "Built on", version.BuildRef, "From Git SHA", version.CommitSHA)
57+
58+
if *connector != proxy.ConnectorNIXLV2 && *connector != proxy.ConnectorLMCache {
59+
logger.Info("Error: --connector must either be 'nixlv2' or 'lmcache'")
60+
return
61+
}
62+
logger.Info("p/d connector validated", "connector", connector)
63+
64+
// Determine namespace and pool name for SSRF protection
65+
if *enableSSRFProtection {
66+
if *inferencePoolNamespace == "" {
67+
logger.Info("Error: --inference-pool-namespace or INFERENCE_POOL_NAMESPACE environment variable is required when --enable-ssrf-protection is true")
68+
return
69+
}
70+
if *inferencePoolName == "" {
71+
logger.Info("Error: --inference-pool-name or INFERENCE_POOL_NAME environment variable is required when --enable-ssrf-protection is true")
72+
return
73+
}
74+
75+
logger.Info("SSRF protection enabled", "namespace", inferencePoolNamespace, "poolName", inferencePoolName)
76+
}
77+
78+
// start reverse proxy HTTP server
79+
scheme := "http"
80+
if *decoderUseTLS {
81+
scheme = "https"
82+
}
83+
targetURL, err := url.Parse(scheme + "://localhost:" + *vLLMPort)
84+
if err != nil {
85+
logger.Error(err, "failed to create targetURL")
86+
return
87+
}
88+
89+
config := proxy.Config{
90+
Connector: *connector,
91+
PrefillerUseTLS: *prefillerUseTLS,
92+
SecureProxy: *secureProxy,
93+
CertPath: *certPath,
94+
PrefillerInsecureSkipVerify: *prefillerInsecureSkipVerify,
95+
DecoderInsecureSkipVerify: *decoderInsecureSkipVerify,
96+
EnableSSRFProtection: *enableSSRFProtection,
97+
InferencePoolNamespace: *inferencePoolNamespace,
98+
InferencePoolName: *inferencePoolName,
99+
}
100+
101+
proxy, err := proxy.NewProxy(*port, targetURL, config)
102+
if err != nil {
103+
logger.Error(err, "Failed to create proxy")
104+
}
105+
if err := proxy.Start(ctx); err != nil {
106+
logger.Error(err, "failed to start proxy server")
107+
}
108+
}

pkg/common/common.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// Package common contains items common to both the
2+
// EPP/Inference-Scheduler and the Routing Sidecar
3+
package common
4+
5+
const (
6+
// PrefillPodHeader is the header name used to indicate Prefill worker <ip:port>
7+
PrefillPodHeader = "x-prefiller-host-port"
8+
)

pkg/plugins/pre-request/pd_prerequest.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ import (
1111
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
1212
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
1313
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
14+
15+
"github.com/llm-d/llm-d-inference-scheduler/pkg/common"
1416
)
1517

1618
const (
1719
// PrefillHeaderHandlerType is the type of the PrefillHeaderHandler
1820
PrefillHeaderHandlerType = "prefill-header-handler"
19-
// prefillPodHeader is the header name used to indicate Prefill worker <ip:port>
20-
prefillPodHeader = "x-prefiller-host-port"
2121

2222
defaultPrefillProfile = "prefill"
2323
)
@@ -69,8 +69,8 @@ func (p *PrefillHeaderHandler) WithName(name string) *PrefillHeaderHandler {
6969

7070
// PreRequest wires prefill SchedulerProfile result into a header to indicate prefill worker
7171
func (p *PrefillHeaderHandler) PreRequest(_ context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult, targetPort int) {
72-
if _, found := request.Headers[prefillPodHeader]; found {
73-
request.Headers[prefillPodHeader] = "" // clear header, if already set
72+
if _, found := request.Headers[common.PrefillPodHeader]; found {
73+
request.Headers[common.PrefillPodHeader] = "" // clear header, if already set
7474
}
7575

7676
prefillProfileRunResult, exists := schedulingResult.ProfileResults[p.prefillProfile]
@@ -79,5 +79,5 @@ func (p *PrefillHeaderHandler) PreRequest(_ context.Context, request *types.LLMR
7979
}
8080

8181
prefillHostPort := net.JoinHostPort(prefillProfileRunResult.TargetPods[0].GetPod().Address, strconv.Itoa(targetPort))
82-
request.Headers[prefillPodHeader] = prefillHostPort // in the form of <ip:port>
82+
request.Headers[common.PrefillPodHeader] = prefillHostPort // in the form of <ip:port>
8383
}

0 commit comments

Comments
 (0)