Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions Dockerfile.activator
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Build Stage: using Go 1.24 image
FROM quay.io/projectquay/golang:1.24 AS builder
ARG TARGETOS
ARG TARGETARCH

WORKDIR /workspace

# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum

# Copy the go source
COPY cmd/activator ./cmd/activator
COPY pkg/activator ./pkg/activator

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
ENV CGO_ENABLED=0
ENV GOOS=${TARGETOS:-linux}
ENV GOARCH=${TARGETARCH}
ARG COMMIT_SHA=unknown
ARG BUILD_REF
RUN go build -a -o bin/activator -ldflags="-X sigs.k8s.io/gateway-api-inference-extension/version.CommitSHA=${COMMIT_SHA} -X sigs.k8s.io/gateway-api-inference-extension/version.BuildRef=${BUILD_REF}" cmd/activator/main.go


# Use ubi9 as a minimal base image to package the manager binary
# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details
FROM registry.access.redhat.com/ubi9/ubi-minimal:latest
WORKDIR /
COPY --from=builder /workspace/bin/activator /app/activator

# expose gRPC, health and metrics ports
EXPOSE 9002
EXPOSE 9003
EXPOSE 9090

USER 65532:65532

ENTRYPOINT ["/app/activator"]
6 changes: 2 additions & 4 deletions Dockerfile.epp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
## Minimal runtime Dockerfile (microdnf-only, no torch, wrapper in site-packages)
# Build Stage: using Go 1.25 image
FROM quay.io/projectquay/golang:1.25 AS builder

# Build Stage: using Go 1.24 image
FROM quay.io/projectquay/golang:1.24 AS builder
ARG TARGETOS
ARG TARGETARCH
ARG KVCACHE_MANAGER_VERSION=v0.3.2
Expand Down Expand Up @@ -105,4 +104,3 @@ EXPOSE 9090
EXPOSE 5557

ENTRYPOINT ["/app/epp"]

19 changes: 14 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ NAMESPACE ?= hc4ai-operator
VLLM_SIMULATOR_TAG ?= v0.5.0
export VLLM_SIMULATOR_TAG

ACTIVATOR_IMAGE_NAME ?= llm-d-activator
ACTIVATOR_NAME ?= activator
ACTIVATOR_TAG ?= dev
ACTIVATOR_IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(ACTIVATOR_IMAGE_NAME)
ACTIVATOR_IMG = $(ACTIVATOR_IMAGE_TAG_BASE):$(ACTIVATOR_TAG)

# Map go arch to typos arch
ifeq ($(TARGETARCH),amd64)
TYPOS_TARGET_ARCH = x86_64
Expand Down Expand Up @@ -59,10 +65,13 @@ SRC = $(shell find . -type f -name '*.go')
# Internal variables for generic targets
epp_IMAGE = $(IMG)
sidecar_IMAGE = $(SIDECAR_IMG)
activator_IMAGE = $(ACTIVATOR_IMG)
epp_NAME = epp
sidecar_NAME = $(SIDECAR_NAME)
activator_NAME = $(ACTIVATOR_NAME)
epp_LDFLAGS = -ldflags="$(LDFLAGS)"
sidecar_LDFLAGS =
activator_LDFLAGS = -ldflags="$(LDFLAGS)"
epp_TEST_FILES = go list ./... | grep -v /test/ | grep -v ./pkg/sidecar/
sidecar_TEST_FILES = go list ./pkg/sidecar/...

Expand Down Expand Up @@ -135,7 +144,7 @@ lint: check-golangci-lint check-typos ## Run lint
##@ Build

.PHONY: build
build: build-epp build-sidecar ## Build the project
build: build-epp build-sidecar build-activator ## Build the project

.PHONY: build-%
build-%: check-go install-dependencies download-tokenizer ## Build the project
Expand All @@ -145,7 +154,7 @@ build-%: check-go install-dependencies download-tokenizer ## Build the project
##@ Container Build/Push

.PHONY: image-build
image-build: image-build-epp image-build-sidecar ## Build Docker image
image-build: image-build-epp image-build-sidecar image-build-activator ## Build Docker image

.PHONY: image-build-%
image-build-%: check-container-tool ## Build Docker image ## Build Docker image using $(CONTAINER_RUNTIME)
Expand All @@ -159,7 +168,7 @@ image-build-%: check-container-tool ## Build Docker image ## Build Docker image
-t $($*_IMAGE) -f Dockerfile.$* .

.PHONY: image-push
image-push: image-push-epp image-push-sidecar ## Push container images to registry
image-push: image-push-epp image-push-sidecar image-push-activator ## Push container images to registry

.PHONY: image-push-%
image-push-%: check-container-tool ## Push container image to registry
Expand Down Expand Up @@ -287,7 +296,7 @@ check-typos: $(TYPOS) ## Check for spelling errors using typos (exits with error
echo "$$TYPOS_OUTPUT"; \
exit 1; \
fi

##@ Tools

.PHONY: check-tools
Expand Down Expand Up @@ -336,7 +345,7 @@ check-container-tool:
else \
echo "✅ Container tool '$(CONTAINER_RUNTIME)' found."; \
fi


.PHONY: check-kubectl
check-kubectl:
Expand Down
23 changes: 23 additions & 0 deletions charts/activator-filter/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
9 changes: 9 additions & 0 deletions charts/activator-filter/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v2
name: activator
description: A Helm chart for the activator extension

type: application

version: 0.1.0

appVersion: "0.2.0"
33 changes: 33 additions & 0 deletions charts/activator-filter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Activator Chart

A chart to deploy the activator HTTP filter for an InferenceGateway and RBAC for all per route activator deployments.

## Install

To install an activator-filter named `activator-filter`, you can run the following command:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whoever is going to use activator isn't necessarily familiar with what is filter. we can call it just activator.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is already a chart named activator. What about activator-istio?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a use case for deploying one chart without the other?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both are needed. The only difference is one is applied once per namespace, the other one once per HTTPRoute.


```txt
$ helm install activator-filter ./charts/activator-filter
```

> **Note:** This chart should be deployed before the [Body Based Routing](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/config/charts/body-based-routing) chart for optimal functionality.

## Uninstall

Run the following command to uninstall the chart:

```txt
$ helm uninstall activator-filter
```

## Configuration

The following table list the configurable parameters of the chart.

| **Parameter Name** | **Description** |
|---------------------------------------------|----------------------------------------------------------------------------------------------------|
| `name` | Name of the activator RBAC resources. Defaults to `activator`. |

## Notes

This chart should only be deployed once
1 change: 1 addition & 0 deletions charts/activator-filter/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Activator HTTP Filter deployed.
25 changes: 25 additions & 0 deletions charts/activator-filter/templates/istio.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: networking.istio.io/v1alpha3
kind: EnvoyFilter
metadata:
name: {{ .Values.name }}-ext-proc
namespace: {{ .Release.Namespace }}
spec:
configPatches:
- applyTo: HTTP_FILTER
match:
# context omitted so that this applies to both sidecars and gateways
listener:
filterChain:
filter:
name: "envoy.filters.network.http_connection_manager"
patch:
operation: INSERT_FIRST
value:
name: envoy.filters.http.activator.ext_proc
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
failure_mode_allow: true
grpc_service:
envoy_grpc:
cluster_name: no-op
message_timeout: 120s
82 changes: 82 additions & 0 deletions charts/activator-filter/templates/rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Values.name }}
namespace: {{ .Release.Namespace }}
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Values.name }}
namespace: {{ .Release.Namespace }}
rules: # TODO: These can probably be trimmed down
- apiGroups:
- "inference.networking.x-k8s.io"
- "inference.networking.k8s.io"
resources:
- "inferencepools"
verbs:
- "get"
- "watch"
- "list"
- apiGroups:
- ""
resources:
- "pods"
verbs:
- "get"
- "watch"
- "list"
- apiGroups:
- "discovery.k8s.io"
resources:
- "endpointslices"
verbs:
- "get"
- "watch"
- "list"
- apiGroups:
- "authentication.k8s.io"
resources:
- "tokenreviews"
verbs:
- "create"
- apiGroups:
- "authorization.k8s.io"
resources:
- "subjectaccessreviews"
verbs:
- "create"
- apiGroups:
- "apps"
resources:
- "deployments"
verbs:
- "create"
- "get"
- "list"
- "watch"
- "update"
- "patch"
- "delete"
- apiGroups:
- apps
resources:
- deployments/scale
verbs:
- get
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ .Values.name }}
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ .Values.name }}
namespace: {{ .Release.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ .Values.name }}
1 change: 1 addition & 0 deletions charts/activator-filter/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
name: activator
23 changes: 23 additions & 0 deletions charts/activator/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
9 changes: 9 additions & 0 deletions charts/activator/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v2
name: activator-route
description: A Helm chart for the activator extension

type: application

version: 0.1.0

appVersion: "0.2.0"
44 changes: 44 additions & 0 deletions charts/activator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Activator Chart

A chart to deploy the activator deployment and service per HTTPRoute.

## Install

To install an activator named `<route.name>-activator`, you can run the following command:

```txt
$ helm install activator ./charts/activator \
--set route.name=http-route-name \
--set inferencePool.name=inference-pool-name

```

## Uninstall

Run the following command to uninstall the chart:

```txt
$ helm uninstall activator
```

## Configuration

The following table list the configurable parameters of the chart.

| **Parameter Name** | **Description** |
|---------------------------------------------|----------------------------------------------------------------------------------------------------|
| `activator.suffix` | Suffix to append to the name of the activator deployment and service. Defaults to `-activator`. |
| `activator.port` | Port serving ext_proc. Defaults to `9004`. |
| `activator.healthCheckPort` | Port for health checks. Defaults to `9005`. |
| `activator.image.name` | Name of the container image used. |
| `activator.image.registry` | Registry URL and namespace where the image is hosted. |
| `activator.image.tag` | Image tag. |
| `activator.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
| `inferenceGateway.port` | The port of the Gateway. Defaults to `80`. |
| `inferencePool.name` | The name of the InferencePool to target. |
| `inferencePool.apiVersion` | The API version of the InferencePool. Defaults to `inference.networking.k8s.io`. |
| `route.name` | The name of the HTTPRoute to attach the activator to. |

## Notes

This chart should only be deployed once per HTTPRoute.
1 change: 1 addition & 0 deletions charts/activator/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Activator extension deployed for HTTPRoute {{.Values.route.name }}
Loading