From 31f6e4a37120bdb6a9757e9bdb8b192dda7de4c7 Mon Sep 17 00:00:00 2001
From: Braulio Dumba <Braulio.Dumba@ibm.com>
Date: Wed, 5 Nov 2025 16:09:13 -0500
Subject: [PATCH] Activator E2E Tests

Signed-off-by: Braulio Dumba <Braulio.Dumba@ibm.com>
---
 go.mod                                     |   3 +
 go.sum                                     |   4 +
 test/activator/e2e_suite_test.go           | 200 +++++++++++++++++++++
 test/activator/e2e_test.go                 | 138 ++++++++++++++
 test/activator/utils_test.go               |  63 +++++++
 test/activator/yaml/activator-filters.yaml |  56 ++++++
 test/activator/yaml/activator.yaml         |  36 ++++
 test/activator/yaml/epp-configmap.yaml     |  18 ++
 test/activator/yaml/epp.yaml               |  74 ++++++++
 test/activator/yaml/inference-pools.yaml   |  20 +++
 test/activator/yaml/istio.helmfile.yaml    |  30 ++++
 test/activator/yaml/network-config.yaml    |  59 ++++++
 test/activator/yaml/rbacs.yaml             | 118 ++++++++++++
 test/activator/yaml/service-accounts.yaml  |  10 ++
 test/activator/yaml/services.yaml          |  62 +++++++
 test/activator/yaml/vllm-sim-1.yaml        |  46 +++++
 test/activator/yaml/vllm-sim-2.yaml        |  46 +++++
 17 files changed, 983 insertions(+)
 create mode 100644 test/activator/e2e_suite_test.go
 create mode 100644 test/activator/e2e_test.go
 create mode 100644 test/activator/utils_test.go
 create mode 100644 test/activator/yaml/activator-filters.yaml
 create mode 100644 test/activator/yaml/activator.yaml
 create mode 100644 test/activator/yaml/epp-configmap.yaml
 create mode 100644 test/activator/yaml/epp.yaml
 create mode 100644 test/activator/yaml/inference-pools.yaml
 create mode 100644 test/activator/yaml/istio.helmfile.yaml
 create mode 100644 test/activator/yaml/network-config.yaml
 create mode 100644 test/activator/yaml/rbacs.yaml
 create mode 100644 test/activator/yaml/service-accounts.yaml
 create mode 100644 test/activator/yaml/services.yaml
 create mode 100644 test/activator/yaml/vllm-sim-1.yaml
 create mode 100644 test/activator/yaml/vllm-sim-2.yaml

diff --git a/go.mod b/go.mod
index 3409a248..9e4065e7 100644
--- a/go.mod
+++ b/go.mod
@@ -17,6 +17,7 @@ require (
 	github.com/stretchr/testify v1.11.1
 	golang.org/x/sync v0.17.0
 	google.golang.org/grpc v1.76.0
+	istio.io/client-go v1.28.0
 	k8s.io/api v0.34.1
 	k8s.io/apiextensions-apiserver v0.34.1
 	k8s.io/apimachinery v0.34.1
@@ -57,6 +58,7 @@ require (
 	github.com/go-openapi/swag v0.23.1 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/btree v1.1.3 // indirect
 	github.com/google/cel-go v0.26.0 // indirect
 	github.com/google/gnostic-models v0.7.0 // indirect
@@ -123,6 +125,7 @@ require (
 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
+	istio.io/api v1.28.0 // indirect
 	k8s.io/apiserver v0.34.1 // indirect
 	k8s.io/component-base v0.34.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
diff --git a/go.sum b/go.sum
index cd221b8f..b085942e 100644
--- a/go.sum
+++ b/go.sum
@@ -385,6 +385,10 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+istio.io/api v1.28.0 h1:0fYY9G03CAdFwE/fCkpr0v7kKsy+Hz9OCCjLNBNLbnU=
+istio.io/api v1.28.0/go.mod h1:BD3qv/ekm16kvSgvSpuiDawgKhEwG97wx849CednJSg=
+istio.io/client-go v1.28.0 h1:EqP19aYNvH42VQAmS/mHXZ51PU3nlrnF6MeeGldJSas=
+istio.io/client-go v1.28.0/go.mod h1:mcFWH+wv9ltQqoDYyfLeVFyRZuD7n1Fj7TD5RGohqSU=
 k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=
 k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=
 k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI=
diff --git a/test/activator/e2e_suite_test.go b/test/activator/e2e_suite_test.go
new file mode 100644
index 00000000..4f1dae2c
--- /dev/null
+++ b/test/activator/e2e_suite_test.go
@@ -0,0 +1,200 @@
+package e2e
+
+import (
+	"fmt"
+	"io"
+	"os/exec"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	"github.com/onsi/gomega/gexec"
+	apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+	"sigs.k8s.io/controller-runtime/pkg/client/config"
+	k8slog "sigs.k8s.io/controller-runtime/pkg/log"
+
+	infextv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+	infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
+	testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
+
+	istiov1 "istio.io/client-go/pkg/apis/networking/v1"
+	istiov1a3 "istio.io/client-go/pkg/apis/networking/v1alpha3"
+	gtwv1 "sigs.k8s.io/gateway-api/apis/v1"
+)
+
+const (
+	// gatewayCrdsKustomize is the manifest for the gateway api
+	gatewayCrdsKustomize = "../../deploy/components/crds-gateway-api"
+	// gieCrdsKustomize is the manifest for the inference pool CRD with 'inference.networking.x-k8s.io' group.
+	gieCrdsKustomize = "../../deploy/components/crds-gie"
+	// inferExtManifest is the manifest for the inference extension test resources.
+	inferExtManifest = "./yaml/inference-pools.yaml"
+	// eppManifest is the manifest for the deployment of the EPP
+	eppManifest = "./yaml/epp.yaml"
+	// eppManifest is the manifest for the deployment of the EPP
+	eppConfigManifest = "./yaml/epp-configmap.yaml"
+	// eppManifest is the manifest for the deployment of the EPP
+	activatorManifest = "./yaml/activator.yaml"
+	// eppManifest is the manifest for the deployment of the EPP
+	activatorfilterManifest = "./yaml/activator-filters.yaml"
+	// rbacManifest is the manifest for the EPP's RBAC resources.
+	rbacManifest = "./yaml/rbacs.yaml"
+	// serviceAccountManifest is the manifest for the EPP's service account resources.
+	serviceAccountManifest = "./yaml/service-accounts.yaml"
+	// servicesManifest is the manifest for the EPP's service resources.
+	servicesManifest = "./yaml/services.yaml"
+	// nsName is the namespace in which the K8S objects will be created
+	networkConfigurationManifest = "./yaml/network-config.yaml"
+)
+
+var (
+	port       string
+	testConfig *testutils.TestConfig
+
+	eppImg       = env.GetEnvString("EPP_IMAGE", "llm-d-inference-scheduler", ginkgo.GinkgoLogr)
+	eppTag       = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr)
+	activatorImg = env.GetEnvString("ACTIVATOR_IMAGE", "llm-d-activator", ginkgo.GinkgoLogr)
+	activatorTag = env.GetEnvString("ACTIVATOR_TAG", "dev", ginkgo.GinkgoLogr)
+	vllmImg      = env.GetEnvString("VLLM_IMAGE", "llm-d-inference-sim", ginkgo.GinkgoLogr)
+	vllmTag      = env.GetEnvString("VLLM_TAG", "dev", ginkgo.GinkgoLogr)
+
+	imageRegistry = env.GetEnvString("IMAGE_REGISTRY", "ghcr.io/llm-d", ginkgo.GinkgoLogr)
+)
+
+func TestEndToEnd(t *testing.T) {
+	gomega.RegisterFailHandler(ginkgo.Fail)
+	ginkgo.RunSpecs(t,
+		"End To End Test Suite",
+	)
+}
+
+var _ = ginkgo.BeforeSuite(func() {
+	port = "30080"
+
+	setupK8sCluster()
+	testConfig = testutils.NewTestConfig("default")
+	setupK8sClient()
+	createCRDs(gieCrdsKustomize)
+	createCRDs(gatewayCrdsKustomize)
+	createIstio()
+	createResources()
+	loadImages()
+})
+
+var _ = ginkgo.AfterSuite(func() {
+	command := exec.Command("kind", "delete", "cluster", "--name", "e2e-tests")
+	session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
+})
+
+// Create the Kubernetes cluster for the E2E tests and load the local images
+func setupK8sCluster() {
+	command := exec.Command("kind", "create", "cluster", "--name", "e2e-tests", "--config", "-")
+	stdin, err := command.StdinPipe()
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	go func() {
+		defer func() {
+			err := stdin.Close()
+			gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+		}()
+		clusterConfig := strings.ReplaceAll(kindClusterConfig, "${PORT}", port)
+		_, err := io.WriteString(stdin, clusterConfig)
+		gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	}()
+	session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
+}
+
+func createResources() {
+	ApplyYAMLFile(testConfig, rbacManifest)
+	ApplyYAMLFile(testConfig, servicesManifest)
+	ApplyYAMLFile(testConfig, eppConfigManifest)
+	ApplyYAMLFile(testConfig, serviceAccountManifest)
+	ApplyYAMLFile(testConfig, activatorfilterManifest)
+	ApplyYAMLFile(testConfig, networkConfigurationManifest)
+}
+
+func loadImages() {
+	kindLoadImage(imageRegistry + "/" + eppImg + ":" + eppTag)
+	kindLoadImage(imageRegistry + "/" + vllmImg + ":" + vllmTag)
+	kindLoadImage(imageRegistry + "/" + activatorImg + ":" + activatorTag)
+}
+
+func kindLoadImage(image string) {
+	ginkgo.By(fmt.Sprintf("Loading %s into the cluster e2e-tests", image))
+
+	command := exec.Command("kind", "--name", "e2e-tests", "load", "docker-image", image)
+	session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
+}
+
+func setupK8sClient() {
+	k8sCfg := config.GetConfigOrDie()
+	gomega.ExpectWithOffset(1, k8sCfg).NotTo(gomega.BeNil())
+
+	err := clientgoscheme.AddToScheme(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	err = infextv1.Install(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	err = apiextv1.AddToScheme(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	err = infextv1a2.Install(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	err = gtwv1.Install(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	err = istiov1.AddToScheme(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	err = istiov1a3.AddToScheme(testConfig.Scheme)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+
+	testConfig.CreateCli()
+
+	k8slog.SetLogger(ginkgo.GinkgoLogr)
+}
+
+// createCRDs creates the Inference Extension CRDs used for testing.
+func createCRDs(manifests string) {
+	crds := runKustomize(manifests)
+	CreateObjsFromYaml(testConfig, crds)
+}
+
+func runKustomize(kustomizeDir string) []string {
+	command := exec.Command("kustomize", "build", kustomizeDir)
+	session, err := gexec.Start(command, nil, ginkgo.GinkgoWriter)
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
+	return strings.Split(string(session.Out.Contents()), "\n---")
+}
+
+func createIstio() {
+	command := exec.Command("helmfile", "apply", "-f", "./yaml/istio.helmfile.yaml")
+	session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
+}
+
+const kindClusterConfig = `
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- extraPortMappings:
+  - containerPort: 30080
+    hostPort: ${PORT}
+    protocol: TCP
+  - containerPort: 30081
+    hostPort: 30081
+    protocol: TCP
+`
diff --git a/test/activator/e2e_test.go b/test/activator/e2e_test.go
new file mode 100644
index 00000000..295739c7
--- /dev/null
+++ b/test/activator/e2e_test.go
@@ -0,0 +1,138 @@
+package e2e
+
+import (
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/option"
+	testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
+)
+
+const (
+	// simDeployment references the YAML file for the deployment
+	simplePrompt   = "Hello my name is Andrew, I have a doctorate in Rocket Science, and I like interplanetary space exploration"
+	simDeployment1 = "./yaml/vllm-sim-1.yaml"
+	simDeployment2 = "./yaml/vllm-sim-2.yaml"
+	modelserver    = "granite-3-8b"
+)
+
+var (
+	modelName = "granite/granite-3-8b-instruct"
+	nsName    = "default"
+)
+
+var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() {
+	ginkgo.When("Running simple non-PD configuration", func() {
+		ginkgo.It("should run successfully", func() {
+			// Create inferencePool
+			inferencePools := createInferencePool(inferExtManifest, "apps/v1", "Deployment", modelserver, "30")
+
+			// Create workload objectts; epp, activator and vLLM pod
+			epp := createResource(eppManifest, imageRegistry, eppImg, eppTag)
+			activator := createResource(activatorManifest, imageRegistry, activatorImg, activatorTag)
+
+			// Create model server
+			modelServers := createResource(simDeployment1, imageRegistry, vllmImg, vllmTag)
+
+			nsHdr, podHdr := runChatCompletion(simplePrompt)
+			gomega.Expect(nsHdr).Should(gomega.Equal(nsName))
+			gomega.Expect(podHdr).Should(gomega.Equal(modelServers[0]))
+
+			testutils.DeleteObjects(testConfig, epp)
+			testutils.DeleteObjects(testConfig, activator)
+			testutils.DeleteObjects(testConfig, modelServers)
+			testutils.DeleteObjects(testConfig, inferencePools)
+		})
+	})
+
+	ginkgo.When("Running simple non-PD KV enabled configuration", func() {
+		ginkgo.It("should run successfully", func() {
+			// Create inferencePool
+			inferencePools := createInferencePool(inferExtManifest, "apps/v1", "Deployment", modelserver, "80")
+
+			// Create workload objectts; epp, activator and vLLM pod
+			epp := createResource(eppManifest, imageRegistry, eppImg, eppTag)
+			activator := createResource(activatorManifest, imageRegistry, activatorImg, activatorTag)
+
+			// Create model server
+			modelServers := createResource(simDeployment1, imageRegistry, vllmImg, vllmTag)
+
+			nsHdr, podHdr := runChatCompletion(simplePrompt)
+			gomega.Expect(nsHdr).Should(gomega.Equal(nsName))
+			gomega.Expect(podHdr).Should(gomega.Equal(modelServers[0]))
+
+			testutils.DeleteObjects(testConfig, epp)
+			testutils.DeleteObjects(testConfig, activator)
+			testutils.DeleteObjects(testConfig, modelServers)
+			testutils.DeleteObjects(testConfig, inferencePools)
+		})
+	})
+})
+
+// createModelServers creates the model server resources used for testing from the given filePaths.
+func createInferencePool(inferPoolManifest, apiVersion, kind, name, gracePeriod string) []string {
+	manifests := testutils.ReadYaml(inferPoolManifest)
+	manifests = substituteMany(manifests,
+		map[string]string{
+			"${KIND}":         kind,
+			"${NAME}":         name,
+			"${GRACE_PERIOD}": gracePeriod,
+			"${API_VERSION}":  apiVersion,
+		})
+	objects := CreateObjsFromYaml(testConfig, manifests)
+
+	return objects
+}
+
+func createResource(manifest, registry, img, tag string) []string {
+	ginkgo.By("Creating resource from manifest: " + manifest)
+	objYamls := testutils.ReadYaml(manifest)
+
+	objYamls = substituteMany(objYamls,
+		map[string]string{
+			"${IMAGE}":          img,
+			"${TAG}":            tag,
+			"${IMAGE_REGISTRY}": registry,
+		})
+	objNames := CreateObjsFromYaml(testConfig, objYamls)
+	return objNames
+}
+
+func runChatCompletion(prompt string) (string, string) {
+	var httpResp *http.Response
+	openaiclient := openai.NewClient(
+		option.WithBaseURL(fmt.Sprintf("http://localhost:%s/v1", port)))
+
+	params := openai.ChatCompletionNewParams{
+		Messages: []openai.ChatCompletionMessageParamUnion{
+			openai.UserMessage(prompt),
+		},
+		Model: modelName,
+	}
+	resp, err := openaiclient.Chat.Completions.New(testConfig.Context, params, option.WithResponseInto(&httpResp))
+	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+	gomega.Expect(resp.Choices).Should(gomega.HaveLen(1))
+	gomega.Expect(resp.Choices[0].FinishReason).Should(gomega.Equal("stop"))
+	gomega.Expect(resp.Choices[0].Message.Content).Should(gomega.Equal(prompt))
+
+	namespaceHeader := httpResp.Header.Get("x-inference-namespace")
+	podHeader := httpResp.Header.Get("x-inference-pod")
+
+	return namespaceHeader, podHeader
+}
+
+func substituteMany(inputs []string, substitutions map[string]string) []string {
+	outputs := []string{}
+	for _, input := range inputs {
+		output := input
+		for key, value := range substitutions {
+			output = strings.ReplaceAll(output, key, value)
+		}
+		outputs = append(outputs, output)
+	}
+	return outputs
+}
diff --git a/test/activator/utils_test.go b/test/activator/utils_test.go
new file mode 100644
index 00000000..e14af58e
--- /dev/null
+++ b/test/activator/utils_test.go
@@ -0,0 +1,63 @@
+package e2e
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/serializer"
+
+	testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
+)
+
+// applyYAMLFile reads a file containing YAML (possibly multiple docs)
+// and applies each object to the cluster.
+func ApplyYAMLFile(testConfig *testutils.TestConfig, filePath string) {
+	// Create the resources from the manifest file
+	CreateObjsFromYaml(testConfig, testutils.ReadYaml(filePath))
+}
+
+// CreateObjsFromYaml creates K8S objects from yaml and waits for them to be instantiated
+func CreateObjsFromYaml(testConfig *testutils.TestConfig, docs []string) []string {
+	objNames := []string{}
+
+	// For each doc, decode and create
+	decoder := serializer.NewCodecFactory(testConfig.Scheme).UniversalDeserializer()
+	for _, doc := range docs {
+		trimmed := strings.TrimSpace(doc)
+		if trimmed == "" {
+			continue
+		}
+		// Decode into a runtime.Object
+		obj, gvk, decodeErr := decoder.Decode([]byte(trimmed), nil, nil)
+		gomega.Expect(decodeErr).NotTo(gomega.HaveOccurred(),
+			"Failed to decode YAML document to a Kubernetes object")
+
+		ginkgo.By(fmt.Sprintf("Decoded GVK: %s", gvk))
+
+		unstrObj, ok := obj.(*unstructured.Unstructured)
+		if !ok {
+			// Fallback if it's a typed object
+			unstrObj = &unstructured.Unstructured{}
+			// Convert typed to unstructured
+			err := testConfig.Scheme.Convert(obj, unstrObj, nil)
+			gomega.Expect(err).NotTo(gomega.HaveOccurred())
+		}
+
+		unstrObj.SetNamespace(testConfig.NsName)
+		kind := unstrObj.GetKind()
+		name := unstrObj.GetName()
+		objNames = append(objNames, kind+"/"+name)
+
+		// Create the object
+		err := testConfig.K8sClient.Create(testConfig.Context, unstrObj, &client.CreateOptions{})
+		gomega.Expect(err).NotTo(gomega.HaveOccurred(),
+			"Failed to create object from YAML")
+	}
+	return objNames
+}
diff --git a/test/activator/yaml/activator-filters.yaml b/test/activator/yaml/activator-filters.yaml
new file mode 100644
index 00000000..3423df0a
--- /dev/null
+++ b/test/activator/yaml/activator-filters.yaml
@@ -0,0 +1,56 @@
+apiVersion: networking.istio.io/v1alpha3
+kind: EnvoyFilter
+metadata:
+  name: activator-ext-proc
+spec:
+  configPatches:
+  - applyTo: HTTP_FILTER
+    match:
+      # context omitted so that this applies to both sidecars and gateways
+      listener:
+        filterChain:
+          filter:
+            name: "envoy.filters.network.http_connection_manager"
+    patch:
+      operation: INSERT_FIRST
+      value:
+        name: envoy.filters.http.activator.ext_proc
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+          failure_mode_allow: true
+          grpc_service:
+            envoy_grpc:
+              cluster_name: no-op
+          message_timeout: 120s
+
+---
+apiVersion: networking.istio.io/v1alpha3
+kind: EnvoyFilter
+metadata:
+  name: granite-3-8b-activator
+spec:
+  configPatches:
+  - applyTo: HTTP_ROUTE
+    match:
+      routeConfiguration:
+        vhost:
+          name: "*:80"
+          route:
+            name: default.granite-3-8b.0 # TODO: what .0?
+    patch:
+      operation: MERGE
+      value:
+        typed_per_filter_config:
+          envoy.filters.http.activator.ext_proc:
+            "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExtProcPerRoute
+            overrides:
+              processing_mode:
+                request_header_mode: "SEND"
+                response_header_mode: "SKIP"
+                request_body_mode: "NONE"
+                response_body_mode: "NONE"
+                request_trailer_mode: "SKIP"
+                response_trailer_mode: "SKIP"
+              grpc_service:
+                envoy_grpc:
+                  cluster_name: outbound|9002||granite-3-8b-activator.default.svc.cluster.local
\ No newline at end of file
diff --git a/test/activator/yaml/activator.yaml b/test/activator/yaml/activator.yaml
new file mode 100644
index 00000000..2a031bf4
--- /dev/null
+++ b/test/activator/yaml/activator.yaml
@@ -0,0 +1,36 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: granite-3-8b-activator
+spec:
+  selector:
+    matchLabels:
+      app: granite-3-8b-activator
+  template:
+    metadata:
+      labels:
+        app: granite-3-8b-activator
+    spec:
+      containers:
+      - name: activator
+        image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG}
+        imagePullPolicy: IfNotPresent
+        args:
+        - --pool-name
+        - granite-3-8b-epp
+        - --pool-namespace
+        - default
+        - --pool-group
+        - inference.networking.x-k8s.io
+        - --zap-encoder
+        - json
+        - --v
+        - "2"
+        ports:
+        - containerPort: 9002
+          name: grpc
+          protocol: TCP
+        - containerPort: 9003
+          name: grpc-health
+          protocol: TCP
+      serviceAccountName: activator
\ No newline at end of file
diff --git a/test/activator/yaml/epp-configmap.yaml b/test/activator/yaml/epp-configmap.yaml
new file mode 100644
index 00000000..72da34ba
--- /dev/null
+++ b/test/activator/yaml/epp-configmap.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: epp
+data:
+  default-plugins.yaml: |
+    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    kind: EndpointPickerConfig
+    plugins:
+    - type: queue-scorer
+    - type: kv-cache-utilization-scorer
+    - type: prefix-cache-scorer
+    schedulingProfiles:
+    - name: default
+      plugins:
+      - pluginRef: queue-scorer
+      - pluginRef: kv-cache-utilization-scorer
+      - pluginRef: prefix-cache-scorer
\ No newline at end of file
diff --git a/test/activator/yaml/epp.yaml b/test/activator/yaml/epp.yaml
new file mode 100644
index 00000000..43b45883
--- /dev/null
+++ b/test/activator/yaml/epp.yaml
@@ -0,0 +1,74 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: granite-3-8b-epp
+  labels:
+    app: granite-3-8b-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      inferencepool: granite-3-8b-epp
+  template:
+    metadata:
+      labels:
+        inferencepool: granite-3-8b-epp
+    spec:
+      containers:
+      - name: epp
+        image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG}
+        args:
+        - --pool-name
+        - granite-3-8b-epp
+        - --pool-namespace
+        - default
+        - --pool-group
+        - inference.networking.x-k8s.io
+        - --zap-encoder
+        - json
+        - --config-file
+        - /config/default-plugins.yaml
+        - --v
+        - "4"
+        env:
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: metadata.namespace
+        livenessProbe:
+          failureThreshold: 3
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 1
+          periodSeconds: 10
+          successThreshold: 1
+          timeoutSeconds: 1
+        ports:
+        - containerPort: 9002
+          name: grpc
+          protocol: TCP
+        - containerPort: 9003
+          name: grpc-health
+          protocol: TCP
+        - containerPort: 9090
+          name: metrics
+          protocol: TCP
+        readinessProbe:
+          failureThreshold: 3
+          grpc:
+            port: 9003
+            service: inference-extension
+          periodSeconds: 2
+          successThreshold: 1
+          timeoutSeconds: 1
+        volumeMounts:
+        - mountPath: /config
+          name: plugins-config-volume
+      serviceAccountName: epp
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: epp
+        name: plugins-config-volume
\ No newline at end of file
diff --git a/test/activator/yaml/inference-pools.yaml b/test/activator/yaml/inference-pools.yaml
new file mode 100644
index 00000000..71ea0884
--- /dev/null
+++ b/test/activator/yaml/inference-pools.yaml
@@ -0,0 +1,20 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  name: granite-3-8b-epp
+  annotations:
+    activator.llm-d.ai/scale-from-zero-grace-period: "${GRACE_PERIOD}"
+    activator.llm-d.ai/target-apiversion: ${API_VERSION}
+    activator.llm-d.ai/target-kind: ${KIND}
+    activator.llm-d.ai/target-name: ${NAME}
+spec:
+  extensionRef:
+    failureMode: FailClose
+    group: ""
+    kind: Service
+    name: granite-3-8b-epp
+    portNumber: 9002
+  selector:
+    lm-d.ai/model: "granite-3-8b"
+    llm-d.ai/inferenceServing: "true"
+  targetPortNumber: 8000
\ No newline at end of file
diff --git a/test/activator/yaml/istio.helmfile.yaml b/test/activator/yaml/istio.helmfile.yaml
new file mode 100644
index 00000000..c831042c
--- /dev/null
+++ b/test/activator/yaml/istio.helmfile.yaml
@@ -0,0 +1,30 @@
+releases:
+  - name: istio-base
+    chart: oci://gcr.io/istio-testing/charts/base
+    version: 1.28-alpha.89f30b26ba71bf5e538083a4720d0bc2d8c06401
+    namespace: istio-system
+    installed: true
+    labels:
+      type: gateway-provider
+      kind: gateway-crds
+
+  - name: istiod
+    chart: oci://gcr.io/istio-testing/charts/istiod
+    version: 1.28-alpha.89f30b26ba71bf5e538083a4720d0bc2d8c06401
+    namespace: istio-system
+    installed: true
+    needs:
+      - istio-system/istio-base
+    values:
+      - meshConfig:
+          defaultConfig:
+            proxyMetadata:
+              SUPPORT_GATEWAY_API_INFERENCE_EXTENSION: "true"
+        pilot:
+          env:
+            SUPPORT_GATEWAY_API_INFERENCE_EXTENSION: "true"
+        tag: 1.28-alpha.89f30b26ba71bf5e538083a4720d0bc2d8c06401
+        hub: "gcr.io/istio-testing"
+    labels:
+      type: gateway-provider
+      kind: gateway-control-plane
\ No newline at end of file
diff --git a/test/activator/yaml/network-config.yaml b/test/activator/yaml/network-config.yaml
new file mode 100644
index 00000000..30e0bf48
--- /dev/null
+++ b/test/activator/yaml/network-config.yaml
@@ -0,0 +1,59 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: sza
+  annotations:
+    networking.istio.io/service-type: NodePort
+spec:
+  gatewayClassName: istio
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+    allowedRoutes:
+      namespaces:
+        from: Same
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: granite-3-8b
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: sza
+  rules:
+    - backendRefs:
+      - group: inference.networking.x-k8s.io
+        kind: InferencePool
+        name: granite-3-8b-epp
+        port: 8000
+      matches:
+      - path:
+          type: PathPrefix
+          value: /
+      timeouts:
+        request: 300s
+---
+apiVersion: networking.istio.io/v1
+kind: DestinationRule
+metadata:
+  name: granite-3-8b-epp
+spec:
+  host: granite-3-8b-epp.default.svc.cluster.local
+  trafficPolicy:
+    tls:
+      insecureSkipVerify: true
+      mode: SIMPLE
+---
+apiVersion: networking.istio.io/v1
+kind: DestinationRule
+metadata:
+  name: granite-3-8b-activator
+spec:
+  host: granite-3-8b-activator.default.svc.cluster.local
+  trafficPolicy:
+      tls:
+        mode: SIMPLE
+        insecureSkipVerify: true
diff --git a/test/activator/yaml/rbacs.yaml b/test/activator/yaml/rbacs.yaml
new file mode 100644
index 00000000..41769ee0
--- /dev/null
+++ b/test/activator/yaml/rbacs.yaml
@@ -0,0 +1,118 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: activator
+rules:
+- apiGroups:
+  - "inference.networking.x-k8s.io"
+  resources:
+  - "inferencepools"
+  verbs:
+  - "get"
+  - "watch"
+  - "list"
+- apiGroups:
+  - ""
+  resources:
+  - "pods"
+  verbs:
+  - "get"
+  - "watch"
+  - "list"
+- apiGroups:
+  - "discovery.k8s.io"
+  resources:
+  - "endpointslices"
+  verbs:
+  - "get"
+  - "watch"
+  - "list"
+- apiGroups:
+  - "authentication.k8s.io"
+  resources:
+  - "tokenreviews"
+  verbs:
+  - "create"
+- apiGroups:
+  - "authorization.k8s.io"
+  resources:
+  - "subjectaccessreviews"
+  verbs:
+  - "create"
+- apiGroups:
+  - "apps"
+  resources:
+  - "deployments"
+  verbs:
+  - "create"
+  - "get"
+  - "list"
+  - "watch"
+  - "update"
+  - "patch"
+  - "delete"
+- apiGroups:
+  - "apps"
+  resources:
+  - "deployments/scale"
+  verbs:
+  - "get"
+  - "update"
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: activator
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: activator
+subjects:
+- kind: ServiceAccount
+  name: activator
+---
+
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: epp
+rules:
+- apiGroups:
+  - inference.networking.x-k8s.io
+  resources:
+  - inferenceobjectives
+  verbs:
+  - get
+  - watch
+  - list
+- apiGroups:
+  - inference.networking.x-k8s.io
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - watch
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  verbs:
+  - get
+  - watch
+  - list
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: epp
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: epp
+subjects:
+- kind: ServiceAccount
+  name: epp
\ No newline at end of file
diff --git a/test/activator/yaml/service-accounts.yaml b/test/activator/yaml/service-accounts.yaml
new file mode 100644
index 00000000..809ab88a
--- /dev/null
+++ b/test/activator/yaml/service-accounts.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: activator
+
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: epp
\ No newline at end of file
diff --git a/test/activator/yaml/services.yaml b/test/activator/yaml/services.yaml
new file mode 100644
index 00000000..286083ff
--- /dev/null
+++ b/test/activator/yaml/services.yaml
@@ -0,0 +1,62 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: granite-3-8b-epp
+spec:
+  selector:
+    inferencepool: granite-3-8b-epp
+  ports:
+  - name: grpc-ext-proc
+    port: 9002
+    protocol: TCP
+    targetPort: 9002
+  - name: http-metrics
+    port: 9090
+    protocol: TCP
+    targetPort: 9090
+  type: ClusterIP
+---
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: granite-3-8b-activator
+spec:
+  selector:
+    app: granite-3-8b-activator
+  ports:
+  - name: grpc-ext-proc
+    port: 9002
+    protocol: TCP
+    targetPort: 9002
+  type: ClusterIP
+
+---
+
+apiVersion: v1
+kind: Service
+metadata:
+  annotations:
+    networking.istio.io/service-type: NodePort
+  labels:
+    gateway.istio.io/managed: istio.io-gateway-controller
+    gateway.networking.k8s.io/gateway-name: sza
+  name: sza-istio
+spec:
+  ports:
+  - appProtocol: tcp
+    name: status-port
+    nodePort: 30868
+    port: 15021
+    protocol: TCP
+    targetPort: 15021
+  - appProtocol: http
+    name: http
+    nodePort: 30080
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    gateway.networking.k8s.io/gateway-name: sza
+  sessionAffinity: None
+  type: NodePort
\ No newline at end of file
diff --git a/test/activator/yaml/vllm-sim-1.yaml b/test/activator/yaml/vllm-sim-1.yaml
new file mode 100644
index 00000000..4ca5ee09
--- /dev/null
+++ b/test/activator/yaml/vllm-sim-1.yaml
@@ -0,0 +1,46 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: granite-3-8b
+spec:
+  replicas: 0 # for scale from zero
+  selector:
+    matchLabels:
+      llm-d.ai/model: "granite-3-8b"
+      llm-d.ai/inferenceServing: "true"
+  template:
+    metadata:
+      labels:
+        llm-d.ai/model: "granite-3-8b"
+        llm-d.ai/inferenceServing: "true"
+    spec:
+      containers:
+      - args:
+        - --model
+        - granite/granite-3-8b-instruct
+        - --port
+        - "8000"
+        image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG}
+        imagePullPolicy: IfNotPresent
+        name: vllm-sim
+        env:
+          - name: POD_NAME
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.name
+          - name: POD_NAMESPACE
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.namespace
+        ports:
+        - containerPort: 8000
+          name: http
+          protocol: TCP
+        readinessProbe:
+          httpGet:
+             path: /health
+             port: 8000
+          initialDelaySeconds: 10 # simulate vllm startup time (optimized)
+          periodSeconds: 5
diff --git a/test/activator/yaml/vllm-sim-2.yaml b/test/activator/yaml/vllm-sim-2.yaml
new file mode 100644
index 00000000..aaf4d94a
--- /dev/null
+++ b/test/activator/yaml/vllm-sim-2.yaml
@@ -0,0 +1,46 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: granite-3-8b
+spec:
+  replicas: 0 # for scale from zero
+  selector:
+    matchLabels:
+      llm-d.ai/model: "granite-3-8b"
+      llm-d.ai/inferenceServing: "true"
+  template:
+    metadata:
+      labels:
+        llm-d.ai/model: "granite-3-8b"
+        llm-d.ai/inferenceServing: "true"
+    spec:
+      containers:
+      - args:
+        - --model
+        - granite/granite-3-8b-instruct
+        - --port
+        - "8000"
+        image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG}
+        imagePullPolicy: IfNotPresent
+        name: vllm-sim
+        env:
+          - name: POD_NAME
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.name
+          - name: POD_NAMESPACE
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.namespace
+        ports:
+        - containerPort: 8000
+          name: http
+          protocol: TCP
+        readinessProbe:
+          httpGet:
+             path: /health
+             port: 8000
+          initialDelaySeconds: 30 # simulate vllm startup time (optimized)
+          periodSeconds: 5