From 31f6e4a37120bdb6a9757e9bdb8b192dda7de4c7 Mon Sep 17 00:00:00 2001 From: Braulio Dumba Date: Wed, 5 Nov 2025 16:09:13 -0500 Subject: [PATCH] Activator E2E Tests Signed-off-by: Braulio Dumba --- go.mod | 3 + go.sum | 4 + test/activator/e2e_suite_test.go | 200 +++++++++++++++++++++ test/activator/e2e_test.go | 138 ++++++++++++++ test/activator/utils_test.go | 63 +++++++ test/activator/yaml/activator-filters.yaml | 56 ++++++ test/activator/yaml/activator.yaml | 36 ++++ test/activator/yaml/epp-configmap.yaml | 18 ++ test/activator/yaml/epp.yaml | 74 ++++++++ test/activator/yaml/inference-pools.yaml | 20 +++ test/activator/yaml/istio.helmfile.yaml | 30 ++++ test/activator/yaml/network-config.yaml | 59 ++++++ test/activator/yaml/rbacs.yaml | 118 ++++++++++++ test/activator/yaml/service-accounts.yaml | 10 ++ test/activator/yaml/services.yaml | 62 +++++++ test/activator/yaml/vllm-sim-1.yaml | 46 +++++ test/activator/yaml/vllm-sim-2.yaml | 46 +++++ 17 files changed, 983 insertions(+) create mode 100644 test/activator/e2e_suite_test.go create mode 100644 test/activator/e2e_test.go create mode 100644 test/activator/utils_test.go create mode 100644 test/activator/yaml/activator-filters.yaml create mode 100644 test/activator/yaml/activator.yaml create mode 100644 test/activator/yaml/epp-configmap.yaml create mode 100644 test/activator/yaml/epp.yaml create mode 100644 test/activator/yaml/inference-pools.yaml create mode 100644 test/activator/yaml/istio.helmfile.yaml create mode 100644 test/activator/yaml/network-config.yaml create mode 100644 test/activator/yaml/rbacs.yaml create mode 100644 test/activator/yaml/service-accounts.yaml create mode 100644 test/activator/yaml/services.yaml create mode 100644 test/activator/yaml/vllm-sim-1.yaml create mode 100644 test/activator/yaml/vllm-sim-2.yaml diff --git a/go.mod b/go.mod index 3409a248..9e4065e7 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( github.com/stretchr/testify v1.11.1 golang.org/x/sync v0.17.0 google.golang.org/grpc v1.76.0 + istio.io/client-go v1.28.0 k8s.io/api v0.34.1 k8s.io/apiextensions-apiserver v0.34.1 k8s.io/apimachinery v0.34.1 @@ -57,6 +58,7 @@ require ( github.com/go-openapi/swag v0.23.1 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.26.0 // indirect github.com/google/gnostic-models v0.7.0 // indirect @@ -123,6 +125,7 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + istio.io/api v1.28.0 // indirect k8s.io/apiserver v0.34.1 // indirect k8s.io/component-base v0.34.1 // indirect k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect diff --git a/go.sum b/go.sum index cd221b8f..b085942e 100644 --- a/go.sum +++ b/go.sum @@ -385,6 +385,10 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +istio.io/api v1.28.0 h1:0fYY9G03CAdFwE/fCkpr0v7kKsy+Hz9OCCjLNBNLbnU= +istio.io/api v1.28.0/go.mod h1:BD3qv/ekm16kvSgvSpuiDawgKhEwG97wx849CednJSg= +istio.io/client-go v1.28.0 h1:EqP19aYNvH42VQAmS/mHXZ51PU3nlrnF6MeeGldJSas= +istio.io/client-go v1.28.0/go.mod h1:mcFWH+wv9ltQqoDYyfLeVFyRZuD7n1Fj7TD5RGohqSU= k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI= diff --git a/test/activator/e2e_suite_test.go b/test/activator/e2e_suite_test.go new file mode 100644 index 00000000..4f1dae2c --- /dev/null +++ b/test/activator/e2e_suite_test.go @@ -0,0 +1,200 @@ +package e2e + +import ( + "fmt" + "io" + "os/exec" + "strings" + "testing" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "github.com/onsi/gomega/gexec" + apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/config" + k8slog "sigs.k8s.io/controller-runtime/pkg/log" + + infextv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" + infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env" + testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils" + + istiov1 "istio.io/client-go/pkg/apis/networking/v1" + istiov1a3 "istio.io/client-go/pkg/apis/networking/v1alpha3" + gtwv1 "sigs.k8s.io/gateway-api/apis/v1" +) + +const ( + // gatewayCrdsKustomize is the manifest for the gateway api + gatewayCrdsKustomize = "../../deploy/components/crds-gateway-api" + // gieCrdsKustomize is the manifest for the inference pool CRD with 'inference.networking.x-k8s.io' group. + gieCrdsKustomize = "../../deploy/components/crds-gie" + // inferExtManifest is the manifest for the inference extension test resources. + inferExtManifest = "./yaml/inference-pools.yaml" + // eppManifest is the manifest for the deployment of the EPP + eppManifest = "./yaml/epp.yaml" + // eppManifest is the manifest for the deployment of the EPP + eppConfigManifest = "./yaml/epp-configmap.yaml" + // eppManifest is the manifest for the deployment of the EPP + activatorManifest = "./yaml/activator.yaml" + // eppManifest is the manifest for the deployment of the EPP + activatorfilterManifest = "./yaml/activator-filters.yaml" + // rbacManifest is the manifest for the EPP's RBAC resources. + rbacManifest = "./yaml/rbacs.yaml" + // serviceAccountManifest is the manifest for the EPP's service account resources. + serviceAccountManifest = "./yaml/service-accounts.yaml" + // servicesManifest is the manifest for the EPP's service resources. + servicesManifest = "./yaml/services.yaml" + // nsName is the namespace in which the K8S objects will be created + networkConfigurationManifest = "./yaml/network-config.yaml" +) + +var ( + port string + testConfig *testutils.TestConfig + + eppImg = env.GetEnvString("EPP_IMAGE", "llm-d-inference-scheduler", ginkgo.GinkgoLogr) + eppTag = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr) + activatorImg = env.GetEnvString("ACTIVATOR_IMAGE", "llm-d-activator", ginkgo.GinkgoLogr) + activatorTag = env.GetEnvString("ACTIVATOR_TAG", "dev", ginkgo.GinkgoLogr) + vllmImg = env.GetEnvString("VLLM_IMAGE", "llm-d-inference-sim", ginkgo.GinkgoLogr) + vllmTag = env.GetEnvString("VLLM_TAG", "dev", ginkgo.GinkgoLogr) + + imageRegistry = env.GetEnvString("IMAGE_REGISTRY", "ghcr.io/llm-d", ginkgo.GinkgoLogr) +) + +func TestEndToEnd(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, + "End To End Test Suite", + ) +} + +var _ = ginkgo.BeforeSuite(func() { + port = "30080" + + setupK8sCluster() + testConfig = testutils.NewTestConfig("default") + setupK8sClient() + createCRDs(gieCrdsKustomize) + createCRDs(gatewayCrdsKustomize) + createIstio() + createResources() + loadImages() +}) + +var _ = ginkgo.AfterSuite(func() { + command := exec.Command("kind", "delete", "cluster", "--name", "e2e-tests") + session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) +}) + +// Create the Kubernetes cluster for the E2E tests and load the local images +func setupK8sCluster() { + command := exec.Command("kind", "create", "cluster", "--name", "e2e-tests", "--config", "-") + stdin, err := command.StdinPipe() + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + go func() { + defer func() { + err := stdin.Close() + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }() + clusterConfig := strings.ReplaceAll(kindClusterConfig, "${PORT}", port) + _, err := io.WriteString(stdin, clusterConfig) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }() + session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) +} + +func createResources() { + ApplyYAMLFile(testConfig, rbacManifest) + ApplyYAMLFile(testConfig, servicesManifest) + ApplyYAMLFile(testConfig, eppConfigManifest) + ApplyYAMLFile(testConfig, serviceAccountManifest) + ApplyYAMLFile(testConfig, activatorfilterManifest) + ApplyYAMLFile(testConfig, networkConfigurationManifest) +} + +func loadImages() { + kindLoadImage(imageRegistry + "/" + eppImg + ":" + eppTag) + kindLoadImage(imageRegistry + "/" + vllmImg + ":" + vllmTag) + kindLoadImage(imageRegistry + "/" + activatorImg + ":" + activatorTag) +} + +func kindLoadImage(image string) { + ginkgo.By(fmt.Sprintf("Loading %s into the cluster e2e-tests", image)) + + command := exec.Command("kind", "--name", "e2e-tests", "load", "docker-image", image) + session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) +} + +func setupK8sClient() { + k8sCfg := config.GetConfigOrDie() + gomega.ExpectWithOffset(1, k8sCfg).NotTo(gomega.BeNil()) + + err := clientgoscheme.AddToScheme(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = infextv1.Install(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = apiextv1.AddToScheme(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = infextv1a2.Install(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = gtwv1.Install(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = istiov1.AddToScheme(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = istiov1a3.AddToScheme(testConfig.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + testConfig.CreateCli() + + k8slog.SetLogger(ginkgo.GinkgoLogr) +} + +// createCRDs creates the Inference Extension CRDs used for testing. +func createCRDs(manifests string) { + crds := runKustomize(manifests) + CreateObjsFromYaml(testConfig, crds) +} + +func runKustomize(kustomizeDir string) []string { + command := exec.Command("kustomize", "build", kustomizeDir) + session, err := gexec.Start(command, nil, ginkgo.GinkgoWriter) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) + return strings.Split(string(session.Out.Contents()), "\n---") +} + +func createIstio() { + command := exec.Command("helmfile", "apply", "-f", "./yaml/istio.helmfile.yaml") + session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) +} + +const kindClusterConfig = ` +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- extraPortMappings: + - containerPort: 30080 + hostPort: ${PORT} + protocol: TCP + - containerPort: 30081 + hostPort: 30081 + protocol: TCP +` diff --git a/test/activator/e2e_test.go b/test/activator/e2e_test.go new file mode 100644 index 00000000..295739c7 --- /dev/null +++ b/test/activator/e2e_test.go @@ -0,0 +1,138 @@ +package e2e + +import ( + "fmt" + "net/http" + "strings" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils" +) + +const ( + // simDeployment references the YAML file for the deployment + simplePrompt = "Hello my name is Andrew, I have a doctorate in Rocket Science, and I like interplanetary space exploration" + simDeployment1 = "./yaml/vllm-sim-1.yaml" + simDeployment2 = "./yaml/vllm-sim-2.yaml" + modelserver = "granite-3-8b" +) + +var ( + modelName = "granite/granite-3-8b-instruct" + nsName = "default" +) + +var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() { + ginkgo.When("Running simple non-PD configuration", func() { + ginkgo.It("should run successfully", func() { + // Create inferencePool + inferencePools := createInferencePool(inferExtManifest, "apps/v1", "Deployment", modelserver, "30") + + // Create workload objectts; epp, activator and vLLM pod + epp := createResource(eppManifest, imageRegistry, eppImg, eppTag) + activator := createResource(activatorManifest, imageRegistry, activatorImg, activatorTag) + + // Create model server + modelServers := createResource(simDeployment1, imageRegistry, vllmImg, vllmTag) + + nsHdr, podHdr := runChatCompletion(simplePrompt) + gomega.Expect(nsHdr).Should(gomega.Equal(nsName)) + gomega.Expect(podHdr).Should(gomega.Equal(modelServers[0])) + + testutils.DeleteObjects(testConfig, epp) + testutils.DeleteObjects(testConfig, activator) + testutils.DeleteObjects(testConfig, modelServers) + testutils.DeleteObjects(testConfig, inferencePools) + }) + }) + + ginkgo.When("Running simple non-PD KV enabled configuration", func() { + ginkgo.It("should run successfully", func() { + // Create inferencePool + inferencePools := createInferencePool(inferExtManifest, "apps/v1", "Deployment", modelserver, "80") + + // Create workload objectts; epp, activator and vLLM pod + epp := createResource(eppManifest, imageRegistry, eppImg, eppTag) + activator := createResource(activatorManifest, imageRegistry, activatorImg, activatorTag) + + // Create model server + modelServers := createResource(simDeployment1, imageRegistry, vllmImg, vllmTag) + + nsHdr, podHdr := runChatCompletion(simplePrompt) + gomega.Expect(nsHdr).Should(gomega.Equal(nsName)) + gomega.Expect(podHdr).Should(gomega.Equal(modelServers[0])) + + testutils.DeleteObjects(testConfig, epp) + testutils.DeleteObjects(testConfig, activator) + testutils.DeleteObjects(testConfig, modelServers) + testutils.DeleteObjects(testConfig, inferencePools) + }) + }) +}) + +// createModelServers creates the model server resources used for testing from the given filePaths. +func createInferencePool(inferPoolManifest, apiVersion, kind, name, gracePeriod string) []string { + manifests := testutils.ReadYaml(inferPoolManifest) + manifests = substituteMany(manifests, + map[string]string{ + "${KIND}": kind, + "${NAME}": name, + "${GRACE_PERIOD}": gracePeriod, + "${API_VERSION}": apiVersion, + }) + objects := CreateObjsFromYaml(testConfig, manifests) + + return objects +} + +func createResource(manifest, registry, img, tag string) []string { + ginkgo.By("Creating resource from manifest: " + manifest) + objYamls := testutils.ReadYaml(manifest) + + objYamls = substituteMany(objYamls, + map[string]string{ + "${IMAGE}": img, + "${TAG}": tag, + "${IMAGE_REGISTRY}": registry, + }) + objNames := CreateObjsFromYaml(testConfig, objYamls) + return objNames +} + +func runChatCompletion(prompt string) (string, string) { + var httpResp *http.Response + openaiclient := openai.NewClient( + option.WithBaseURL(fmt.Sprintf("http://localhost:%s/v1", port))) + + params := openai.ChatCompletionNewParams{ + Messages: []openai.ChatCompletionMessageParamUnion{ + openai.UserMessage(prompt), + }, + Model: modelName, + } + resp, err := openaiclient.Chat.Completions.New(testConfig.Context, params, option.WithResponseInto(&httpResp)) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + gomega.Expect(resp.Choices).Should(gomega.HaveLen(1)) + gomega.Expect(resp.Choices[0].FinishReason).Should(gomega.Equal("stop")) + gomega.Expect(resp.Choices[0].Message.Content).Should(gomega.Equal(prompt)) + + namespaceHeader := httpResp.Header.Get("x-inference-namespace") + podHeader := httpResp.Header.Get("x-inference-pod") + + return namespaceHeader, podHeader +} + +func substituteMany(inputs []string, substitutions map[string]string) []string { + outputs := []string{} + for _, input := range inputs { + output := input + for key, value := range substitutions { + output = strings.ReplaceAll(output, key, value) + } + outputs = append(outputs, output) + } + return outputs +} diff --git a/test/activator/utils_test.go b/test/activator/utils_test.go new file mode 100644 index 00000000..e14af58e --- /dev/null +++ b/test/activator/utils_test.go @@ -0,0 +1,63 @@ +package e2e + +import ( + "fmt" + "strings" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + "sigs.k8s.io/controller-runtime/pkg/client" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/serializer" + + testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils" +) + +// applyYAMLFile reads a file containing YAML (possibly multiple docs) +// and applies each object to the cluster. +func ApplyYAMLFile(testConfig *testutils.TestConfig, filePath string) { + // Create the resources from the manifest file + CreateObjsFromYaml(testConfig, testutils.ReadYaml(filePath)) +} + +// CreateObjsFromYaml creates K8S objects from yaml and waits for them to be instantiated +func CreateObjsFromYaml(testConfig *testutils.TestConfig, docs []string) []string { + objNames := []string{} + + // For each doc, decode and create + decoder := serializer.NewCodecFactory(testConfig.Scheme).UniversalDeserializer() + for _, doc := range docs { + trimmed := strings.TrimSpace(doc) + if trimmed == "" { + continue + } + // Decode into a runtime.Object + obj, gvk, decodeErr := decoder.Decode([]byte(trimmed), nil, nil) + gomega.Expect(decodeErr).NotTo(gomega.HaveOccurred(), + "Failed to decode YAML document to a Kubernetes object") + + ginkgo.By(fmt.Sprintf("Decoded GVK: %s", gvk)) + + unstrObj, ok := obj.(*unstructured.Unstructured) + if !ok { + // Fallback if it's a typed object + unstrObj = &unstructured.Unstructured{} + // Convert typed to unstructured + err := testConfig.Scheme.Convert(obj, unstrObj, nil) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + + unstrObj.SetNamespace(testConfig.NsName) + kind := unstrObj.GetKind() + name := unstrObj.GetName() + objNames = append(objNames, kind+"/"+name) + + // Create the object + err := testConfig.K8sClient.Create(testConfig.Context, unstrObj, &client.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), + "Failed to create object from YAML") + } + return objNames +} diff --git a/test/activator/yaml/activator-filters.yaml b/test/activator/yaml/activator-filters.yaml new file mode 100644 index 00000000..3423df0a --- /dev/null +++ b/test/activator/yaml/activator-filters.yaml @@ -0,0 +1,56 @@ +apiVersion: networking.istio.io/v1alpha3 +kind: EnvoyFilter +metadata: + name: activator-ext-proc +spec: + configPatches: + - applyTo: HTTP_FILTER + match: + # context omitted so that this applies to both sidecars and gateways + listener: + filterChain: + filter: + name: "envoy.filters.network.http_connection_manager" + patch: + operation: INSERT_FIRST + value: + name: envoy.filters.http.activator.ext_proc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + failure_mode_allow: true + grpc_service: + envoy_grpc: + cluster_name: no-op + message_timeout: 120s + +--- +apiVersion: networking.istio.io/v1alpha3 +kind: EnvoyFilter +metadata: + name: granite-3-8b-activator +spec: + configPatches: + - applyTo: HTTP_ROUTE + match: + routeConfiguration: + vhost: + name: "*:80" + route: + name: default.granite-3-8b.0 # TODO: what .0? + patch: + operation: MERGE + value: + typed_per_filter_config: + envoy.filters.http.activator.ext_proc: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExtProcPerRoute + overrides: + processing_mode: + request_header_mode: "SEND" + response_header_mode: "SKIP" + request_body_mode: "NONE" + response_body_mode: "NONE" + request_trailer_mode: "SKIP" + response_trailer_mode: "SKIP" + grpc_service: + envoy_grpc: + cluster_name: outbound|9002||granite-3-8b-activator.default.svc.cluster.local \ No newline at end of file diff --git a/test/activator/yaml/activator.yaml b/test/activator/yaml/activator.yaml new file mode 100644 index 00000000..2a031bf4 --- /dev/null +++ b/test/activator/yaml/activator.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: granite-3-8b-activator +spec: + selector: + matchLabels: + app: granite-3-8b-activator + template: + metadata: + labels: + app: granite-3-8b-activator + spec: + containers: + - name: activator + image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG} + imagePullPolicy: IfNotPresent + args: + - --pool-name + - granite-3-8b-epp + - --pool-namespace + - default + - --pool-group + - inference.networking.x-k8s.io + - --zap-encoder + - json + - --v + - "2" + ports: + - containerPort: 9002 + name: grpc + protocol: TCP + - containerPort: 9003 + name: grpc-health + protocol: TCP + serviceAccountName: activator \ No newline at end of file diff --git a/test/activator/yaml/epp-configmap.yaml b/test/activator/yaml/epp-configmap.yaml new file mode 100644 index 00000000..72da34ba --- /dev/null +++ b/test/activator/yaml/epp-configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: epp +data: + default-plugins.yaml: | + apiVersion: inference.networking.x-k8s.io/v1alpha1 + kind: EndpointPickerConfig + plugins: + - type: queue-scorer + - type: kv-cache-utilization-scorer + - type: prefix-cache-scorer + schedulingProfiles: + - name: default + plugins: + - pluginRef: queue-scorer + - pluginRef: kv-cache-utilization-scorer + - pluginRef: prefix-cache-scorer \ No newline at end of file diff --git a/test/activator/yaml/epp.yaml b/test/activator/yaml/epp.yaml new file mode 100644 index 00000000..43b45883 --- /dev/null +++ b/test/activator/yaml/epp.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: granite-3-8b-epp + labels: + app: granite-3-8b-epp +spec: + replicas: 1 + selector: + matchLabels: + inferencepool: granite-3-8b-epp + template: + metadata: + labels: + inferencepool: granite-3-8b-epp + spec: + containers: + - name: epp + image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG} + args: + - --pool-name + - granite-3-8b-epp + - --pool-namespace + - default + - --pool-group + - inference.networking.x-k8s.io + - --zap-encoder + - json + - --config-file + - /config/default-plugins.yaml + - --v + - "4" + env: + - name: NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + livenessProbe: + failureThreshold: 3 + grpc: + port: 9003 + service: inference-extension + initialDelaySeconds: 1 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + ports: + - containerPort: 9002 + name: grpc + protocol: TCP + - containerPort: 9003 + name: grpc-health + protocol: TCP + - containerPort: 9090 + name: metrics + protocol: TCP + readinessProbe: + failureThreshold: 3 + grpc: + port: 9003 + service: inference-extension + periodSeconds: 2 + successThreshold: 1 + timeoutSeconds: 1 + volumeMounts: + - mountPath: /config + name: plugins-config-volume + serviceAccountName: epp + volumes: + - configMap: + defaultMode: 420 + name: epp + name: plugins-config-volume \ No newline at end of file diff --git a/test/activator/yaml/inference-pools.yaml b/test/activator/yaml/inference-pools.yaml new file mode 100644 index 00000000..71ea0884 --- /dev/null +++ b/test/activator/yaml/inference-pools.yaml @@ -0,0 +1,20 @@ +apiVersion: inference.networking.x-k8s.io/v1alpha2 +kind: InferencePool +metadata: + name: granite-3-8b-epp + annotations: + activator.llm-d.ai/scale-from-zero-grace-period: "${GRACE_PERIOD}" + activator.llm-d.ai/target-apiversion: ${API_VERSION} + activator.llm-d.ai/target-kind: ${KIND} + activator.llm-d.ai/target-name: ${NAME} +spec: + extensionRef: + failureMode: FailClose + group: "" + kind: Service + name: granite-3-8b-epp + portNumber: 9002 + selector: + lm-d.ai/model: "granite-3-8b" + llm-d.ai/inferenceServing: "true" + targetPortNumber: 8000 \ No newline at end of file diff --git a/test/activator/yaml/istio.helmfile.yaml b/test/activator/yaml/istio.helmfile.yaml new file mode 100644 index 00000000..c831042c --- /dev/null +++ b/test/activator/yaml/istio.helmfile.yaml @@ -0,0 +1,30 @@ +releases: + - name: istio-base + chart: oci://gcr.io/istio-testing/charts/base + version: 1.28-alpha.89f30b26ba71bf5e538083a4720d0bc2d8c06401 + namespace: istio-system + installed: true + labels: + type: gateway-provider + kind: gateway-crds + + - name: istiod + chart: oci://gcr.io/istio-testing/charts/istiod + version: 1.28-alpha.89f30b26ba71bf5e538083a4720d0bc2d8c06401 + namespace: istio-system + installed: true + needs: + - istio-system/istio-base + values: + - meshConfig: + defaultConfig: + proxyMetadata: + SUPPORT_GATEWAY_API_INFERENCE_EXTENSION: "true" + pilot: + env: + SUPPORT_GATEWAY_API_INFERENCE_EXTENSION: "true" + tag: 1.28-alpha.89f30b26ba71bf5e538083a4720d0bc2d8c06401 + hub: "gcr.io/istio-testing" + labels: + type: gateway-provider + kind: gateway-control-plane \ No newline at end of file diff --git a/test/activator/yaml/network-config.yaml b/test/activator/yaml/network-config.yaml new file mode 100644 index 00000000..30e0bf48 --- /dev/null +++ b/test/activator/yaml/network-config.yaml @@ -0,0 +1,59 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: sza + annotations: + networking.istio.io/service-type: NodePort +spec: + gatewayClassName: istio + listeners: + - name: http + port: 80 + protocol: HTTP + allowedRoutes: + namespaces: + from: Same +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: granite-3-8b +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: sza + rules: + - backendRefs: + - group: inference.networking.x-k8s.io + kind: InferencePool + name: granite-3-8b-epp + port: 8000 + matches: + - path: + type: PathPrefix + value: / + timeouts: + request: 300s +--- +apiVersion: networking.istio.io/v1 +kind: DestinationRule +metadata: + name: granite-3-8b-epp +spec: + host: granite-3-8b-epp.default.svc.cluster.local + trafficPolicy: + tls: + insecureSkipVerify: true + mode: SIMPLE +--- +apiVersion: networking.istio.io/v1 +kind: DestinationRule +metadata: + name: granite-3-8b-activator +spec: + host: granite-3-8b-activator.default.svc.cluster.local + trafficPolicy: + tls: + mode: SIMPLE + insecureSkipVerify: true diff --git a/test/activator/yaml/rbacs.yaml b/test/activator/yaml/rbacs.yaml new file mode 100644 index 00000000..41769ee0 --- /dev/null +++ b/test/activator/yaml/rbacs.yaml @@ -0,0 +1,118 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: activator +rules: +- apiGroups: + - "inference.networking.x-k8s.io" + resources: + - "inferencepools" + verbs: + - "get" + - "watch" + - "list" +- apiGroups: + - "" + resources: + - "pods" + verbs: + - "get" + - "watch" + - "list" +- apiGroups: + - "discovery.k8s.io" + resources: + - "endpointslices" + verbs: + - "get" + - "watch" + - "list" +- apiGroups: + - "authentication.k8s.io" + resources: + - "tokenreviews" + verbs: + - "create" +- apiGroups: + - "authorization.k8s.io" + resources: + - "subjectaccessreviews" + verbs: + - "create" +- apiGroups: + - "apps" + resources: + - "deployments" + verbs: + - "create" + - "get" + - "list" + - "watch" + - "update" + - "patch" + - "delete" +- apiGroups: + - "apps" + resources: + - "deployments/scale" + verbs: + - "get" + - "update" + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: activator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: activator +subjects: +- kind: ServiceAccount + name: activator +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: epp +rules: +- apiGroups: + - inference.networking.x-k8s.io + resources: + - inferenceobjectives + verbs: + - get + - watch + - list +- apiGroups: + - inference.networking.x-k8s.io + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - watch + - list +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - watch + - list + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: epp +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: epp +subjects: +- kind: ServiceAccount + name: epp \ No newline at end of file diff --git a/test/activator/yaml/service-accounts.yaml b/test/activator/yaml/service-accounts.yaml new file mode 100644 index 00000000..809ab88a --- /dev/null +++ b/test/activator/yaml/service-accounts.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: activator + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: epp \ No newline at end of file diff --git a/test/activator/yaml/services.yaml b/test/activator/yaml/services.yaml new file mode 100644 index 00000000..286083ff --- /dev/null +++ b/test/activator/yaml/services.yaml @@ -0,0 +1,62 @@ +apiVersion: v1 +kind: Service +metadata: + name: granite-3-8b-epp +spec: + selector: + inferencepool: granite-3-8b-epp + ports: + - name: grpc-ext-proc + port: 9002 + protocol: TCP + targetPort: 9002 + - name: http-metrics + port: 9090 + protocol: TCP + targetPort: 9090 + type: ClusterIP +--- + +apiVersion: v1 +kind: Service +metadata: + name: granite-3-8b-activator +spec: + selector: + app: granite-3-8b-activator + ports: + - name: grpc-ext-proc + port: 9002 + protocol: TCP + targetPort: 9002 + type: ClusterIP + +--- + +apiVersion: v1 +kind: Service +metadata: + annotations: + networking.istio.io/service-type: NodePort + labels: + gateway.istio.io/managed: istio.io-gateway-controller + gateway.networking.k8s.io/gateway-name: sza + name: sza-istio +spec: + ports: + - appProtocol: tcp + name: status-port + nodePort: 30868 + port: 15021 + protocol: TCP + targetPort: 15021 + - appProtocol: http + name: http + nodePort: 30080 + port: 80 + protocol: TCP + targetPort: 80 + selector: + gateway.networking.k8s.io/gateway-name: sza + sessionAffinity: None + type: NodePort \ No newline at end of file diff --git a/test/activator/yaml/vllm-sim-1.yaml b/test/activator/yaml/vllm-sim-1.yaml new file mode 100644 index 00000000..4ca5ee09 --- /dev/null +++ b/test/activator/yaml/vllm-sim-1.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: granite-3-8b +spec: + replicas: 0 # for scale from zero + selector: + matchLabels: + llm-d.ai/model: "granite-3-8b" + llm-d.ai/inferenceServing: "true" + template: + metadata: + labels: + llm-d.ai/model: "granite-3-8b" + llm-d.ai/inferenceServing: "true" + spec: + containers: + - args: + - --model + - granite/granite-3-8b-instruct + - --port + - "8000" + image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG} + imagePullPolicy: IfNotPresent + name: vllm-sim + env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + ports: + - containerPort: 8000 + name: http + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 # simulate vllm startup time (optimized) + periodSeconds: 5 diff --git a/test/activator/yaml/vllm-sim-2.yaml b/test/activator/yaml/vllm-sim-2.yaml new file mode 100644 index 00000000..aaf4d94a --- /dev/null +++ b/test/activator/yaml/vllm-sim-2.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: granite-3-8b +spec: + replicas: 0 # for scale from zero + selector: + matchLabels: + llm-d.ai/model: "granite-3-8b" + llm-d.ai/inferenceServing: "true" + template: + metadata: + labels: + llm-d.ai/model: "granite-3-8b" + llm-d.ai/inferenceServing: "true" + spec: + containers: + - args: + - --model + - granite/granite-3-8b-instruct + - --port + - "8000" + image: ${IMAGE_REGISTRY}/${IMAGE}:${TAG} + imagePullPolicy: IfNotPresent + name: vllm-sim + env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + ports: + - containerPort: 8000 + name: http + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 # simulate vllm startup time (optimized) + periodSeconds: 5