Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ require (
github.com/stretchr/testify v1.11.1
golang.org/x/sync v0.17.0
google.golang.org/grpc v1.76.0
istio.io/client-go v1.28.0
k8s.io/api v0.34.1
k8s.io/apiextensions-apiserver v0.34.1
k8s.io/apimachinery v0.34.1
Expand Down Expand Up @@ -57,6 +58,7 @@ require (
github.com/go-openapi/swag v0.23.1 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/cel-go v0.26.0 // indirect
github.com/google/gnostic-models v0.7.0 // indirect
Expand Down Expand Up @@ -123,6 +125,7 @@ require (
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
istio.io/api v1.28.0 // indirect
k8s.io/apiserver v0.34.1 // indirect
k8s.io/component-base v0.34.1 // indirect
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
istio.io/api v1.28.0 h1:0fYY9G03CAdFwE/fCkpr0v7kKsy+Hz9OCCjLNBNLbnU=
istio.io/api v1.28.0/go.mod h1:BD3qv/ekm16kvSgvSpuiDawgKhEwG97wx849CednJSg=
istio.io/client-go v1.28.0 h1:EqP19aYNvH42VQAmS/mHXZ51PU3nlrnF6MeeGldJSas=
istio.io/client-go v1.28.0/go.mod h1:mcFWH+wv9ltQqoDYyfLeVFyRZuD7n1Fj7TD5RGohqSU=
k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=
k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=
k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI=
Expand Down
200 changes: 200 additions & 0 deletions test/activator/e2e_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
package e2e

import (
"fmt"
"io"
"os/exec"
"strings"
"testing"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/onsi/gomega/gexec"
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client/config"
k8slog "sigs.k8s.io/controller-runtime/pkg/log"

infextv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"

istiov1 "istio.io/client-go/pkg/apis/networking/v1"
istiov1a3 "istio.io/client-go/pkg/apis/networking/v1alpha3"
gtwv1 "sigs.k8s.io/gateway-api/apis/v1"
)

const (
// gatewayCrdsKustomize is the manifest for the gateway api
gatewayCrdsKustomize = "../../deploy/components/crds-gateway-api"
// gieCrdsKustomize is the manifest for the inference pool CRD with 'inference.networking.x-k8s.io' group.
gieCrdsKustomize = "../../deploy/components/crds-gie"
// inferExtManifest is the manifest for the inference extension test resources.
inferExtManifest = "./yaml/inference-pools.yaml"
// eppManifest is the manifest for the deployment of the EPP
eppManifest = "./yaml/epp.yaml"
// eppManifest is the manifest for the deployment of the EPP
eppConfigManifest = "./yaml/epp-configmap.yaml"
// eppManifest is the manifest for the deployment of the EPP
activatorManifest = "./yaml/activator.yaml"
// eppManifest is the manifest for the deployment of the EPP
activatorfilterManifest = "./yaml/activator-filters.yaml"
// rbacManifest is the manifest for the EPP's RBAC resources.
rbacManifest = "./yaml/rbacs.yaml"
// serviceAccountManifest is the manifest for the EPP's service account resources.
serviceAccountManifest = "./yaml/service-accounts.yaml"
// servicesManifest is the manifest for the EPP's service resources.
servicesManifest = "./yaml/services.yaml"
// nsName is the namespace in which the K8S objects will be created
networkConfigurationManifest = "./yaml/network-config.yaml"
)

var (
port string
testConfig *testutils.TestConfig

eppImg = env.GetEnvString("EPP_IMAGE", "llm-d-inference-scheduler", ginkgo.GinkgoLogr)
eppTag = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr)
activatorImg = env.GetEnvString("ACTIVATOR_IMAGE", "llm-d-activator", ginkgo.GinkgoLogr)
activatorTag = env.GetEnvString("ACTIVATOR_TAG", "dev", ginkgo.GinkgoLogr)
vllmImg = env.GetEnvString("VLLM_IMAGE", "llm-d-inference-sim", ginkgo.GinkgoLogr)
vllmTag = env.GetEnvString("VLLM_TAG", "dev", ginkgo.GinkgoLogr)

imageRegistry = env.GetEnvString("IMAGE_REGISTRY", "ghcr.io/llm-d", ginkgo.GinkgoLogr)
)

func TestEndToEnd(t *testing.T) {
gomega.RegisterFailHandler(ginkgo.Fail)
ginkgo.RunSpecs(t,
"End To End Test Suite",
)
}

var _ = ginkgo.BeforeSuite(func() {
port = "30080"

setupK8sCluster()
testConfig = testutils.NewTestConfig("default")
setupK8sClient()
createCRDs(gieCrdsKustomize)
createCRDs(gatewayCrdsKustomize)
createIstio()
createResources()
loadImages()
})

var _ = ginkgo.AfterSuite(func() {
command := exec.Command("kind", "delete", "cluster", "--name", "e2e-tests")
session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
})

// Create the Kubernetes cluster for the E2E tests and load the local images
func setupK8sCluster() {
command := exec.Command("kind", "create", "cluster", "--name", "e2e-tests", "--config", "-")
stdin, err := command.StdinPipe()
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
go func() {
defer func() {
err := stdin.Close()
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
}()
clusterConfig := strings.ReplaceAll(kindClusterConfig, "${PORT}", port)
_, err := io.WriteString(stdin, clusterConfig)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
}()
session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
}

func createResources() {
ApplyYAMLFile(testConfig, rbacManifest)
ApplyYAMLFile(testConfig, servicesManifest)
ApplyYAMLFile(testConfig, eppConfigManifest)
ApplyYAMLFile(testConfig, serviceAccountManifest)
ApplyYAMLFile(testConfig, activatorfilterManifest)
ApplyYAMLFile(testConfig, networkConfigurationManifest)
}

func loadImages() {
kindLoadImage(imageRegistry + "/" + eppImg + ":" + eppTag)
kindLoadImage(imageRegistry + "/" + vllmImg + ":" + vllmTag)
kindLoadImage(imageRegistry + "/" + activatorImg + ":" + activatorTag)
}

func kindLoadImage(image string) {
ginkgo.By(fmt.Sprintf("Loading %s into the cluster e2e-tests", image))

command := exec.Command("kind", "--name", "e2e-tests", "load", "docker-image", image)
session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
}

func setupK8sClient() {
k8sCfg := config.GetConfigOrDie()
gomega.ExpectWithOffset(1, k8sCfg).NotTo(gomega.BeNil())

err := clientgoscheme.AddToScheme(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = infextv1.Install(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = apiextv1.AddToScheme(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = infextv1a2.Install(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = gtwv1.Install(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = istiov1.AddToScheme(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = istiov1a3.AddToScheme(testConfig.Scheme)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

testConfig.CreateCli()

k8slog.SetLogger(ginkgo.GinkgoLogr)
}

// createCRDs creates the Inference Extension CRDs used for testing.
func createCRDs(manifests string) {
crds := runKustomize(manifests)
CreateObjsFromYaml(testConfig, crds)
}

func runKustomize(kustomizeDir string) []string {
command := exec.Command("kustomize", "build", kustomizeDir)
session, err := gexec.Start(command, nil, ginkgo.GinkgoWriter)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
return strings.Split(string(session.Out.Contents()), "\n---")
}

func createIstio() {
command := exec.Command("helmfile", "apply", "-f", "./yaml/istio.helmfile.yaml")
session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
}

const kindClusterConfig = `
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- extraPortMappings:
- containerPort: 30080
hostPort: ${PORT}
protocol: TCP
- containerPort: 30081
hostPort: 30081
protocol: TCP
`
138 changes: 138 additions & 0 deletions test/activator/e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package e2e

import (
"fmt"
"net/http"
"strings"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
)

const (
// simDeployment references the YAML file for the deployment
simplePrompt = "Hello my name is Andrew, I have a doctorate in Rocket Science, and I like interplanetary space exploration"
simDeployment1 = "./yaml/vllm-sim-1.yaml"
simDeployment2 = "./yaml/vllm-sim-2.yaml"
modelserver = "granite-3-8b"
)

var (
modelName = "granite/granite-3-8b-instruct"
nsName = "default"
)

var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() {
ginkgo.When("Running simple non-PD configuration", func() {
ginkgo.It("should run successfully", func() {
// Create inferencePool
inferencePools := createInferencePool(inferExtManifest, "apps/v1", "Deployment", modelserver, "30")

// Create workload objectts; epp, activator and vLLM pod
epp := createResource(eppManifest, imageRegistry, eppImg, eppTag)
activator := createResource(activatorManifest, imageRegistry, activatorImg, activatorTag)

// Create model server
modelServers := createResource(simDeployment1, imageRegistry, vllmImg, vllmTag)

nsHdr, podHdr := runChatCompletion(simplePrompt)
gomega.Expect(nsHdr).Should(gomega.Equal(nsName))
gomega.Expect(podHdr).Should(gomega.Equal(modelServers[0]))

testutils.DeleteObjects(testConfig, epp)
testutils.DeleteObjects(testConfig, activator)
testutils.DeleteObjects(testConfig, modelServers)
testutils.DeleteObjects(testConfig, inferencePools)
})
})

ginkgo.When("Running simple non-PD KV enabled configuration", func() {
ginkgo.It("should run successfully", func() {
// Create inferencePool
inferencePools := createInferencePool(inferExtManifest, "apps/v1", "Deployment", modelserver, "80")

// Create workload objectts; epp, activator and vLLM pod
epp := createResource(eppManifest, imageRegistry, eppImg, eppTag)
activator := createResource(activatorManifest, imageRegistry, activatorImg, activatorTag)

// Create model server
modelServers := createResource(simDeployment1, imageRegistry, vllmImg, vllmTag)

nsHdr, podHdr := runChatCompletion(simplePrompt)
gomega.Expect(nsHdr).Should(gomega.Equal(nsName))
gomega.Expect(podHdr).Should(gomega.Equal(modelServers[0]))

testutils.DeleteObjects(testConfig, epp)
testutils.DeleteObjects(testConfig, activator)
testutils.DeleteObjects(testConfig, modelServers)
testutils.DeleteObjects(testConfig, inferencePools)
})
})
})

// createModelServers creates the model server resources used for testing from the given filePaths.
func createInferencePool(inferPoolManifest, apiVersion, kind, name, gracePeriod string) []string {
manifests := testutils.ReadYaml(inferPoolManifest)
manifests = substituteMany(manifests,
map[string]string{
"${KIND}": kind,
"${NAME}": name,
"${GRACE_PERIOD}": gracePeriod,
"${API_VERSION}": apiVersion,
})
objects := CreateObjsFromYaml(testConfig, manifests)

return objects
}

func createResource(manifest, registry, img, tag string) []string {
ginkgo.By("Creating resource from manifest: " + manifest)
objYamls := testutils.ReadYaml(manifest)

objYamls = substituteMany(objYamls,
map[string]string{
"${IMAGE}": img,
"${TAG}": tag,
"${IMAGE_REGISTRY}": registry,
})
objNames := CreateObjsFromYaml(testConfig, objYamls)
return objNames
}

func runChatCompletion(prompt string) (string, string) {
var httpResp *http.Response
openaiclient := openai.NewClient(
option.WithBaseURL(fmt.Sprintf("http://localhost:%s/v1", port)))

params := openai.ChatCompletionNewParams{
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage(prompt),
},
Model: modelName,
}
resp, err := openaiclient.Chat.Completions.New(testConfig.Context, params, option.WithResponseInto(&httpResp))
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
gomega.Expect(resp.Choices).Should(gomega.HaveLen(1))
gomega.Expect(resp.Choices[0].FinishReason).Should(gomega.Equal("stop"))
gomega.Expect(resp.Choices[0].Message.Content).Should(gomega.Equal(prompt))

namespaceHeader := httpResp.Header.Get("x-inference-namespace")
podHeader := httpResp.Header.Get("x-inference-pod")

return namespaceHeader, podHeader
}

func substituteMany(inputs []string, substitutions map[string]string) []string {
outputs := []string{}
for _, input := range inputs {
output := input
for key, value := range substitutions {
output = strings.ReplaceAll(output, key, value)
}
outputs = append(outputs, output)
}
return outputs
}
Loading