From 2aaa5b35a84df5fc657459a1ee9ffb7a911a3896 Mon Sep 17 00:00:00 2001 From: Pierangelo Di Pilato Date: Fri, 21 Nov 2025 15:55:50 +0100 Subject: [PATCH 1/5] Bump kv-cache-manager to v0.4.0-rc1 Signed-off-by: Pierangelo Di Pilato --- go.mod | 12 +++++++++--- go.sum | 16 ++++++++++++---- pkg/kv-cache/block_cache.go | 4 ++-- pkg/kv-cache/kv_cache_sender.go | 2 +- pkg/kv-cache/kv_cache_test.go | 8 ++++---- pkg/kv-cache/kv_test_helper.go | 8 ++++---- pkg/llm-d-inference-sim/simulator.go | 12 ++++++++++-- 7 files changed, 42 insertions(+), 20 deletions(-) diff --git a/go.mod b/go.mod index 51bc363f..b6a877f5 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/buaazp/fasthttprouter v0.1.1 github.com/go-logr/logr v1.4.2 github.com/google/uuid v1.6.0 - github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 + github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 github.com/openai/openai-go/v3 v3.6.1 @@ -23,6 +23,12 @@ require ( k8s.io/klog/v2 v2.130.1 ) +require ( + github.com/dgraph-io/ristretto/v2 v2.3.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + go.uber.org/multierr v1.11.0 // indirect +) + require ( github.com/andybalholm/brotli v1.1.1 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -64,7 +70,7 @@ require ( go.uber.org/automaxprocs v1.6.0 // indirect golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.32.0 // indirect + golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.30.0 // indirect golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.9.0 // indirect @@ -77,7 +83,7 @@ require ( k8s.io/client-go v0.33.0 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect - sigs.k8s.io/controller-runtime v0.21.0 // indirect + sigs.k8s.io/controller-runtime v0.21.0 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect diff --git a/go.sum b/go.sum index 0e726bbb..9e392a74 100644 --- a/go.sum +++ b/go.sum @@ -19,8 +19,14 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/ristretto/v2 v2.3.0 h1:qTQ38m7oIyd4GAed/QkUZyPFNMnvVWyazGXRwvOt5zk= +github.com/dgraph-io/ristretto/v2 v2.3.0/go.mod h1:gpoRV3VzrEY1a9dWAYV6T1U7YzfgttXdd/ZzL1s9OZM= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= @@ -68,8 +74,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 h1:SDLiNrcreDcA9m9wfXAumFARDHHXpjOjHTzshTiTGxk= -github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1/go.mod h1:tN80/D0Faf6pE2ocwFgTNoCxKPsqdsa2XnjQUqOaZ8Q= +github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1 h1:gWkZ9yp7sU5j1vbNB7eO95lxbvgJV+qd/60LnPfNk9w= +github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1/go.mod h1:oEmDhEjW1pEoOSlEFy8CKoMc7ixQmSKEbhLt9CoH/a0= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= @@ -153,6 +159,8 @@ go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -174,8 +182,8 @@ golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pkg/kv-cache/block_cache.go b/pkg/kv-cache/block_cache.go index d9d3f5da..ba10e5fe 100644 --- a/pkg/kv-cache/block_cache.go +++ b/pkg/kv-cache/block_cache.go @@ -177,14 +177,14 @@ func (bc *blockCache) startRequest(requestID string, blocks []uint64) (int, erro delete(bc.unusedBlocks, oldestUnusedHash) common.WriteToChannel(bc.eventChan, - EventData{action: eventActionRemove, hashValues: []uint64{oldestUnusedHash}}, + EventData{action: eventActionRemove, hashValues: []any{oldestUnusedHash}}, bc.logger, "block cache eventChan") } // Add the new block bc.usedBlocks[block] = 1 common.WriteToChannel(bc.eventChan, - EventData{action: eventActionStore, hashValues: []uint64{block}}, + EventData{action: eventActionStore, hashValues: []any{block}}, bc.logger, "block cache eventChan") } diff --git a/pkg/kv-cache/kv_cache_sender.go b/pkg/kv-cache/kv_cache_sender.go index 51d0c384..67bbc6d4 100644 --- a/pkg/kv-cache/kv_cache_sender.go +++ b/pkg/kv-cache/kv_cache_sender.go @@ -37,7 +37,7 @@ const ( type EventData struct { action EventAction - hashValues []uint64 + hashValues []any } type KVEventSender struct { diff --git a/pkg/kv-cache/kv_cache_test.go b/pkg/kv-cache/kv_cache_test.go index 826eada6..6d92d88e 100644 --- a/pkg/kv-cache/kv_cache_test.go +++ b/pkg/kv-cache/kv_cache_test.go @@ -330,8 +330,8 @@ var _ = Describe("KV cache", Ordered, func() { wg.Wait() // wait for goroutine to exit }() - expectedRemovedBlocks := []uint64{2, 4} - expectedStoredBlocks := []uint64{1, 2, 3, 4, 5, 6} + expectedRemovedBlocks := []any{uint64(2), uint64(4)} + expectedStoredBlocks := []any{uint64(1), uint64(2), uint64(3), uint64(4), uint64(5), uint64(6)} go func() { // Make sure that the subscriber listens before the events are published @@ -371,8 +371,8 @@ var _ = Describe("KV cache", Ordered, func() { Expect(alreadyInCache).To(Equal(0)) }() - removedBlocks := make([]uint64, 0) - storedBlocks := make([]uint64, 0) + removedBlocks := make([]any, 0) + storedBlocks := make([]any, 0) count := uint64(1) for { parts, err := sub.RecvMessageBytes(0) diff --git a/pkg/kv-cache/kv_test_helper.go b/pkg/kv-cache/kv_test_helper.go index c124d0f7..2b50dc47 100644 --- a/pkg/kv-cache/kv_test_helper.go +++ b/pkg/kv-cache/kv_test_helper.go @@ -21,11 +21,11 @@ import ( "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents" "github.com/onsi/ginkgo/v2" - gomega "github.com/onsi/gomega" + "github.com/onsi/gomega" "github.com/vmihailenco/msgpack/v5" ) -func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]uint64, []uint64, bool) { +func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]any, []any, bool) { // The message should be [topic, seq, payload] gomega.Expect(parts).To(gomega.HaveLen(3)) @@ -34,8 +34,8 @@ func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]u seq := binary.BigEndian.Uint64(parts[1]) gomega.Expect(seq).To(gomega.Equal(expectedSeq)) - removed := make([]uint64, 0) - stored := make([]uint64, 0) + removed := make([]any, 0) + stored := make([]any, 0) allCleared := false var eventBatch kvevents.EventBatch diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index c58f6e9f..001a34b6 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -333,10 +333,18 @@ func (s *VllmSimulator) initializeSim(ctx context.Context) error { return err } - tokenizationConfig := tokenization.DefaultConfig() + tokenizationConfig, err := tokenization.DefaultConfig() + if err != nil { + return fmt.Errorf("failed to create default tokenization configuration: %w", err) + } + if s.config.TokenizersCacheDir != "" { - tokenizationConfig.TokenizersCacheDir = s.config.TokenizersCacheDir + if tokenizationConfig.HFTokenizerConfig == nil { + tokenizationConfig.HFTokenizerConfig = &tokenization.HFTokenizerConfig{} + } + tokenizationConfig.HFTokenizerConfig.TokenizersCacheDir = s.config.TokenizersCacheDir } + s.tokenizer, err = tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig) if err != nil { return fmt.Errorf("failed to create tokenizer: %w", err) From 97e7e80fd5fb08fbac2d8274872aee502ad74298 Mon Sep 17 00:00:00 2001 From: Pierangelo Di Pilato Date: Mon, 24 Nov 2025 08:51:33 +0100 Subject: [PATCH 2/5] rc2 Signed-off-by: Pierangelo Di Pilato --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b6a877f5..45f48666 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/buaazp/fasthttprouter v0.1.1 github.com/go-logr/logr v1.4.2 github.com/google/uuid v1.6.0 - github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1 + github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 github.com/openai/openai-go/v3 v3.6.1 diff --git a/go.sum b/go.sum index 9e392a74..61de9f21 100644 --- a/go.sum +++ b/go.sum @@ -74,8 +74,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1 h1:gWkZ9yp7sU5j1vbNB7eO95lxbvgJV+qd/60LnPfNk9w= -github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc1/go.mod h1:oEmDhEjW1pEoOSlEFy8CKoMc7ixQmSKEbhLt9CoH/a0= +github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2 h1:l2Sm8W6SRg4TAme4RsndwZ++5+4aQvDI4vnf8TKrhww= +github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2/go.mod h1:ZlK7MCuz5D/weLeHyNKEmVF/eJZDyYn3XyRowTihq9o= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= From 6300e50964d540fa074d94a5985e3dd523dae4b7 Mon Sep 17 00:00:00 2001 From: Pierangelo Di Pilato Date: Mon, 24 Nov 2025 09:45:46 +0100 Subject: [PATCH 3/5] Install python3-dev and pkg-config Extract template wrapper from kv-cache-manager Signed-off-by: Pierangelo Di Pilato --- .github/workflows/ci-pr-checks.yaml | 37 +++++++++++++++++++++++--- Dockerfile | 39 +++++++++++++++++++++++----- pkg/llm-d-inference-sim/simulator.go | 2 +- 3 files changed, 66 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 35ed2316..44095891 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -21,22 +21,51 @@ jobs: go-version-file: ./go.mod cache-dependency-path: ./go.sum - - name: Install libzmq dependencies (kvcache/kvevents) + - name: Install libzmq and Python dependencies (kvcache/kvevents) run: | sudo apt-get update + sudo apt-get install -y pkg-config python3-dev python3-pip make download-zmq + pip3 install transformers --break-system-packages - - name: Set PKG_CONFIG_PATH - run: echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV + - name: Configure CGO for Python + run: | + PYTHON_INCLUDE=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))") + echo "CPATH=${PYTHON_INCLUDE}:${CPATH}" >> $GITHUB_ENV + echo "CGO_ENABLED=1" >> $GITHUB_ENV + echo "CGO_CFLAGS=$(python3-config --cflags --embed)" >> $GITHUB_ENV + echo "CGO_LDFLAGS=$(python3-config --ldflags --embed)" >> $GITHUB_ENV + + - name: Set PKG_CONFIG_PATH and PYTHONPATH + run: | + echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV + GOMODCACHE=$(go env GOMODCACHE) + # Extract kv-cache-manager version from go.mod + KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) + KV_CACHE_MGR_PATH="${GOMODCACHE}/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions" + echo "PYTHONPATH=${KV_CACHE_MGR_PATH}:${PYTHONPATH}" >> $GITHUB_ENV - name: Run lint checks uses: golangci/golangci-lint-action@v8 with: version: 'v2.4.0' args: "--config=./.golangci.yml" + env: + CGO_ENABLED: ${{ env.CGO_ENABLED }} + CGO_CFLAGS: ${{ env.CGO_CFLAGS }} + CGO_LDFLAGS: ${{ env.CGO_LDFLAGS }} + CPATH: ${{ env.CPATH }} + PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }} - name: Run go test - shell: bash + shell: bash run: | echo "Running tests with Ginkgo..." make test + env: + CGO_ENABLED: ${{ env.CGO_ENABLED }} + CGO_CFLAGS: ${{ env.CGO_CFLAGS }} + CGO_LDFLAGS: ${{ env.CGO_LDFLAGS }} + CPATH: ${{ env.CPATH }} + PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }} + PYTHONPATH: ${{ env.PYTHONPATH }} diff --git a/Dockerfile b/Dockerfile index 9d87f5bd..35411cc6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ ARG TARGETARCH # Install build tools # The builder is based on UBI8, so we need epel-release-8. RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \ - dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \ + dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3-devel && \ dnf clean all WORKDIR /workspace @@ -28,6 +28,13 @@ ARG TOKENIZER_VERSION=v1.22.1 RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib RUN ranlib lib/*.a +# Copy Python wrapper from kv-cache-manager dependency +# Extract version dynamically and copy to a known location +RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \ + mkdir -p /workspace/kv-cache-manager-wrapper && \ + cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \ + /workspace/kv-cache-manager-wrapper/ + # Build # the GOARCH has not a default value to allow the binary be built according to the host where the command # was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO @@ -38,20 +45,38 @@ ENV GOOS=${TARGETOS:-linux} ENV GOARCH=${TARGETARCH} RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go +# Runtime stage # Use ubi9 as a minimal base image to package the manager binary # Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details FROM registry.access.redhat.com/ubi9/ubi-minimal:latest WORKDIR / -# Install zeromq runtime library needed by the manager. +# Install zeromq runtime library and Python runtime needed by the manager. # The final image is UBI9, so we need epel-release-9. +# Using microdnf for minimal image size USER root -RUN microdnf install -y dnf && \ - dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm' && \ - dnf install -y zeromq && \ - dnf clean all && \ - rm -rf /var/cache/dnf /var/lib/dnf +RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + rpm -i /tmp/epel-release.rpm && \ + rm /tmp/epel-release.rpm && \ + microdnf install -y --setopt=install_weak_deps=0 zeromq python3 python3-libs python3-pip && \ + microdnf clean all && \ + rm -rf /var/cache/yum /var/lib/yum + +# Install wrapper as a module in site-packages +# Extract the kv-cache-manager version dynamically from go.mod in the builder stage +RUN mkdir -p /usr/local/lib/python3.9/site-packages/ +COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.9/site-packages/ + +# Python deps (no cache, single target) – install transformers +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \ + python3 -m pip install --no-cache-dir --target /usr/local/lib/python3.9/site-packages transformers && \ + rm -rf /root/.cache/pip + +# Python env +ENV PYTHONPATH="/usr/local/lib/python3.9/site-packages:/usr/lib/python3.9/site-packages" +ENV PYTHON=python3 COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index 001a34b6..fae80f4a 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -347,7 +347,7 @@ func (s *VllmSimulator) initializeSim(ctx context.Context) error { s.tokenizer, err = tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig) if err != nil { - return fmt.Errorf("failed to create tokenizer: %w", err) + return fmt.Errorf("failed to create hf tokenizer: %w", err) } if s.config.EnableKVCache { From dfa54b200d24a2e260e8e55b34bc3b430cb79532 Mon Sep 17 00:00:00 2001 From: Pierangelo Di Pilato Date: Mon, 24 Nov 2025 12:35:47 +0100 Subject: [PATCH 4/5] Build image in CI checks Signed-off-by: Pierangelo Di Pilato --- .github/workflows/ci-pr-checks.yaml | 4 +++ Dockerfile | 38 +++++++++++++++++++---------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 44095891..83408f0d 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -57,6 +57,10 @@ jobs: CPATH: ${{ env.CPATH }} PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }} + - name: Build Container image + run: | + make image-build SIM_TAG=pr-check + - name: Run go test shell: bash run: | diff --git a/Dockerfile b/Dockerfile index 35411cc6..1e69dd3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,8 +6,9 @@ ARG TARGETARCH # Install build tools # The builder is based on UBI8, so we need epel-release-8. RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \ - dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3-devel && \ + dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3.12-devel python3.12-pip git && \ dnf clean all +# python3.12-devel needed for CGO compilation (Python headers and python3.12-config for linker flags) WORKDIR /workspace # Copy the Go Modules manifests @@ -28,11 +29,13 @@ ARG TOKENIZER_VERSION=v1.22.1 RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib RUN ranlib lib/*.a -# Copy Python wrapper from kv-cache-manager dependency +# Copy Python wrapper and requirements from kv-cache-manager dependency # Extract version dynamically and copy to a known location RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \ mkdir -p /workspace/kv-cache-manager-wrapper && \ cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \ + /workspace/kv-cache-manager-wrapper/ && \ + cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/requirements.txt \ /workspace/kv-cache-manager-wrapper/ # Build @@ -43,7 +46,12 @@ RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-k ENV CGO_ENABLED=1 ENV GOOS=${TARGETOS:-linux} ENV GOARCH=${TARGETARCH} -RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go +ENV PYTHON=python3.12 +ENV PYTHONPATH=/usr/lib64/python3.12/site-packages:/usr/lib/python3.12/site-packages + +RUN export CGO_CFLAGS="$(python3.12-config --cflags) -I/workspace/lib" && \ + export CGO_LDFLAGS="$(python3.12-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \ + go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go # Runtime stage # Use ubi9 as a minimal base image to package the manager binary @@ -59,24 +67,28 @@ USER root RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ rpm -i /tmp/epel-release.rpm && \ rm /tmp/epel-release.rpm && \ - microdnf install -y --setopt=install_weak_deps=0 zeromq python3 python3-libs python3-pip && \ + microdnf install -y --setopt=install_weak_deps=0 zeromq python3.12 python3.12-libs python3.12-pip && \ microdnf clean all && \ - rm -rf /var/cache/yum /var/lib/yum + rm -rf /var/cache/yum /var/lib/yum && \ + ln -sf /usr/bin/python3.12 /usr/bin/python3 && \ + ln -sf /usr/bin/python3.12 /usr/bin/python # Install wrapper as a module in site-packages -# Extract the kv-cache-manager version dynamically from go.mod in the builder stage -RUN mkdir -p /usr/local/lib/python3.9/site-packages/ -COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.9/site-packages/ +RUN mkdir -p /usr/local/lib/python3.12/site-packages/ +COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.12/site-packages/ -# Python deps (no cache, single target) – install transformers +# Python deps (no cache, single target) – filter out torch ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \ - python3 -m pip install --no-cache-dir --target /usr/local/lib/python3.9/site-packages transformers && \ +COPY --from=builder /workspace/kv-cache-manager-wrapper/requirements.txt /tmp/requirements.txt +RUN sed '/^torch\b/d' /tmp/requirements.txt > /tmp/requirements.notorch.txt && \ + python3.12 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \ + python3.12 -m pip install --no-cache-dir --target /usr/local/lib/python3.12/site-packages -r /tmp/requirements.notorch.txt && \ + rm /tmp/requirements.txt /tmp/requirements.notorch.txt && \ rm -rf /root/.cache/pip # Python env -ENV PYTHONPATH="/usr/local/lib/python3.9/site-packages:/usr/lib/python3.9/site-packages" -ENV PYTHON=python3 +ENV PYTHONPATH="/usr/local/lib/python3.12/site-packages:/usr/lib/python3.12/site-packages" +ENV PYTHON=python3.12 COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim From f9714432f9c28be7c7d08339833223288bdc587e Mon Sep 17 00:00:00 2001 From: Pierangelo Di Pilato Date: Fri, 28 Nov 2025 14:01:08 +0100 Subject: [PATCH 5/5] Move setup to Makefile Signed-off-by: Pierangelo Di Pilato --- .github/workflows/ci-pr-checks.yaml | 39 ++-------------- Makefile | 72 ++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 59 deletions(-) diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 83408f0d..12f9d828 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -25,37 +25,12 @@ jobs: run: | sudo apt-get update sudo apt-get install -y pkg-config python3-dev python3-pip - make download-zmq + make install-dependencies pip3 install transformers --break-system-packages - - name: Configure CGO for Python - run: | - PYTHON_INCLUDE=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))") - echo "CPATH=${PYTHON_INCLUDE}:${CPATH}" >> $GITHUB_ENV - echo "CGO_ENABLED=1" >> $GITHUB_ENV - echo "CGO_CFLAGS=$(python3-config --cflags --embed)" >> $GITHUB_ENV - echo "CGO_LDFLAGS=$(python3-config --ldflags --embed)" >> $GITHUB_ENV - - - name: Set PKG_CONFIG_PATH and PYTHONPATH - run: | - echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV - GOMODCACHE=$(go env GOMODCACHE) - # Extract kv-cache-manager version from go.mod - KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) - KV_CACHE_MGR_PATH="${GOMODCACHE}/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions" - echo "PYTHONPATH=${KV_CACHE_MGR_PATH}:${PYTHONPATH}" >> $GITHUB_ENV - - name: Run lint checks - uses: golangci/golangci-lint-action@v8 - with: - version: 'v2.4.0' - args: "--config=./.golangci.yml" - env: - CGO_ENABLED: ${{ env.CGO_ENABLED }} - CGO_CFLAGS: ${{ env.CGO_CFLAGS }} - CGO_LDFLAGS: ${{ env.CGO_LDFLAGS }} - CPATH: ${{ env.CPATH }} - PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }} + run: | + make lint - name: Build Container image run: | @@ -64,12 +39,4 @@ jobs: - name: Run go test shell: bash run: | - echo "Running tests with Ginkgo..." make test - env: - CGO_ENABLED: ${{ env.CGO_ENABLED }} - CGO_CFLAGS: ${{ env.CGO_CFLAGS }} - CGO_LDFLAGS: ${{ env.CGO_LDFLAGS }} - CPATH: ${{ env.CPATH }} - PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }} - PYTHONPATH: ${{ env.PYTHONPATH }} diff --git a/Makefile b/Makefile index a71b78c6..57c1e6af 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,23 @@ SRC = $(shell find . -type f -name '*.go') help: ## Print help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) +PYTHON_INCLUDE := $(shell python3 -c "import sysconfig; print(sysconfig.get_path('include'))") +CGO_CFLAGS := $(shell python3-config --cflags --embed) +CGO_LDFLAGS := $(shell python3-config --ldflags --embed) + +export PKG_CONFIG_PATH=/usr/lib/pkgconfig + +GOMODCACHE := $(shell go env GOMODCACHE) +KV_CACHE_MGR_VERSION := $(shell go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) +KV_CACHE_MGR_PATH := $(GOMODCACHE)/github.com/llm-d/llm-d-kv-cache-manager@$(KV_CACHE_MGR_VERSION)/pkg/preprocessing/chat_completions +export PYTHONPATH := $(KV_CACHE_MGR_PATH):$(PYTHONPATH) + +# Export them for all targets (optional) +export CGO_ENABLED=1 +export CGO_CFLAGS +export CGO_LDFLAGS +export CPATH := $(PYTHON_INCLUDE):$(CPATH) + GO_LDFLAGS := -extldflags '-L$(shell pwd)/lib $(LDFLAGS)' CGO_ENABLED=1 TOKENIZER_LIB = lib/libtokenizers.a @@ -82,7 +99,7 @@ format: ## Format Go source files @gofmt -l -w $(SRC) .PHONY: test -test: $(GINKGO) download-tokenizer download-zmq ## Run tests +test: $(GINKGO) install-dependencies ## Run tests @printf "\033[33;1m==== Running tests ====\033[0m\n" ifdef GINKGO_FOCUS CGO_ENABLED=1 ginkgo -ldflags="$(GO_LDFLAGS)" -v -r -- -ginkgo.v -ginkgo.focus="$(GINKGO_FOCUS)" @@ -103,7 +120,7 @@ lint: $(GOLANGCI_LINT) ## Run lint ##@ Build .PHONY: build -build: check-go download-tokenizer download-zmq +build: check-go install-dependencies @printf "\033[33;1m==== Building ====\033[0m\n" go build -ldflags="$(GO_LDFLAGS)" -o $(LOCALBIN)/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go @@ -217,32 +234,41 @@ install-hooks: ## Install git hooks ##@ ZMQ Setup -.PHONY: download-zmq -download-zmq: ## Install ZMQ dependencies based on OS/ARCH - @echo "⏳ Checking if ZMQ is already installed..." - @if pkg-config --exists libzmq; then \ - echo "✅ ZMQ is already installed."; \ - else \ - echo "⏳ Installing ZMQ dependencies..."; \ - if [ "$(TARGETOS)" = "linux" ]; then \ - if command -v apt >/dev/null 2>&1; then \ - sudo apt update && sudo apt install -y libzmq3-dev; \ - elif command -v dnf >/dev/null 2>&1; then \ - sudo dnf install -y zeromq-devel; \ +.PHONY: install-dependencies +install-dependencies: download-tokenizer ## Install development dependencies based on OS/ARCH + @echo "Checking and installing development dependencies..." + @if [ "$(TARGETOS)" = "linux" ]; then \ + if [ -x "$$(command -v apt)" ]; then \ + if ! dpkg -s libzmq3-dev >/dev/null 2>&1 || ! dpkg -s g++ >/dev/null 2>&1; then \ + echo "Installing dependencies with apt..."; \ + sudo apt-get update && sudo apt-get install -y libzmq3-dev g++; \ else \ - echo -e "⚠️ Unsupported Linux package manager. Follow installation guides: https://github.com/zeromq/libzmq#installation-of-binary-packages-\n"; \ - exit 1; \ + echo "✅ ZMQ and g++ are already installed."; \ fi; \ - elif [ "$(TARGETOS)" = "darwin" ]; then \ - if command -v brew >/dev/null 2>&1; then \ - brew install zeromq; \ + elif [ -x "$$(command -v dnf)" ]; then \ + if ! dnf -q list installed zeromq-devel >/dev/null 2>&1 || ! dnf -q list installed gcc-c++ >/dev/null 2>&1; then \ + echo "Installing dependencies with dnf..."; \ + sudo dnf install -y zeromq-devel gcc-c++; \ else \ - echo "⚠️ Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \ - exit 1; \ + echo "✅ ZMQ and gcc-c++ are already installed."; \ fi; \ else \ - echo "⚠️ Unsupported OS: $(TARGETOS). Install libzmq manually - see https://zeromq.org/download/"; \ + echo "Unsupported Linux package manager. Install libzmq and g++/gcc-c++ manually."; \ exit 1; \ fi; \ - echo "✅ ZMQ dependencies installed."; \ + elif [ "$(TARGETOS)" = "darwin" ]; then \ + if [ -x "$$(command -v brew)" ]; then \ + if ! brew list zeromq pkg-config >/dev/null 2>&1; then \ + echo "Installing dependencies with brew..."; \ + brew install zeromq pkg-config; \ + else \ + echo "✅ ZeroMQ and pkgconf are already installed."; \ + fi; \ + else \ + echo "Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \ + exit 1; \ + fi; \ + else \ + echo "Unsupported OS: $(TARGETOS). Install development dependencies manually."; \ + exit 1; \ fi