diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 35ed2316..12f9d828 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -21,22 +21,22 @@ jobs: go-version-file: ./go.mod cache-dependency-path: ./go.sum - - name: Install libzmq dependencies (kvcache/kvevents) + - name: Install libzmq and Python dependencies (kvcache/kvevents) run: | sudo apt-get update - make download-zmq - - - name: Set PKG_CONFIG_PATH - run: echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV + sudo apt-get install -y pkg-config python3-dev python3-pip + make install-dependencies + pip3 install transformers --break-system-packages - name: Run lint checks - uses: golangci/golangci-lint-action@v8 - with: - version: 'v2.4.0' - args: "--config=./.golangci.yml" + run: | + make lint + + - name: Build Container image + run: | + make image-build SIM_TAG=pr-check - name: Run go test - shell: bash + shell: bash run: | - echo "Running tests with Ginkgo..." make test diff --git a/Dockerfile b/Dockerfile index 9d87f5bd..1e69dd3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,8 +6,9 @@ ARG TARGETARCH # Install build tools # The builder is based on UBI8, so we need epel-release-8. RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \ - dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \ + dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3.12-devel python3.12-pip git && \ dnf clean all +# python3.12-devel needed for CGO compilation (Python headers and python3.12-config for linker flags) WORKDIR /workspace # Copy the Go Modules manifests @@ -28,6 +29,15 @@ ARG TOKENIZER_VERSION=v1.22.1 RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib RUN ranlib lib/*.a +# Copy Python wrapper and requirements from kv-cache-manager dependency +# Extract version dynamically and copy to a known location +RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \ + mkdir -p /workspace/kv-cache-manager-wrapper && \ + cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \ + /workspace/kv-cache-manager-wrapper/ && \ + cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/requirements.txt \ + /workspace/kv-cache-manager-wrapper/ + # Build # the GOARCH has not a default value to allow the binary be built according to the host where the command # was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO @@ -36,22 +46,49 @@ RUN ranlib lib/*.a ENV CGO_ENABLED=1 ENV GOOS=${TARGETOS:-linux} ENV GOARCH=${TARGETARCH} -RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go +ENV PYTHON=python3.12 +ENV PYTHONPATH=/usr/lib64/python3.12/site-packages:/usr/lib/python3.12/site-packages + +RUN export CGO_CFLAGS="$(python3.12-config --cflags) -I/workspace/lib" && \ + export CGO_LDFLAGS="$(python3.12-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \ + go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go +# Runtime stage # Use ubi9 as a minimal base image to package the manager binary # Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details FROM registry.access.redhat.com/ubi9/ubi-minimal:latest WORKDIR / -# Install zeromq runtime library needed by the manager. +# Install zeromq runtime library and Python runtime needed by the manager. # The final image is UBI9, so we need epel-release-9. +# Using microdnf for minimal image size USER root -RUN microdnf install -y dnf && \ - dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm' && \ - dnf install -y zeromq && \ - dnf clean all && \ - rm -rf /var/cache/dnf /var/lib/dnf +RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + rpm -i /tmp/epel-release.rpm && \ + rm /tmp/epel-release.rpm && \ + microdnf install -y --setopt=install_weak_deps=0 zeromq python3.12 python3.12-libs python3.12-pip && \ + microdnf clean all && \ + rm -rf /var/cache/yum /var/lib/yum && \ + ln -sf /usr/bin/python3.12 /usr/bin/python3 && \ + ln -sf /usr/bin/python3.12 /usr/bin/python + +# Install wrapper as a module in site-packages +RUN mkdir -p /usr/local/lib/python3.12/site-packages/ +COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.12/site-packages/ + +# Python deps (no cache, single target) – filter out torch +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +COPY --from=builder /workspace/kv-cache-manager-wrapper/requirements.txt /tmp/requirements.txt +RUN sed '/^torch\b/d' /tmp/requirements.txt > /tmp/requirements.notorch.txt && \ + python3.12 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \ + python3.12 -m pip install --no-cache-dir --target /usr/local/lib/python3.12/site-packages -r /tmp/requirements.notorch.txt && \ + rm /tmp/requirements.txt /tmp/requirements.notorch.txt && \ + rm -rf /root/.cache/pip + +# Python env +ENV PYTHONPATH="/usr/local/lib/python3.12/site-packages:/usr/lib/python3.12/site-packages" +ENV PYTHON=python3.12 COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim diff --git a/Makefile b/Makefile index a71b78c6..57c1e6af 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,23 @@ SRC = $(shell find . -type f -name '*.go') help: ## Print help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) +PYTHON_INCLUDE := $(shell python3 -c "import sysconfig; print(sysconfig.get_path('include'))") +CGO_CFLAGS := $(shell python3-config --cflags --embed) +CGO_LDFLAGS := $(shell python3-config --ldflags --embed) + +export PKG_CONFIG_PATH=/usr/lib/pkgconfig + +GOMODCACHE := $(shell go env GOMODCACHE) +KV_CACHE_MGR_VERSION := $(shell go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) +KV_CACHE_MGR_PATH := $(GOMODCACHE)/github.com/llm-d/llm-d-kv-cache-manager@$(KV_CACHE_MGR_VERSION)/pkg/preprocessing/chat_completions +export PYTHONPATH := $(KV_CACHE_MGR_PATH):$(PYTHONPATH) + +# Export them for all targets (optional) +export CGO_ENABLED=1 +export CGO_CFLAGS +export CGO_LDFLAGS +export CPATH := $(PYTHON_INCLUDE):$(CPATH) + GO_LDFLAGS := -extldflags '-L$(shell pwd)/lib $(LDFLAGS)' CGO_ENABLED=1 TOKENIZER_LIB = lib/libtokenizers.a @@ -82,7 +99,7 @@ format: ## Format Go source files @gofmt -l -w $(SRC) .PHONY: test -test: $(GINKGO) download-tokenizer download-zmq ## Run tests +test: $(GINKGO) install-dependencies ## Run tests @printf "\033[33;1m==== Running tests ====\033[0m\n" ifdef GINKGO_FOCUS CGO_ENABLED=1 ginkgo -ldflags="$(GO_LDFLAGS)" -v -r -- -ginkgo.v -ginkgo.focus="$(GINKGO_FOCUS)" @@ -103,7 +120,7 @@ lint: $(GOLANGCI_LINT) ## Run lint ##@ Build .PHONY: build -build: check-go download-tokenizer download-zmq +build: check-go install-dependencies @printf "\033[33;1m==== Building ====\033[0m\n" go build -ldflags="$(GO_LDFLAGS)" -o $(LOCALBIN)/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go @@ -217,32 +234,41 @@ install-hooks: ## Install git hooks ##@ ZMQ Setup -.PHONY: download-zmq -download-zmq: ## Install ZMQ dependencies based on OS/ARCH - @echo "⏳ Checking if ZMQ is already installed..." - @if pkg-config --exists libzmq; then \ - echo "✅ ZMQ is already installed."; \ - else \ - echo "⏳ Installing ZMQ dependencies..."; \ - if [ "$(TARGETOS)" = "linux" ]; then \ - if command -v apt >/dev/null 2>&1; then \ - sudo apt update && sudo apt install -y libzmq3-dev; \ - elif command -v dnf >/dev/null 2>&1; then \ - sudo dnf install -y zeromq-devel; \ +.PHONY: install-dependencies +install-dependencies: download-tokenizer ## Install development dependencies based on OS/ARCH + @echo "Checking and installing development dependencies..." + @if [ "$(TARGETOS)" = "linux" ]; then \ + if [ -x "$$(command -v apt)" ]; then \ + if ! dpkg -s libzmq3-dev >/dev/null 2>&1 || ! dpkg -s g++ >/dev/null 2>&1; then \ + echo "Installing dependencies with apt..."; \ + sudo apt-get update && sudo apt-get install -y libzmq3-dev g++; \ else \ - echo -e "⚠️ Unsupported Linux package manager. Follow installation guides: https://github.com/zeromq/libzmq#installation-of-binary-packages-\n"; \ - exit 1; \ + echo "✅ ZMQ and g++ are already installed."; \ fi; \ - elif [ "$(TARGETOS)" = "darwin" ]; then \ - if command -v brew >/dev/null 2>&1; then \ - brew install zeromq; \ + elif [ -x "$$(command -v dnf)" ]; then \ + if ! dnf -q list installed zeromq-devel >/dev/null 2>&1 || ! dnf -q list installed gcc-c++ >/dev/null 2>&1; then \ + echo "Installing dependencies with dnf..."; \ + sudo dnf install -y zeromq-devel gcc-c++; \ else \ - echo "⚠️ Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \ - exit 1; \ + echo "✅ ZMQ and gcc-c++ are already installed."; \ fi; \ else \ - echo "⚠️ Unsupported OS: $(TARGETOS). Install libzmq manually - see https://zeromq.org/download/"; \ + echo "Unsupported Linux package manager. Install libzmq and g++/gcc-c++ manually."; \ exit 1; \ fi; \ - echo "✅ ZMQ dependencies installed."; \ + elif [ "$(TARGETOS)" = "darwin" ]; then \ + if [ -x "$$(command -v brew)" ]; then \ + if ! brew list zeromq pkg-config >/dev/null 2>&1; then \ + echo "Installing dependencies with brew..."; \ + brew install zeromq pkg-config; \ + else \ + echo "✅ ZeroMQ and pkgconf are already installed."; \ + fi; \ + else \ + echo "Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \ + exit 1; \ + fi; \ + else \ + echo "Unsupported OS: $(TARGETOS). Install development dependencies manually."; \ + exit 1; \ fi diff --git a/go.mod b/go.mod index 51bc363f..45f48666 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/buaazp/fasthttprouter v0.1.1 github.com/go-logr/logr v1.4.2 github.com/google/uuid v1.6.0 - github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 + github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 github.com/openai/openai-go/v3 v3.6.1 @@ -23,6 +23,12 @@ require ( k8s.io/klog/v2 v2.130.1 ) +require ( + github.com/dgraph-io/ristretto/v2 v2.3.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + go.uber.org/multierr v1.11.0 // indirect +) + require ( github.com/andybalholm/brotli v1.1.1 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -64,7 +70,7 @@ require ( go.uber.org/automaxprocs v1.6.0 // indirect golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.32.0 // indirect + golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.30.0 // indirect golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.9.0 // indirect @@ -77,7 +83,7 @@ require ( k8s.io/client-go v0.33.0 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect - sigs.k8s.io/controller-runtime v0.21.0 // indirect + sigs.k8s.io/controller-runtime v0.21.0 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect diff --git a/go.sum b/go.sum index 0e726bbb..61de9f21 100644 --- a/go.sum +++ b/go.sum @@ -19,8 +19,14 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/ristretto/v2 v2.3.0 h1:qTQ38m7oIyd4GAed/QkUZyPFNMnvVWyazGXRwvOt5zk= +github.com/dgraph-io/ristretto/v2 v2.3.0/go.mod h1:gpoRV3VzrEY1a9dWAYV6T1U7YzfgttXdd/ZzL1s9OZM= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= @@ -68,8 +74,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 h1:SDLiNrcreDcA9m9wfXAumFARDHHXpjOjHTzshTiTGxk= -github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1/go.mod h1:tN80/D0Faf6pE2ocwFgTNoCxKPsqdsa2XnjQUqOaZ8Q= +github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2 h1:l2Sm8W6SRg4TAme4RsndwZ++5+4aQvDI4vnf8TKrhww= +github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2/go.mod h1:ZlK7MCuz5D/weLeHyNKEmVF/eJZDyYn3XyRowTihq9o= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= @@ -153,6 +159,8 @@ go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -174,8 +182,8 @@ golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pkg/kv-cache/block_cache.go b/pkg/kv-cache/block_cache.go index d9d3f5da..ba10e5fe 100644 --- a/pkg/kv-cache/block_cache.go +++ b/pkg/kv-cache/block_cache.go @@ -177,14 +177,14 @@ func (bc *blockCache) startRequest(requestID string, blocks []uint64) (int, erro delete(bc.unusedBlocks, oldestUnusedHash) common.WriteToChannel(bc.eventChan, - EventData{action: eventActionRemove, hashValues: []uint64{oldestUnusedHash}}, + EventData{action: eventActionRemove, hashValues: []any{oldestUnusedHash}}, bc.logger, "block cache eventChan") } // Add the new block bc.usedBlocks[block] = 1 common.WriteToChannel(bc.eventChan, - EventData{action: eventActionStore, hashValues: []uint64{block}}, + EventData{action: eventActionStore, hashValues: []any{block}}, bc.logger, "block cache eventChan") } diff --git a/pkg/kv-cache/kv_cache_sender.go b/pkg/kv-cache/kv_cache_sender.go index 51d0c384..67bbc6d4 100644 --- a/pkg/kv-cache/kv_cache_sender.go +++ b/pkg/kv-cache/kv_cache_sender.go @@ -37,7 +37,7 @@ const ( type EventData struct { action EventAction - hashValues []uint64 + hashValues []any } type KVEventSender struct { diff --git a/pkg/kv-cache/kv_cache_test.go b/pkg/kv-cache/kv_cache_test.go index 826eada6..6d92d88e 100644 --- a/pkg/kv-cache/kv_cache_test.go +++ b/pkg/kv-cache/kv_cache_test.go @@ -330,8 +330,8 @@ var _ = Describe("KV cache", Ordered, func() { wg.Wait() // wait for goroutine to exit }() - expectedRemovedBlocks := []uint64{2, 4} - expectedStoredBlocks := []uint64{1, 2, 3, 4, 5, 6} + expectedRemovedBlocks := []any{uint64(2), uint64(4)} + expectedStoredBlocks := []any{uint64(1), uint64(2), uint64(3), uint64(4), uint64(5), uint64(6)} go func() { // Make sure that the subscriber listens before the events are published @@ -371,8 +371,8 @@ var _ = Describe("KV cache", Ordered, func() { Expect(alreadyInCache).To(Equal(0)) }() - removedBlocks := make([]uint64, 0) - storedBlocks := make([]uint64, 0) + removedBlocks := make([]any, 0) + storedBlocks := make([]any, 0) count := uint64(1) for { parts, err := sub.RecvMessageBytes(0) diff --git a/pkg/kv-cache/kv_test_helper.go b/pkg/kv-cache/kv_test_helper.go index c124d0f7..2b50dc47 100644 --- a/pkg/kv-cache/kv_test_helper.go +++ b/pkg/kv-cache/kv_test_helper.go @@ -21,11 +21,11 @@ import ( "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents" "github.com/onsi/ginkgo/v2" - gomega "github.com/onsi/gomega" + "github.com/onsi/gomega" "github.com/vmihailenco/msgpack/v5" ) -func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]uint64, []uint64, bool) { +func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]any, []any, bool) { // The message should be [topic, seq, payload] gomega.Expect(parts).To(gomega.HaveLen(3)) @@ -34,8 +34,8 @@ func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]u seq := binary.BigEndian.Uint64(parts[1]) gomega.Expect(seq).To(gomega.Equal(expectedSeq)) - removed := make([]uint64, 0) - stored := make([]uint64, 0) + removed := make([]any, 0) + stored := make([]any, 0) allCleared := false var eventBatch kvevents.EventBatch diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index c58f6e9f..fae80f4a 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -333,13 +333,21 @@ func (s *VllmSimulator) initializeSim(ctx context.Context) error { return err } - tokenizationConfig := tokenization.DefaultConfig() + tokenizationConfig, err := tokenization.DefaultConfig() + if err != nil { + return fmt.Errorf("failed to create default tokenization configuration: %w", err) + } + if s.config.TokenizersCacheDir != "" { - tokenizationConfig.TokenizersCacheDir = s.config.TokenizersCacheDir + if tokenizationConfig.HFTokenizerConfig == nil { + tokenizationConfig.HFTokenizerConfig = &tokenization.HFTokenizerConfig{} + } + tokenizationConfig.HFTokenizerConfig.TokenizersCacheDir = s.config.TokenizersCacheDir } + s.tokenizer, err = tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig) if err != nil { - return fmt.Errorf("failed to create tokenizer: %w", err) + return fmt.Errorf("failed to create hf tokenizer: %w", err) } if s.config.EnableKVCache {