Skip to content

Commit c73ae0b

Browse files
authored
Update to kernels 0.2.1 (#3084)
* Update to `kernels` 0.2.1 The package was renamed from `hf-kernels` to `kernels`. The new version also updates the lockfile format. * Download kernels in `install-cuda` target
1 parent d4c6faa commit c73ae0b

File tree

11 files changed

+303
-7116
lines changed

11 files changed

+303
-7116
lines changed

.github/workflows/tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- name: Download locked kernels
4747
run: |
4848
source ./.venv/bin/activate
49-
hf-kernels download server
49+
kernels download server
5050
- name: Run server tests
5151
run: |
5252
source ./.venv/bin/activate

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,12 @@ COPY server server
183183
COPY server/Makefile server/Makefile
184184
ENV HF_KERNELS_CACHE=/kernels
185185
RUN cd server && \
186-
uv sync --frozen --extra gen --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines --no-install-project --active && \
186+
uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project --active && \
187187
make gen-server-raw && \
188-
hf-kernels download .
188+
kernels download .
189189

190190
RUN cd server && \
191-
uv sync --frozen --extra gen --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines --active --python=${PYTHON_VERSION} && \
191+
uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --active --python=${PYTHON_VERSION} && \
192192
uv pip install nvidia-nccl-cu12==2.25.1 && \
193193
pwd && \
194194
text-generation-server --help

flake.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
66
};
77
nix-filter.url = "github:numtide/nix-filter";
8-
tgi-nix.url = "github:huggingface/text-generation-inference-nix/hub-rotary";
8+
tgi-nix.url = "github:huggingface/text-generation-inference-nix/kernels-0.2.0";
99
nixpkgs.follows = "tgi-nix/nixpkgs";
1010
flake-utils.url = "github:numtide/flake-utils";
1111
rust-overlay = {

nix/server.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
grpcio-reflection,
1717
grpcio-status,
1818
grpcio-tools,
19-
hf-kernels,
2019
hf-transfer,
20+
kernels,
2121
loguru,
2222
mamba-ssm,
2323
moe,
@@ -91,8 +91,8 @@ buildPythonPackage {
9191
grpcio-reflection
9292
grpcio-status
9393
grpcio-tools
94-
hf-kernels
9594
hf-transfer
95+
kernels
9696
loguru
9797
mamba-ssm
9898
moe

server/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ install: install-cuda
3939
install-cuda: install-server install-flash-attention-v2-cuda install-flash-attention
4040
uv pip install -e ".[attention,bnb,marlin,moe]"
4141
uv pip install nvidia-nccl-cu12==2.22.3
42+
kernels download .
4243

4344
install-rocm: install-server install-flash-attention-v2-rocm install-vllm-rocm
4445

0 commit comments

Comments
 (0)