Skip to content

Commit f01dc9e

Browse files
authored
Update neuron backend (#3098)
* feat(neuron): use AWS Neuron SDK 2.21.1 * feat(neuron): bump optimum-neuron version * feat(neuron): tag latest image for local tests * test(neuron): simplify sampling test
1 parent 5c5528e commit f01dc9e

File tree

3 files changed

+17
-22
lines changed

3 files changed

+17
-22
lines changed

Dockerfile.neuron

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ RUN mkdir -p /tgi
55
# Fetch the optimum-neuron sources directly to avoid relying on pypi deployments
66
FROM alpine AS optimum-neuron
77
RUN mkdir -p /optimum-neuron
8-
ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.0.28.tar.gz /optimum-neuron/sources.tar.gz
8+
ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.1.0.tar.gz /optimum-neuron/sources.tar.gz
99
RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1
1010

1111
# Build cargo components (adapted from TGI original Dockerfile)
@@ -108,10 +108,10 @@ RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEU
108108
# Install neuronx packages
109109
RUN apt-get update -y \
110110
&& apt-get install -y --no-install-recommends \
111-
aws-neuronx-dkms=2.18.20.0 \
112-
aws-neuronx-collectives=2.22.33.0-d2128d1aa \
113-
aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \
114-
aws-neuronx-tools=2.19.0.0 \
111+
aws-neuronx-dkms=2.19.64.0 \
112+
aws-neuronx-collectives=2.23.135.0-3e70920f2 \
113+
aws-neuronx-runtime-lib=2.23.112.0-9b5179492 \
114+
aws-neuronx-tools=2.20.204.0 \
115115
libxml2 \
116116
&& rm -rf /var/lib/apt/lists/* \
117117
&& apt-get clean
@@ -120,16 +120,16 @@ ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
120120

121121
# Install manually torch CPU version to avoid pulling CUDA
122122
RUN pip3 install \
123-
torch==2.1.2 \
124-
torchvision==0.16.2 \
123+
torch==2.5.1 \
124+
torchvision==0.20.1 \
125125
--index-url https://download.pytorch.org/whl/cpu
126126

127127
RUN pip3 install \
128-
neuronx-cc==2.15.143.0 \
129-
torch-neuronx==2.1.2.2.3.2 \
130-
transformers-neuronx==0.12.313 \
131-
neuronx-distributed==0.9.0 \
132-
libneuronxla==2.0.5347.0 \
128+
neuronx-cc==2.16.372.0 \
129+
torch-neuronx==2.5.1.2.4.0 \
130+
transformers-neuronx==0.13.322 \
131+
neuronx-distributed==0.10.1 \
132+
libneuronxla==2.1.681.0 \
133133
--extra-index-url=https://pip.repos.neuron.amazonaws.com
134134

135135
# Install HuggingFace packages

backends/neuron/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ image:
2525
--ulimit nofile=100000:100000 \
2626
--build-arg VERSION=$(VERSION) \
2727
-t text-generation-inference:$(VERSION)-neuron ${root_dir}
28+
docker tag text-generation-inference:$(VERSION)-neuron text-generation-inference:latest-neuron
2829

2930
install_server:
3031
make -C ${mkfile_dir}/server install VERSION:=${VERSION}

integration-tests/neuron/test_generate.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,11 @@ async def test_model_single_request(tgi_service):
4949
max_new_tokens=128,
5050
seed=42,
5151
)
52-
sample_expectations = {
53-
"gpt2": "Deep Learning",
54-
"llama": "Deep Learning",
55-
"mistral": "Deep learning",
56-
"qwen2": "Deep Learning",
57-
"granite": "Deep learning",
58-
}
59-
assert sample_expectations[service_name] in response
52+
# The response must be different
53+
assert not response.startswith(greedy_expectations[service_name])
6054

61-
# Sampling with stop sequence
62-
stop_sequence = sample_expectations[service_name][-5:]
55+
# Sampling with stop sequence (using one of the words returned from the previous test)
56+
stop_sequence = response.split(" ")[-5]
6357
response = await tgi_service.client.text_generation(
6458
"What is Deep Learning?",
6559
do_sample=True,

0 commit comments

Comments
 (0)