Skip to content

Commit 87a0af4

Browse files
mht-sharmapcuencaNarsil
authored
Update transformers to 4.51 (#3148)
* update transformres * Upgrading the nix deps too. * Forcing torchvision to be in there. * Fixing bug in mllama. * Those tests cannot be run in CI. * Lint. --------- Co-authored-by: Pedro Cuenca <pedro@huggingface.co> Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
1 parent 9c26b52 commit 87a0af4

File tree

8 files changed

+366
-237
lines changed

8 files changed

+366
-237
lines changed

Dockerfile

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
165165
git \
166166
&& rm -rf /var/lib/apt/lists/*
167167

168-
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
169-
ENV PATH="$PATH:/root/.local/bin"
168+
# RUN curl -LsSf https://astral.sh/uv/install.sh | sh
169+
# ENV PATH="$PATH:/root/.local/bin"
170+
COPY --from=ghcr.io/astral-sh/uv:0.5.31 /uv /uvx /bin/
170171
# Install flash-attention dependencies
171172
# RUN pip install einops --no-cache-dir
172173

@@ -183,19 +184,16 @@ COPY server server
183184
COPY server/Makefile server/Makefile
184185
ENV HF_KERNELS_CACHE=/kernels
185186
RUN cd server && \
186-
uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project --active && \
187+
uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra torch --no-install-project --active && \
187188
make gen-server-raw && \
188189
kernels download .
189190

190191
RUN cd server && \
191-
uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --active --python=${PYTHON_VERSION} && \
192+
uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra torch --active --python=${PYTHON_VERSION} && \
192193
uv pip install nvidia-nccl-cu12==2.25.1 && \
193194
pwd && \
194195
text-generation-server --help
195196

196-
# This shouldn't be necessary.
197-
# RUN uv pip install torchvision --no-deps
198-
199197
# Copy build artifacts from flash attention builder
200198
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
201199
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages

flake.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 155 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,155 +1,155 @@
1-
import base64
2-
from io import BytesIO
3-
from PIL import Image
4-
5-
import pytest
6-
7-
8-
@pytest.fixture(scope="module")
9-
def flash_llama4_handle(launcher):
10-
with launcher("ll-re/Llama-4-Scout-17B-16E-Instruct", num_shard=8) as handle:
11-
yield handle
12-
13-
14-
@pytest.fixture(scope="module")
15-
async def flash_llama4(flash_llama4_handle):
16-
await flash_llama4_handle.health(300)
17-
return flash_llama4_handle.client
18-
19-
20-
async def test_flash_llama4(flash_llama4, response_snapshot):
21-
response = await flash_llama4.generate(
22-
"Hello I am doing a project on the 1918 flu pandemic and I am trying to find out how many",
23-
seed=42,
24-
max_new_tokens=100,
25-
)
26-
27-
assert (
28-
response.generated_text
29-
== " people died in the 1918 flu pandemic. Estimating the death toll of the 1918 flu pandemic is difficult because of incomplete records and because of the fact that many of the extra deaths were not attributed to the flu. Many experts believe that the 1918 flu pandemic killed between 50 and 100 million people. Iassistant\n\nThe 1918 flu pandemic, also known as the Spanish flu, is indeed one of the most devastating public health crises in human history. Estimating the exact"
30-
)
31-
assert response.details.generated_tokens == 100
32-
assert response == response_snapshot
33-
34-
35-
async def test_flash_llama4_image_cow_dog(flash_llama4, response_snapshot):
36-
image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
37-
response = await flash_llama4.chat(
38-
seed=42,
39-
messages=[
40-
{
41-
"role": "user",
42-
"content": [
43-
{"type": "image_url", "image_url": {"url": image_url}},
44-
{
45-
"type": "text",
46-
"text": "What is the breed of the dog in the image?",
47-
},
48-
],
49-
},
50-
],
51-
max_tokens=100,
52-
)
53-
54-
assert (
55-
response.choices[0].message.content
56-
== "The image does not depict a dog; it shows a cow standing on a beach. Therefore, there is no breed of a dog to identify."
57-
)
58-
assert response.usage["completion_tokens"] == 30
59-
assert response == response_snapshot
60-
61-
62-
async def test_flash_llama4_image_cow(flash_llama4, response_snapshot):
63-
image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
64-
response = await flash_llama4.chat(
65-
seed=42,
66-
messages=[
67-
{
68-
"role": "user",
69-
"content": [
70-
{"type": "image_url", "image_url": {"url": image_url}},
71-
{"type": "text", "text": "What is shown in this image?"},
72-
],
73-
},
74-
],
75-
max_tokens=100,
76-
)
77-
assert (
78-
response.choices[0].message.content
79-
== "The image shows a brown cow standing on the beach with a white face and black and white marking on its ears. The cow has a white patch around its nose and mouth. The ocean and blue sky are in the background."
80-
)
81-
assert response.usage["completion_tokens"] == 46
82-
assert response == response_snapshot
83-
84-
85-
# Helper function to convert a Pillow image to a base64 data URL
86-
def image_to_data_url(img: Image.Image, fmt: str) -> str:
87-
buffer = BytesIO()
88-
img.save(buffer, format=fmt)
89-
img_data = buffer.getvalue()
90-
b64_str = base64.b64encode(img_data).decode("utf-8")
91-
mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
92-
return f"data:{mime_type};base64,{b64_str}"
93-
94-
95-
async def test_flash_llama4_image_base64_rgba(flash_llama4, response_snapshot):
96-
# Create an empty 100x100 PNG image with alpha (transparent background)
97-
img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
98-
data_url = image_to_data_url(img, "PNG")
99-
response = await flash_llama4.chat(
100-
seed=42,
101-
messages=[
102-
{
103-
"role": "user",
104-
"content": [
105-
{"type": "image_url", "image_url": {"url": data_url}},
106-
{
107-
"type": "text",
108-
"text": "What do you see in this transparent image?",
109-
},
110-
],
111-
},
112-
],
113-
max_tokens=100,
114-
)
115-
assert response == response_snapshot
116-
117-
118-
async def test_flash_llama4_image_base64_rgb_png(flash_llama4, response_snapshot):
119-
# Create an empty 100x100 PNG image without alpha (white background)
120-
img = Image.new("RGB", (100, 100), (255, 255, 255))
121-
data_url = image_to_data_url(img, "PNG")
122-
response = await flash_llama4.chat(
123-
seed=42,
124-
messages=[
125-
{
126-
"role": "user",
127-
"content": [
128-
{"type": "image_url", "image_url": {"url": data_url}},
129-
{"type": "text", "text": "What do you see in this plain image?"},
130-
],
131-
},
132-
],
133-
max_tokens=100,
134-
)
135-
assert response == response_snapshot
136-
137-
138-
async def test_flash_llama4_image_base64_rgb_jpg(flash_llama4, response_snapshot):
139-
# Create an empty 100x100 JPEG image (white background)
140-
img = Image.new("RGB", (100, 100), (255, 255, 255))
141-
data_url = image_to_data_url(img, "JPEG")
142-
response = await flash_llama4.chat(
143-
seed=42,
144-
messages=[
145-
{
146-
"role": "user",
147-
"content": [
148-
{"type": "image_url", "image_url": {"url": data_url}},
149-
{"type": "text", "text": "What do you see in this JPEG image?"},
150-
],
151-
},
152-
],
153-
max_tokens=100,
154-
)
155-
assert response == response_snapshot
1+
# import base64
2+
# from io import BytesIO
3+
# from PIL import Image
4+
#
5+
# import pytest
6+
#
7+
#
8+
# @pytest.fixture(scope="module")
9+
# def flash_llama4_handle(launcher):
10+
# with launcher("ll-re/Llama-4-Scout-17B-16E-Instruct", num_shard=8) as handle:
11+
# yield handle
12+
#
13+
#
14+
# @pytest.fixture(scope="module")
15+
# async def flash_llama4(flash_llama4_handle):
16+
# await flash_llama4_handle.health(300)
17+
# return flash_llama4_handle.client
18+
#
19+
#
20+
# async def test_flash_llama4(flash_llama4, response_snapshot):
21+
# response = await flash_llama4.generate(
22+
# "Hello I am doing a project on the 1918 flu pandemic and I am trying to find out how many",
23+
# seed=42,
24+
# max_new_tokens=100,
25+
# )
26+
#
27+
# assert (
28+
# response.generated_text
29+
# == " people died in the 1918 flu pandemic. Estimating the death toll of the 1918 flu pandemic is difficult because of incomplete records and because of the fact that many of the extra deaths were not attributed to the flu. Many experts believe that the 1918 flu pandemic killed between 50 and 100 million people. Iassistant\n\nThe 1918 flu pandemic, also known as the Spanish flu, is indeed one of the most devastating public health crises in human history. Estimating the exact"
30+
# )
31+
# assert response.details.generated_tokens == 100
32+
# assert response == response_snapshot
33+
#
34+
#
35+
# async def test_flash_llama4_image_cow_dog(flash_llama4, response_snapshot):
36+
# image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
37+
# response = await flash_llama4.chat(
38+
# seed=42,
39+
# messages=[
40+
# {
41+
# "role": "user",
42+
# "content": [
43+
# {"type": "image_url", "image_url": {"url": image_url}},
44+
# {
45+
# "type": "text",
46+
# "text": "What is the breed of the dog in the image?",
47+
# },
48+
# ],
49+
# },
50+
# ],
51+
# max_tokens=100,
52+
# )
53+
#
54+
# assert (
55+
# response.choices[0].message.content
56+
# == "The image does not depict a dog; it shows a cow standing on a beach. Therefore, there is no breed of a dog to identify."
57+
# )
58+
# assert response.usage["completion_tokens"] == 30
59+
# assert response == response_snapshot
60+
#
61+
#
62+
# async def test_flash_llama4_image_cow(flash_llama4, response_snapshot):
63+
# image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
64+
# response = await flash_llama4.chat(
65+
# seed=42,
66+
# messages=[
67+
# {
68+
# "role": "user",
69+
# "content": [
70+
# {"type": "image_url", "image_url": {"url": image_url}},
71+
# {"type": "text", "text": "What is shown in this image?"},
72+
# ],
73+
# },
74+
# ],
75+
# max_tokens=100,
76+
# )
77+
# assert (
78+
# response.choices[0].message.content
79+
# == "The image shows a brown cow standing on the beach with a white face and black and white marking on its ears. The cow has a white patch around its nose and mouth. The ocean and blue sky are in the background."
80+
# )
81+
# assert response.usage["completion_tokens"] == 46
82+
# assert response == response_snapshot
83+
#
84+
#
85+
# # Helper function to convert a Pillow image to a base64 data URL
86+
# def image_to_data_url(img: Image.Image, fmt: str) -> str:
87+
# buffer = BytesIO()
88+
# img.save(buffer, format=fmt)
89+
# img_data = buffer.getvalue()
90+
# b64_str = base64.b64encode(img_data).decode("utf-8")
91+
# mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
92+
# return f"data:{mime_type};base64,{b64_str}"
93+
#
94+
#
95+
# async def test_flash_llama4_image_base64_rgba(flash_llama4, response_snapshot):
96+
# # Create an empty 100x100 PNG image with alpha (transparent background)
97+
# img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
98+
# data_url = image_to_data_url(img, "PNG")
99+
# response = await flash_llama4.chat(
100+
# seed=42,
101+
# messages=[
102+
# {
103+
# "role": "user",
104+
# "content": [
105+
# {"type": "image_url", "image_url": {"url": data_url}},
106+
# {
107+
# "type": "text",
108+
# "text": "What do you see in this transparent image?",
109+
# },
110+
# ],
111+
# },
112+
# ],
113+
# max_tokens=100,
114+
# )
115+
# assert response == response_snapshot
116+
#
117+
#
118+
# async def test_flash_llama4_image_base64_rgb_png(flash_llama4, response_snapshot):
119+
# # Create an empty 100x100 PNG image without alpha (white background)
120+
# img = Image.new("RGB", (100, 100), (255, 255, 255))
121+
# data_url = image_to_data_url(img, "PNG")
122+
# response = await flash_llama4.chat(
123+
# seed=42,
124+
# messages=[
125+
# {
126+
# "role": "user",
127+
# "content": [
128+
# {"type": "image_url", "image_url": {"url": data_url}},
129+
# {"type": "text", "text": "What do you see in this plain image?"},
130+
# ],
131+
# },
132+
# ],
133+
# max_tokens=100,
134+
# )
135+
# assert response == response_snapshot
136+
#
137+
#
138+
# async def test_flash_llama4_image_base64_rgb_jpg(flash_llama4, response_snapshot):
139+
# # Create an empty 100x100 JPEG image (white background)
140+
# img = Image.new("RGB", (100, 100), (255, 255, 255))
141+
# data_url = image_to_data_url(img, "JPEG")
142+
# response = await flash_llama4.chat(
143+
# seed=42,
144+
# messages=[
145+
# {
146+
# "role": "user",
147+
# "content": [
148+
# {"type": "image_url", "image_url": {"url": data_url}},
149+
# {"type": "text", "text": "What do you see in this JPEG image?"},
150+
# ],
151+
# },
152+
# ],
153+
# max_tokens=100,
154+
# )
155+
# assert response == response_snapshot

nix/overlay.nix

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,18 @@ final: prev: {
1818
src = final.fetchFromGitHub {
1919
owner = "huggingface";
2020
repo = "transformers";
21-
rev = "v4.50.0";
22-
hash = "sha256-/scrMPUY43n+XAMbwWCtmiJKXscXGLrklyDg9XZTaqw=";
21+
rev = "v4.51.0";
22+
hash = "sha256-dnVpc6fm1SYGcx7FegpwVVxUY6XRlsxLs5WOxYv11y8=";
23+
};
24+
}
25+
);
26+
huggingface-hub = python-super.huggingface-hub.overrideAttrs (
27+
_: _: {
28+
src = final.fetchFromGitHub {
29+
owner = "huggingface";
30+
repo = "huggingface_hub";
31+
rev = "v0.30.0";
32+
hash = "sha256-sz+n1uoWrSQPqJFiG/qCT6b4r08kD9MsoPZXbfWNB2o=";
2333
};
2434
}
2535
);

router/src/validation.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ fn fetch_image(input: &str) -> Result<(Vec<u8>, String, usize, usize), Validatio
566566
return Err(ValidationError::InvalidImageContent(content.to_string()));
567567
}
568568

569-
let data = STANDARD.decode(content["base64,".len()..].as_bytes())?;
569+
let data = STANDARD.decode(&content["base64,".len()..])?;
570570
let img = if let Some(format) = format_from_mimetype(mimetype) {
571571
ImageReader::with_format(Cursor::new(&data), format).decode()?
572572
} else {
@@ -603,7 +603,7 @@ fn image_tokens(
603603

604604
let mut image_string = String::with_capacity(2 * FAKE.len() + slots * IMAGE.len());
605605
image_string.push_str(FAKE);
606-
image_string.extend(iter::repeat(IMAGE).take(slots));
606+
image_string.extend(iter::repeat_n(IMAGE, slots));
607607
image_string.push_str(FAKE);
608608

609609
if matches!(

0 commit comments

Comments
 (0)