huggingface
diff --git a/‎Dockerfile‎
Lines changed: 5 additions & 7 deletions b/‎Dockerfile‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎flake.lock‎
Lines changed: 3 additions & 3 deletions b/‎flake.lock‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎integration-tests/models/test_transformers_llama4.py‎
Lines changed: 155 additions & 155 deletions b/‎integration-tests/models/test_transformers_llama4.py‎
Lines changed: 155 additions & 155 deletions
diff --git a/‎nix/overlay.nix‎
Lines changed: 12 additions & 2 deletions b/‎nix/overlay.nix‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎router/src/validation.rs‎
Lines changed: 2 additions & 2 deletions b/‎router/src/validation.rs‎
Lines changed: 2 additions & 2 deletions
@@ -165,8 +165,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
         git \
         && rm -rf /var/lib/apt/lists/*
 
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh
-ENV PATH="$PATH:/root/.local/bin"
+# RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+# ENV PATH="$PATH:/root/.local/bin"
+COPY --from=ghcr.io/astral-sh/uv:0.5.31 /uv /uvx /bin/
 # Install flash-attention dependencies
 # RUN pip install einops --no-cache-dir
 
@@ -183,19 +184,16 @@ COPY server server
 COPY server/Makefile server/Makefile
 ENV HF_KERNELS_CACHE=/kernels
 RUN cd server && \
-	uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project --active && \
+	uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra torch --no-install-project --active && \
     make gen-server-raw && \
     kernels download .
 
 RUN cd server && \
-    uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --active --python=${PYTHON_VERSION} && \
+    uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra torch --active --python=${PYTHON_VERSION} && \
     uv pip install nvidia-nccl-cu12==2.25.1 && \
     pwd && \
     text-generation-server --help
 
-# This shouldn't be necessary.
-# RUN uv pip install torchvision --no-deps
-
 # Copy build artifacts from flash attention builder
 COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
 COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
 
@@ -1,155 +1,155 @@
-import base64
-from io import BytesIO
-from PIL import Image
-
-import pytest
-
-
-@pytest.fixture(scope="module")
-def flash_llama4_handle(launcher):
-    with launcher("ll-re/Llama-4-Scout-17B-16E-Instruct", num_shard=8) as handle:
-        yield handle
-
-
-@pytest.fixture(scope="module")
-async def flash_llama4(flash_llama4_handle):
-    await flash_llama4_handle.health(300)
-    return flash_llama4_handle.client
-
-
-async def test_flash_llama4(flash_llama4, response_snapshot):
-    response = await flash_llama4.generate(
-        "Hello I am doing a project on the 1918 flu pandemic and I am trying to find out how many",
-        seed=42,
-        max_new_tokens=100,
-    )
-
-    assert (
-        response.generated_text
-        == " people died in the 1918 flu pandemic. Estimating the death toll of the 1918 flu pandemic is difficult because of incomplete records and because of the fact that many of the extra deaths were not attributed to the flu. Many experts believe that the 1918 flu pandemic killed between 50 and 100 million people. Iassistant\n\nThe 1918 flu pandemic, also known as the Spanish flu, is indeed one of the most devastating public health crises in human history. Estimating the exact"
-    )
-    assert response.details.generated_tokens == 100
-    assert response == response_snapshot
-
-
-async def test_flash_llama4_image_cow_dog(flash_llama4, response_snapshot):
-    image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
-    response = await flash_llama4.chat(
-        seed=42,
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image_url", "image_url": {"url": image_url}},
-                    {
-                        "type": "text",
-                        "text": "What is the breed of the dog in the image?",
-                    },
-                ],
-            },
-        ],
-        max_tokens=100,
-    )
-
-    assert (
-        response.choices[0].message.content
-        == "The image does not depict a dog; it shows a cow standing on a beach. Therefore, there is no breed of a dog to identify."
-    )
-    assert response.usage["completion_tokens"] == 30
-    assert response == response_snapshot
-
-
-async def test_flash_llama4_image_cow(flash_llama4, response_snapshot):
-    image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
-    response = await flash_llama4.chat(
-        seed=42,
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image_url", "image_url": {"url": image_url}},
-                    {"type": "text", "text": "What is shown in this image?"},
-                ],
-            },
-        ],
-        max_tokens=100,
-    )
-    assert (
-        response.choices[0].message.content
-        == "The image shows a brown cow standing on the beach with a white face and black and white marking on its ears. The cow has a white patch around its nose and mouth. The ocean and blue sky are in the background."
-    )
-    assert response.usage["completion_tokens"] == 46
-    assert response == response_snapshot
-
-
-# Helper function to convert a Pillow image to a base64 data URL
-def image_to_data_url(img: Image.Image, fmt: str) -> str:
-    buffer = BytesIO()
-    img.save(buffer, format=fmt)
-    img_data = buffer.getvalue()
-    b64_str = base64.b64encode(img_data).decode("utf-8")
-    mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
-    return f"data:{mime_type};base64,{b64_str}"
-
-
-async def test_flash_llama4_image_base64_rgba(flash_llama4, response_snapshot):
-    # Create an empty 100x100 PNG image with alpha (transparent background)
-    img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
-    data_url = image_to_data_url(img, "PNG")
-    response = await flash_llama4.chat(
-        seed=42,
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image_url", "image_url": {"url": data_url}},
-                    {
-                        "type": "text",
-                        "text": "What do you see in this transparent image?",
-                    },
-                ],
-            },
-        ],
-        max_tokens=100,
-    )
-    assert response == response_snapshot
-
-
-async def test_flash_llama4_image_base64_rgb_png(flash_llama4, response_snapshot):
-    # Create an empty 100x100 PNG image without alpha (white background)
-    img = Image.new("RGB", (100, 100), (255, 255, 255))
-    data_url = image_to_data_url(img, "PNG")
-    response = await flash_llama4.chat(
-        seed=42,
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image_url", "image_url": {"url": data_url}},
-                    {"type": "text", "text": "What do you see in this plain image?"},
-                ],
-            },
-        ],
-        max_tokens=100,
-    )
-    assert response == response_snapshot
-
-
-async def test_flash_llama4_image_base64_rgb_jpg(flash_llama4, response_snapshot):
-    # Create an empty 100x100 JPEG image (white background)
-    img = Image.new("RGB", (100, 100), (255, 255, 255))
-    data_url = image_to_data_url(img, "JPEG")
-    response = await flash_llama4.chat(
-        seed=42,
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image_url", "image_url": {"url": data_url}},
-                    {"type": "text", "text": "What do you see in this JPEG image?"},
-                ],
-            },
-        ],
-        max_tokens=100,
-    )
-    assert response == response_snapshot
+# import base64
+# from io import BytesIO
+# from PIL import Image
+#
+# import pytest
+#
+#
+# @pytest.fixture(scope="module")
+# def flash_llama4_handle(launcher):
+#     with launcher("ll-re/Llama-4-Scout-17B-16E-Instruct", num_shard=8) as handle:
+#         yield handle
+#
+#
+# @pytest.fixture(scope="module")
+# async def flash_llama4(flash_llama4_handle):
+#     await flash_llama4_handle.health(300)
+#     return flash_llama4_handle.client
+#
+#
+# async def test_flash_llama4(flash_llama4, response_snapshot):
+#     response = await flash_llama4.generate(
+#         "Hello I am doing a project on the 1918 flu pandemic and I am trying to find out how many",
+#         seed=42,
+#         max_new_tokens=100,
+#     )
+#
+#     assert (
+#         response.generated_text
+#         == " people died in the 1918 flu pandemic. Estimating the death toll of the 1918 flu pandemic is difficult because of incomplete records and because of the fact that many of the extra deaths were not attributed to the flu. Many experts believe that the 1918 flu pandemic killed between 50 and 100 million people. Iassistant\n\nThe 1918 flu pandemic, also known as the Spanish flu, is indeed one of the most devastating public health crises in human history. Estimating the exact"
+#     )
+#     assert response.details.generated_tokens == 100
+#     assert response == response_snapshot
+#
+#
+# async def test_flash_llama4_image_cow_dog(flash_llama4, response_snapshot):
+#     image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
+#     response = await flash_llama4.chat(
+#         seed=42,
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": [
+#                     {"type": "image_url", "image_url": {"url": image_url}},
+#                     {
+#                         "type": "text",
+#                         "text": "What is the breed of the dog in the image?",
+#                     },
+#                 ],
+#             },
+#         ],
+#         max_tokens=100,
+#     )
+#
+#     assert (
+#         response.choices[0].message.content
+#         == "The image does not depict a dog; it shows a cow standing on a beach. Therefore, there is no breed of a dog to identify."
+#     )
+#     assert response.usage["completion_tokens"] == 30
+#     assert response == response_snapshot
+#
+#
+# async def test_flash_llama4_image_cow(flash_llama4, response_snapshot):
+#     image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
+#     response = await flash_llama4.chat(
+#         seed=42,
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": [
+#                     {"type": "image_url", "image_url": {"url": image_url}},
+#                     {"type": "text", "text": "What is shown in this image?"},
+#                 ],
+#             },
+#         ],
+#         max_tokens=100,
+#     )
+#     assert (
+#         response.choices[0].message.content
+#         == "The image shows a brown cow standing on the beach with a white face and black and white marking on its ears. The cow has a white patch around its nose and mouth. The ocean and blue sky are in the background."
+#     )
+#     assert response.usage["completion_tokens"] == 46
+#     assert response == response_snapshot
+#
+#
+# # Helper function to convert a Pillow image to a base64 data URL
+# def image_to_data_url(img: Image.Image, fmt: str) -> str:
+#     buffer = BytesIO()
+#     img.save(buffer, format=fmt)
+#     img_data = buffer.getvalue()
+#     b64_str = base64.b64encode(img_data).decode("utf-8")
+#     mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
+#     return f"data:{mime_type};base64,{b64_str}"
+#
+#
+# async def test_flash_llama4_image_base64_rgba(flash_llama4, response_snapshot):
+#     # Create an empty 100x100 PNG image with alpha (transparent background)
+#     img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
+#     data_url = image_to_data_url(img, "PNG")
+#     response = await flash_llama4.chat(
+#         seed=42,
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": [
+#                     {"type": "image_url", "image_url": {"url": data_url}},
+#                     {
+#                         "type": "text",
+#                         "text": "What do you see in this transparent image?",
+#                     },
+#                 ],
+#             },
+#         ],
+#         max_tokens=100,
+#     )
+#     assert response == response_snapshot
+#
+#
+# async def test_flash_llama4_image_base64_rgb_png(flash_llama4, response_snapshot):
+#     # Create an empty 100x100 PNG image without alpha (white background)
+#     img = Image.new("RGB", (100, 100), (255, 255, 255))
+#     data_url = image_to_data_url(img, "PNG")
+#     response = await flash_llama4.chat(
+#         seed=42,
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": [
+#                     {"type": "image_url", "image_url": {"url": data_url}},
+#                     {"type": "text", "text": "What do you see in this plain image?"},
+#                 ],
+#             },
+#         ],
+#         max_tokens=100,
+#     )
+#     assert response == response_snapshot
+#
+#
+# async def test_flash_llama4_image_base64_rgb_jpg(flash_llama4, response_snapshot):
+#     # Create an empty 100x100 JPEG image (white background)
+#     img = Image.new("RGB", (100, 100), (255, 255, 255))
+#     data_url = image_to_data_url(img, "JPEG")
+#     response = await flash_llama4.chat(
+#         seed=42,
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": [
+#                     {"type": "image_url", "image_url": {"url": data_url}},
+#                     {"type": "text", "text": "What do you see in this JPEG image?"},
+#                 ],
+#             },
+#         ],
+#         max_tokens=100,
+#     )
+#     assert response == response_snapshot
@@ -18,8 +18,18 @@ final: prev: {
             src = final.fetchFromGitHub {
               owner = "huggingface";
               repo = "transformers";
-              rev = "v4.50.0";
-              hash = "sha256-/scrMPUY43n+XAMbwWCtmiJKXscXGLrklyDg9XZTaqw=";
+              rev = "v4.51.0";
+              hash = "sha256-dnVpc6fm1SYGcx7FegpwVVxUY6XRlsxLs5WOxYv11y8=";
+            };
+          }
+        );
+        huggingface-hub = python-super.huggingface-hub.overrideAttrs (
+          _: _: {
+            src = final.fetchFromGitHub {
+              owner = "huggingface";
+              repo = "huggingface_hub";
+              rev = "v0.30.0";
+              hash = "sha256-sz+n1uoWrSQPqJFiG/qCT6b4r08kD9MsoPZXbfWNB2o=";
             };
           }
         );
 
@@ -566,7 +566,7 @@ fn fetch_image(input: &str) -> Result<(Vec<u8>, String, usize, usize), Validatio
             return Err(ValidationError::InvalidImageContent(content.to_string()));
         }
 
-        let data = STANDARD.decode(content["base64,".len()..].as_bytes())?;
+        let data = STANDARD.decode(&content["base64,".len()..])?;
         let img = if let Some(format) = format_from_mimetype(mimetype) {
             ImageReader::with_format(Cursor::new(&data), format).decode()?
         } else {
@@ -603,7 +603,7 @@ fn image_tokens(
 
             let mut image_string = String::with_capacity(2 * FAKE.len() + slots * IMAGE.len());
             image_string.push_str(FAKE);
-            image_string.extend(iter::repeat(IMAGE).take(slots));
+            image_string.extend(iter::repeat_n(IMAGE, slots));
             image_string.push_str(FAKE);
 
             if matches!(