Skip to content

Commit e2f56c3

Browse files
authored
[CPU] Update torch 2.9.1 for CPU backend (#29664)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
1 parent f8151b6 commit e2f56c3

File tree

6 files changed

+16
-24
lines changed

6 files changed

+16
-24
lines changed

.buildkite/scripts/hardware_ci/run-cpu-test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ trap remove_docker_container EXIT
2121
remove_docker_container
2222

2323
# Try building the docker image
24-
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$NUMA_NODE" --target vllm-test -f docker/Dockerfile.cpu .
25-
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$NUMA_NODE"-avx2 --target vllm-test -f docker/Dockerfile.cpu .
24+
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --progress plain --tag cpu-test-"$NUMA_NODE" --target vllm-test -f docker/Dockerfile.cpu .
25+
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --progress plain --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$NUMA_NODE"-avx2 --target vllm-test -f docker/Dockerfile.cpu .
2626

2727
# Run the image, setting --shm-size=4g for tensor parallel.
2828
docker run -itd --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=16 --env VLLM_CPU_CI_ENV=1 -e E2E_OMP_THREADS="$OMP_CORE_RANGE" --shm-size=4g --name cpu-test-"$NUMA_NODE" cpu-test-"$NUMA_NODE"

csrc/cpu/utils.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,13 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
5151
if (node_id != -1) {
5252
node_ids.insert(node_id);
5353
}
54-
TORCH_WARN(node_id == mem_node_id, "CPU ", cpu_id, " is on NUMA node ",
55-
node_id, ", but CPU ", omp_cpu_ids.front(),
56-
" is on NUMA node ", mem_node_id,
57-
". All CPUs should be on the same NUMA node for optimal "
58-
"performance. Memory will be bound to NUMA node ",
59-
mem_node_id, ".");
54+
if (node_id != mem_node_id) {
55+
TORCH_WARN("CPU ", cpu_id, " is on NUMA node ", node_id, ", but CPU ",
56+
omp_cpu_ids.front(), " is on NUMA node ", mem_node_id,
57+
". All CPUs should be on the same NUMA node for optimal "
58+
"performance. Memory will be bound to NUMA node ",
59+
mem_node_id, ".");
60+
}
6061
}
6162
// Concatenate all node_ids into a single comma-separated string
6263
if (!node_ids.empty()) {

docker/Dockerfile.cpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
132132
esac; \
133133
}; \
134134
remove_packages_not_supported_on_aarch64 && \
135-
sed -i 's/^torch==.*/torch==2.8.0/g' requirements/cpu-test.in && \
135+
sed -i 's/^torch==.*/torch==2.9.1/g' requirements/cpu-test.in && \
136136
sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
137137
sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
138138
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

requirements/cpu-build.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@ packaging>=24.2
44
setuptools>=77.0.3,<81.0.0
55
setuptools-scm>=8
66
--extra-index-url https://download.pytorch.org/whl/cpu
7-
torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
8-
torch==2.9.0; platform_system == "Darwin"
9-
torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
7+
torch==2.9.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
8+
torch==2.9.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "aarch64"
109
scons; platform_machine == "aarch64" # needed to build Arm Compute Library (ACL)
1110
wheel
1211
jinja2>=3.1.6

requirements/cpu.txt

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,18 @@
44
numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding
55

66
# Dependencies for CPUs
7-
packaging>=24.2
8-
setuptools>=77.0.3,<81.0.0
97
--extra-index-url https://download.pytorch.org/whl/cpu
10-
torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
11-
torch==2.9.0; platform_system == "Darwin"
12-
torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
8+
torch==2.9.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
9+
torch==2.9.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "aarch64"
1310

1411
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
15-
torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
16-
torchaudio==2.8.0; platform_machine == "ppc64le"
12+
torchaudio; platform_machine != "s390x"
1713

1814
# required for the image processor of phi3v, this must be updated alongside torch
19-
torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
20-
torchvision==0.23.0; platform_machine == "ppc64le"
21-
datasets # for benchmark scripts
15+
torchvision; platform_machine != "s390x"
2216

2317
# Intel Extension for PyTorch, only for x86_64 CPUs
2418
intel-openmp==2024.2.1; platform_machine == "x86_64"
25-
triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
2619

2720
# Use this to gather CPU info and optimize based on ARM Neoverse cores
2821
py-cpuinfo; platform_machine == "aarch64"

vllm/platforms/cpu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,6 @@ def get_allowed_cpu_core_node_list(cls) -> tuple[list[int], list[LogicalCPUInfo]
384384

385385
@classmethod
386386
def is_pin_memory_available(cls) -> bool:
387-
logger.warning("Pin memory is not supported on CPU.")
388387
return False
389388

390389
@classmethod

0 commit comments

Comments
 (0)