Skip to content

Commit 587e92c

Browse files
authored
Add FP32 and INT4 test in Llama2 (microsoft#21187)
### Description <!-- Describe your changes. --> ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent d1ab94c commit 587e92c

File tree

2 files changed

+108
-16
lines changed

2 files changed

+108
-16
lines changed

tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,6 @@ parameters:
3838
type: number
3939
default: 0
4040

41-
resources:
42-
repositories:
43-
- repository: LLaMa2Onnx
44-
type: Github
45-
endpoint: Microsoft
46-
name: Microsoft/Llama-2-Onnx
47-
ref: main
48-
4941
variables:
5042
- template: templates/common-variables.yml
5143
- name: docker_base_image
@@ -287,11 +279,12 @@ stages:
287279
workingDirectory: $(Build.SourcesDirectory)
288280
condition: ne(variables.hitAnother, 'True')
289281
290-
- stage: Llama2_ONNX_FP16
282+
- stage: Llama2_7B_ONNX
291283
dependsOn:
292284
- Build_Onnxruntime_Cuda
293285
jobs:
294-
- job: Llama2_ONNX_FP16
286+
- job: Llama2_7B_ONNX
287+
timeoutInMinutes: 120
295288
variables:
296289
skipComponentGovernanceDetection: true
297290
workspace:
@@ -319,15 +312,15 @@ stages:
319312

320313
- template: templates/get-docker-image-steps.yml
321314
parameters:
322-
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0
315+
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch
323316
Context: tools/ci_build/github/linux/docker/
324317
ScriptName: tools/ci_build/get_docker_image.py
325318
DockerBuildArgs: "
326319
--build-arg BUILD_UID=$( id -u )
327320
--build-arg BASEIMAGE=${{ variables.docker_base_image }}
328321
--build-arg TRT_VERSION=${{ variables.linux_trt_version }}
329322
"
330-
Repository: onnxruntimeubi8packagestest
323+
Repository: onnxruntimeubi8packagestest_torch
331324
UpdateDepsTxt: false
332325

333326
- task: DownloadPackage@1
@@ -343,7 +336,7 @@ stages:
343336
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
344337
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
345338
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
346-
onnxruntimeubi8packagestest \
339+
onnxruntimeubi8packagestest_torch \
347340
bash -c "
348341
set -ex; \
349342
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
@@ -352,14 +345,56 @@ stages:
352345
python3 -m pip install -r requirements.txt ; \
353346
popd ; \
354347
python3 -m pip install /ort-artifact/*.whl ; \
355-
python3 -m pip uninstall -y torch ; \
356-
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
357-
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --input /meta-llama2 --small_gpu ;\
348+
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --small_gp;\
349+
ls -l llama2-7b-fp16; \
350+
du -sh llama2-7b-fp16; \
358351
popd ; \
359352
"
360353
displayName: 'Run Llama2 to Onnx F16 and parity Test'
361354
workingDirectory: $(Build.SourcesDirectory)
362355
356+
- script: |
357+
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
358+
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
359+
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
360+
onnxruntimeubi8packagestest_torch \
361+
bash -c "
362+
set -ex; \
363+
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
364+
python3 -m pip install --upgrade pip ; \
365+
pushd models/llama ; \
366+
python3 -m pip install -r requirements.txt ; \
367+
popd ; \
368+
python3 -m pip install /ort-artifact/*.whl ; \
369+
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-fp32-gpu --precision fp32 --execution_provider cuda;\
370+
ls -l llama2-7b-fp32-gpu; \
371+
du -sh llama2-7b-fp32-gpu; \
372+
popd ; \
373+
"
374+
displayName: 'Run Llama2 to Onnx fp32 and parity Test'
375+
workingDirectory: $(Build.SourcesDirectory)
376+
377+
- script: |
378+
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
379+
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
380+
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
381+
onnxruntimeubi8packagestest_torch \
382+
bash -c "
383+
set -ex; \
384+
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
385+
python3 -m pip install --upgrade pip ; \
386+
pushd models/llama ; \
387+
python3 -m pip install -r requirements.txt ; \
388+
popd ; \
389+
python3 -m pip install /ort-artifact/*.whl ; \
390+
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-int4-gpu --precision int4 --execution_provider cuda --use_gqa;\
391+
ls -l llama2-7b-int4-gpu; \
392+
du -sh llama2-7b-int4-gpu; \
393+
popd ; \
394+
"
395+
displayName: 'Run Llama2 to Onnx INT4 and parity Test'
396+
workingDirectory: $(Build.SourcesDirectory)
397+
363398
- stage: Whisper_ONNX
364399
dependsOn:
365400
- Build_Onnxruntime_Cuda
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# --------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License.
4+
# --------------------------------------------------------------
5+
# Dockerfile to Test ONNX Runtime on UBI8 with TensorRT 10.0 and CUDA 11.8 by default
6+
7+
# Build base image with required system packages
8+
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
9+
ARG TRT_VERSION=10.0.1.6-1.cuda11.8
10+
FROM $BASEIMAGE AS base
11+
ARG TRT_VERSION
12+
ENV PATH /opt/python/cp38-cp38/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
13+
14+
RUN dnf install -y bash wget &&\
15+
dnf clean dbcache
16+
17+
RUN pip3 install --upgrade pip
18+
RUN pip3 install setuptools>=68.2.2
19+
20+
#Install TensorRT only if TRT_VERSION is not empty
21+
RUN if [ -n "$TRT_VERSION" ]; then \
22+
echo "TRT_VERSION is $TRT_VERSION" && \
23+
dnf -y install \
24+
libnvinfer10-${TRT_VERSION} \
25+
libnvinfer-headers-devel-${TRT_VERSION} \
26+
libnvinfer-devel-${TRT_VERSION} \
27+
libnvinfer-lean10-${TRT_VERSION} \
28+
libnvonnxparsers10-${TRT_VERSION} \
29+
libnvonnxparsers-devel-${TRT_VERSION} \
30+
libnvinfer-dispatch10-${TRT_VERSION} \
31+
libnvinfer-plugin10-${TRT_VERSION} \
32+
libnvinfer-vc-plugin10-${TRT_VERSION} \
33+
libnvinfer-bin-${TRT_VERSION} \
34+
libnvinfer-plugin10-${TRT_VERSION} \
35+
libnvinfer-plugin-devel-${TRT_VERSION} \
36+
libnvinfer-vc-plugin-devel-${TRT_VERSION} \
37+
libnvinfer-lean-devel-${TRT_VERSION} \
38+
libnvinfer-dispatch-devel-${TRT_VERSION} \
39+
libnvinfer-headers-plugin-devel-${TRT_VERSION} && \
40+
dnf clean dbcache ; \
41+
else \
42+
echo "TRT_VERSION is none skipping Tensor RT Installation" ; \
43+
fi
44+
45+
ADD scripts /tmp/scripts
46+
RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && /tmp/scripts/install_java.sh && rm -rf /tmp/scripts
47+
48+
RUN python3 -m pip uninstall -y torch
49+
RUN python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118
50+
51+
# Build final image from base.
52+
FROM base as final
53+
ARG BUILD_USER=onnxruntimedev
54+
ARG BUILD_UID=1000
55+
RUN adduser --uid $BUILD_UID $BUILD_USER
56+
WORKDIR /home/$BUILD_USER
57+
USER $BUILD_USER

0 commit comments

Comments
 (0)