Skip to content

Commit ddc64b9

Browse files
committed
Merge branch 'master' into sync_msft_05082025
2 parents c533007 + 2ba3682 commit ddc64b9

File tree

124 files changed

+6542
-1294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+6542
-1294
lines changed

.github/actions/macos-ci-setup/action.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@ description: "Common setup steps for macOS CI pipelines"
33

44
inputs:
55
platform_machine:
6-
required: false
6+
required: true
77
type: string
8-
default: "arm64"
98
python_version:
109
required: false
1110
type: string

.github/workflows/mac.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,42 @@ jobs:
2323
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
2424
with:
2525
# Only build arm64 for CPU
26-
matrix_exclude: >-
26+
matrix_include: >-
2727
[
28-
{"platform_machine": "x86_64"}
28+
{"machine": "arm64", "target": "arm64", "build_config": "Debug"},
29+
{"machine": "arm64", "target": "arm64", "build_config": "Release"}
2930
]
3031
3132
coreml:
3233
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
3334
with:
3435
use_coreml: true
35-
36+
matrix_include: >-
37+
[
38+
{"machine": "x86_64", "target": "x86_64", "build_config": "Release"},
39+
{"machine": "arm64", "target": "arm64", "build_config": "Debug"},
40+
{"machine": "arm64", "target": "arm64", "build_config": "Release"}
41+
]
3642
xnnpack:
3743
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
3844
with:
3945
use_xnnpack: true
4046
# only build arm64/Debug for XNNPack
41-
matrix_exclude: >-
47+
matrix_include: >-
4248
[
43-
{"platform_machine": "x86_64"},
44-
{"platform_machine": "arm64", "build_config": "Release"}
49+
{"machine": "arm64", "target": "arm64", "build_config": "Debug"}
4550
]
4651
4752
webgpu:
4853
uses: ./.github/workflows/macos-ci-build-and-test-workflow.yml
4954
with:
5055
use_webgpu: true
51-
56+
matrix_include: >-
57+
[
58+
{"machine": "arm64", "target": "x86_64", "build_config": "Release"},
59+
{"machine": "arm64", "target": "arm64", "build_config": "Debug"},
60+
{"machine": "arm64", "target": "arm64", "build_config": "Release"}
61+
]
5262
iphone_simulator:
5363
runs-on: macos-15
5464

.github/workflows/macos-ci-build-and-test-workflow.yml

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,37 @@ on:
2020
required: false
2121
type: string
2222
default: "3.11"
23-
matrix_exclude:
23+
matrix_include:
2424
required: false
2525
type: string
26-
description: "JSON string specifying combinations to exclude from the matrix"
26+
description: "JSON string specifying combinations to include in the matrix"
2727
# we do not have enough resources to run all combinations
28-
# Exclude x86_64 + Debug combination by default
28+
# Include arm64 + Debug combination by default
2929
default: >-
3030
[
31-
{"platform_machine": "x86_64", "build_config": "Debug"}
31+
{"machine": "arm64", "target": "arm64", "build_config": "Debug"}
3232
]
3333
3434
jobs:
3535
build-and-test:
3636
strategy:
3737
matrix:
38-
platform_machine: ["x86_64", "arm64"]
39-
build_config: ["Debug", "Release"]
40-
exclude: ${{ fromJSON(inputs.matrix_exclude) }}
38+
# To avoid creating too many jobs, we explicitly specify the combinations to run.
39+
#
40+
# # Specify the machine architecture to run the build on.
41+
# machine: ["x86_64", "arm64"]
42+
#
43+
# # Specify the target architecture for the build.
44+
# target: ["x86_64", "arm64"]
45+
#
46+
# # Specify the build configuration for the build.
47+
# build_config: ["Debug", "Release"]
48+
#
49+
include: ${{ fromJSON(inputs.matrix_include) }}
4150

42-
runs-on: ${{ matrix.platform_machine == 'x86_64' && 'macos-13' || 'macos-15' }}
51+
# "macos-13" is a x86_64 image, and "macos-15" is an arm64 image.
52+
# see also: https://github.com/actions/runner-images/blob/main/README.md
53+
runs-on: ${{ matrix.machine == 'x86_64' && 'macos-13' || 'macos-15' }}
4354
env:
4455
build_flags: >
4556
--build_dir ./build
@@ -53,11 +64,13 @@ jobs:
5364
--build_wheel
5465
${{ inputs.use_webgpu && '--use_webgpu' || '' }}
5566
${{ inputs.use_xnnpack && '--use_xnnpack' || '' }}
56-
${{ inputs.use_coreml && '--use_coreml' || '' }}
5767
${{ inputs.use_coreml && '--use_coreml --skip_onnx_tests' || '' }}
5868
--use_vcpkg --use_vcpkg_ms_internal_asset_cache
5969
--config ${{ matrix.build_config }}
60-
xcode_version: ${{ matrix.platform_machine == 'x86_64' && '14.3.1' || '16' }}
70+
--osx_arch ${{ matrix.target }}
71+
72+
# xCode version needs to match the "runs-on" configuration.
73+
xcode_version: ${{ matrix.machine == 'x86_64' && '14.3.1' || '16' }}
6174

6275
steps:
6376
- name: Checkout code
@@ -66,7 +79,7 @@ jobs:
6679
- name: macOS CI pipeline prepare steps
6780
uses: ./.github/actions/macos-ci-setup
6881
with:
69-
platform_machine: ${{ matrix.platform_machine }}
82+
platform_machine: ${{ matrix.machine }}
7083
python_version: ${{ inputs.python_version }}
7184
xcode_version: ${{ env.xcode_version }}
7285
use_cache: true
@@ -108,6 +121,8 @@ jobs:
108121
make install DESTDIR=${{ github.workspace }}/build/installed
109122
110123
- name: Running Tests (build.py --test)
124+
# Skip tests when cross-compiling
125+
if: ${{ matrix.machine == matrix.target }}
111126
shell: bash
112127
working-directory: ${{ github.workspace }}
113128
run: |

cmake/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,7 @@ else()
558558
check_cxx_compiler_flag(-Wambiguous-reversed-operator HAS_AMBIGUOUS_REVERSED_OPERATOR)
559559
# -Winterference-size was added in GCC 13
560560
check_cxx_compiler_flag(-Winterference-size HAS_INTERFERENCE_SIZE)
561+
check_cxx_compiler_flag(-Warray-bounds HAS_ARRAY_BOUNDS)
561562
check_cxx_compiler_flag(-Wbitwise-instead-of-logical HAS_BITWISE_INSTEAD_OF_LOGICAL)
562563
check_cxx_compiler_flag(-Wcast-function-type HAS_CAST_FUNCTION_TYPE)
563564
check_cxx_compiler_flag(-Wcatch-value HAS_CATCH_VALUE)

cmake/external/onnxruntime_external_deps.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,11 @@ if (onnxruntime_USE_WEBGPU)
747747
#
748748
# - (private) Fulfill the BinSkim requirements
749749
# Some build warnings are not allowed to be disabled in project level.
750-
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_binskim.patch)
750+
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_binskim.patch &&
751+
752+
# Android devices doesn't seem to allow fp16 in uniforms so the WebGPU EP has to manually handle passing an fp32
753+
# in the uniform and converting to fp16 before using.
754+
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/uniform_and_storage_buffer_16_bit_access.patch)
751755

752756
onnxruntime_fetchcontent_declare(
753757
dawn

cmake/onnxruntime_config.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#pragma once
55

6+
#cmakedefine HAS_ARRAY_BOUNDS
67
#cmakedefine HAS_BITWISE_INSTEAD_OF_LOGICAL
78
#cmakedefine HAS_CAST_FUNCTION_TYPE
89
#cmakedefine HAS_CATCH_VALUE

cmake/onnxruntime_webassembly.cmake

Lines changed: 16 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ if (NOT onnxruntime_USE_VCPKG)
101101
target_compile_options(onnx PRIVATE -Wno-unused-parameter -Wno-unused-variable)
102102
endif()
103103

104+
# Include the Node.js helper for finding and validating Node.js and NPM
105+
include(node_helper.cmake)
106+
104107
if (onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB)
105108
bundle_static_library(onnxruntime_webassembly
106109
${PROTOBUF_LIB}
@@ -148,11 +151,6 @@ if (onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB)
148151
GTest::gtest
149152
)
150153

151-
find_program(NODE_EXECUTABLE node required)
152-
if (NOT NODE_EXECUTABLE)
153-
message(FATAL_ERROR "Node is required for a test")
154-
endif()
155-
156154
add_test(NAME onnxruntime_webassembly_test
157155
COMMAND ${NODE_EXECUTABLE} onnxruntime_webassembly_test.js
158156
WORKING_DIRECTORY $<TARGET_FILE_DIR:onnxruntime_webassembly_test>
@@ -343,6 +341,19 @@ else()
343341
)
344342
endif()
345343

344+
#
345+
# Apply post-processing script for the generated JavaScript file
346+
#
347+
list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/wasm_post_build.js")
348+
add_custom_command(
349+
TARGET onnxruntime_webassembly
350+
POST_BUILD
351+
# Backup file at $<TARGET_FILE_NAME:onnxruntime_webassembly>.bak
352+
COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE_NAME:onnxruntime_webassembly>" "$<TARGET_FILE_NAME:onnxruntime_webassembly>.bak"
353+
COMMAND ${CMAKE_COMMAND} -E echo "Performing post-process for $<TARGET_FILE_NAME:onnxruntime_webassembly>"
354+
COMMAND ${NODE_EXECUTABLE} "${ONNXRUNTIME_ROOT}/wasm/wasm_post_build.js" "$<TARGET_FILE_NAME:onnxruntime_webassembly>"
355+
)
356+
346357
set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS "${onnxruntime_webassembly_script_deps}")
347358

348359
set(target_name_list ort)
@@ -373,61 +384,4 @@ else()
373384
endif()
374385

375386
set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME ${target_name} SUFFIX ".mjs")
376-
377-
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
378-
#
379-
# The following POST_BUILD script is a workaround for enabling:
380-
# - using onnxruntime-web with Multi-threading enabled when import from CDN
381-
# - using onnxruntime-web when consumed in some frameworks like Vite
382-
#
383-
# In the use case mentioned above, the file name of the script may be changed. So we need to replace the line:
384-
# `new Worker(new URL("ort-wasm-*.mjs", import.meta.url),`
385-
# with
386-
# `new Worker(new URL(import.meta.url),`
387-
#
388-
# This behavior is introduced in https://github.com/emscripten-core/emscripten/pull/22165. Since it's unlikely to be
389-
# reverted, and there is no config to disable this behavior, we have to use a post-build script to workaround it.
390-
#
391-
392-
# Generate a script to do the post-build work
393-
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/wasm_post_build.js "
394-
const fs = require('fs');
395-
const path = require('path');
396-
397-
// node wasm_post_build.js <mjsFilePath>
398-
const mjsFilePath = process.argv[2];
399-
let contents = fs.readFileSync(mjsFilePath).toString();
400-
401-
const regex = 'new Worker\\\\(new URL\\\\(\".+?\", ?import\\\\.meta\\\\.url\\\\),';
402-
const matches = [...contents.matchAll(new RegExp(regex, 'g'))];
403-
if (matches.length !== 1) {
404-
throw new Error(
405-
`Unexpected number of matches for \"\${regex}\" in \"\${mjsFilePath}\": \${matches.length}.`,
406-
);
407-
}
408-
409-
// Replace the only occurrence.
410-
contents = contents.replace(
411-
new RegExp(regex),
412-
`new Worker(new URL(import.meta.url),`,
413-
);
414-
415-
fs.writeFileSync(mjsFilePath, contents);
416-
"
417-
)
418-
419-
find_program(NODE_EXECUTABLE node required)
420-
if (NOT NODE_EXECUTABLE)
421-
message(FATAL_ERROR "Node is required to run the post-build script")
422-
endif()
423-
424-
add_custom_command(
425-
TARGET onnxruntime_webassembly
426-
POST_BUILD
427-
# Backup file at $<TARGET_FILE_NAME:onnxruntime_webassembly>.bak
428-
COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE_NAME:onnxruntime_webassembly>" "$<TARGET_FILE_NAME:onnxruntime_webassembly>.bak"
429-
COMMAND ${CMAKE_COMMAND} -E echo "Performing post-process for $<TARGET_FILE_NAME:onnxruntime_webassembly>"
430-
COMMAND ${NODE_EXECUTABLE} "${CMAKE_CURRENT_BINARY_DIR}/wasm_post_build.js" "$<TARGET_FILE_NAME:onnxruntime_webassembly>"
431-
)
432-
endif()
433387
endif()
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
2+
index c01d64e40f..0f1f4beae4 100644
3+
--- a/src/dawn/native/vulkan/DeviceVk.cpp
4+
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
5+
@@ -464,13 +464,15 @@ ResultOrError<VulkanDeviceKnobs> Device::CreateDevice(VkPhysicalDevice vkPhysica
6+
DAWN_ASSERT(usedKnobs.HasExt(DeviceExt::ShaderFloat16Int8) &&
7+
mDeviceInfo.shaderFloat16Int8Features.shaderFloat16 == VK_TRUE &&
8+
usedKnobs.HasExt(DeviceExt::_16BitStorage) &&
9+
- mDeviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE &&
10+
+ mDeviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE /*&&
11+
mDeviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess ==
12+
- VK_TRUE);
13+
+ VK_TRUE*/);
14+
15+
usedKnobs.shaderFloat16Int8Features.shaderFloat16 = VK_TRUE;
16+
usedKnobs._16BitStorageFeatures.storageBuffer16BitAccess = VK_TRUE;
17+
- usedKnobs._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess = VK_TRUE;
18+
+ if (mDeviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess == VK_TRUE) {
19+
+ usedKnobs._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess = VK_TRUE;
20+
+ }
21+
if (mDeviceInfo._16BitStorageFeatures.storageInputOutput16 == VK_TRUE) {
22+
usedKnobs._16BitStorageFeatures.storageInputOutput16 = VK_TRUE;
23+
}
24+
diff --git a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
25+
index a324c101ed..8d64da750f 100644
26+
--- a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
27+
+++ b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
28+
@@ -269,8 +269,9 @@ void PhysicalDevice::InitializeSupportedFeaturesImpl() {
29+
if (mDeviceInfo.HasExt(DeviceExt::ShaderFloat16Int8) &&
30+
mDeviceInfo.HasExt(DeviceExt::_16BitStorage) &&
31+
mDeviceInfo.shaderFloat16Int8Features.shaderFloat16 == VK_TRUE &&
32+
- mDeviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE &&
33+
- mDeviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess == VK_TRUE) {
34+
+ mDeviceInfo._16BitStorageFeatures.storageBuffer16BitAccess == VK_TRUE /*&&
35+
+ WebGPU EP needs to ensure we don't put fp16 values in uniforms when this patch is applied.
36+
+ mDeviceInfo._16BitStorageFeatures.uniformAndStorageBuffer16BitAccess == VK_TRUE*/) {
37+
// ONNX Runtime Patch: enable shaderF16 on all devices.
38+
EnableFeature(Feature::ShaderF16);
39+
shaderF16Enabled = true;

0 commit comments

Comments
 (0)