Skip to content

Commit aa1730c

Browse files
authored
Merge pull request #730 from intel/sync_msft_4_7_25
Backmerging With msft commits
2 parents 1695972 + 3106e3b commit aa1730c

File tree

133 files changed

+2274
-1490
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+2274
-1490
lines changed

.github/workflows/linux-wasm-ci-build-and-test-workflow.yml

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,10 @@ jobs:
107107
cp ${{ github.workspace }}/build/wasm_inferencing_jsep/${{ inputs.build_config }}/ort-wasm-simd-threaded.jsep.wasm ${{ github.workspace }}/artifacts/wasm/
108108
cp ${{ github.workspace }}/build/wasm_inferencing_jsep/${{ inputs.build_config }}/ort-wasm-simd-threaded.jsep.mjs ${{ github.workspace }}/artifacts/wasm/
109109
fi
110-
111-
- name: Create WebGPU Artifacts
112-
if: ${{ inputs.skip_publish != true && inputs.build_webgpu == true }}
113-
run: |
114-
mkdir -p ${{ github.workspace }}/artifacts/wasm_webgpu/
115-
cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.asyncify.wasm ${{ github.workspace }}/artifacts/wasm_webgpu/
116-
cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.asyncify.mjs ${{ github.workspace }}/artifacts/wasm_webgpu/
110+
if [ -d ${{ github.workspace }}/build/wasm_inferencing_webgpu ]; then
111+
cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.asyncify.wasm ${{ github.workspace }}/artifacts/wasm/
112+
cp ${{ github.workspace }}/build/wasm_inferencing_webgpu/${{ inputs.build_config }}/ort-wasm-simd-threaded.asyncify.mjs ${{ github.workspace }}/artifacts/wasm/
113+
fi
117114
118115
- name: Upload WASM artifacts
119116
if: ${{ inputs.skip_publish != true }}
@@ -122,13 +119,6 @@ jobs:
122119
name: ${{ inputs.build_config }}_wasm
123120
path: ${{ github.workspace }}/artifacts/wasm
124121

125-
- name: Upload WebGPU artifacts
126-
if: ${{ inputs.skip_publish != true && inputs.build_webgpu == true }}
127-
uses: actions/upload-artifact@v4
128-
with:
129-
name: ${{ inputs.build_config }}_wasm_webgpu
130-
path: ${{ github.workspace }}/artifacts/wasm_webgpu
131-
132122
- name: Test (Node.js) (simd + threads)
133123
# onnxruntime_test_all is currently only supported in Debug build because it requires exception, which is disabled in Release build.
134124
if: ${{ inputs.build_config == 'Debug' }}

.github/workflows/windows-web-ci-workflow.yml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -83,22 +83,6 @@ jobs:
8383
run: |
8484
copy ${{ github.workspace }}\artifacts_wasm\ort-*.mjs ${{ github.workspace }}\js\web\dist\
8585
86-
- name: Download WebAssembly WebGPU artifacts
87-
uses: actions/download-artifact@v4
88-
with:
89-
name: ${{ inputs.build_config }}_wasm_webgpu
90-
path: ${{ github.workspace }}/artifacts_wasm_webgpu
91-
92-
- name: Binplace dist files (.wasm) for WebGPU
93-
shell: cmd
94-
run: |
95-
copy ${{ github.workspace }}\artifacts_wasm_webgpu\ort-*.wasm ${{ github.workspace }}\js\web\dist\
96-
97-
- name: Binplace dist files (.mjs) for WebGPU
98-
shell: cmd
99-
run: |
100-
copy ${{ github.workspace }}\artifacts_wasm_webgpu\ort-*.mjs ${{ github.workspace }}\js\web\dist\
101-
10286
- name: npm ci for /js/
10387
run: npm ci
10488
working-directory: ${{ github.workspace }}/js

cmake/onnxruntime_providers_nv.cmake

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ endif ()
1717
add_definitions("-DONNX_ML=1")
1818
add_definitions("-DONNX_NAMESPACE=onnx")
1919
set(CUDA_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS})
20-
set(TENSORRT_ROOT ${onnxruntime_TENSORRT_HOME})
20+
set(TENSORRT_RTX_ROOT ${onnxruntime_TENSORRT_RTX_HOME})
2121
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
2222
set(PROTOBUF_LIBRARY ${PROTOBUF_LIB})
2323
if (WIN32)
@@ -34,12 +34,12 @@ endif ()
3434
endif()
3535
set(CXX_VERSION_DEFINED TRUE)
3636

37-
find_path(TENSORRT_INCLUDE_DIR NvInfer.h
38-
HINTS ${TENSORRT_ROOT}
37+
find_path(TENSORRT_RTX_INCLUDE_DIR NvInfer.h
38+
HINTS ${TENSORRT_RTX_ROOT}
3939
PATH_SUFFIXES include)
4040

4141

42-
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h NVINFER_VER_CONTENT)
42+
file(READ ${TENSORRT_RTX_INCLUDE_DIR}/NvInferVersion.h NVINFER_VER_CONTENT)
4343
string(REGEX MATCH "define TRT_MAJOR_RTX * +([0-9]+)" NV_TRT_MAJOR_RTX "${NVINFER_VER_CONTENT}")
4444
string(REGEX REPLACE "define TRT_MAJOR_RTX * +([0-9]+)" "\\1" NV_TRT_MAJOR_RTX "${NV_TRT_MAJOR_RTX}")
4545
string(REGEX MATCH "define TRT_MINOR_RTX * +([0-9]+)" NV_TRT_MINOR_RTX "${NVINFER_VER_CONTENT}")
@@ -54,37 +54,37 @@ endif ()
5454
endif()
5555

5656
if (WIN32)
57-
set(NVINFER_LIB "tensorrt_rtx_${NV_TRT_MAJOR_RTX}_${NV_TRT_MINOR_RTX}")
58-
set(PARSER_LIB "tensorrt_onnxparser_rtx_${NV_TRT_MAJOR_RTX}_${NV_TRT_MINOR_RTX}")
57+
set(TRT_RTX_LIB "tensorrt_rtx_${NV_TRT_MAJOR_RTX}_${NV_TRT_MINOR_RTX}")
58+
set(RTX_PARSER_LIB "tensorrt_onnxparser_rtx_${NV_TRT_MAJOR_RTX}_${NV_TRT_MINOR_RTX}")
5959
endif()
6060

61-
if (NOT NVINFER_LIB)
62-
set(NVINFER_LIB "tensorrt_rtx")
61+
if (NOT TRT_RTX_LIB)
62+
set(TRT_RTX_LIB "tensorrt_rtx")
6363
endif()
6464

65-
if (NOT PARSER_LIB)
66-
set(PARSER_LIB "tensorrt_onnxparser_rtx")
65+
if (NOT RTX_PARSER_LIB)
66+
set(RTX_PARSER_LIB "tensorrt_onnxparser_rtx")
6767
endif()
6868

69-
MESSAGE(STATUS "Looking for ${NVINFER_LIB}")
69+
MESSAGE(STATUS "Looking for ${TRT_RTX_LIB}")
7070

71-
find_library(TENSORRT_LIBRARY_INFER ${NVINFER_LIB}
72-
HINTS ${TENSORRT_ROOT}
71+
find_library(TENSORRT_LIBRARY_INFER ${TRT_RTX_LIB}
72+
HINTS ${TENSORRT_RTX_ROOT}
7373
PATH_SUFFIXES lib lib64 lib/x64)
7474

7575
if (NOT TENSORRT_LIBRARY_INFER)
76-
MESSAGE(STATUS "Can't find ${NVINFER_LIB}")
76+
MESSAGE(STATUS "Can't find ${TRT_RTX_LIB}")
7777
endif()
7878

7979
if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
80-
MESSAGE(STATUS "Looking for ${PARSER_LIB}")
80+
MESSAGE(STATUS "Looking for ${RTX_PARSER_LIB}")
8181

82-
find_library(TENSORRT_LIBRARY_NVONNXPARSER ${PARSER_LIB}
83-
HINTS ${TENSORRT_ROOT}
82+
find_library(TENSORRT_LIBRARY_NVONNXPARSER ${RTX_PARSER_LIB}
83+
HINTS ${TENSORRT_RTX_ROOT}
8484
PATH_SUFFIXES lib lib64 lib/x64)
8585

8686
if (NOT TENSORRT_LIBRARY_NVONNXPARSER)
87-
MESSAGE(STATUS "Can't find ${PARSER_LIB}")
87+
MESSAGE(STATUS "Can't find ${RTX_PARSER_LIB}")
8888
endif()
8989

9090
set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_NVONNXPARSER})
@@ -104,7 +104,6 @@ endif ()
104104
# The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
105105
# unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
106106
onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
107-
include_directories(${onnx_tensorrt_SOURCE_DIR})
108107
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
109108
if ( CMAKE_COMPILER_IS_GNUCC )
110109
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
@@ -114,17 +113,16 @@ endif ()
114113
unset(PROTOBUF_LIBRARY)
115114
unset(OLD_CMAKE_CXX_FLAGS)
116115
unset(OLD_CMAKE_CUDA_FLAGS)
117-
set_target_properties(${PARSER_LIB} PROPERTIES LINK_FLAGS "/ignore:4199")
116+
set_target_properties(${RTX_PARSER_LIB} PROPERTIES LINK_FLAGS "/ignore:4199")
118117
target_compile_options(nvonnxparser_static PRIVATE /FIio.h /wd4100)
119-
target_compile_options(${PARSER_LIB} PRIVATE /FIio.h /wd4100)
118+
target_compile_options(${RTX_PARSER_LIB} PRIVATE /FIio.h /wd4100)
120119
endif()
121120
# Static libraries are just nvonnxparser_static on all platforms
122121
set(onnxparser_link_libs nvonnxparser_static)
123122
set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER})
124123
MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
125124
endif()
126125

127-
include_directories(${TENSORRT_INCLUDE_DIR})
128126
# ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.
129127
# nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt
130128
# See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121
@@ -152,7 +150,7 @@ endif ()
152150
else()
153151
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${onnxparser_link_libs} ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
154152
endif()
155-
target_include_directories(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}
153+
target_include_directories(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${TENSORRT_RTX_INCLUDE_DIR} ${onnx_tensorrt_SOURCE_DIR}
156154
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
157155

158156
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found

cmake/onnxruntime_providers_tensorrt.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@
138138
# The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
139139
# unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
140140
onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
141-
include_directories(${onnx_tensorrt_SOURCE_DIR})
142141
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
143142
if ( CMAKE_COMPILER_IS_GNUCC )
144143
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
@@ -158,7 +157,6 @@
158157
MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
159158
endif()
160159

161-
include_directories(${TENSORRT_INCLUDE_DIR})
162160
# ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.
163161
# nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt
164162
# See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121
@@ -197,9 +195,11 @@
197195
else()
198196
target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${onnxparser_link_libs} ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
199197
endif()
200-
target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}
198+
target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${TENSORRT_INCLUDE_DIR}
201199
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
202-
200+
if (NOT onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
201+
target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${onnx_tensorrt_SOURCE_DIR})
202+
endif()
203203
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
204204
set_target_properties(onnxruntime_providers_tensorrt PROPERTIES LINKER_LANGUAGE CUDA)
205205
set_target_properties(onnxruntime_providers_tensorrt PROPERTIES FOLDER "ONNXRuntime")

cmake/onnxruntime_unittests.cmake

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,7 +1118,7 @@ if (NOT IOS)
11181118

11191119
target_link_libraries(onnx_test_runner PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs} nlohmann_json::nlohmann_json)
11201120
target_include_directories(onnx_test_runner PRIVATE ${ONNXRUNTIME_ROOT})
1121-
1121+
11221122
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
11231123
target_link_libraries(onnx_test_runner PRIVATE Python::Python)
11241124
endif()
@@ -1239,7 +1239,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
12391239
target_include_directories(onnxruntime_perf_test PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT}
12401240
${onnxruntime_graph_header} ${onnxruntime_exec_src_dir}
12411241
${CMAKE_CURRENT_BINARY_DIR})
1242-
1242+
12431243
if (WIN32)
12441244
target_compile_options(onnxruntime_perf_test PRIVATE ${disabled_warnings})
12451245
if (NOT DEFINED SYS_PATH_LIB)
@@ -1345,7 +1345,7 @@ endif()
13451345
if (onnxruntime_USE_CUDA)
13461346
list(APPEND onnxruntime_shared_lib_test_LIBS)
13471347
endif()
1348-
1348+
13491349
if (onnxruntime_USE_TENSORRT)
13501350
list(APPEND onnxruntime_shared_lib_test_LIBS ${TENSORRT_LIBRARY_INFER})
13511351
endif()
@@ -1379,7 +1379,7 @@ endif()
13791379
if (onnxruntime_USE_NV)
13801380
target_include_directories(onnxruntime_shared_lib_test PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
13811381
endif()
1382-
1382+
13831383

13841384
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
13851385
target_sources(onnxruntime_shared_lib_test PRIVATE
@@ -1436,7 +1436,7 @@ endif()
14361436
DEPENDS ${all_dependencies}
14371437
)
14381438

1439-
1439+
14401440

14411441
target_compile_definitions(onnxruntime_test_debug_node_inputs_outputs
14421442
PRIVATE DEBUG_NODE_INPUTS_OUTPUTS)
@@ -1990,6 +1990,11 @@ if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten"
19901990
LIBS ${onnxruntime_ep_graph_test_LIBS}
19911991
DEPENDS ${all_dependencies}
19921992
)
1993+
if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
1994+
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
1995+
# simply ignore them for TensorRT EP build
1996+
set_property(TARGET onnxruntime_ep_graph_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
1997+
endif()
19931998
endif()
19941999

19952000
include(onnxruntime_fuzz_test.cmake)

include/onnxruntime/core/framework/allocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class Stream;
8686
namespace synchronize {
8787
class Notification;
8888
}
89-
using WaitNotificationFn = std::function<void(Stream&, synchronize::Notification&)>;
89+
using WaitNotificationFn = std::function<void(Stream*, synchronize::Notification&)>;
9090
void* AllocateBufferWithOptions(IAllocator& allocator, size_t size, bool use_reserve, Stream* stream, WaitNotificationFn wait_fn);
9191

9292
template <typename T>

include/onnxruntime/core/framework/ortdevice.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#undef INTEL
1414
#endif
1515

16-
// Struct to represent a physical device.
16+
// Struct to represent a combination of physical device and memory type.
17+
// A memory allocation and allocator have a specific OrtDevice associated with them, and this information is used
18+
// to determine when data transfer is required.
1719
struct OrtDevice {
1820
using DeviceType = int8_t;
1921
using MemoryType = int8_t;
@@ -41,7 +43,13 @@ struct OrtDevice {
4143
QNN_HTP_SHARED = 4,
4244
};
4345

44-
static const MemoryType HOST_ACCESSIBLE = 5; // Device memory that is accessible from host and device.
46+
// HOST_ACCESSIBLE memory is treated as CPU memory.
47+
// When creating an OrtDevice with MemType::HOST_ACCESSIBLE:
48+
// - For memory that is only accessible by a specific device and CPU, use the specific device type and id.
49+
// - When creating an OrtDevice for an EP allocator, you would typically use the same device type and id
50+
// that the EP is registered with (i.e. the OrtDevice passed to the base IExecutionProvider constructor).
51+
// - Otherwise use OrtDevice::CPU.
52+
static const MemoryType HOST_ACCESSIBLE = 5;
4553
};
4654

4755
// PCI vendor ids
@@ -101,6 +109,11 @@ struct OrtDevice {
101109
return alignment;
102110
}
103111

112+
// CPU or HOST_ACCESSIBLE memory.
113+
bool UsesCpuMemory() const noexcept {
114+
return device_type == CPU || memory_type == MemType::HOST_ACCESSIBLE;
115+
}
116+
104117
std::string ToString() const {
105118
std::ostringstream ostr;
106119
ostr << "Device:["

include/onnxruntime/core/framework/stream_handles.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ class Notification;
2626
// i.e. different cuda stream on different GPU.
2727
class Stream {
2828
public:
29-
Stream(StreamHandle h, const OrtDevice& d) : handle_(h), device_(d) {}
29+
Stream(StreamHandle h, const OrtDevice& d)
30+
: handle_(h), device_(d) {
31+
}
3032

3133
virtual ~Stream() = default;
3234
virtual std::unique_ptr<synchronize::Notification> CreateNotification(size_t /*num_consumers*/) {
@@ -168,14 +170,18 @@ class IStreamCommandHandleRegistry {
168170
virtual ~IStreamCommandHandleRegistry() = default;
169171
// Wait is a little special as we need to consider the source stream the notification generated, and the stream we are waiting.
170172
// i.e., for an cuda event what notify the memory copy, it could be wait on a CPU stream, or on another cuda stream.
171-
[[nodiscard]] virtual WaitNotificationFn GetWaitHandle(OrtDevice::DeviceType notification_ower_device_type,
172-
OrtDevice::DeviceType executor_device_type) const = 0;
173-
// Get the stream creation function registered on the given device type.
173+
[[nodiscard]] virtual WaitNotificationFn GetWaitHandle(const OrtDevice& notification_owner_device,
174+
const OrtDevice& executor_device) const = 0;
175+
176+
// Get the stream creation function registered for the given device type.
174177
[[nodiscard]] virtual CreateStreamFn GetCreateStreamFn(OrtDevice::DeviceType execution_device_type) const = 0;
175-
// register a wait methond which will be invoked when we wait a notification (created by 'notification_device_type' device) on a stream at 'device_type' device.
178+
179+
// register a wait method which will be invoked to await a notification that is
180+
// created by 'notification_device_type' device on a stream at 'device_type' device.
176181
virtual void RegisterWaitFn(OrtDevice::DeviceType notification_device_type,
177182
OrtDevice::DeviceType device_type,
178183
WaitNotificationFn fn) = 0;
184+
179185
// register a handle about how to create stream on given device type.
180186
virtual void RegisterCreateStreamFn(OrtDevice::DeviceType device_type, CreateStreamFn f) = 0;
181187

0 commit comments

Comments
 (0)