intel
diff --git a/‎.github/workflows/windows_webgpu.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/windows_webgpu.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cmake/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎cmake/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cmake/adjust_global_compile_flags.cmake‎
Lines changed: 5 additions & 0 deletions b/‎cmake/adjust_global_compile_flags.cmake‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎cmake/external/onnxruntime_external_deps.cmake‎
Lines changed: 18 additions & 7 deletions b/‎cmake/external/onnxruntime_external_deps.cmake‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎cmake/onnxruntime_mlas.cmake‎
Lines changed: 1 addition & 0 deletions b/‎cmake/onnxruntime_mlas.cmake‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cmake/onnxruntime_providers_tensorrt.cmake‎
Lines changed: 5 additions & 18 deletions b/‎cmake/onnxruntime_providers_tensorrt.cmake‎
Lines changed: 5 additions & 18 deletions
diff --git a/‎cmake/onnxruntime_providers_webgpu.cmake‎
Lines changed: 6 additions & 5 deletions b/‎cmake/onnxruntime_providers_webgpu.cmake‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎cmake/vcpkg.json‎
Lines changed: 8 additions & 1 deletion b/‎cmake/vcpkg.json‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Mobile/EndToEndTests.Mobile.Automation/Tests.cs‎
Lines changed: 3 additions & 1 deletion b/‎csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Mobile/EndToEndTests.Mobile.Automation/Tests.cs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎csharp/test/Microsoft.ML.OnnxRuntime.Tests.Devices/TestResultProcessor.cs‎
Lines changed: 2 additions & 1 deletion b/‎csharp/test/Microsoft.ML.OnnxRuntime.Tests.Devices/TestResultProcessor.cs‎
Lines changed: 2 additions & 1 deletion
@@ -22,6 +22,7 @@ jobs:
     strategy:
       matrix:
         vcpkg_option: [novcpkg, vcpkg]
+        wgsl_template: [static, dynamic]
     env:
       OrtPackageId: Microsoft.ML.OnnxRuntime
       OnnxRuntimeBuildDirectory: ${{ github.workspace }}
@@ -123,6 +124,7 @@ jobs:
             --build_nodejs `
             --build_java `
             --use_webgpu `
+            --wgsl_template ${{ matrix.wgsl_template }} `
             ${{ matrix.vcpkg_option == 'vcpkg' && '--use_vcpkg' || '' }} `
             --cmake_extra_defines `
             onnxruntime_BUILD_UNIT_TESTS=ON `
 
@@ -151,6 +151,7 @@ option(onnxruntime_DISABLE_SPARSE_TENSORS "Disable sparse tensors data types" OF
 option(onnxruntime_DISABLE_OPTIONAL_TYPE "Disable optional type" OFF)
 option(onnxruntime_DISABLE_FLOAT8_TYPES "Disable float 8 types" OFF)
 option(onnxruntime_MINIMAL_BUILD "Exclude as much as possible from the build. Support ORT format models. No support for ONNX format models." OFF)
+option(onnxruntime_CLIENT_PACKAGE_BUILD "Enables default settings that are more appropriate for client/on-device workloads." OFF)
 cmake_dependent_option(onnxruntime_DISABLE_RTTI "Disable RTTI" ON "NOT onnxruntime_ENABLE_PYTHON;NOT onnxruntime_USE_CUDA" OFF)
 # For now onnxruntime_DISABLE_EXCEPTIONS will only work with onnxruntime_MINIMAL_BUILD, more changes (ONNX, non-CPU EP, ...) are required to run this standalone
 cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handling. Requires onnxruntime_MINIMAL_BUILD currently." ON "onnxruntime_MINIMAL_BUILD;NOT onnxruntime_ENABLE_PYTHON" OFF)
 
@@ -95,6 +95,11 @@ if (onnxruntime_MINIMAL_BUILD)
   endif()
 endif()
 
+# ORT build with default settings more appropriate for client/on-device workloads.
+if (onnxruntime_CLIENT_PACKAGE_BUILD)
+  add_compile_definitions(ORT_CLIENT_PACKAGE_BUILD)
+endif()
+
 if (onnxruntime_ENABLE_LTO)
     include(CheckIPOSupported)
     check_ipo_supported(RESULT ipo_enabled OUTPUT ipo_output)
 
@@ -774,13 +774,24 @@ if (onnxruntime_USE_WEBGPU)
   endif()
 
   if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_WGSL_TEMPLATE STREQUAL "dynamic")
-    onnxruntime_fetchcontent_declare(
-      duktape
-      URL ${DEP_URL_duktape}
-      URL_HASH SHA1=${DEP_SHA1_duktape}
-      EXCLUDE_FROM_ALL
-    )
-    onnxruntime_fetchcontent_makeavailable(duktape)
+    if(onnxruntime_USE_VCPKG)
+      find_package(unofficial-duktape CONFIG REQUIRED)
+      add_library(duktape_static ALIAS unofficial::duktape::duktape)
+    else()
+      onnxruntime_fetchcontent_declare(
+        duktape
+        URL ${DEP_URL_duktape}
+        URL_HASH SHA1=${DEP_SHA1_duktape}
+        EXCLUDE_FROM_ALL
+      )
+      onnxruntime_fetchcontent_makeavailable(duktape)
+
+      if(NOT TARGET duktape_static)
+        add_library(duktape_static STATIC "${duktape_SOURCE_DIR}/src/duktape.c")
+        target_compile_features(duktape_static PRIVATE c_std_99)
+        target_include_directories(duktape_static INTERFACE $<BUILD_INTERFACE:${duktape_SOURCE_DIR}/src>)
+      endif()
+    endif()
   endif()
 endif()
 
 
@@ -31,6 +31,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
   ${MLAS_SRC_DIR}/eltwise.cpp
   ${MLAS_SRC_DIR}/erf.cpp
   ${MLAS_SRC_DIR}/compute.cpp
+  ${MLAS_SRC_DIR}/dequantize.cpp
   ${MLAS_SRC_DIR}/quantize.cpp
   ${MLAS_SRC_DIR}/qgemm_kernel_default.cpp
   ${MLAS_SRC_DIR}/qladd.cpp
 
@@ -72,26 +72,21 @@
   endif()
 
   # TensorRT 10 GA onwards, the TensorRT libraries will have major version appended to the end on Windows,
-  # for example, nvinfer_10.dll, nvinfer_plugin_10.dll, nvonnxparser_10.dll ...
+  # for example, nvinfer_10.dll, nvonnxparser_10.dll ...
   if (WIN32 AND TRT_GREATER_OR_EQUAL_TRT_10_GA)
     set(NVINFER_LIB "nvinfer_${NV_TENSORRT_MAJOR}")
-    set(NVINFER_PLUGIN_LIB "nvinfer_plugin_${NV_TENSORRT_MAJOR}")
     set(PARSER_LIB "nvonnxparser_${NV_TENSORRT_MAJOR}")
   endif()
 
   if (NOT NVINFER_LIB)
      set(NVINFER_LIB "nvinfer")
   endif()
 
-  if (NOT NVINFER_PLUGIN_LIB)
-     set(NVINFER_PLUGIN_LIB "nvinfer_plugin")
-  endif()
-
   if (NOT PARSER_LIB)
      set(PARSER_LIB "nvonnxparser")
   endif()
 
-  MESSAGE(STATUS "Looking for ${NVINFER_LIB} and ${NVINFER_PLUGIN_LIB}")
+  MESSAGE(STATUS "Looking for ${NVINFER_LIB}")
 
   find_library(TENSORRT_LIBRARY_INFER ${NVINFER_LIB}
     HINTS ${TENSORRT_ROOT}
@@ -101,14 +96,6 @@
     MESSAGE(STATUS "Can't find ${NVINFER_LIB}")
   endif()
 
-  find_library(TENSORRT_LIBRARY_INFER_PLUGIN ${NVINFER_PLUGIN_LIB}
-    HINTS  ${TENSORRT_ROOT}
-    PATH_SUFFIXES lib lib64 lib/x64)
-
-  if (NOT TENSORRT_LIBRARY_INFER_PLUGIN)
-    MESSAGE(STATUS "Can't find ${NVINFER_PLUGIN_LIB}")
-  endif()
-
   if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
     MESSAGE(STATUS "Looking for ${PARSER_LIB}")
 
@@ -120,7 +107,7 @@
       MESSAGE(STATUS "Can't find ${PARSER_LIB}")
     endif()
 
-    set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_NVONNXPARSER})
+    set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_NVONNXPARSER})
     MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
   else()
     if (TRT_GREATER_OR_EQUAL_TRT_10_GA)
@@ -153,15 +140,15 @@
     endif()
     # Static libraries are just nvonnxparser_static on all platforms
     set(onnxparser_link_libs nvonnxparser_static)
-    set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN})
+    set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER})
     MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
   endif()
 
   # ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.
   # nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt
   # See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121
   # However, starting from TRT 10 GA, nvonnxparser_static doesn't link against tensorrt libraries.
-  # Therefore, the above code finds ${TENSORRT_LIBRARY_INFER} and ${TENSORRT_LIBRARY_INFER_PLUGIN}.
+  # Therefore, the above code finds ${TENSORRT_LIBRARY_INFER}.
   if(onnxruntime_CUDA_MINIMAL)
     set(trt_link_libs ${CMAKE_DL_LIBS} ${TENSORRT_LIBRARY})
   else()
 
@@ -172,10 +172,12 @@
     file(MAKE_DIRECTORY ${WGSL_GENERATED_DIR})
 
     # Find all WGSL template input files
-    file(GLOB_RECURSE WGSL_TEMPLATE_FILES "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.wgsl.template")
+    file(GLOB_RECURSE WGSL_TEMPLATE_FILES
+      "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.wgsl.template"
+      "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.wgsl.template")
 
     # Set wgsl-gen command line options as a list
-    set(WGSL_GEN_OPTIONS "-i" "../" "--output" "${WGSL_GENERATED_DIR}" "-I" "wgsl_template_gen/" "--preserve-code-ref" "--verbose")
+    set(WGSL_GEN_OPTIONS "-i" "${ONNXRUNTIME_ROOT}/core/providers/webgpu/" "-i" "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/" "--output" "${WGSL_GENERATED_DIR}" "-I" "wgsl_template_gen/" "--preserve-code-ref" "--verbose")
     if (onnxruntime_WGSL_TEMPLATE STREQUAL "static")
       if (CMAKE_BUILD_TYPE STREQUAL "Debug")
         list(APPEND WGSL_GEN_OPTIONS "--generator" "static-cpp-literal")
@@ -207,10 +209,9 @@
       # Add the generated directory to include paths
       target_include_directories(onnxruntime_providers_webgpu PRIVATE ${WGSL_GENERATED_ROOT})
     elseif(onnxruntime_WGSL_TEMPLATE STREQUAL "dynamic")
-      add_library(duktape_static STATIC "${duktape_SOURCE_DIR}/src/duktape.c")
-      target_compile_features(duktape_static PRIVATE c_std_99)
       target_link_libraries(onnxruntime_providers_webgpu duktape_static)
-      target_include_directories(onnxruntime_providers_webgpu PRIVATE ${duktape_SOURCE_DIR}/src)
+      onnxruntime_add_include_to_target(onnxruntime_providers_webgpu duktape_static)
+
       # Define the path to the generated templates.js file
       target_compile_definitions(onnxruntime_providers_webgpu PRIVATE
         "ORT_WGSL_TEMPLATES_JS_PATH=\"${WGSL_GENERATED_TEMPLATES_JS}\"")
 
@@ -43,7 +43,6 @@
     "ms-gsl",
     "nlohmann-json",
     "onnx",
-    "optional-lite",
     {
       "name": "protobuf",
       "version>=": "3.21.12"
@@ -94,6 +93,10 @@
     "webgpu-ep": {
       "description": "Build with WebGPU EP",
       "dependencies": []
+    },
+    "webgpu-ep-wgsl-template-dynamic": {
+      "description": "Build with WebGPU EP with dynamic WGSL template code generator",
+      "dependencies": ["duktape"]
     }
   },
   "overrides": [
@@ -104,6 +107,10 @@
     {
       "name": "flatbuffers",
       "version": "23.5.26"
+    },
+    {
+      "name": "duktape",
+      "version": "2.7.0#2"
     }
   ]
 }
@@ -40,10 +40,12 @@ public void RunPlatformUnitTest()
             var serializedResultSummary = _app.Invoke(_getResultsBackdoorMethodName)?.ToString();
             Assert.IsNotEmpty(serializedResultSummary, "Test results were not returned");
 
+            // Fix security issue (overflow with too much nesting): GHSA-5crp-9r3c-p9vr
+            JsonConvert.DefaultSettings = () => new JsonSerializerSettings { MaxDepth = 128 };
             var testSummary = JsonConvert.DeserializeObject<TestResultSummary>(serializedResultSummary);
             Assert.AreEqual(testSummary.Failed, 0, $"{testSummary.Failed} tests failed");
 
             _app.Screenshot("Post-testing");
         }
     }
-}
+}
@@ -45,8 +45,9 @@ public TestResultSummary GetResults()
         public string GetSerializedResults()
         {
             var resultSummary = GetResults();
+	    JsonConvert.DefaultSettings = () => new JsonSerializerSettings { MaxDepth = 128 };
             var serializedResultSummary = JsonConvert.SerializeObject(resultSummary, Formatting.Indented);
             return serializedResultSummary;
         }
     }
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,6 @@`
`43`	`43`	`"ms-gsl",`
`44`	`44`	`"nlohmann-json",`
`45`	`45`	`"onnx",`
`46`		`- "optional-lite",`
`47`	`46`	`{`
`48`	`47`	`"name": "protobuf",`
`49`	`48`	`"version>=": "3.21.12"`
`@@ -94,6 +93,10 @@`
`94`	`93`	`"webgpu-ep": {`
`95`	`94`	`"description": "Build with WebGPU EP",`
`96`	`95`	`"dependencies": []`
	`96`	`+ },`
	`97`	`+ "webgpu-ep-wgsl-template-dynamic": {`
	`98`	`+ "description": "Build with WebGPU EP with dynamic WGSL template code generator",`
	`99`	`+ "dependencies": ["duktape"]`
`97`	`100`	`}`
`98`	`101`	`},`
`99`	`102`	`"overrides": [`
`@@ -104,6 +107,10 @@`
`104`	`107`	`{`
`105`	`108`	`"name": "flatbuffers",`
`106`	`109`	`"version": "23.5.26"`
	`110`	`+ },`
	`111`	`+ {`
	`112`	`+ "name": "duktape",`
	`113`	`+ "version": "2.7.0#2"`
`107`	`114`	`}`
`108`	`115`	`]`
`109`	`116`	`}`
Original file line number	Diff line number	Diff line change
`@@ -40,10 +40,12 @@ public void RunPlatformUnitTest()`
`40`	`40`	`var serializedResultSummary = _app.Invoke(_getResultsBackdoorMethodName)?.ToString();`
`41`	`41`	`Assert.IsNotEmpty(serializedResultSummary, "Test results were not returned");`
`42`	`42`
	`43`	`+ // Fix security issue (overflow with too much nesting): GHSA-5crp-9r3c-p9vr`
	`44`	`+ JsonConvert.DefaultSettings = () => new JsonSerializerSettings { MaxDepth = 128 };`
`43`	`45`	`var testSummary = JsonConvert.DeserializeObject<TestResultSummary>(serializedResultSummary);`
`44`	`46`	`Assert.AreEqual(testSummary.Failed, 0, $"{testSummary.Failed} tests failed");`
`45`	`47`
`46`	`48`	`_app.Screenshot("Post-testing");`
`47`	`49`	`}`
`48`	`50`	`}`
`49`		`-}`
	`51`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -45,8 +45,9 @@ public TestResultSummary GetResults()`
`45`	`45`	`public string GetSerializedResults()`
`46`	`46`	`{`
`47`	`47`	`var resultSummary = GetResults();`
	`48`	`+ JsonConvert.DefaultSettings = () => new JsonSerializerSettings { MaxDepth = 128 };`
`48`	`49`	`var serializedResultSummary = JsonConvert.SerializeObject(resultSummary, Formatting.Indented);`
`49`	`50`	`return serializedResultSummary;`
`50`	`51`	`}`
`51`	`52`	`}`
`52`		`-}`
	`53`	`+}`