diff --git a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h index 47d0fc5e4008c..47a73cb9528fd 100644 --- a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h +++ b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h @@ -6,7 +6,7 @@ #ifdef DEBUG_GENERATION #define DUMP_TENSOR_LEVEL 2 #else -#define DUMP_TENSOR_LEVEL 0 // change it to 1 or 2 if want to enable dumping for code not in generation. +#define DUMP_TENSOR_LEVEL 1 // change it to 1 or 2 if want to enable dumping for code not in generation. #endif #define DUMP_CPU_TENSOR_LEVEL DUMP_TENSOR_LEVEL diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 2cf3d3faa8b47..6801678e0532b 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -57,6 +57,7 @@ struct OnnxToOvNetworkBindings { "beam_idx", "past_key_values", "present", + "total_seq_len", }; OnnxToOvNetworkBindings(OVExeNetwork& exec_network, SubGraphContext& subgraph_context, SessionContext& session_context) { @@ -72,7 +73,7 @@ struct OnnxToOvNetworkBindings { // stateful representation has introduced these new tensors, creating a name mismatch (matched_names=false). // So, if there is a name mismatch, or the name matches our special io list, we simply continue processing // here to prevent runtime exceptions. - if (session_context.enable_causallm) { + //if (session_context.enable_causallm) { if (!matched_names || std::any_of(special_io_names_.begin(), special_io_names_.end(), [&onnx_name](const std::string& name) { return onnx_name.find(name) != std::string::npos; })) { @@ -80,7 +81,7 @@ struct OnnxToOvNetworkBindings { has_dynamic_io_ = true; continue; } - } + //} ORT_ENFORCE(matched_names, log_tag, "Input names mismatch between OpenVINO and ONNX. ", onnx_name, diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index f848b89ed10c8..d2ee8317e0249 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -96,6 +96,7 @@ std::vector supported_op_mode = { {"Atanh", V_2020_4, {"CPU"}}, {"Atanh", V_2022_1, {"GPU"}}, {"Attention", V_2023_0, {"CPU", "GPU"}}, + {"GroupQueryAttention", V_2023_0, {"CPU", "GPU"}}, {"AveragePool", V_2020_4, {"CPU", "GPU"}}, {"BatchNormalization", V_2020_4, {"CPU", "GPU"}}, {"BiasGelu", V_2023_0, {"CPU", "GPU"}}, diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index edceae55ddda4..c00dfc8959f50 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -386,6 +386,9 @@ def generate_build_tree( "-Donnxruntime_ENABLE_PIX_FOR_WEBGPU_EP=" + ("ON" if args.enable_pix_capture else "OFF"), ] + if 1: #args.dump_node_input_output: + cmake_args.append("-Donnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=ON") + if args.caller_framework: cmake_args.append("-Donnxruntime_CALLER_FRAMEWORK=" + args.caller_framework) if args.winml_root_namespace_override: diff --git a/tools/ci_build/build_args.py b/tools/ci_build/build_args.py index de538604aac75..0704aa47c9f7f 100644 --- a/tools/ci_build/build_args.py +++ b/tools/ci_build/build_args.py @@ -842,6 +842,12 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]: # Use list[str] fromfile_prefix_chars="@", # Allow args from file (@filename) ) + # dump_node_input_output + parser.add_argument( + "--dump_node_input_output", + type=str, + help="Dump node input/output data to files in the specified directory.", + ) # Add arguments by category add_core_build_args(parser) add_cmake_build_config_args(parser)