@@ -1110,6 +1110,9 @@ class ZeCollector {
11101110 std::vector<std::string> knames;
11111111 size_t max_name_size = 0 ;
11121112 global_device_time_stats_mutex_.lock ();
1113+
1114+ AggregateDeviceTimeStats ();
1115+
11131116 std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list (
11141117 global_device_time_stats_->begin (), global_device_time_stats_->end ());
11151118
@@ -1197,6 +1200,9 @@ class ZeCollector {
11971200 std::vector<std::string> knames;
11981201 size_t max_name_size = 0 ;
11991202 global_device_time_stats_mutex_.lock ();
1203+
1204+ AggregateDeviceTimeStats ();
1205+
12001206 std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list (
12011207 global_device_time_stats_->begin (), global_device_time_stats_->end ());
12021208
@@ -3980,6 +3986,49 @@ class ZeCollector {
39803986 local_device_submissions_.CollectHostFunctionTimeStats (id, time);
39813987 }
39823988
3989+ void AggregateDeviceTimeStats () const {
3990+ // do not acquire global_device_time_stats_mutex_. caller dos it.
3991+ for (auto it = global_device_time_stats_->begin (); it != global_device_time_stats_->end (); it++) {
3992+ std::string kname;
3993+ if (it->first .tile_ >= 0 ) {
3994+ kname = " Tile #" + std::to_string (it->first .tile_ ) + " : " + GetZeKernelCommandName (it->first .kernel_command_id_ , it->first .group_count_ , it->first .mem_size_ , options_.verbose );
3995+ }
3996+ else {
3997+ kname = GetZeKernelCommandName (it->first .kernel_command_id_ , it->first .group_count_ , it->first .mem_size_ , options_.verbose );
3998+ }
3999+
4000+ auto it2 = it;
4001+ it2++;
4002+
4003+ for (; it2 != global_device_time_stats_->end ();) {
4004+ std::string kname2;
4005+ if (it2->first .tile_ >= 0 ) {
4006+ kname2 = " Tile #" + std::to_string (it2->first .tile_ ) + " : " + GetZeKernelCommandName (it2->first .kernel_command_id_ , it2->first .group_count_ , it2->first .mem_size_ , options_.verbose );
4007+ }
4008+ else {
4009+ kname2 = GetZeKernelCommandName (it2->first .kernel_command_id_ , it2->first .group_count_ , it2->first .mem_size_ , options_.verbose );
4010+ }
4011+
4012+ if (kname2 == kname) {
4013+ it->second .append_time_ += it2->second .append_time_ ;
4014+ it->second .submit_time_ += it2->second .submit_time_ ;
4015+ it->second .execute_time_ += it2->second .execute_time_ ;
4016+ if (it->second .min_time_ > it2->second .min_time_ ) {
4017+ it->second .min_time_ = it2->second .min_time_ ;
4018+ }
4019+ if (it->second .max_time_ < it2->second .max_time_ ) {
4020+ it->second .max_time_ = it2->second .max_time_ ;
4021+ }
4022+ it->second .call_count_ += it2->second .call_count_ ;
4023+ it2 = global_device_time_stats_->erase (it2);
4024+ }
4025+ else {
4026+ it2++;
4027+ }
4028+ }
4029+ }
4030+ }
4031+
39834032 private: // Data
39844033 zel_tracer_handle_t tracer_ = nullptr ;
39854034 CollectorOptions options_;
0 commit comments