Skip to content

Commit 6a19f46

Browse files
Profiler Teamcopybara-github
authored andcommitted
Feat: Add cost analysis of Pallas kernels using LLO tracing
This change introduces the capability to perform cost analysis of Pallas kernels within XProf. Pallas kernels are represented as "custom-call" HLOs, and this change enables XProf to estimate their performance characteristics (flops, IOPS, and DMA bandwidth) by analyzing Low-Level Optimized (LLO) instruction traces. PiperOrigin-RevId: 831937474
1 parent c886d9e commit 6a19f46

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

xprof/utils/op_metrics_db_utils.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,25 @@ void SetOpMetricsFromHloEvent(const tsl::profiler::XEventVisitor& hlo_event,
245245
normalized_duration_ps);
246246
op_metrics->set_dma_stall_ps(op_metrics->dma_stall_ps() + dma_stall_ps);
247247
}
248+
// Fill The Custom Call Information
249+
if (op_metrics->category() == "custom-call") {
250+
hlo_event.ForEachStat([&](const XStatVisitor& stat) {
251+
if (!stat.Type()) return;
252+
switch (static_cast<StatType>(*stat.Type())) {
253+
case StatType::kBytesAccessed:
254+
op_metrics->set_bytes_accessed(stat.IntOrUintValue());
255+
break;
256+
case StatType::kModelFlops:
257+
op_metrics->set_model_flops(stat.IntOrUintValue());
258+
break;
259+
case StatType::kFlops:
260+
op_metrics->set_flops(stat.IntOrUintValue());
261+
break;
262+
default:
263+
break;
264+
}
265+
});
266+
}
248267
}
249268

250269
void MergeOpMetrics(const OpMetrics& src, OpMetrics& dst) {

0 commit comments

Comments
 (0)