Skip to content

Commit d1b6b70

Browse files
Profiler Teamcopybara-github
authored andcommitted
Add SparseCoreBoundRule to smart suggestions.
PiperOrigin-RevId: 831462946
1 parent f72e32a commit d1b6b70

File tree

5 files changed

+315
-1
lines changed

5 files changed

+315
-1
lines changed

xprof/convert/smart_suggestion/BUILD

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,20 @@ cc_library(
164164
],
165165
)
166166

167+
cc_library(
168+
name = "sparse_core_bound_rule",
169+
hdrs = ["sparse_core_bound_rule.h"],
170+
deps = [
171+
":signal_provider",
172+
":smart_suggestion_rule",
173+
"@com_google_absl//absl/status:statusor",
174+
"@com_google_absl//absl/strings",
175+
"@com_google_absl//absl/strings:str_format",
176+
"@org_xprof//plugin/xprof/protobuf:smart_suggestion_proto_cc",
177+
"@xla//xla/tsl/platform:statusor",
178+
],
179+
)
180+
167181
cc_library(
168182
name = "smart_suggestion_rule_factory",
169183
hdrs = ["smart_suggestion_rule_factory.h"],
@@ -183,6 +197,7 @@ cc_library(
183197
":input_bound_rule",
184198
":memory_bound_rule",
185199
":smart_suggestion_rule_factory",
200+
":sparse_core_bound_rule",
186201
":tensor_core_idle_bound_rule",
187202
],
188203
)
@@ -290,3 +305,19 @@ cc_test(
290305
"@org_xprof//plugin/xprof/protobuf:tpu_input_pipeline_proto_cc",
291306
],
292307
)
308+
309+
cc_test(
310+
name = "sparse_core_bound_rule_test",
311+
srcs = ["sparse_core_bound_rule_test.cc"],
312+
deps = [
313+
":signal_provider",
314+
":sparse_core_bound_rule",
315+
":tool_data_provider",
316+
"@com_google_absl//absl/status:statusor",
317+
"@com_google_googletest//:gtest_main",
318+
"@org_xprof//plugin/xprof/protobuf:input_pipeline_proto_cc",
319+
"@org_xprof//plugin/xprof/protobuf:overview_page_proto_cc",
320+
"@org_xprof//plugin/xprof/protobuf:smart_suggestion_proto_cc",
321+
"@org_xprof//plugin/xprof/protobuf:tpu_input_pipeline_proto_cc",
322+
],
323+
)

xprof/convert/smart_suggestion/all_rules.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ limitations under the License.
2323
#include "xprof/convert/smart_suggestion/input_bound_rule.h"
2424
#include "xprof/convert/smart_suggestion/memory_bound_rule.h"
2525
#include "xprof/convert/smart_suggestion/smart_suggestion_rule_factory.h"
26+
#include "xprof/convert/smart_suggestion/sparse_core_bound_rule.h"
2627
#include "xprof/convert/smart_suggestion/tensor_core_idle_bound_rule.h"
2728

2829
namespace tensorflow {
@@ -37,6 +38,7 @@ inline void RegisterAllRules(SmartSuggestionRuleFactory* f) {
3738
f->Register<HostProcessingBoundRule>();
3839
f->Register<InputBoundRule>();
3940
f->Register<MemoryBoundRule>();
41+
f->Register<SparseCoreBoundRule>();
4042
f->Register<TensorCoreIdleBoundRule>();
4143
// go/keep-sorted end
4244
}

xprof/convert/smart_suggestion/signal_provider.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,28 @@ class SignalProvider {
145145
return (total_percent / event_time_of_interest.size()) * 100.0;
146146
}
147147

148+
// Returns the percentage of time that is spent on SparseCore.
149+
absl::StatusOr<double> GetSparseCoreTimePercent() const {
150+
TF_ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
151+
tool_data_provider_->GetInputPipelineAnalysisResult());
152+
TpuStepTimeBreakdown step_time_breakdown;
153+
double sparse_core_time_ms = 0.0;
154+
if (input_pipeline_analysis->step_time_breakdown().UnpackTo(
155+
&step_time_breakdown)) {
156+
sparse_core_time_ms = step_time_breakdown.sparse_core_step_summary()
157+
.sc_step_time_ms_summary()
158+
.average();
159+
} else {
160+
return absl::NotFoundError("Failed to unpack TpuStepTimeBreakdown.");
161+
}
162+
double step_time_ms =
163+
input_pipeline_analysis->step_time_summary().average();
164+
if (step_time_ms == 0) {
165+
return 0.0;
166+
}
167+
return (sparse_core_time_ms / step_time_ms) * 100.0;
168+
}
169+
148170
// Returns true if the profile is latency bound, i.e. MXU and HBM utilization
149171
// are both below 50%.
150172
bool IsLatencyBound() const {
@@ -155,7 +177,7 @@ class SignalProvider {
155177
}
156178

157179
return *mxu_utilization < kMxuUtilizationLowThreshold &&
158-
*hbm_utilization < kHbmUtilizationLowThreshold;
180+
*hbm_utilization < kHbmUtilizationLowThreshold;
159181
}
160182

161183
private:
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#ifndef THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_SPARSE_CORE_BOUND_RULE_H_
17+
#define THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_SPARSE_CORE_BOUND_RULE_H_
18+
19+
#include <optional>
20+
#include <string>
21+
22+
#include "absl/status/statusor.h"
23+
#include "absl/strings/str_cat.h"
24+
#include "absl/strings/str_format.h"
25+
#include "xla/tsl/platform/statusor.h"
26+
#include "xprof/convert/smart_suggestion/signal_provider.h"
27+
#include "xprof/convert/smart_suggestion/smart_suggestion_rule.h"
28+
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"
29+
30+
namespace tensorflow {
31+
namespace profiler {
32+
33+
// If the percentage of SparseCore time is higher than
34+
// kSparseCoreTimeThresholdInPercent, it is considered SparseCore time bound.
35+
constexpr double kSparseCoreTimeThresholdInPercent = 10;
36+
37+
// Rule to detect high SparseCore time bottleneck.
38+
class SparseCoreBoundRule : public SmartSuggestionRule {
39+
public:
40+
bool MeetsConditions(const SignalProvider& signal_provider) const override {
41+
if (!signal_provider.IsLatencyBound()) {
42+
return false;
43+
}
44+
45+
absl::StatusOr<double> sparse_core_time_percent =
46+
signal_provider.GetSparseCoreTimePercent();
47+
if (!sparse_core_time_percent.ok()) {
48+
return false;
49+
}
50+
51+
return *sparse_core_time_percent > kSparseCoreTimeThresholdInPercent;
52+
}
53+
54+
absl::StatusOr<std::optional<SmartSuggestion>> GenerateSuggestion(
55+
const SignalProvider& signal_provider) const override {
56+
SmartSuggestion suggestion;
57+
suggestion.set_rule_name("SparseCoreBoundRule");
58+
59+
TF_ASSIGN_OR_RETURN(double sparse_core_time_percent,
60+
signal_provider.GetSparseCoreTimePercent());
61+
std::string suggestion_text = absl::StrCat(
62+
"<p>Your program is likely bottlenecked by <b>SparseCore Operations"
63+
"</b> in the TPU: <b>",
64+
absl::StrFormat("%.1f", sparse_core_time_percent),
65+
"% of the total step time </b> is spent on SparseCore. Please consider "
66+
"the following optimizations: </p>",
67+
"<ul>"
68+
"<li><b>Refine Sparse Data Representation:</b> Ensure your sparse "
69+
"tensors are in the most performant format for your hardware (e.g., "
70+
"CSR/CSC if suitable). Pre-process data to improve memory access "
71+
"patterns on the SparseCore, like sorting indices or grouping related "
72+
"features.</li>"
73+
"<li><b>Streamline Embedding Tables:</b> For large embedding tables, "
74+
"consider quantization (reducing precision like int8) or pruning to "
75+
"significantly cut down their memory footprint and processing load on "
76+
"the SparseCore.</li>"
77+
"<li><b>Utilize Framework-Specific Sparse APIs::</b> Employ "
78+
"specialized APIs designed for sparse operations on your platform "
79+
"(e.g., tf.tpu.experimental.embedding.TPU Embedding for "
80+
"TensorFlow/TPU). These are highly optimized for direct SparseCore "
81+
"interaction.</li>"
82+
"</ul>");
83+
84+
suggestion.set_suggestion_text(suggestion_text);
85+
return suggestion;
86+
}
87+
};
88+
89+
} // namespace profiler
90+
} // namespace tensorflow
91+
92+
#endif // THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_SPARSE_CORE_BOUND_RULE_H_
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "xprof/convert/smart_suggestion/sparse_core_bound_rule.h"
17+
18+
#include <memory>
19+
#include <optional>
20+
#include <utility>
21+
#include <vector>
22+
#include <string>
23+
24+
#include "testing/base/public/gmock.h"
25+
#include "<gtest/gtest.h>"
26+
#include "absl/status/statusor.h"
27+
#include "xprof/convert/smart_suggestion/signal_provider.h"
28+
#include "xprof/convert/smart_suggestion/tool_data_provider.h"
29+
#include "plugin/xprof/protobuf/input_pipeline.pb.h"
30+
#include "plugin/xprof/protobuf/overview_page.pb.h"
31+
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"
32+
#include "plugin/xprof/protobuf/tpu_input_pipeline.pb.h"
33+
34+
namespace tensorflow {
35+
namespace profiler {
36+
namespace {
37+
38+
using ::testing::Eq;
39+
using ::testing::Return;
40+
using ::testing::status::IsOkAndHolds;
41+
42+
// Mock ToolDataProvider
43+
class MockToolDataProvider : public ToolDataProvider {
44+
public:
45+
MOCK_METHOD(absl::StatusOr<const OverviewPage*>, GetOverviewPage, (),
46+
(override));
47+
MOCK_METHOD(absl::StatusOr<const InputPipelineAnalysisResult*>,
48+
GetInputPipelineAnalysisResult, (), (override));
49+
MOCK_METHOD(absl::StatusOr<std::vector<float>>,
50+
GetEventTimeFractionEachStep, (const std::string&),
51+
(override));
52+
};
53+
54+
TEST(SparseCoreBoundRuleTest, MeetsConditions) {
55+
auto mock_tool_data_provider = std::make_unique<MockToolDataProvider>();
56+
InputPipelineAnalysisResult input_pipeline_analysis;
57+
input_pipeline_analysis.mutable_step_time_summary()->set_average(100.0);
58+
TpuStepTimeBreakdown step_time_breakdown;
59+
step_time_breakdown.mutable_sparse_core_step_summary()
60+
->mutable_sc_step_time_ms_summary()
61+
->set_average(11.0);
62+
input_pipeline_analysis.mutable_step_time_breakdown()->PackFrom(
63+
step_time_breakdown);
64+
65+
EXPECT_CALL(*mock_tool_data_provider, GetInputPipelineAnalysisResult())
66+
.WillRepeatedly(Return(&input_pipeline_analysis));
67+
68+
OverviewPage overview_page;
69+
overview_page.mutable_analysis()->set_mxu_utilization_percent(49.0);
70+
overview_page.mutable_analysis()
71+
->set_memory_bw_utilization_relative_to_hw_limit_percent(49.0);
72+
EXPECT_CALL(*mock_tool_data_provider, GetOverviewPage())
73+
.WillRepeatedly(Return(&overview_page));
74+
75+
SignalProvider signal_provider(std::move(mock_tool_data_provider));
76+
SparseCoreBoundRule rule;
77+
78+
absl::StatusOr<std::optional<SmartSuggestion>> suggestion =
79+
rule.Apply(signal_provider);
80+
EXPECT_THAT(suggestion, IsOkAndHolds(testing::Not(Eq(std::nullopt))));
81+
EXPECT_EQ((*suggestion)->rule_name(), "SparseCoreBoundRule");
82+
EXPECT_THAT((*suggestion)->suggestion_text(),
83+
testing::HasSubstr(
84+
"11.0% of the total step time </b> is spent on SparseCore"));
85+
}
86+
87+
TEST(SparseCoreBoundRuleTest, IdleTimeTooLow) {
88+
auto mock_tool_data_provider = std::make_unique<MockToolDataProvider>();
89+
InputPipelineAnalysisResult input_pipeline_analysis;
90+
input_pipeline_analysis.mutable_step_time_summary()->set_average(100.0);
91+
TpuStepTimeBreakdown step_time_breakdown;
92+
step_time_breakdown.mutable_sparse_core_step_summary()
93+
->mutable_sc_step_time_ms_summary()
94+
->set_average(9.0);
95+
input_pipeline_analysis.mutable_step_time_breakdown()->PackFrom(
96+
step_time_breakdown);
97+
98+
EXPECT_CALL(*mock_tool_data_provider, GetInputPipelineAnalysisResult())
99+
.WillRepeatedly(Return(&input_pipeline_analysis));
100+
101+
OverviewPage overview_page;
102+
overview_page.mutable_analysis()->set_mxu_utilization_percent(49.0);
103+
overview_page.mutable_analysis()
104+
->set_memory_bw_utilization_relative_to_hw_limit_percent(49.0);
105+
EXPECT_CALL(*mock_tool_data_provider, GetOverviewPage())
106+
.WillRepeatedly(Return(&overview_page));
107+
108+
SignalProvider signal_provider(std::move(mock_tool_data_provider));
109+
SparseCoreBoundRule rule;
110+
111+
absl::StatusOr<std::optional<SmartSuggestion>> suggestion =
112+
rule.Apply(signal_provider);
113+
EXPECT_THAT(suggestion, IsOkAndHolds(Eq(std::nullopt)));
114+
}
115+
116+
TEST(SparseCoreBoundRuleTest, NoTpuStepTimeBreakdownField) {
117+
auto mock_tool_data_provider = std::make_unique<MockToolDataProvider>();
118+
InputPipelineAnalysisResult input_pipeline_analysis;
119+
input_pipeline_analysis.mutable_step_time_summary()->set_average(100.0);
120+
GenericStepTimeBreakdown step_time_breakdown;
121+
step_time_breakdown.mutable_input_ms_summary()->set_average(9.0);
122+
input_pipeline_analysis.mutable_step_time_breakdown()->PackFrom(
123+
step_time_breakdown);
124+
125+
EXPECT_CALL(*mock_tool_data_provider, GetInputPipelineAnalysisResult())
126+
.WillRepeatedly(Return(&input_pipeline_analysis));
127+
128+
SignalProvider signal_provider(std::move(mock_tool_data_provider));
129+
SparseCoreBoundRule rule;
130+
131+
absl::StatusOr<std::optional<SmartSuggestion>> suggestion =
132+
rule.Apply(signal_provider);
133+
EXPECT_THAT(suggestion, IsOkAndHolds(Eq(std::nullopt)));
134+
}
135+
136+
TEST(SparseCoreBoundRuleTest, HbmAndMxuUtilizationTooHigh) {
137+
auto mock_tool_data_provider = std::make_unique<MockToolDataProvider>();
138+
InputPipelineAnalysisResult input_pipeline_analysis;
139+
input_pipeline_analysis.mutable_step_time_summary()->set_average(100.0);
140+
TpuStepTimeBreakdown step_time_breakdown;
141+
step_time_breakdown.mutable_sparse_core_step_summary()
142+
->mutable_sc_step_time_ms_summary()
143+
->set_average(11.0);
144+
input_pipeline_analysis.mutable_step_time_breakdown()->PackFrom(
145+
step_time_breakdown);
146+
147+
EXPECT_CALL(*mock_tool_data_provider, GetInputPipelineAnalysisResult())
148+
.WillRepeatedly(Return(&input_pipeline_analysis));
149+
150+
OverviewPage overview_page;
151+
overview_page.mutable_analysis()->set_mxu_utilization_percent(51.0);
152+
overview_page.mutable_analysis()
153+
->set_memory_bw_utilization_relative_to_hw_limit_percent(51.0);
154+
EXPECT_CALL(*mock_tool_data_provider, GetOverviewPage())
155+
.WillRepeatedly(Return(&overview_page));
156+
157+
SignalProvider signal_provider(std::move(mock_tool_data_provider));
158+
SparseCoreBoundRule rule;
159+
160+
absl::StatusOr<std::optional<SmartSuggestion>> suggestion =
161+
rule.Apply(signal_provider);
162+
EXPECT_THAT(suggestion, IsOkAndHolds(Eq(std::nullopt)));
163+
}
164+
165+
} // namespace
166+
} // namespace profiler
167+
} // namespace tensorflow

0 commit comments

Comments
 (0)