Skip to content

Commit 4c3e032

Browse files
committed
[BOLT] Move call probe information to CallSiteInfo
Pseudo probe matching (#100446) needs callee information for call probes. Embed call probe information (probe id, inline tree node, indirect flag) into CallSiteInfo. As a consequence: - Remove call probes from PseudoProbeInfo to avoid duplication, making it only contain block probes. - Probe grouping across inline tree nodes becomes more potent + allows to unambiguously elide block id 1 (common case). Block mask (blx) encoding becomes a low-ROI optimization and will be replaced by a more compact encoding leveraging simplified PseudoProbeInfo in #166680. The size increase is ~3% for an XL profile (461->475MB). Compact block probe encoding shrinks it by ~6%. Test Plan: updated pseudoprobe-decoding-{inline,noinline}.test Reviewers: paschalis-mpeis, ayermolo, yota9, yozhu, rafaelauler, maksfb Reviewed By: rafaelauler Pull Request: #165490
1 parent c9ff2df commit 4c3e032

9 files changed

+108
-117
lines changed

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ struct CallSiteInfo {
2929
uint32_t EntryDiscriminator{0}; /// multiple entry discriminator
3030
uint64_t Count{0};
3131
uint64_t Mispreds{0};
32+
// Pseudo probe information, optional
33+
uint32_t Probe{0};
34+
bool Indirect = false;
35+
uint32_t InlineTreeNode{0};
3236

3337
bool operator==(const CallSiteInfo &Other) const {
3438
return Offset == Other.Offset && DestId == Other.DestId &&
@@ -63,6 +67,9 @@ template <> struct MappingTraits<bolt::CallSiteInfo> {
6367
YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0);
6468
YamlIO.mapRequired("cnt", CSI.Count);
6569
YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0);
70+
YamlIO.mapOptional("pp", CSI.Probe, 0);
71+
YamlIO.mapOptional("ppn", CSI.InlineTreeNode, 0);
72+
YamlIO.mapOptional("ind", CSI.Indirect, false);
6673
}
6774

6875
static const bool flow = true;
@@ -95,29 +102,20 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
95102

96103
namespace bolt {
97104
struct PseudoProbeInfo {
98-
uint32_t InlineTreeIndex = 0;
99-
uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
100-
std::vector<uint64_t> BlockProbes; // block probes with indices above 64
101-
std::vector<uint64_t> CallProbes;
102-
std::vector<uint64_t> IndCallProbes;
105+
std::vector<uint64_t> BlockProbes;
103106
std::vector<uint32_t> InlineTreeNodes;
104107

105108
bool operator==(const PseudoProbeInfo &Other) const {
106-
return InlineTreeIndex == Other.InlineTreeIndex &&
107-
BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
108-
IndCallProbes == Other.IndCallProbes;
109+
return InlineTreeNodes == Other.InlineTreeNodes &&
110+
BlockProbes == Other.BlockProbes;
109111
}
110112
};
111113
} // end namespace bolt
112114

113115
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
114116
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
115-
YamlIO.mapOptional("blx", PI.BlockMask, 0);
116-
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
117-
YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
118-
YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
119-
YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
120-
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
117+
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>(1, 1));
118+
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>(1, 0));
121119
}
122120

123121
static const bool flow = true;

bolt/include/bolt/Profile/YAMLProfileWriter.h

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,25 +74,24 @@ class YAMLProfileWriter {
7474
collectInlineTree(const MCPseudoProbeDecoder &Decoder,
7575
const MCDecodedPseudoProbeInlineTree &Root);
7676

77-
// 0 - block probe, 1 - indirect call, 2 - direct call
78-
using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
79-
using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
80-
static std::vector<yaml::bolt::PseudoProbeInfo>
81-
convertNodeProbes(NodeIdToProbes &NodeProbes);
82-
8377
public:
84-
template <typename T>
85-
static std::vector<yaml::bolt::PseudoProbeInfo>
86-
writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
87-
NodeIdToProbes NodeProbes;
88-
for (const MCDecodedPseudoProbe &Probe : Probes) {
89-
auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
90-
if (It == InlineTreeNodeId.end())
91-
continue;
92-
NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
93-
}
94-
return convertNodeProbes(NodeProbes);
95-
}
78+
class BlockProbeCtx {
79+
struct Call {
80+
uint64_t Id;
81+
uint32_t Node;
82+
bool Indirect;
83+
bool Used;
84+
};
85+
// Group block probes by node id.
86+
DenseMap<uint32_t, std::vector<uint64_t>> NodeToProbes;
87+
// Offset -> call probe
88+
DenseMap<uint32_t, Call> CallProbes;
89+
90+
public:
91+
void addBlockProbe(const InlineTreeMapTy &Map,
92+
const MCDecodedPseudoProbe &Probe, uint32_t ProbeOffset);
93+
void finalize(yaml::bolt::BinaryBasicBlockProfile &YamlBB);
94+
};
9695
};
9796
} // namespace bolt
9897
} // namespace llvm

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2397,26 +2397,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23972397
PseudoProbeDecoder->getAddress2ProbesMap();
23982398
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
23992399
Fragments.insert(BF);
2400-
DenseMap<
2401-
uint32_t,
2402-
std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2403-
BlockProbes;
2400+
DenseMap<uint32_t, YAMLProfileWriter::BlockProbeCtx> BlockCtx;
24042401
for (const BinaryFunction *F : Fragments) {
24052402
const uint64_t FuncAddr = F->getAddress();
24062403
for (const MCDecodedPseudoProbe &Probe :
24072404
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
24082405
const uint32_t OutputAddress = Probe.getAddress();
24092406
const uint32_t InputOffset = BAT->translate(
24102407
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2411-
const unsigned BlockIndex = getBlock(InputOffset).second;
2412-
BlockProbes[BlockIndex].emplace_back(Probe);
2408+
const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset);
2409+
BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe,
2410+
InputOffset - BlockOffset);
24132411
}
24142412
}
24152413

2416-
for (auto &[Block, Probes] : BlockProbes) {
2417-
YamlBF.Blocks[Block].PseudoProbes =
2418-
YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2419-
}
2414+
for (auto &[Block, Ctx] : BlockCtx)
2415+
Ctx.finalize(YamlBF.Blocks[Block]);
24202416
}
24212417
// Skip printing if there's no profile data
24222418
llvm::erase_if(

bolt/lib/Profile/StaleProfileMatching.cpp

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -348,26 +348,10 @@ class StaleMatcher {
348348
return It->second;
349349
};
350350

351-
auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
352-
&ProfileProbe,
353-
uint32_t NodeId) {
354-
for (uint64_t Index = 0; Index < 64; ++Index)
355-
if (ProfileProbe.BlockMask & 1ull << Index)
356-
++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)];
357-
for (const auto &ProfileProbes :
358-
{ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
359-
ProfileProbe.CallProbes})
360-
for (uint64_t ProfileProbe : ProfileProbes)
361-
++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)];
362-
};
363-
364-
for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
365-
if (!ProfileProbe.InlineTreeNodes.empty())
366-
for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
367-
matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
368-
else
369-
matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
370-
}
351+
for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes)
352+
for (uint32_t Node : ProfileProbe.InlineTreeNodes)
353+
for (uint64_t Probe : ProfileProbe.BlockProbes)
354+
++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)];
371355
uint32_t BestMatchCount = 0;
372356
uint32_t TotalMatchCount = 0;
373357
const FlowBlock *BestMatchBlock = nullptr;

bolt/lib/Profile/YAMLProfileWriter.cpp

Lines changed: 58 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -129,50 +129,62 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
129129
return {Desc, InlineTree};
130130
}
131131

132-
std::vector<yaml::bolt::PseudoProbeInfo>
133-
YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
134-
struct BlockProbeInfoHasher {
135-
size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
136-
return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes),
137-
llvm::hash_combine_range(BPI.CallProbes),
138-
llvm::hash_combine_range(BPI.IndCallProbes));
132+
void YAMLProfileWriter::BlockProbeCtx::addBlockProbe(
133+
const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe,
134+
uint32_t ProbeOffset) {
135+
auto It = Map.find(Probe.getInlineTreeNode());
136+
if (It == Map.end())
137+
return;
138+
auto NodeId = It->second;
139+
uint32_t Index = Probe.getIndex();
140+
if (Probe.isCall())
141+
CallProbes[ProbeOffset] =
142+
Call{Index, NodeId, Probe.isIndirectCall(), false};
143+
else
144+
NodeToProbes[NodeId].emplace_back(Index);
145+
}
146+
147+
void YAMLProfileWriter::BlockProbeCtx::finalize(
148+
yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
149+
// Hash block probes by vector
150+
struct ProbeHasher {
151+
size_t operator()(const ArrayRef<uint64_t> Probes) const {
152+
return llvm::hash_combine_range(Probes);
139153
}
140154
};
141155

142-
// Check identical BlockProbeInfo structs and merge them
143-
std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
144-
BlockProbeInfoHasher>
145-
BPIToNodes;
146-
for (auto &[NodeId, Probes] : NodeProbes) {
147-
yaml::bolt::PseudoProbeInfo BPI;
148-
BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
149-
BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
150-
BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
151-
BPIToNodes[BPI].push_back(NodeId);
156+
// Check identical block probes and merge them
157+
std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher>
158+
ProbesToNodes;
159+
for (auto &[NodeId, Probes] : NodeToProbes) {
160+
llvm::sort(Probes);
161+
ProbesToNodes[Probes].emplace_back(NodeId);
152162
}
153-
154-
auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
155-
for (auto Id : Ids)
156-
if (Id > 64)
157-
Vec.emplace_back(Id);
158-
else
159-
Mask |= 1ull << (Id - 1);
160-
};
161-
162-
// Add to YAML with merged nodes/block mask optimizations
163-
std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
164-
YamlProbes.reserve(BPIToNodes.size());
165-
for (const auto &[BPI, Nodes] : BPIToNodes) {
166-
auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
167-
YamlBPI.CallProbes = BPI.CallProbes;
168-
YamlBPI.IndCallProbes = BPI.IndCallProbes;
169-
if (Nodes.size() == 1)
170-
YamlBPI.InlineTreeIndex = Nodes.front();
171-
else
172-
YamlBPI.InlineTreeNodes = Nodes;
173-
handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
163+
for (auto &[Probes, Nodes] : ProbesToNodes) {
164+
llvm::sort(Nodes);
165+
YamlBB.PseudoProbes.emplace_back(
166+
yaml::bolt::PseudoProbeInfo{Probes, Nodes});
167+
}
168+
for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) {
169+
auto It = CallProbes.find(CSI.Offset);
170+
if (It == CallProbes.end())
171+
continue;
172+
Call &Probe = It->second;
173+
CSI.Probe = Probe.Id;
174+
CSI.InlineTreeNode = Probe.Node;
175+
CSI.Indirect = Probe.Indirect;
176+
Probe.Used = true;
177+
}
178+
for (const auto &[Offset, Probe] : CallProbes) {
179+
if (Probe.Used)
180+
continue;
181+
yaml::bolt::CallSiteInfo CSI;
182+
CSI.Offset = Offset;
183+
CSI.Probe = Probe.Id;
184+
CSI.InlineTreeNode = Probe.Node;
185+
CSI.Indirect = Probe.Indirect;
186+
YamlBB.CallSites.emplace_back(CSI);
174187
}
175-
return YamlProbes;
176188
}
177189

178190
std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
@@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
343355
const AddressProbesMap &ProbeMap =
344356
PseudoProbeDecoder->getAddress2ProbesMap();
345357
const uint64_t FuncAddr = BF.getAddress();
346-
const std::pair<uint64_t, uint64_t> &BlockRange =
347-
BB->getInputAddressRange();
348-
const std::pair<uint64_t, uint64_t> BlockAddrRange = {
349-
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
350-
auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
351-
YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
358+
auto [Start, End] = BB->getInputAddressRange();
359+
Start += FuncAddr;
360+
End += FuncAddr;
361+
BlockProbeCtx Ctx;
362+
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End))
363+
Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start);
364+
Ctx.finalize(YamlBB);
352365
}
353366

354367
YamlBF.Blocks.emplace_back(YamlBB);

bolt/test/X86/match-blocks-with-pseudo-probes-inline.test

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ functions:
3030
insns: 11
3131
hash: 0x1
3232
exec: 1
33-
probes: [ { blx: 9 } ]
33+
probes: [ { blk: [ 1, 4 ] } ]
3434
inline_tree: [ { } ]
3535
- name: foo
3636
fid: 10
@@ -43,7 +43,7 @@ functions:
4343
hash: 0x2
4444
exec: 1
4545
succ: [ { bid: 3, cnt: 0 } ]
46-
probes: [ { blx: 3 } ]
46+
probes: [ { blk: [ 1, 2 ] } ]
4747
inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
4848
- name: main
4949
fid: 11
@@ -56,7 +56,7 @@ functions:
5656
hash: 0x3
5757
exec: 1
5858
succ: [ { bid: 3, cnt: 0 } ]
59-
probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
59+
probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { blk: [ 1 ] } ]
6060
inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
6161
pseudo_probe_desc:
6262
gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]

bolt/test/X86/match-blocks-with-pseudo-probes.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ functions:
5555
hash: 0xFFFFFFFFFFFFFFF1
5656
insns: 1
5757
succ: [ { bid: 3, cnt: 1} ]
58-
probes: [ { blx: 1 } ]
58+
probes: [ { blk: [ 1 ] } ]
5959
inline_tree: [ { g: 0 } ]
6060
pseudo_probe_desc:
6161
gs: [ 0xDB956436E78DD5FA ]

bolt/test/X86/pseudoprobe-decoding-inline.test

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
1515
# CHECK-YAML: name: bar
1616
# CHECK-YAML: - bid: 0
17-
# CHECK-YAML: probes: [ { blx: 9 } ]
17+
# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ]
1818
# CHECK-YAML: inline_tree: [ { } ]
1919
#
2020
# CHECK-YAML: name: foo
2121
# CHECK-YAML: - bid: 0
22-
# CHECK-YAML: probes: [ { blx: 3 } ]
22+
# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ]
2323
# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
2424
#
2525
# CHECK-YAML: name: main
2626
# CHECK-YAML: - bid: 0
27-
# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
27+
# CHECK-YAML: probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { } ]
2828
# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
2929
#
3030
# CHECK-YAML: pseudo_probe_desc:

bolt/test/X86/pseudoprobe-decoding-noinline.test

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,18 @@
1515
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
1616
# CHECK-YAML: name: bar
1717
# CHECK-YAML: - bid: 0
18-
# CHECK-YAML: probes: [ { blx: 9 } ]
18+
# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ]
1919
# CHECK-YAML: inline_tree: [ { } ]
2020
#
2121
# CHECK-YAML: name: foo
2222
# CHECK-YAML: - bid: 0
23-
# CHECK-YAML: probes: [ { blx: 3 } ]
23+
# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ]
2424
# CHECK-YAML: inline_tree: [ { g: 2 } ]
2525
#
2626
# CHECK-YAML: name: main
2727
# CHECK-YAML: - bid: 0
28-
# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ]
28+
# CHECK-YAML: calls: [ { off: 0x4, fid: 0, cnt: 0, pp: 2 } ]
29+
# CHECK-YAML: probes: [ { } ]
2930
# CHECK-YAML: inline_tree: [ { g: 1 } ]
3031
#
3132
# CHECK-YAML: pseudo_probe_desc:

0 commit comments

Comments
 (0)