xref: /llvm-project/bolt/lib/Profile/YAMLProfileWriter.cpp (revision 06e08696248ac01754c87c22cc8a4b797ef46430)
1 //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/YAMLProfileWriter.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Profile/BoltAddressTranslation.h"
13 #include "bolt/Profile/DataAggregator.h"
14 #include "bolt/Profile/ProfileReaderBase.h"
15 #include "bolt/Rewrite/RewriteInstance.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/MC/MCPseudoProbe.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 namespace opts {
26 using namespace llvm;
27 extern cl::opt<bool> ProfileUseDFS;
28 cl::opt<bool> ProfileWritePseudoProbes(
29     "profile-write-pseudo-probes",
30     cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
31     cl::cat(BoltOptCategory));
32 } // namespace opts
33 
34 namespace llvm {
35 namespace bolt {
36 
37 const BinaryFunction *YAMLProfileWriter::setCSIDestination(
38     const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
39     const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
40     uint32_t Offset) {
41   CSI.DestId = 0; // designated for unknown functions
42   CSI.EntryDiscriminator = 0;
43 
44   if (Symbol) {
45     uint64_t EntryID = 0;
46     if (const BinaryFunction *Callee =
47             BC.getFunctionForSymbol(Symbol, &EntryID)) {
48       if (BAT && BAT->isBATFunction(Callee->getAddress()))
49         std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset);
50       else if (const BinaryBasicBlock *BB =
51                    Callee->getBasicBlockContainingOffset(Offset))
52         BC.getFunctionForSymbol(Callee->getSecondaryEntryPointSymbol(*BB),
53                                 &EntryID);
54       CSI.DestId = Callee->getFunctionNumber();
55       CSI.EntryDiscriminator = EntryID;
56       return Callee;
57     }
58   }
59   return nullptr;
60 }
61 
62 std::vector<YAMLProfileWriter::InlineTreeNode>
63 YAMLProfileWriter::collectInlineTree(
64     const MCPseudoProbeDecoder &Decoder,
65     const MCDecodedPseudoProbeInlineTree &Root) {
66   auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
67     return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
68   };
69   std::vector<InlineTreeNode> InlineTree(
70       {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}});
71   uint32_t ParentId = 0;
72   while (ParentId != InlineTree.size()) {
73     const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
74     for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
75       InlineTree.emplace_back(
76           InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId,
77                          std::get<1>(Child.getInlineSite())});
78     ++ParentId;
79   }
80 
81   return InlineTree;
82 }
83 
84 std::tuple<yaml::bolt::ProfilePseudoProbeDesc,
85            YAMLProfileWriter::InlineTreeDesc>
86 YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
87   yaml::bolt::ProfilePseudoProbeDesc Desc;
88   InlineTreeDesc InlineTree;
89 
90   for (const MCDecodedPseudoProbeInlineTree &TopLev :
91        Decoder.getDummyInlineRoot().getChildren())
92     InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
93 
94   for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
95     ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
96 
97   InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
98   for (const auto &Node : Decoder.getInlineTreeVec())
99     ++InlineTree.GUIDIdxMap[Node.Guid];
100 
101   std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
102   GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
103   for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
104     GUIDFreqVec.emplace_back(Cnt, GUID);
105   llvm::sort(GUIDFreqVec);
106 
107   std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
108   HashFreqVec.reserve(InlineTree.HashIdxMap.size());
109   for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
110     HashFreqVec.emplace_back(Cnt, Hash);
111   llvm::sort(HashFreqVec);
112 
113   uint32_t Index = 0;
114   Desc.Hash.reserve(HashFreqVec.size());
115   for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
116     Desc.Hash.emplace_back(Hash);
117     InlineTree.HashIdxMap[Hash] = Index++;
118   }
119 
120   Index = 0;
121   Desc.GUID.reserve(GUIDFreqVec.size());
122   for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
123     Desc.GUID.emplace_back(GUID);
124     InlineTree.GUIDIdxMap[GUID] = Index++;
125     uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
126     Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]);
127   }
128 
129   return {Desc, InlineTree};
130 }
131 
132 std::vector<yaml::bolt::PseudoProbeInfo>
133 YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
134   struct BlockProbeInfoHasher {
135     size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
136       auto HashCombine = [](auto &Range) {
137         return llvm::hash_combine_range(Range.begin(), Range.end());
138       };
139       return llvm::hash_combine(HashCombine(BPI.BlockProbes),
140                                 HashCombine(BPI.CallProbes),
141                                 HashCombine(BPI.IndCallProbes));
142     }
143   };
144 
145   // Check identical BlockProbeInfo structs and merge them
146   std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
147                      BlockProbeInfoHasher>
148       BPIToNodes;
149   for (auto &[NodeId, Probes] : NodeProbes) {
150     yaml::bolt::PseudoProbeInfo BPI;
151     BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
152     BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
153     BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
154     BPIToNodes[BPI].push_back(NodeId);
155   }
156 
157   auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
158     for (auto Id : Ids)
159       if (Id > 64)
160         Vec.emplace_back(Id);
161       else
162         Mask |= 1ull << (Id - 1);
163   };
164 
165   // Add to YAML with merged nodes/block mask optimizations
166   std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
167   YamlProbes.reserve(BPIToNodes.size());
168   for (const auto &[BPI, Nodes] : BPIToNodes) {
169     auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
170     YamlBPI.CallProbes = BPI.CallProbes;
171     YamlBPI.IndCallProbes = BPI.IndCallProbes;
172     if (Nodes.size() == 1)
173       YamlBPI.InlineTreeIndex = Nodes.front();
174     else
175       YamlBPI.InlineTreeNodes = Nodes;
176     handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
177   }
178   return YamlProbes;
179 }
180 
181 std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
182            YAMLProfileWriter::InlineTreeMapTy>
183 YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
184                                        const InlineTreeDesc &InlineTree,
185                                        uint64_t GUID) {
186   DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
187   std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
188   auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
189   if (It == InlineTree.TopLevelGUIDToInlineTree.end())
190     return {YamlInlineTree, InlineTreeNodeId};
191   const MCDecodedPseudoProbeInlineTree *Root = It->second;
192   assert(Root && "Malformed TopLevelGUIDToInlineTree");
193   uint32_t Index = 0;
194   uint32_t PrevParent = 0;
195   uint32_t PrevGUIDIdx = 0;
196   for (const auto &Node : collectInlineTree(Decoder, *Root)) {
197     InlineTreeNodeId[Node.InlineTree] = Index++;
198     auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID);
199     assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap");
200     uint32_t GUIDIdx = GUIDIdxIt->second;
201     if (GUIDIdx == PrevGUIDIdx)
202       GUIDIdx = UINT32_MAX;
203     else
204       PrevGUIDIdx = GUIDIdx;
205     YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{
206         Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0});
207     PrevParent = Node.ParentId;
208   }
209   return {YamlInlineTree, InlineTreeNodeId};
210 }
211 
212 yaml::bolt::BinaryFunctionProfile
213 YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
214                            const InlineTreeDesc &InlineTree,
215                            const BoltAddressTranslation *BAT) {
216   yaml::bolt::BinaryFunctionProfile YamlBF;
217   const BinaryContext &BC = BF.getBinaryContext();
218   const MCPseudoProbeDecoder *PseudoProbeDecoder =
219       opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
220 
221   const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
222 
223   // Prepare function and block hashes
224   BF.computeHash(UseDFS);
225   BF.computeBlockHashes();
226 
227   YamlBF.Name = DataAggregator::getLocationName(BF, BAT);
228   YamlBF.Id = BF.getFunctionNumber();
229   YamlBF.Hash = BF.getHash();
230   YamlBF.NumBasicBlocks = BF.size();
231   YamlBF.ExecCount = BF.getKnownExecutionCount();
232   DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
233   if (PseudoProbeDecoder && BF.getGUID()) {
234     std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
235         convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
236   }
237 
238   BinaryFunction::BasicBlockOrderType Order;
239   llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
240              std::back_inserter(Order));
241 
242   const FunctionLayout Layout = BF.getLayout();
243   Layout.updateLayoutIndices(Order);
244 
245   for (const BinaryBasicBlock *BB : Order) {
246     yaml::bolt::BinaryBasicBlockProfile YamlBB;
247     YamlBB.Index = BB->getLayoutIndex();
248     YamlBB.NumInstructions = BB->getNumNonPseudos();
249     YamlBB.Hash = BB->getHash();
250 
251     if (!LBRProfile) {
252       YamlBB.EventCount = BB->getKnownExecutionCount();
253       if (YamlBB.EventCount)
254         YamlBF.Blocks.emplace_back(YamlBB);
255       continue;
256     }
257 
258     YamlBB.ExecCount = BB->getKnownExecutionCount();
259 
260     for (const MCInst &Instr : *BB) {
261       if (!BC.MIB->isCall(Instr) && !BC.MIB->isIndirectBranch(Instr))
262         continue;
263 
264       SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets;
265       yaml::bolt::CallSiteInfo CSI;
266       std::optional<uint32_t> Offset = BC.MIB->getOffset(Instr);
267       if (!Offset || *Offset < BB->getInputOffset())
268         continue;
269       CSI.Offset = *Offset - BB->getInputOffset();
270 
271       if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
272         const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
273             Instr, "CallProfile");
274         if (!ICSP)
275           continue;
276         for (const IndirectCallProfile &CSP : ICSP.get()) {
277           StringRef TargetName = "";
278           const BinaryFunction *Callee =
279               setCSIDestination(BC, CSI, CSP.Symbol, BAT);
280           if (Callee)
281             TargetName = Callee->getOneName();
282           CSI.Count = CSP.Count;
283           CSI.Mispreds = CSP.Mispreds;
284           CSTargets.emplace_back(TargetName, CSI);
285         }
286       } else { // direct call or a tail call
287         StringRef TargetName = "";
288         const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr);
289         const BinaryFunction *const Callee =
290             setCSIDestination(BC, CSI, CalleeSymbol, BAT);
291         if (Callee)
292           TargetName = Callee->getOneName();
293 
294         auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) {
295           return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull);
296         };
297         if (BC.MIB->getConditionalTailCall(Instr)) {
298           CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount");
299           CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount");
300         } else {
301           CSI.Count = getAnnotationWithDefault(Instr, "Count");
302         }
303 
304         if (CSI.Count)
305           CSTargets.emplace_back(TargetName, CSI);
306       }
307       // Sort targets in a similar way to getBranchData, see Location::operator<
308       llvm::sort(CSTargets, [](const auto &RHS, const auto &LHS) {
309         if (RHS.first != LHS.first)
310           return RHS.first < LHS.first;
311         return RHS.second.Offset < LHS.second.Offset;
312       });
313       for (auto &KV : CSTargets)
314         YamlBB.CallSites.push_back(KV.second);
315     }
316 
317     // Skip printing if there's no profile data for non-entry basic block.
318     // Include landing pads with non-zero execution count.
319     if (YamlBB.CallSites.empty() && !BB->isEntryPoint() &&
320         !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) {
321       // Include blocks having successors or predecessors with positive counts.
322       uint64_t SuccessorExecCount = 0;
323       for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
324            BB->branch_info())
325         SuccessorExecCount += BranchInfo.Count;
326       uint64_t PredecessorExecCount = 0;
327       for (auto Pred : BB->predecessors())
328         PredecessorExecCount += Pred->getBranchInfo(*BB).Count;
329       if (!SuccessorExecCount && !PredecessorExecCount)
330         continue;
331     }
332 
333     auto BranchInfo = BB->branch_info_begin();
334     for (const BinaryBasicBlock *Successor : BB->successors()) {
335       yaml::bolt::SuccessorInfo YamlSI;
336       YamlSI.Index = Successor->getLayoutIndex();
337       YamlSI.Count = BranchInfo->Count;
338       YamlSI.Mispreds = BranchInfo->MispredictedCount;
339 
340       YamlBB.Successors.emplace_back(YamlSI);
341 
342       ++BranchInfo;
343     }
344 
345     if (PseudoProbeDecoder) {
346       const AddressProbesMap &ProbeMap =
347           PseudoProbeDecoder->getAddress2ProbesMap();
348       const uint64_t FuncAddr = BF.getAddress();
349       const std::pair<uint64_t, uint64_t> &BlockRange =
350           BB->getInputAddressRange();
351       const std::pair<uint64_t, uint64_t> BlockAddrRange = {
352           FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
353       auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
354       YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
355     }
356 
357     YamlBF.Blocks.emplace_back(YamlBB);
358   }
359   return YamlBF;
360 }
361 
362 std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
363   const BinaryContext &BC = RI.getBinaryContext();
364   const auto &Functions = BC.getBinaryFunctions();
365 
366   std::error_code EC;
367   OS = std::make_unique<raw_fd_ostream>(Filename, EC, sys::fs::OF_None);
368   if (EC) {
369     errs() << "BOLT-WARNING: " << EC.message() << " : unable to open "
370            << Filename << " for output.\n";
371     return EC;
372   }
373 
374   yaml::bolt::BinaryProfile BP;
375 
376   // Fill out the header info.
377   BP.Header.Version = 1;
378   BP.Header.FileName = std::string(BC.getFilename());
379   std::optional<StringRef> BuildID = BC.getFileBuildID();
380   BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
381   BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName());
382   BP.Header.IsDFSOrder = opts::ProfileUseDFS;
383   BP.Header.HashFunction = HashFunction::Default;
384 
385   StringSet<> EventNames = RI.getProfileReader()->getEventNames();
386   if (!EventNames.empty()) {
387     std::string Sep;
388     for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) {
389       BP.Header.EventNames += Sep + EventEntry.first().str();
390       Sep = ",";
391     }
392   }
393 
394   // Make sure the profile is consistent across all functions.
395   uint16_t ProfileFlags = BinaryFunction::PF_NONE;
396   for (const auto &BFI : Functions) {
397     const BinaryFunction &BF = BFI.second;
398     if (BF.hasProfile() && !BF.empty()) {
399       assert(BF.getProfileFlags() != BinaryFunction::PF_NONE);
400       if (ProfileFlags == BinaryFunction::PF_NONE)
401         ProfileFlags = BF.getProfileFlags();
402 
403       assert(BF.getProfileFlags() == ProfileFlags &&
404              "expected consistent profile flags across all functions");
405     }
406   }
407   BP.Header.Flags = ProfileFlags;
408 
409   // Add probe inline tree nodes.
410   InlineTreeDesc InlineTree;
411   if (const MCPseudoProbeDecoder *Decoder =
412           opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
413     std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);
414 
415   // Add all function objects.
416   for (const auto &BFI : Functions) {
417     const BinaryFunction &BF = BFI.second;
418     if (BF.hasProfile()) {
419       if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
420         continue;
421 
422       BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree));
423     }
424   }
425 
426   // Write the profile.
427   yaml::Output Out(*OS, nullptr, 0);
428   Out << BP;
429 
430   return std::error_code();
431 }
432 
433 } // namespace bolt
434 } // namespace llvm
435