xref: /llvm-project/bolt/lib/Profile/YAMLProfileReader.cpp (revision 06e08696248ac01754c87c22cc8a4b797ef46430)
12f09f445SMaksim Panchenko //===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler 
9a34c753fSRafael Auler #include "bolt/Profile/YAMLProfileReader.h"
10a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
11a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
12a34c753fSRafael Auler #include "bolt/Passes/MCF.h"
13a34c753fSRafael Auler #include "bolt/Profile/ProfileYAMLMapping.h"
1497dc5088SShaw Young #include "bolt/Utils/NameResolver.h"
15a34c753fSRafael Auler #include "bolt/Utils/Utils.h"
167b750943SAmir Ayupov #include "llvm/ADT/STLExtras.h"
1797dc5088SShaw Young #include "llvm/ADT/edit_distance.h"
1897dc5088SShaw Young #include "llvm/Demangle/Demangle.h"
199a9af0a2SShaw Young #include "llvm/MC/MCPseudoProbe.h"
20a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
21a34c753fSRafael Auler 
22a34c753fSRafael Auler using namespace llvm;
23a34c753fSRafael Auler 
24a34c753fSRafael Auler namespace opts {
25a34c753fSRafael Auler 
26a34c753fSRafael Auler extern cl::opt<unsigned> Verbosity;
27a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory;
2844268271Sspupyrev extern cl::opt<bool> InferStaleProfile;
2949fdbbcfSShaw Young extern cl::opt<bool> Lite;
30a34c753fSRafael Auler 
3197dc5088SShaw Young cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
3297dc5088SShaw Young     "name-similarity-function-matching-threshold",
3397dc5088SShaw Young     cl::desc("Match functions using namespace and edit distance"), cl::init(0),
3497dc5088SShaw Young     cl::Hidden, cl::cat(BoltOptCategory));
3597dc5088SShaw Young 
36a34c753fSRafael Auler static llvm::cl::opt<bool>
37a34c753fSRafael Auler     IgnoreHash("profile-ignore-hash",
38a34c753fSRafael Auler                cl::desc("ignore hash while reading function profile"),
39b92436efSFangrui Song                cl::Hidden, cl::cat(BoltOptCategory));
4069b7e257SAmir Ayupov 
4149fdbbcfSShaw Young llvm::cl::opt<bool>
4249fdbbcfSShaw Young     MatchProfileWithFunctionHash("match-profile-with-function-hash",
4349fdbbcfSShaw Young                                  cl::desc("Match profile with function hash"),
4449fdbbcfSShaw Young                                  cl::Hidden, cl::cat(BoltOptCategory));
45296a9563SShaw Young llvm::cl::opt<bool>
46296a9563SShaw Young     MatchWithCallGraph("match-with-call-graph",
47296a9563SShaw Young                        cl::desc("Match functions with call graph"), cl::Hidden,
48296a9563SShaw Young                        cl::cat(BoltOptCategory));
4949fdbbcfSShaw Young 
5069b7e257SAmir Ayupov llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
5169b7e257SAmir Ayupov                                   cl::desc("use DFS order for YAML profile"),
5269b7e257SAmir Ayupov                                   cl::Hidden, cl::cat(BoltOptCategory));
539a9af0a2SShaw Young 
549a9af0a2SShaw Young extern llvm::cl::opt<bool> StaleMatchingWithPseudoProbes;
553af586f7SHo Cheung } // namespace opts
56a34c753fSRafael Auler 
57a34c753fSRafael Auler namespace llvm {
58a34c753fSRafael Auler namespace bolt {
59a34c753fSRafael Auler 
60296a9563SShaw Young YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
61296a9563SShaw Young     BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
62296a9563SShaw Young     ProfileLookupMap &IdToYAMLBF) {
63296a9563SShaw Young   constructBFCG(BC, YamlBP);
64296a9563SShaw Young   constructYAMLFCG(YamlBP, IdToYAMLBF);
65296a9563SShaw Young   computeBFNeighborHashes(BC);
66296a9563SShaw Young }
67296a9563SShaw Young 
68296a9563SShaw Young void YAMLProfileReader::CallGraphMatcher::constructBFCG(
69296a9563SShaw Young     BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
70296a9563SShaw Young   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
71296a9563SShaw Young     for (const BinaryBasicBlock &BB : BF->blocks()) {
72296a9563SShaw Young       for (const MCInst &Instr : BB) {
73296a9563SShaw Young         if (!BC.MIB->isCall(Instr))
74296a9563SShaw Young           continue;
75296a9563SShaw Young         const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
76296a9563SShaw Young         if (!CallSymbol)
77296a9563SShaw Young           continue;
78296a9563SShaw Young         BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
79296a9563SShaw Young         if (!BD)
80296a9563SShaw Young           continue;
81296a9563SShaw Young         BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
82296a9563SShaw Young         if (!CalleeBF)
83296a9563SShaw Young           continue;
84296a9563SShaw Young 
85296a9563SShaw Young         BFAdjacencyMap[CalleeBF].insert(BF);
86296a9563SShaw Young         BFAdjacencyMap[BF].insert(CalleeBF);
87296a9563SShaw Young       }
88296a9563SShaw Young     }
89296a9563SShaw Young   }
90296a9563SShaw Young }
91296a9563SShaw Young 
92296a9563SShaw Young void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
93296a9563SShaw Young     BinaryContext &BC) {
94296a9563SShaw Young   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
95296a9563SShaw Young     auto It = BFAdjacencyMap.find(BF);
96296a9563SShaw Young     if (It == BFAdjacencyMap.end())
97296a9563SShaw Young       continue;
98296a9563SShaw Young     auto &AdjacentBFs = It->second;
99296a9563SShaw Young     std::string HashStr;
100296a9563SShaw Young     for (BinaryFunction *BF : AdjacentBFs)
101296a9563SShaw Young       HashStr += BF->getOneName();
102296a9563SShaw Young     uint64_t Hash = std::hash<std::string>{}(HashStr);
103296a9563SShaw Young     NeighborHashToBFs[Hash].push_back(BF);
104296a9563SShaw Young   }
105296a9563SShaw Young }
106296a9563SShaw Young 
107296a9563SShaw Young void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
108296a9563SShaw Young     yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {
109296a9563SShaw Young 
110296a9563SShaw Young   for (auto &CallerYamlBF : YamlBP.Functions) {
111296a9563SShaw Young     for (auto &YamlBB : CallerYamlBF.Blocks) {
112296a9563SShaw Young       for (auto &CallSite : YamlBB.CallSites) {
113296a9563SShaw Young         auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
114296a9563SShaw Young         if (IdToYAMLBFIt == IdToYAMLBF.end())
115296a9563SShaw Young           continue;
116296a9563SShaw Young         YamlBFAdjacencyMap[&CallerYamlBF].insert(IdToYAMLBFIt->second);
117296a9563SShaw Young         YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(&CallerYamlBF);
118296a9563SShaw Young       }
119296a9563SShaw Young     }
120296a9563SShaw Young   }
121296a9563SShaw Young }
122296a9563SShaw Young 
123a34c753fSRafael Auler bool YAMLProfileReader::isYAML(const StringRef Filename) {
124e8a75c3fSAmir Ayupov   if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
125e8a75c3fSAmir Ayupov     StringRef Buffer = (*MB)->getBuffer();
126ad8fd5b1SKazu Hirata     return Buffer.starts_with("---\n");
127e8a75c3fSAmir Ayupov   } else {
128e8a75c3fSAmir Ayupov     report_error(Filename, MB.getError());
129e8a75c3fSAmir Ayupov   }
130a34c753fSRafael Auler   return false;
131a34c753fSRafael Auler }
132a34c753fSRafael Auler 
1337b750943SAmir Ayupov void YAMLProfileReader::buildNameMaps(BinaryContext &BC) {
1347b750943SAmir Ayupov   auto lookupFunction = [&](StringRef Name) -> BinaryFunction * {
1357b750943SAmir Ayupov     if (BinaryData *BD = BC.getBinaryDataByName(Name))
1367b750943SAmir Ayupov       return BC.getFunctionForSymbol(BD->getSymbol());
1377b750943SAmir Ayupov     return nullptr;
1387b750943SAmir Ayupov   };
1397b750943SAmir Ayupov 
1407b750943SAmir Ayupov   ProfileBFs.reserve(YamlBP.Functions.size());
1417b750943SAmir Ayupov 
142a34c753fSRafael Auler   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
143a34c753fSRafael Auler     StringRef Name = YamlBF.Name;
144a34c753fSRafael Auler     const size_t Pos = Name.find("(*");
145a34c753fSRafael Auler     if (Pos != StringRef::npos)
146a34c753fSRafael Auler       Name = Name.substr(0, Pos);
1477b750943SAmir Ayupov     ProfileFunctionNames.insert(Name);
1487b750943SAmir Ayupov     ProfileBFs.push_back(lookupFunction(Name));
14915d1e517SAmir Ayupov     if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
150a34c753fSRafael Auler       LTOCommonNameMap[*CommonName].push_back(&YamlBF);
151a34c753fSRafael Auler   }
1527b750943SAmir Ayupov   for (auto &[Symbol, BF] : BC.SymbolToFunctionMap) {
1537b750943SAmir Ayupov     StringRef Name = Symbol->getName();
15415d1e517SAmir Ayupov     if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
1557b750943SAmir Ayupov       LTOCommonNameFunctionMap[*CommonName].insert(BF);
156a34c753fSRafael Auler   }
157a34c753fSRafael Auler }
158a34c753fSRafael Auler 
159a34c753fSRafael Auler bool YAMLProfileReader::hasLocalsWithFileName() const {
1607b750943SAmir Ayupov   return llvm::any_of(ProfileFunctionNames.keys(), [](StringRef FuncName) {
161e8a75c3fSAmir Ayupov     return FuncName.count('/') == 2 && FuncName[0] != '/';
162e8a75c3fSAmir Ayupov   });
163a34c753fSRafael Auler }
164a34c753fSRafael Auler 
165a34c753fSRafael Auler bool YAMLProfileReader::parseFunctionProfile(
16640c2e0faSMaksim Panchenko     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
167a34c753fSRafael Auler   BinaryContext &BC = BF.getBinaryContext();
168a34c753fSRafael Auler 
1691e0d08e8SAmir Ayupov   const bool IsDFSOrder = YamlBP.Header.IsDFSOrder;
170b039ccc6SAmir Ayupov   const HashFunction HashFunction = YamlBP.Header.HashFunction;
171a34c753fSRafael Auler   bool ProfileMatched = true;
172a34c753fSRafael Auler   uint64_t MismatchedBlocks = 0;
173a34c753fSRafael Auler   uint64_t MismatchedCalls = 0;
174a34c753fSRafael Auler   uint64_t MismatchedEdges = 0;
175a34c753fSRafael Auler 
176a34c753fSRafael Auler   uint64_t FunctionExecutionCount = 0;
177a34c753fSRafael Auler 
178a34c753fSRafael Auler   BF.setExecutionCount(YamlBF.ExecCount);
179a34c753fSRafael Auler 
18092758a99Sspupyrev   uint64_t FuncRawBranchCount = 0;
18192758a99Sspupyrev   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
18292758a99Sspupyrev     for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
18392758a99Sspupyrev       FuncRawBranchCount += YamlSI.Count;
18492758a99Sspupyrev   BF.setRawBranchCount(FuncRawBranchCount);
18592758a99Sspupyrev 
18691423d71SAmir Ayupov   if (BF.empty())
18791423d71SAmir Ayupov     return true;
18891423d71SAmir Ayupov 
189720cade2SAmir Ayupov   if (!opts::IgnoreHash) {
190720cade2SAmir Ayupov     if (!BF.getHash())
191720cade2SAmir Ayupov       BF.computeHash(IsDFSOrder, HashFunction);
192720cade2SAmir Ayupov     if (YamlBF.Hash != BF.getHash()) {
193a34c753fSRafael Auler       if (opts::Verbosity >= 1)
194a34c753fSRafael Auler         errs() << "BOLT-WARNING: function hash mismatch\n";
195a34c753fSRafael Auler       ProfileMatched = false;
196a34c753fSRafael Auler     }
197720cade2SAmir Ayupov   }
198a34c753fSRafael Auler 
199a34c753fSRafael Auler   if (YamlBF.NumBasicBlocks != BF.size()) {
200a34c753fSRafael Auler     if (opts::Verbosity >= 1)
201a34c753fSRafael Auler       errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
202a34c753fSRafael Auler     ProfileMatched = false;
203a34c753fSRafael Auler   }
204a34c753fSRafael Auler 
20569b7e257SAmir Ayupov   BinaryFunction::BasicBlockOrderType Order;
2061e0d08e8SAmir Ayupov   if (IsDFSOrder)
20769b7e257SAmir Ayupov     llvm::copy(BF.dfs(), std::back_inserter(Order));
20869b7e257SAmir Ayupov   else
20969b7e257SAmir Ayupov     llvm::copy(BF.getLayout().blocks(), std::back_inserter(Order));
210a34c753fSRafael Auler 
211a34c753fSRafael Auler   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
21269b7e257SAmir Ayupov     if (YamlBB.Index >= Order.size()) {
213a34c753fSRafael Auler       if (opts::Verbosity >= 2)
214a34c753fSRafael Auler         errs() << "BOLT-WARNING: index " << YamlBB.Index
215a34c753fSRafael Auler                << " is out of bounds\n";
216a34c753fSRafael Auler       ++MismatchedBlocks;
217a34c753fSRafael Auler       continue;
218a34c753fSRafael Auler     }
219a34c753fSRafael Auler 
22069b7e257SAmir Ayupov     BinaryBasicBlock &BB = *Order[YamlBB.Index];
221a34c753fSRafael Auler 
222a34c753fSRafael Auler     // Basic samples profile (without LBR) does not have branches information
223a34c753fSRafael Auler     // and needs a special processing.
224a34c753fSRafael Auler     if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
225a34c753fSRafael Auler       if (!YamlBB.EventCount) {
226a34c753fSRafael Auler         BB.setExecutionCount(0);
227a34c753fSRafael Auler         continue;
228a34c753fSRafael Auler       }
229a34c753fSRafael Auler       uint64_t NumSamples = YamlBB.EventCount * 1000;
230def464aaSAmir Ayupov       if (NormalizeByInsnCount && BB.getNumNonPseudos())
231a34c753fSRafael Auler         NumSamples /= BB.getNumNonPseudos();
232def464aaSAmir Ayupov       else if (NormalizeByCalls)
233a34c753fSRafael Auler         NumSamples /= BB.getNumCalls() + 1;
234def464aaSAmir Ayupov 
235a34c753fSRafael Auler       BB.setExecutionCount(NumSamples);
236a34c753fSRafael Auler       if (BB.isEntryPoint())
237a34c753fSRafael Auler         FunctionExecutionCount += NumSamples;
238a34c753fSRafael Auler       continue;
239a34c753fSRafael Auler     }
240a34c753fSRafael Auler 
241a34c753fSRafael Auler     BB.setExecutionCount(YamlBB.ExecCount);
242a34c753fSRafael Auler 
243a34c753fSRafael Auler     for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
244d936924fSAmir Ayupov       BinaryFunction *Callee = YamlProfileToFunction.lookup(YamlCSI.DestId);
245a34c753fSRafael Auler       bool IsFunction = Callee ? true : false;
246a34c753fSRafael Auler       MCSymbol *CalleeSymbol = nullptr;
247def464aaSAmir Ayupov       if (IsFunction)
248a34c753fSRafael Auler         CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator);
249def464aaSAmir Ayupov 
25040c2e0faSMaksim Panchenko       BF.getAllCallSites().emplace_back(CalleeSymbol, YamlCSI.Count,
25140c2e0faSMaksim Panchenko                                         YamlCSI.Mispreds, YamlCSI.Offset);
252a34c753fSRafael Auler 
253a34c753fSRafael Auler       if (YamlCSI.Offset >= BB.getOriginalSize()) {
254a34c753fSRafael Auler         if (opts::Verbosity >= 2)
255a34c753fSRafael Auler           errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
256a34c753fSRafael Auler                  << " out of bounds in block " << BB.getName() << '\n';
257a34c753fSRafael Auler         ++MismatchedCalls;
258a34c753fSRafael Auler         continue;
259a34c753fSRafael Auler       }
260a34c753fSRafael Auler 
261a34c753fSRafael Auler       MCInst *Instr =
262a34c753fSRafael Auler           BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
263a34c753fSRafael Auler       if (!Instr) {
264a34c753fSRafael Auler         if (opts::Verbosity >= 2)
265a34c753fSRafael Auler           errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
266a34c753fSRafael Auler                  << " in block " << BB.getName() << '\n';
267a34c753fSRafael Auler         ++MismatchedCalls;
268a34c753fSRafael Auler         continue;
269a34c753fSRafael Auler       }
270a34c753fSRafael Auler       if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) {
271a34c753fSRafael Auler         if (opts::Verbosity >= 2)
272a34c753fSRafael Auler           errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
273a34c753fSRafael Auler                  << " in block " << BB.getName() << '\n';
274a34c753fSRafael Auler         ++MismatchedCalls;
275a34c753fSRafael Auler         continue;
276a34c753fSRafael Auler       }
277a34c753fSRafael Auler 
278a34c753fSRafael Auler       auto setAnnotation = [&](StringRef Name, uint64_t Count) {
279a34c753fSRafael Auler         if (BC.MIB->hasAnnotation(*Instr, Name)) {
280a34c753fSRafael Auler           if (opts::Verbosity >= 1)
281a34c753fSRafael Auler             errs() << "BOLT-WARNING: ignoring duplicate " << Name
282a34c753fSRafael Auler                    << " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
283a34c753fSRafael Auler                    << " in function " << BF << '\n';
284a34c753fSRafael Auler           return;
285a34c753fSRafael Auler         }
286a34c753fSRafael Auler         BC.MIB->addAnnotation(*Instr, Name, Count);
287a34c753fSRafael Auler       };
288a34c753fSRafael Auler 
289a34c753fSRafael Auler       if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
290a34c753fSRafael Auler         auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
291a34c753fSRafael Auler             *Instr, "CallProfile");
292a34c753fSRafael Auler         CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds);
293a34c753fSRafael Auler       } else if (BC.MIB->getConditionalTailCall(*Instr)) {
294a34c753fSRafael Auler         setAnnotation("CTCTakenCount", YamlCSI.Count);
295a34c753fSRafael Auler         setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
296a34c753fSRafael Auler       } else {
297a34c753fSRafael Auler         setAnnotation("Count", YamlCSI.Count);
298a34c753fSRafael Auler       }
299a34c753fSRafael Auler     }
300a34c753fSRafael Auler 
301a34c753fSRafael Auler     for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
30269b7e257SAmir Ayupov       if (YamlSI.Index >= Order.size()) {
303a34c753fSRafael Auler         if (opts::Verbosity >= 1)
304a34c753fSRafael Auler           errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
305a34c753fSRafael Auler         ++MismatchedEdges;
306a34c753fSRafael Auler         continue;
307a34c753fSRafael Auler       }
308a34c753fSRafael Auler 
309b06f97b0SAmir Ayupov       BinaryBasicBlock *ToBB = Order[YamlSI.Index];
310b06f97b0SAmir Ayupov       if (!BB.getSuccessor(ToBB->getLabel())) {
311b06f97b0SAmir Ayupov         // Allow passthrough blocks.
312b06f97b0SAmir Ayupov         BinaryBasicBlock *FTSuccessor = BB.getConditionalSuccessor(false);
313b06f97b0SAmir Ayupov         if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
314b06f97b0SAmir Ayupov             FTSuccessor->getSuccessor(ToBB->getLabel())) {
315b06f97b0SAmir Ayupov           BinaryBasicBlock::BinaryBranchInfo &FTBI =
316b06f97b0SAmir Ayupov               FTSuccessor->getBranchInfo(*ToBB);
317b06f97b0SAmir Ayupov           FTBI.Count += YamlSI.Count;
318b06f97b0SAmir Ayupov           FTBI.MispredictedCount += YamlSI.Mispreds;
319b06f97b0SAmir Ayupov           ToBB = FTSuccessor;
320b06f97b0SAmir Ayupov         } else {
321a34c753fSRafael Auler           if (opts::Verbosity >= 1)
322a34c753fSRafael Auler             errs() << "BOLT-WARNING: no successor for block " << BB.getName()
323a34c753fSRafael Auler                    << " that matches index " << YamlSI.Index << " or block "
324b06f97b0SAmir Ayupov                    << ToBB->getName() << '\n';
325a34c753fSRafael Auler           ++MismatchedEdges;
326a34c753fSRafael Auler           continue;
327a34c753fSRafael Auler         }
328b06f97b0SAmir Ayupov       }
329a34c753fSRafael Auler 
330b06f97b0SAmir Ayupov       BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB);
331a34c753fSRafael Auler       BI.Count += YamlSI.Count;
332a34c753fSRafael Auler       BI.MispredictedCount += YamlSI.Mispreds;
333a34c753fSRafael Auler     }
334a34c753fSRafael Auler   }
335a34c753fSRafael Auler 
336a34c753fSRafael Auler   // If basic block profile wasn't read it should be 0.
337def464aaSAmir Ayupov   for (BinaryBasicBlock &BB : BF)
338a34c753fSRafael Auler     if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
339a34c753fSRafael Auler       BB.setExecutionCount(0);
340a34c753fSRafael Auler 
341f3dc732bSAmir Ayupov   if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE)
342a34c753fSRafael Auler     BF.setExecutionCount(FunctionExecutionCount);
343a34c753fSRafael Auler 
344a34c753fSRafael Auler   ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
345a34c753fSRafael Auler 
3463c64b24eSAmir Ayupov   if (!ProfileMatched) {
3473c64b24eSAmir Ayupov     if (opts::Verbosity >= 1)
348a34c753fSRafael Auler       errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
349a34c753fSRafael Auler              << MismatchedCalls << " calls, and " << MismatchedEdges
350a34c753fSRafael Auler              << " edges in profile did not match function " << BF << '\n';
351a34c753fSRafael Auler 
3523c64b24eSAmir Ayupov     if (YamlBF.NumBasicBlocks != BF.size())
3533c64b24eSAmir Ayupov       ++BC.Stats.NumStaleFuncsWithEqualBlockCount;
3543c64b24eSAmir Ayupov 
3559a9af0a2SShaw Young     if (!opts::InferStaleProfile)
3569a9af0a2SShaw Young       return false;
3579a9af0a2SShaw Young     ArrayRef<ProbeMatchSpec> ProbeMatchSpecs;
3589a9af0a2SShaw Young     auto BFIt = BFToProbeMatchSpecs.find(&BF);
3599a9af0a2SShaw Young     if (BFIt != BFToProbeMatchSpecs.end())
3609a9af0a2SShaw Young       ProbeMatchSpecs = BFIt->second;
3619a9af0a2SShaw Young     ProfileMatched = inferStaleProfile(BF, YamlBF, ProbeMatchSpecs);
3623c64b24eSAmir Ayupov   }
3633c64b24eSAmir Ayupov   if (ProfileMatched)
36444268271Sspupyrev     BF.markProfiled(YamlBP.Header.Flags);
36544268271Sspupyrev 
366a34c753fSRafael Auler   return ProfileMatched;
367a34c753fSRafael Auler }
368a34c753fSRafael Auler 
369a34c753fSRafael Auler Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
370a34c753fSRafael Auler   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
371a34c753fSRafael Auler       MemoryBuffer::getFileOrSTDIN(Filename);
372a34c753fSRafael Auler   if (std::error_code EC = MB.getError()) {
373a34c753fSRafael Auler     errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
374a34c753fSRafael Auler     return errorCodeToError(EC);
375a34c753fSRafael Auler   }
376a34c753fSRafael Auler   yaml::Input YamlInput(MB.get()->getBuffer());
37715fa3ba5SAmir Ayupov   YamlInput.setAllowUnknownKeys(true);
378a34c753fSRafael Auler 
379a34c753fSRafael Auler   // Consume YAML file.
380a34c753fSRafael Auler   YamlInput >> YamlBP;
381a34c753fSRafael Auler   if (YamlInput.error()) {
382a34c753fSRafael Auler     errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
383a34c753fSRafael Auler            << " : " << YamlInput.error().message() << '\n';
384a34c753fSRafael Auler     return errorCodeToError(YamlInput.error());
385a34c753fSRafael Auler   }
386a34c753fSRafael Auler 
387a34c753fSRafael Auler   // Sanity check.
388def464aaSAmir Ayupov   if (YamlBP.Header.Version != 1)
389a34c753fSRafael Auler     return make_error<StringError>(
390a34c753fSRafael Auler         Twine("cannot read profile : unsupported version"),
391a34c753fSRafael Auler         inconvertibleErrorCode());
392def464aaSAmir Ayupov 
393def464aaSAmir Ayupov   if (YamlBP.Header.EventNames.find(',') != StringRef::npos)
394a34c753fSRafael Auler     return make_error<StringError>(
395a34c753fSRafael Auler         Twine("multiple events in profile are not supported"),
396a34c753fSRafael Auler         inconvertibleErrorCode());
397a34c753fSRafael Auler 
398a34c753fSRafael Auler   // Match profile to function based on a function name.
3997b750943SAmir Ayupov   buildNameMaps(BC);
400a34c753fSRafael Auler 
401a34c753fSRafael Auler   // Preliminary assign function execution count.
4026a1cf545SAmir Ayupov   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
4036a1cf545SAmir Ayupov     if (!BF)
4046a1cf545SAmir Ayupov       continue;
4056a1cf545SAmir Ayupov     if (!BF->hasProfile()) {
4067b750943SAmir Ayupov       BF->setExecutionCount(YamlBF.ExecCount);
4076a1cf545SAmir Ayupov     } else {
4086a1cf545SAmir Ayupov       if (opts::Verbosity >= 1) {
4096a1cf545SAmir Ayupov         errs() << "BOLT-WARNING: dropping duplicate profile for " << YamlBF.Name
4106a1cf545SAmir Ayupov                << '\n';
4116a1cf545SAmir Ayupov       }
4126a1cf545SAmir Ayupov       BF = nullptr;
4136a1cf545SAmir Ayupov     }
4146a1cf545SAmir Ayupov   }
415a34c753fSRafael Auler 
416a34c753fSRafael Auler   return Error::success();
417a34c753fSRafael Auler }
418a34c753fSRafael Auler 
41937bee254SShaw Young bool YAMLProfileReader::profileMatches(
42037bee254SShaw Young     const yaml::bolt::BinaryFunctionProfile &Profile, const BinaryFunction &BF) {
42137bee254SShaw Young   if (opts::IgnoreHash)
42237bee254SShaw Young     return Profile.NumBasicBlocks == BF.size();
42337bee254SShaw Young   return Profile.Hash == static_cast<uint64_t>(BF.getHash());
42437bee254SShaw Young }
42537bee254SShaw Young 
426a34c753fSRafael Auler bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
427296a9563SShaw Young   if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
42849fdbbcfSShaw Young     return true;
4297b750943SAmir Ayupov   for (StringRef Name : BF.getNames())
4307b750943SAmir Ayupov     if (ProfileFunctionNames.contains(Name))
431a34c753fSRafael Auler       return true;
4327b750943SAmir Ayupov   for (StringRef Name : BF.getNames()) {
43315d1e517SAmir Ayupov     if (const std::optional<StringRef> CommonName = getLTOCommonName(Name)) {
4344e585e51SKazu Hirata       if (LTOCommonNameMap.contains(*CommonName))
435a34c753fSRafael Auler         return true;
436a34c753fSRafael Auler     }
437a34c753fSRafael Auler   }
438a34c753fSRafael Auler 
439a34c753fSRafael Auler   return false;
440a34c753fSRafael Auler }
441a34c753fSRafael Auler 
44237bee254SShaw Young size_t YAMLProfileReader::matchWithExactName() {
44337bee254SShaw Young   size_t MatchedWithExactName = 0;
44437bee254SShaw Young   // This first pass assigns profiles that match 100% by name and by hash.
44537bee254SShaw Young   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
44637bee254SShaw Young     if (!BF)
44737bee254SShaw Young       continue;
44837bee254SShaw Young     BinaryFunction &Function = *BF;
44937bee254SShaw Young     // Clear function call count that may have been set while pre-processing
45037bee254SShaw Young     // the profile.
45137bee254SShaw Young     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
45237bee254SShaw Young 
45337bee254SShaw Young     if (profileMatches(YamlBF, Function)) {
45437bee254SShaw Young       matchProfileToFunction(YamlBF, Function);
45537bee254SShaw Young       ++MatchedWithExactName;
45637bee254SShaw Young     }
45737bee254SShaw Young   }
45837bee254SShaw Young   return MatchedWithExactName;
45937bee254SShaw Young }
46037bee254SShaw Young 
46137bee254SShaw Young size_t YAMLProfileReader::matchWithHash(BinaryContext &BC) {
46237bee254SShaw Young   // Iterates through profiled functions to match the first binary function with
46337bee254SShaw Young   // the same exact hash. Serves to match identical, renamed functions.
46437bee254SShaw Young   // Collisions are possible where multiple functions share the same exact hash.
46537bee254SShaw Young   size_t MatchedWithHash = 0;
46637bee254SShaw Young   if (opts::MatchProfileWithFunctionHash) {
46737bee254SShaw Young     DenseMap<size_t, BinaryFunction *> StrictHashToBF;
46837bee254SShaw Young     StrictHashToBF.reserve(BC.getBinaryFunctions().size());
46937bee254SShaw Young 
47037bee254SShaw Young     for (auto &[_, BF] : BC.getBinaryFunctions())
47137bee254SShaw Young       StrictHashToBF[BF.getHash()] = &BF;
47237bee254SShaw Young 
47337bee254SShaw Young     for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
47437bee254SShaw Young       if (YamlBF.Used)
47537bee254SShaw Young         continue;
47637bee254SShaw Young       auto It = StrictHashToBF.find(YamlBF.Hash);
47737bee254SShaw Young       if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
47837bee254SShaw Young         BinaryFunction *BF = It->second;
47937bee254SShaw Young         matchProfileToFunction(YamlBF, *BF);
48037bee254SShaw Young         ++MatchedWithHash;
48137bee254SShaw Young       }
48237bee254SShaw Young     }
48337bee254SShaw Young   }
48437bee254SShaw Young   return MatchedWithHash;
48537bee254SShaw Young }
48637bee254SShaw Young 
48737bee254SShaw Young size_t YAMLProfileReader::matchWithLTOCommonName() {
48837bee254SShaw Young   // This second pass allows name ambiguity for LTO private functions.
48937bee254SShaw Young   size_t MatchedWithLTOCommonName = 0;
49037bee254SShaw Young   for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
49137bee254SShaw Young     if (!LTOCommonNameFunctionMap.contains(CommonName))
49237bee254SShaw Young       continue;
49337bee254SShaw Young     std::unordered_set<BinaryFunction *> &Functions =
49437bee254SShaw Young         LTOCommonNameFunctionMap[CommonName];
49537bee254SShaw Young     // Return true if a given profile is matched to one of BinaryFunctions with
49637bee254SShaw Young     // matching LTO common name.
49737bee254SShaw Young     auto matchProfile = [&](yaml::bolt::BinaryFunctionProfile *YamlBF) {
49837bee254SShaw Young       if (YamlBF->Used)
49937bee254SShaw Young         return false;
50037bee254SShaw Young       for (BinaryFunction *BF : Functions) {
50137bee254SShaw Young         if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
50237bee254SShaw Young           matchProfileToFunction(*YamlBF, *BF);
50337bee254SShaw Young           ++MatchedWithLTOCommonName;
50437bee254SShaw Young           return true;
50537bee254SShaw Young         }
50637bee254SShaw Young       }
50737bee254SShaw Young       return false;
50837bee254SShaw Young     };
50937bee254SShaw Young     bool ProfileMatched = llvm::any_of(LTOProfiles, matchProfile);
51037bee254SShaw Young 
51137bee254SShaw Young     // If there's only one function with a given name, try to match it
51237bee254SShaw Young     // partially.
51337bee254SShaw Young     if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
51437bee254SShaw Young         !LTOProfiles.front()->Used &&
51537bee254SShaw Young         !ProfiledFunctions.count(*Functions.begin())) {
51637bee254SShaw Young       matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
51737bee254SShaw Young       ++MatchedWithLTOCommonName;
51837bee254SShaw Young     }
51937bee254SShaw Young   }
52037bee254SShaw Young   return MatchedWithLTOCommonName;
52137bee254SShaw Young }
52237bee254SShaw Young 
523296a9563SShaw Young size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
524296a9563SShaw Young   if (!opts::MatchWithCallGraph)
525296a9563SShaw Young     return 0;
526296a9563SShaw Young 
527296a9563SShaw Young   size_t MatchedWithCallGraph = 0;
528296a9563SShaw Young   CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);
529296a9563SShaw Young 
530296a9563SShaw Young   ItaniumPartialDemangler Demangler;
531296a9563SShaw Young   auto GetBaseName = [&](std::string &FunctionName) {
532296a9563SShaw Young     if (Demangler.partialDemangle(FunctionName.c_str()))
533296a9563SShaw Young       return std::string("");
534296a9563SShaw Young     size_t BufferSize = 1;
535296a9563SShaw Young     char *Buffer = static_cast<char *>(std::malloc(BufferSize));
536296a9563SShaw Young     char *BaseName = Demangler.getFunctionBaseName(Buffer, &BufferSize);
537296a9563SShaw Young     if (!BaseName) {
538296a9563SShaw Young       std::free(Buffer);
539296a9563SShaw Young       return std::string("");
540296a9563SShaw Young     }
541296a9563SShaw Young     if (Buffer != BaseName)
542296a9563SShaw Young       Buffer = BaseName;
543296a9563SShaw Young     std::string BaseNameStr(Buffer, BufferSize);
544296a9563SShaw Young     std::free(Buffer);
545296a9563SShaw Young     return BaseNameStr;
546296a9563SShaw Young   };
547296a9563SShaw Young 
548296a9563SShaw Young   // Matches YAMLBF to BFs with neighbor hashes.
549296a9563SShaw Young   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
550296a9563SShaw Young     if (YamlBF.Used)
551296a9563SShaw Young       continue;
552296a9563SShaw Young     auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
553296a9563SShaw Young     if (!AdjacentYamlBFsOpt)
554296a9563SShaw Young       continue;
555296a9563SShaw Young     std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
556296a9563SShaw Young         AdjacentYamlBFsOpt.value();
557296a9563SShaw Young     std::string AdjacentYamlBFsHashStr;
558296a9563SShaw Young     for (auto *AdjacentYamlBF : AdjacentYamlBFs)
559296a9563SShaw Young       AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
560296a9563SShaw Young     uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
561296a9563SShaw Young     auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(Hash);
562296a9563SShaw Young     if (!BFsWithSameHashOpt)
563296a9563SShaw Young       continue;
564296a9563SShaw Young     std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
565296a9563SShaw Young     // Finds the binary function with the longest common prefix to the profiled
566296a9563SShaw Young     // function and matches.
567296a9563SShaw Young     BinaryFunction *ClosestBF = nullptr;
568296a9563SShaw Young     size_t LCP = 0;
569296a9563SShaw Young     std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
570296a9563SShaw Young     for (BinaryFunction *BF : BFsWithSameHash) {
571296a9563SShaw Young       if (ProfiledFunctions.count(BF))
572296a9563SShaw Young         continue;
573296a9563SShaw Young       std::string BFName = std::string(BF->getOneName());
574296a9563SShaw Young       std::string BFBaseName = GetBaseName(BFName);
575296a9563SShaw Young       size_t PrefixLength = 0;
576296a9563SShaw Young       size_t N = std::min(YamlBFBaseName.size(), BFBaseName.size());
577296a9563SShaw Young       for (size_t I = 0; I < N; ++I) {
578296a9563SShaw Young         if (YamlBFBaseName[I] != BFBaseName[I])
579296a9563SShaw Young           break;
580296a9563SShaw Young         ++PrefixLength;
581296a9563SShaw Young       }
582296a9563SShaw Young       if (PrefixLength >= LCP) {
583296a9563SShaw Young         LCP = PrefixLength;
584296a9563SShaw Young         ClosestBF = BF;
585296a9563SShaw Young       }
586296a9563SShaw Young     }
587296a9563SShaw Young     if (ClosestBF) {
588296a9563SShaw Young       matchProfileToFunction(YamlBF, *ClosestBF);
589296a9563SShaw Young       ++MatchedWithCallGraph;
590296a9563SShaw Young     }
591296a9563SShaw Young   }
592296a9563SShaw Young 
593296a9563SShaw Young   return MatchedWithCallGraph;
594296a9563SShaw Young }
595296a9563SShaw Young 
5969a9af0a2SShaw Young size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
5979a9af0a2SShaw Young     const MCPseudoProbeDecoder &Decoder,
5989a9af0a2SShaw Young     const std::vector<yaml::bolt::InlineTreeNode> &DecodedInlineTree,
5999a9af0a2SShaw Young     const MCDecodedPseudoProbeInlineTree *Root) {
6009a9af0a2SShaw Young   // Match inline tree nodes by GUID, checksum, parent, and call site.
6019a9af0a2SShaw Young   for (const auto &[InlineTreeNodeId, InlineTreeNode] :
6029a9af0a2SShaw Young        llvm::enumerate(DecodedInlineTree)) {
6039a9af0a2SShaw Young     uint64_t GUID = InlineTreeNode.GUID;
6049a9af0a2SShaw Young     uint64_t Hash = InlineTreeNode.Hash;
6059a9af0a2SShaw Young     uint32_t ParentId = InlineTreeNode.ParentIndexDelta;
6069a9af0a2SShaw Young     uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
6079a9af0a2SShaw Young     const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
6089a9af0a2SShaw Young     if (!InlineTreeNodeId) {
6099a9af0a2SShaw Young       Cur = Root;
6109a9af0a2SShaw Young     } else if (const MCDecodedPseudoProbeInlineTree *Parent =
6119a9af0a2SShaw Young                    getInlineTreeNode(ParentId)) {
6129a9af0a2SShaw Young       for (const MCDecodedPseudoProbeInlineTree &Child :
6139a9af0a2SShaw Young            Parent->getChildren()) {
6149a9af0a2SShaw Young         if (Child.Guid == GUID) {
6159a9af0a2SShaw Young           if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
6169a9af0a2SShaw Young             Cur = &Child;
6179a9af0a2SShaw Young           break;
6189a9af0a2SShaw Young         }
6199a9af0a2SShaw Young       }
6209a9af0a2SShaw Young     }
6219a9af0a2SShaw Young     // Don't match nodes if the profile is stale (mismatching binary FuncHash
6229a9af0a2SShaw Young     // and YAML Hash)
6239a9af0a2SShaw Young     if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash)
6249a9af0a2SShaw Young       mapInlineTreeNode(InlineTreeNodeId, Cur);
6259a9af0a2SShaw Young   }
6269a9af0a2SShaw Young   return Map.size();
6279a9af0a2SShaw Young }
6289a9af0a2SShaw Young 
6299a9af0a2SShaw Young // Decode index deltas and indirection through \p YamlPD. Return modified copy
6309a9af0a2SShaw Young // of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex).
6319a9af0a2SShaw Young static std::vector<yaml::bolt::InlineTreeNode>
6329a9af0a2SShaw Young decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD,
6339a9af0a2SShaw Young                      std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree) {
6349a9af0a2SShaw Young   uint32_t ParentId = 0;
6359a9af0a2SShaw Young   uint32_t PrevGUIDIdx = 0;
6369a9af0a2SShaw Young   for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
6379a9af0a2SShaw Young     uint32_t GUIDIdx = InlineTreeNode.GUIDIndex;
6389a9af0a2SShaw Young     if (GUIDIdx != UINT32_MAX)
6399a9af0a2SShaw Young       PrevGUIDIdx = GUIDIdx;
6409a9af0a2SShaw Young     else
6419a9af0a2SShaw Young       GUIDIdx = PrevGUIDIdx;
6429a9af0a2SShaw Young     uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
6439a9af0a2SShaw Young     ParentId += InlineTreeNode.ParentIndexDelta;
6449a9af0a2SShaw Young     InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx];
6459a9af0a2SShaw Young     InlineTreeNode.Hash = YamlPD.Hash[HashIdx];
6469a9af0a2SShaw Young     InlineTreeNode.ParentIndexDelta = ParentId;
6479a9af0a2SShaw Young   }
6489a9af0a2SShaw Young   return YamlInlineTree;
6499a9af0a2SShaw Young }
6509a9af0a2SShaw Young 
6519a9af0a2SShaw Young size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) {
6529a9af0a2SShaw Young   if (!opts::StaleMatchingWithPseudoProbes)
6539a9af0a2SShaw Young     return 0;
6549a9af0a2SShaw Young 
6559a9af0a2SShaw Young   const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
6569a9af0a2SShaw Young   const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc;
6579a9af0a2SShaw Young 
6589a9af0a2SShaw Young   // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe
6599a9af0a2SShaw Young   // matching.
6609a9af0a2SShaw Young   assert(Decoder &&
6619a9af0a2SShaw Young          "If pseudo probes are in use, pseudo probe decoder should exist");
6629a9af0a2SShaw Young   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
6639a9af0a2SShaw Young     // BF is preliminary name-matched function to YamlBF
6649a9af0a2SShaw Young     // MatchedBF is final matched function
6659a9af0a2SShaw Young     BinaryFunction *MatchedBF = YamlProfileToFunction.lookup(YamlBF.Id);
6669a9af0a2SShaw Young     if (!BF)
6679a9af0a2SShaw Young       BF = MatchedBF;
6689a9af0a2SShaw Young     if (!BF)
6699a9af0a2SShaw Young       continue;
6709a9af0a2SShaw Young     uint64_t GUID = BF->getGUID();
6719a9af0a2SShaw Young     if (!GUID)
6729a9af0a2SShaw Young       continue;
6739a9af0a2SShaw Young     auto It = TopLevelGUIDToInlineTree.find(GUID);
6749a9af0a2SShaw Young     if (It == TopLevelGUIDToInlineTree.end())
6759a9af0a2SShaw Young       continue;
6769a9af0a2SShaw Young     const MCDecodedPseudoProbeInlineTree *Node = It->second;
6779a9af0a2SShaw Young     assert(Node && "Malformed TopLevelGUIDToInlineTree");
6789a9af0a2SShaw Young     auto &MatchSpecs = BFToProbeMatchSpecs[BF];
6799a9af0a2SShaw Young     auto &InlineTreeMap =
6809a9af0a2SShaw Young         MatchSpecs.emplace_back(InlineTreeNodeMapTy(), YamlBF).first;
6819a9af0a2SShaw Young     std::vector<yaml::bolt::InlineTreeNode> ProfileInlineTree =
6829a9af0a2SShaw Young         decodeYamlInlineTree(YamlPD, YamlBF.InlineTree);
6839a9af0a2SShaw Young     // Erase unsuccessful match
6849a9af0a2SShaw Young     if (!InlineTreeMap.matchInlineTrees(*Decoder, ProfileInlineTree, Node))
6859a9af0a2SShaw Young       MatchSpecs.pop_back();
6869a9af0a2SShaw Young   }
6879a9af0a2SShaw Young 
6889a9af0a2SShaw Young   return 0;
6899a9af0a2SShaw Young }
6909a9af0a2SShaw Young 
69137bee254SShaw Young size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
69237bee254SShaw Young   if (opts::NameSimilarityFunctionMatchingThreshold == 0)
69337bee254SShaw Young     return 0;
69437bee254SShaw Young 
69537bee254SShaw Young   size_t MatchedWithNameSimilarity = 0;
69697dc5088SShaw Young   ItaniumPartialDemangler Demangler;
69797dc5088SShaw Young 
69897dc5088SShaw Young   // Demangle and derive namespace from function name.
69997dc5088SShaw Young   auto DemangleName = [&](std::string &FunctionName) {
70097dc5088SShaw Young     StringRef RestoredName = NameResolver::restore(FunctionName);
70197dc5088SShaw Young     return demangle(RestoredName);
70297dc5088SShaw Young   };
70397dc5088SShaw Young   auto DeriveNameSpace = [&](std::string &DemangledName) {
70497dc5088SShaw Young     if (Demangler.partialDemangle(DemangledName.c_str()))
70597dc5088SShaw Young       return std::string("");
70697dc5088SShaw Young     std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
70797dc5088SShaw Young     size_t BufferSize;
70897dc5088SShaw Young     char *NameSpace =
70997dc5088SShaw Young         Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
71097dc5088SShaw Young     return std::string(NameSpace, BufferSize);
71197dc5088SShaw Young   };
71297dc5088SShaw Young 
71397dc5088SShaw Young   // Maps namespaces to associated function block counts and gets profile
71497dc5088SShaw Young   // function names and namespaces to minimize the number of BFs to process and
71597dc5088SShaw Young   // avoid repeated name demangling/namespace derivation.
71697dc5088SShaw Young   StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
71797dc5088SShaw Young   std::vector<std::string> ProfileBFDemangledNames;
71897dc5088SShaw Young   ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
71997dc5088SShaw Young   std::vector<std::string> ProfiledBFNamespaces;
72097dc5088SShaw Young   ProfiledBFNamespaces.reserve(YamlBP.Functions.size());
72197dc5088SShaw Young 
72297dc5088SShaw Young   for (auto &YamlBF : YamlBP.Functions) {
72397dc5088SShaw Young     std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
72497dc5088SShaw Young     ProfileBFDemangledNames.push_back(YamlBFDemangledName);
72597dc5088SShaw Young     std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
72697dc5088SShaw Young     ProfiledBFNamespaces.push_back(YamlBFNamespace);
72797dc5088SShaw Young     NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
72897dc5088SShaw Young   }
72997dc5088SShaw Young 
73097dc5088SShaw Young   StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;
73197dc5088SShaw Young 
73297dc5088SShaw Young   // Maps namespaces to BFs excluding binary functions with no equal sized
73397dc5088SShaw Young   // profiled functions belonging to the same namespace.
73497dc5088SShaw Young   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
73597dc5088SShaw Young     std::string DemangledName = BF->getDemangledName();
73697dc5088SShaw Young     std::string Namespace = DeriveNameSpace(DemangledName);
73797dc5088SShaw Young 
73897dc5088SShaw Young     auto NamespaceToProfiledBFSizesIt =
73997dc5088SShaw Young         NamespaceToProfiledBFSizes.find(Namespace);
74097dc5088SShaw Young     // Skip if there are no ProfileBFs with a given \p Namespace.
74197dc5088SShaw Young     if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
74297dc5088SShaw Young       continue;
74397dc5088SShaw Young     // Skip if there are no ProfileBFs in a given \p Namespace with
74497dc5088SShaw Young     // equal number of blocks.
74597dc5088SShaw Young     if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
74697dc5088SShaw Young       continue;
7471be849c5SKazu Hirata     NamespaceToBFs[Namespace].push_back(BF);
74897dc5088SShaw Young   }
74997dc5088SShaw Young 
75097dc5088SShaw Young   // Iterates through all profiled functions and binary functions belonging to
75197dc5088SShaw Young   // the same namespace and matches based on edit distance threshold.
75297dc5088SShaw Young   assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
75397dc5088SShaw Young          ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
75497dc5088SShaw Young   for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
75597dc5088SShaw Young     yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
75697dc5088SShaw Young     std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
75797dc5088SShaw Young     if (YamlBF.Used)
75897dc5088SShaw Young       continue;
75997dc5088SShaw Young     // Skip if there are no BFs in a given \p Namespace.
76097dc5088SShaw Young     auto It = NamespaceToBFs.find(YamlBFNamespace);
76197dc5088SShaw Young     if (It == NamespaceToBFs.end())
76297dc5088SShaw Young       continue;
76397dc5088SShaw Young 
76497dc5088SShaw Young     std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
76597dc5088SShaw Young     std::vector<BinaryFunction *> BFs = It->second;
76697dc5088SShaw Young     unsigned MinEditDistance = UINT_MAX;
76797dc5088SShaw Young     BinaryFunction *ClosestNameBF = nullptr;
76897dc5088SShaw Young 
76997dc5088SShaw Young     // Determines BF the closest to the profiled function, in the
77097dc5088SShaw Young     // same namespace.
77197dc5088SShaw Young     for (BinaryFunction *BF : BFs) {
77297dc5088SShaw Young       if (ProfiledFunctions.count(BF))
77397dc5088SShaw Young         continue;
77497dc5088SShaw Young       if (BF->size() != YamlBF.NumBasicBlocks)
77597dc5088SShaw Young         continue;
77697dc5088SShaw Young       std::string BFDemangledName = BF->getDemangledName();
77797dc5088SShaw Young       unsigned BFEditDistance =
77897dc5088SShaw Young           StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
77997dc5088SShaw Young       if (BFEditDistance < MinEditDistance) {
78097dc5088SShaw Young         MinEditDistance = BFEditDistance;
78197dc5088SShaw Young         ClosestNameBF = BF;
78297dc5088SShaw Young       }
78397dc5088SShaw Young     }
78497dc5088SShaw Young 
78597dc5088SShaw Young     if (ClosestNameBF &&
78697dc5088SShaw Young         MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
78797dc5088SShaw Young       matchProfileToFunction(YamlBF, *ClosestNameBF);
78897dc5088SShaw Young       ++MatchedWithNameSimilarity;
78997dc5088SShaw Young     }
79097dc5088SShaw Young   }
79197dc5088SShaw Young 
79297dc5088SShaw Young   return MatchedWithNameSimilarity;
79397dc5088SShaw Young }
79497dc5088SShaw Young 
795a34c753fSRafael Auler Error YAMLProfileReader::readProfile(BinaryContext &BC) {
796b039ccc6SAmir Ayupov   if (opts::Verbosity >= 1) {
797b039ccc6SAmir Ayupov     outs() << "BOLT-INFO: YAML profile with hash: ";
798b039ccc6SAmir Ayupov     switch (YamlBP.Header.HashFunction) {
799b039ccc6SAmir Ayupov     case HashFunction::StdHash:
800b039ccc6SAmir Ayupov       outs() << "std::hash\n";
801b039ccc6SAmir Ayupov       break;
802b039ccc6SAmir Ayupov     case HashFunction::XXH3:
803b039ccc6SAmir Ayupov       outs() << "xxh3\n";
804b039ccc6SAmir Ayupov       break;
805b039ccc6SAmir Ayupov     }
806b039ccc6SAmir Ayupov   }
807d936924fSAmir Ayupov   YamlProfileToFunction.reserve(YamlBP.Functions.size());
808a34c753fSRafael Auler 
80949fdbbcfSShaw Young   // Computes hash for binary functions.
81049fdbbcfSShaw Young   if (opts::MatchProfileWithFunctionHash) {
81149fdbbcfSShaw Young     for (auto &[_, BF] : BC.getBinaryFunctions()) {
81249fdbbcfSShaw Young       BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
81349fdbbcfSShaw Young     }
81449fdbbcfSShaw Young   } else if (!opts::IgnoreHash) {
81549fdbbcfSShaw Young     for (BinaryFunction *BF : ProfileBFs) {
81649fdbbcfSShaw Young       if (!BF)
81749fdbbcfSShaw Young         continue;
81849fdbbcfSShaw Young       BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
81949fdbbcfSShaw Young     }
82049fdbbcfSShaw Young   }
82149fdbbcfSShaw Young 
8229a9af0a2SShaw Young   if (opts::StaleMatchingWithPseudoProbes) {
8239a9af0a2SShaw Young     const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
8249a9af0a2SShaw Young     assert(Decoder &&
8259a9af0a2SShaw Young            "If pseudo probes are in use, pseudo probe decoder should exist");
8269a9af0a2SShaw Young     for (const MCDecodedPseudoProbeInlineTree &TopLev :
8279a9af0a2SShaw Young          Decoder->getDummyInlineRoot().getChildren())
8289a9af0a2SShaw Young       TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
8299a9af0a2SShaw Young   }
8309a9af0a2SShaw Young 
831296a9563SShaw Young   // Map profiled function ids to names.
832296a9563SShaw Young   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
833296a9563SShaw Young     IdToYamLBF[YamlBF.Id] = &YamlBF;
834296a9563SShaw Young 
83537bee254SShaw Young   const size_t MatchedWithExactName = matchWithExactName();
83637bee254SShaw Young   const size_t MatchedWithHash = matchWithHash(BC);
83737bee254SShaw Young   const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
838296a9563SShaw Young   const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
83937bee254SShaw Young   const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);
840*06e08696SKazu Hirata   [[maybe_unused]] const size_t MatchedWithPseudoProbes =
841*06e08696SKazu Hirata       matchWithPseudoProbes(BC);
842a34c753fSRafael Auler 
8437b750943SAmir Ayupov   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
8447b750943SAmir Ayupov     if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
8457b750943SAmir Ayupov       matchProfileToFunction(YamlBF, *BF);
846a34c753fSRafael Auler 
84797dc5088SShaw Young 
848def464aaSAmir Ayupov   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
849def464aaSAmir Ayupov     if (!YamlBF.Used && opts::Verbosity >= 1)
850a34c753fSRafael Auler       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
851a34c753fSRafael Auler              << '\n';
852a34c753fSRafael Auler 
85349fdbbcfSShaw Young   if (opts::Verbosity >= 1) {
85449fdbbcfSShaw Young     outs() << "BOLT-INFO: matched " << MatchedWithExactName
85549fdbbcfSShaw Young            << " functions with identical names\n";
85649fdbbcfSShaw Young     outs() << "BOLT-INFO: matched " << MatchedWithHash
85749fdbbcfSShaw Young            << " functions with hash\n";
85849fdbbcfSShaw Young     outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
85949fdbbcfSShaw Young            << " functions with matching LTO common names\n";
860296a9563SShaw Young     outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
861296a9563SShaw Young            << " functions with call graph\n";
86297dc5088SShaw Young     outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
86397dc5088SShaw Young            << " functions with similar names\n";
86449fdbbcfSShaw Young   }
86549fdbbcfSShaw Young 
866a34c753fSRafael Auler   // Set for parseFunctionProfile().
867a34c753fSRafael Auler   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
868a34c753fSRafael Auler   NormalizeByCalls = usesEvent("branches");
869a34c753fSRafael Auler   uint64_t NumUnused = 0;
870a34c753fSRafael Auler   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
871d936924fSAmir Ayupov     if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id))
872a34c753fSRafael Auler       parseFunctionProfile(*BF, YamlBF);
873def464aaSAmir Ayupov     else
874a34c753fSRafael Auler       ++NumUnused;
875a34c753fSRafael Auler   }
876a34c753fSRafael Auler 
877a34c753fSRafael Auler   BC.setNumUnusedProfiledObjects(NumUnused);
878a34c753fSRafael Auler 
879296a9563SShaw Young   if (opts::Lite &&
880296a9563SShaw Young       (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
88149fdbbcfSShaw Young     for (BinaryFunction *BF : BC.getAllBinaryFunctions())
88249fdbbcfSShaw Young       if (!BF->hasProfile())
88349fdbbcfSShaw Young         BF->setIgnored();
88449fdbbcfSShaw Young   }
88549fdbbcfSShaw Young 
886a34c753fSRafael Auler   return Error::success();
887a34c753fSRafael Auler }
888a34c753fSRafael Auler 
889a34c753fSRafael Auler bool YAMLProfileReader::usesEvent(StringRef Name) const {
890a34c753fSRafael Auler   return YamlBP.Header.EventNames.find(std::string(Name)) != StringRef::npos;
891a34c753fSRafael Auler }
892a34c753fSRafael Auler 
893a34c753fSRafael Auler } // end namespace bolt
894a34c753fSRafael Auler } // end namespace llvm
895