xref: /llvm-project/bolt/lib/Profile/YAMLProfileReader.cpp (revision 06e08696248ac01754c87c22cc8a4b797ef46430)
1 //===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/YAMLProfileReader.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Passes/MCF.h"
13 #include "bolt/Profile/ProfileYAMLMapping.h"
14 #include "bolt/Utils/NameResolver.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/edit_distance.h"
18 #include "llvm/Demangle/Demangle.h"
19 #include "llvm/MC/MCPseudoProbe.h"
20 #include "llvm/Support/CommandLine.h"
21 
22 using namespace llvm;
23 
24 namespace opts {
25 
26 extern cl::opt<unsigned> Verbosity;
27 extern cl::OptionCategory BoltOptCategory;
28 extern cl::opt<bool> InferStaleProfile;
29 extern cl::opt<bool> Lite;
30 
31 cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
32     "name-similarity-function-matching-threshold",
33     cl::desc("Match functions using namespace and edit distance"), cl::init(0),
34     cl::Hidden, cl::cat(BoltOptCategory));
35 
36 static llvm::cl::opt<bool>
37     IgnoreHash("profile-ignore-hash",
38                cl::desc("ignore hash while reading function profile"),
39                cl::Hidden, cl::cat(BoltOptCategory));
40 
41 llvm::cl::opt<bool>
42     MatchProfileWithFunctionHash("match-profile-with-function-hash",
43                                  cl::desc("Match profile with function hash"),
44                                  cl::Hidden, cl::cat(BoltOptCategory));
45 llvm::cl::opt<bool>
46     MatchWithCallGraph("match-with-call-graph",
47                        cl::desc("Match functions with call graph"), cl::Hidden,
48                        cl::cat(BoltOptCategory));
49 
50 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
51                                   cl::desc("use DFS order for YAML profile"),
52                                   cl::Hidden, cl::cat(BoltOptCategory));
53 
54 extern llvm::cl::opt<bool> StaleMatchingWithPseudoProbes;
55 } // namespace opts
56 
57 namespace llvm {
58 namespace bolt {
59 
60 YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
61     BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
62     ProfileLookupMap &IdToYAMLBF) {
63   constructBFCG(BC, YamlBP);
64   constructYAMLFCG(YamlBP, IdToYAMLBF);
65   computeBFNeighborHashes(BC);
66 }
67 
68 void YAMLProfileReader::CallGraphMatcher::constructBFCG(
69     BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
70   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
71     for (const BinaryBasicBlock &BB : BF->blocks()) {
72       for (const MCInst &Instr : BB) {
73         if (!BC.MIB->isCall(Instr))
74           continue;
75         const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
76         if (!CallSymbol)
77           continue;
78         BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
79         if (!BD)
80           continue;
81         BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
82         if (!CalleeBF)
83           continue;
84 
85         BFAdjacencyMap[CalleeBF].insert(BF);
86         BFAdjacencyMap[BF].insert(CalleeBF);
87       }
88     }
89   }
90 }
91 
92 void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
93     BinaryContext &BC) {
94   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
95     auto It = BFAdjacencyMap.find(BF);
96     if (It == BFAdjacencyMap.end())
97       continue;
98     auto &AdjacentBFs = It->second;
99     std::string HashStr;
100     for (BinaryFunction *BF : AdjacentBFs)
101       HashStr += BF->getOneName();
102     uint64_t Hash = std::hash<std::string>{}(HashStr);
103     NeighborHashToBFs[Hash].push_back(BF);
104   }
105 }
106 
107 void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
108     yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {
109 
110   for (auto &CallerYamlBF : YamlBP.Functions) {
111     for (auto &YamlBB : CallerYamlBF.Blocks) {
112       for (auto &CallSite : YamlBB.CallSites) {
113         auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
114         if (IdToYAMLBFIt == IdToYAMLBF.end())
115           continue;
116         YamlBFAdjacencyMap[&CallerYamlBF].insert(IdToYAMLBFIt->second);
117         YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(&CallerYamlBF);
118       }
119     }
120   }
121 }
122 
123 bool YAMLProfileReader::isYAML(const StringRef Filename) {
124   if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
125     StringRef Buffer = (*MB)->getBuffer();
126     return Buffer.starts_with("---\n");
127   } else {
128     report_error(Filename, MB.getError());
129   }
130   return false;
131 }
132 
133 void YAMLProfileReader::buildNameMaps(BinaryContext &BC) {
134   auto lookupFunction = [&](StringRef Name) -> BinaryFunction * {
135     if (BinaryData *BD = BC.getBinaryDataByName(Name))
136       return BC.getFunctionForSymbol(BD->getSymbol());
137     return nullptr;
138   };
139 
140   ProfileBFs.reserve(YamlBP.Functions.size());
141 
142   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
143     StringRef Name = YamlBF.Name;
144     const size_t Pos = Name.find("(*");
145     if (Pos != StringRef::npos)
146       Name = Name.substr(0, Pos);
147     ProfileFunctionNames.insert(Name);
148     ProfileBFs.push_back(lookupFunction(Name));
149     if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
150       LTOCommonNameMap[*CommonName].push_back(&YamlBF);
151   }
152   for (auto &[Symbol, BF] : BC.SymbolToFunctionMap) {
153     StringRef Name = Symbol->getName();
154     if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
155       LTOCommonNameFunctionMap[*CommonName].insert(BF);
156   }
157 }
158 
159 bool YAMLProfileReader::hasLocalsWithFileName() const {
160   return llvm::any_of(ProfileFunctionNames.keys(), [](StringRef FuncName) {
161     return FuncName.count('/') == 2 && FuncName[0] != '/';
162   });
163 }
164 
165 bool YAMLProfileReader::parseFunctionProfile(
166     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
167   BinaryContext &BC = BF.getBinaryContext();
168 
169   const bool IsDFSOrder = YamlBP.Header.IsDFSOrder;
170   const HashFunction HashFunction = YamlBP.Header.HashFunction;
171   bool ProfileMatched = true;
172   uint64_t MismatchedBlocks = 0;
173   uint64_t MismatchedCalls = 0;
174   uint64_t MismatchedEdges = 0;
175 
176   uint64_t FunctionExecutionCount = 0;
177 
178   BF.setExecutionCount(YamlBF.ExecCount);
179 
180   uint64_t FuncRawBranchCount = 0;
181   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
182     for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
183       FuncRawBranchCount += YamlSI.Count;
184   BF.setRawBranchCount(FuncRawBranchCount);
185 
186   if (BF.empty())
187     return true;
188 
189   if (!opts::IgnoreHash) {
190     if (!BF.getHash())
191       BF.computeHash(IsDFSOrder, HashFunction);
192     if (YamlBF.Hash != BF.getHash()) {
193       if (opts::Verbosity >= 1)
194         errs() << "BOLT-WARNING: function hash mismatch\n";
195       ProfileMatched = false;
196     }
197   }
198 
199   if (YamlBF.NumBasicBlocks != BF.size()) {
200     if (opts::Verbosity >= 1)
201       errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
202     ProfileMatched = false;
203   }
204 
205   BinaryFunction::BasicBlockOrderType Order;
206   if (IsDFSOrder)
207     llvm::copy(BF.dfs(), std::back_inserter(Order));
208   else
209     llvm::copy(BF.getLayout().blocks(), std::back_inserter(Order));
210 
211   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
212     if (YamlBB.Index >= Order.size()) {
213       if (opts::Verbosity >= 2)
214         errs() << "BOLT-WARNING: index " << YamlBB.Index
215                << " is out of bounds\n";
216       ++MismatchedBlocks;
217       continue;
218     }
219 
220     BinaryBasicBlock &BB = *Order[YamlBB.Index];
221 
222     // Basic samples profile (without LBR) does not have branches information
223     // and needs a special processing.
224     if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
225       if (!YamlBB.EventCount) {
226         BB.setExecutionCount(0);
227         continue;
228       }
229       uint64_t NumSamples = YamlBB.EventCount * 1000;
230       if (NormalizeByInsnCount && BB.getNumNonPseudos())
231         NumSamples /= BB.getNumNonPseudos();
232       else if (NormalizeByCalls)
233         NumSamples /= BB.getNumCalls() + 1;
234 
235       BB.setExecutionCount(NumSamples);
236       if (BB.isEntryPoint())
237         FunctionExecutionCount += NumSamples;
238       continue;
239     }
240 
241     BB.setExecutionCount(YamlBB.ExecCount);
242 
243     for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
244       BinaryFunction *Callee = YamlProfileToFunction.lookup(YamlCSI.DestId);
245       bool IsFunction = Callee ? true : false;
246       MCSymbol *CalleeSymbol = nullptr;
247       if (IsFunction)
248         CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator);
249 
250       BF.getAllCallSites().emplace_back(CalleeSymbol, YamlCSI.Count,
251                                         YamlCSI.Mispreds, YamlCSI.Offset);
252 
253       if (YamlCSI.Offset >= BB.getOriginalSize()) {
254         if (opts::Verbosity >= 2)
255           errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
256                  << " out of bounds in block " << BB.getName() << '\n';
257         ++MismatchedCalls;
258         continue;
259       }
260 
261       MCInst *Instr =
262           BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
263       if (!Instr) {
264         if (opts::Verbosity >= 2)
265           errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
266                  << " in block " << BB.getName() << '\n';
267         ++MismatchedCalls;
268         continue;
269       }
270       if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) {
271         if (opts::Verbosity >= 2)
272           errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
273                  << " in block " << BB.getName() << '\n';
274         ++MismatchedCalls;
275         continue;
276       }
277 
278       auto setAnnotation = [&](StringRef Name, uint64_t Count) {
279         if (BC.MIB->hasAnnotation(*Instr, Name)) {
280           if (opts::Verbosity >= 1)
281             errs() << "BOLT-WARNING: ignoring duplicate " << Name
282                    << " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
283                    << " in function " << BF << '\n';
284           return;
285         }
286         BC.MIB->addAnnotation(*Instr, Name, Count);
287       };
288 
289       if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
290         auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
291             *Instr, "CallProfile");
292         CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds);
293       } else if (BC.MIB->getConditionalTailCall(*Instr)) {
294         setAnnotation("CTCTakenCount", YamlCSI.Count);
295         setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
296       } else {
297         setAnnotation("Count", YamlCSI.Count);
298       }
299     }
300 
301     for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
302       if (YamlSI.Index >= Order.size()) {
303         if (opts::Verbosity >= 1)
304           errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
305         ++MismatchedEdges;
306         continue;
307       }
308 
309       BinaryBasicBlock *ToBB = Order[YamlSI.Index];
310       if (!BB.getSuccessor(ToBB->getLabel())) {
311         // Allow passthrough blocks.
312         BinaryBasicBlock *FTSuccessor = BB.getConditionalSuccessor(false);
313         if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
314             FTSuccessor->getSuccessor(ToBB->getLabel())) {
315           BinaryBasicBlock::BinaryBranchInfo &FTBI =
316               FTSuccessor->getBranchInfo(*ToBB);
317           FTBI.Count += YamlSI.Count;
318           FTBI.MispredictedCount += YamlSI.Mispreds;
319           ToBB = FTSuccessor;
320         } else {
321           if (opts::Verbosity >= 1)
322             errs() << "BOLT-WARNING: no successor for block " << BB.getName()
323                    << " that matches index " << YamlSI.Index << " or block "
324                    << ToBB->getName() << '\n';
325           ++MismatchedEdges;
326           continue;
327         }
328       }
329 
330       BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB);
331       BI.Count += YamlSI.Count;
332       BI.MispredictedCount += YamlSI.Mispreds;
333     }
334   }
335 
336   // If basic block profile wasn't read it should be 0.
337   for (BinaryBasicBlock &BB : BF)
338     if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
339       BB.setExecutionCount(0);
340 
341   if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE)
342     BF.setExecutionCount(FunctionExecutionCount);
343 
344   ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
345 
346   if (!ProfileMatched) {
347     if (opts::Verbosity >= 1)
348       errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
349              << MismatchedCalls << " calls, and " << MismatchedEdges
350              << " edges in profile did not match function " << BF << '\n';
351 
352     if (YamlBF.NumBasicBlocks != BF.size())
353       ++BC.Stats.NumStaleFuncsWithEqualBlockCount;
354 
355     if (!opts::InferStaleProfile)
356       return false;
357     ArrayRef<ProbeMatchSpec> ProbeMatchSpecs;
358     auto BFIt = BFToProbeMatchSpecs.find(&BF);
359     if (BFIt != BFToProbeMatchSpecs.end())
360       ProbeMatchSpecs = BFIt->second;
361     ProfileMatched = inferStaleProfile(BF, YamlBF, ProbeMatchSpecs);
362   }
363   if (ProfileMatched)
364     BF.markProfiled(YamlBP.Header.Flags);
365 
366   return ProfileMatched;
367 }
368 
369 Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
370   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
371       MemoryBuffer::getFileOrSTDIN(Filename);
372   if (std::error_code EC = MB.getError()) {
373     errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
374     return errorCodeToError(EC);
375   }
376   yaml::Input YamlInput(MB.get()->getBuffer());
377   YamlInput.setAllowUnknownKeys(true);
378 
379   // Consume YAML file.
380   YamlInput >> YamlBP;
381   if (YamlInput.error()) {
382     errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
383            << " : " << YamlInput.error().message() << '\n';
384     return errorCodeToError(YamlInput.error());
385   }
386 
387   // Sanity check.
388   if (YamlBP.Header.Version != 1)
389     return make_error<StringError>(
390         Twine("cannot read profile : unsupported version"),
391         inconvertibleErrorCode());
392 
393   if (YamlBP.Header.EventNames.find(',') != StringRef::npos)
394     return make_error<StringError>(
395         Twine("multiple events in profile are not supported"),
396         inconvertibleErrorCode());
397 
398   // Match profile to function based on a function name.
399   buildNameMaps(BC);
400 
401   // Preliminary assign function execution count.
402   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
403     if (!BF)
404       continue;
405     if (!BF->hasProfile()) {
406       BF->setExecutionCount(YamlBF.ExecCount);
407     } else {
408       if (opts::Verbosity >= 1) {
409         errs() << "BOLT-WARNING: dropping duplicate profile for " << YamlBF.Name
410                << '\n';
411       }
412       BF = nullptr;
413     }
414   }
415 
416   return Error::success();
417 }
418 
419 bool YAMLProfileReader::profileMatches(
420     const yaml::bolt::BinaryFunctionProfile &Profile, const BinaryFunction &BF) {
421   if (opts::IgnoreHash)
422     return Profile.NumBasicBlocks == BF.size();
423   return Profile.Hash == static_cast<uint64_t>(BF.getHash());
424 }
425 
426 bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
427   if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
428     return true;
429   for (StringRef Name : BF.getNames())
430     if (ProfileFunctionNames.contains(Name))
431       return true;
432   for (StringRef Name : BF.getNames()) {
433     if (const std::optional<StringRef> CommonName = getLTOCommonName(Name)) {
434       if (LTOCommonNameMap.contains(*CommonName))
435         return true;
436     }
437   }
438 
439   return false;
440 }
441 
442 size_t YAMLProfileReader::matchWithExactName() {
443   size_t MatchedWithExactName = 0;
444   // This first pass assigns profiles that match 100% by name and by hash.
445   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
446     if (!BF)
447       continue;
448     BinaryFunction &Function = *BF;
449     // Clear function call count that may have been set while pre-processing
450     // the profile.
451     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
452 
453     if (profileMatches(YamlBF, Function)) {
454       matchProfileToFunction(YamlBF, Function);
455       ++MatchedWithExactName;
456     }
457   }
458   return MatchedWithExactName;
459 }
460 
461 size_t YAMLProfileReader::matchWithHash(BinaryContext &BC) {
462   // Iterates through profiled functions to match the first binary function with
463   // the same exact hash. Serves to match identical, renamed functions.
464   // Collisions are possible where multiple functions share the same exact hash.
465   size_t MatchedWithHash = 0;
466   if (opts::MatchProfileWithFunctionHash) {
467     DenseMap<size_t, BinaryFunction *> StrictHashToBF;
468     StrictHashToBF.reserve(BC.getBinaryFunctions().size());
469 
470     for (auto &[_, BF] : BC.getBinaryFunctions())
471       StrictHashToBF[BF.getHash()] = &BF;
472 
473     for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
474       if (YamlBF.Used)
475         continue;
476       auto It = StrictHashToBF.find(YamlBF.Hash);
477       if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
478         BinaryFunction *BF = It->second;
479         matchProfileToFunction(YamlBF, *BF);
480         ++MatchedWithHash;
481       }
482     }
483   }
484   return MatchedWithHash;
485 }
486 
487 size_t YAMLProfileReader::matchWithLTOCommonName() {
488   // This second pass allows name ambiguity for LTO private functions.
489   size_t MatchedWithLTOCommonName = 0;
490   for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
491     if (!LTOCommonNameFunctionMap.contains(CommonName))
492       continue;
493     std::unordered_set<BinaryFunction *> &Functions =
494         LTOCommonNameFunctionMap[CommonName];
495     // Return true if a given profile is matched to one of BinaryFunctions with
496     // matching LTO common name.
497     auto matchProfile = [&](yaml::bolt::BinaryFunctionProfile *YamlBF) {
498       if (YamlBF->Used)
499         return false;
500       for (BinaryFunction *BF : Functions) {
501         if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
502           matchProfileToFunction(*YamlBF, *BF);
503           ++MatchedWithLTOCommonName;
504           return true;
505         }
506       }
507       return false;
508     };
509     bool ProfileMatched = llvm::any_of(LTOProfiles, matchProfile);
510 
511     // If there's only one function with a given name, try to match it
512     // partially.
513     if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
514         !LTOProfiles.front()->Used &&
515         !ProfiledFunctions.count(*Functions.begin())) {
516       matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
517       ++MatchedWithLTOCommonName;
518     }
519   }
520   return MatchedWithLTOCommonName;
521 }
522 
523 size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
524   if (!opts::MatchWithCallGraph)
525     return 0;
526 
527   size_t MatchedWithCallGraph = 0;
528   CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);
529 
530   ItaniumPartialDemangler Demangler;
531   auto GetBaseName = [&](std::string &FunctionName) {
532     if (Demangler.partialDemangle(FunctionName.c_str()))
533       return std::string("");
534     size_t BufferSize = 1;
535     char *Buffer = static_cast<char *>(std::malloc(BufferSize));
536     char *BaseName = Demangler.getFunctionBaseName(Buffer, &BufferSize);
537     if (!BaseName) {
538       std::free(Buffer);
539       return std::string("");
540     }
541     if (Buffer != BaseName)
542       Buffer = BaseName;
543     std::string BaseNameStr(Buffer, BufferSize);
544     std::free(Buffer);
545     return BaseNameStr;
546   };
547 
548   // Matches YAMLBF to BFs with neighbor hashes.
549   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
550     if (YamlBF.Used)
551       continue;
552     auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
553     if (!AdjacentYamlBFsOpt)
554       continue;
555     std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
556         AdjacentYamlBFsOpt.value();
557     std::string AdjacentYamlBFsHashStr;
558     for (auto *AdjacentYamlBF : AdjacentYamlBFs)
559       AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
560     uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
561     auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(Hash);
562     if (!BFsWithSameHashOpt)
563       continue;
564     std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
565     // Finds the binary function with the longest common prefix to the profiled
566     // function and matches.
567     BinaryFunction *ClosestBF = nullptr;
568     size_t LCP = 0;
569     std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
570     for (BinaryFunction *BF : BFsWithSameHash) {
571       if (ProfiledFunctions.count(BF))
572         continue;
573       std::string BFName = std::string(BF->getOneName());
574       std::string BFBaseName = GetBaseName(BFName);
575       size_t PrefixLength = 0;
576       size_t N = std::min(YamlBFBaseName.size(), BFBaseName.size());
577       for (size_t I = 0; I < N; ++I) {
578         if (YamlBFBaseName[I] != BFBaseName[I])
579           break;
580         ++PrefixLength;
581       }
582       if (PrefixLength >= LCP) {
583         LCP = PrefixLength;
584         ClosestBF = BF;
585       }
586     }
587     if (ClosestBF) {
588       matchProfileToFunction(YamlBF, *ClosestBF);
589       ++MatchedWithCallGraph;
590     }
591   }
592 
593   return MatchedWithCallGraph;
594 }
595 
596 size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
597     const MCPseudoProbeDecoder &Decoder,
598     const std::vector<yaml::bolt::InlineTreeNode> &DecodedInlineTree,
599     const MCDecodedPseudoProbeInlineTree *Root) {
600   // Match inline tree nodes by GUID, checksum, parent, and call site.
601   for (const auto &[InlineTreeNodeId, InlineTreeNode] :
602        llvm::enumerate(DecodedInlineTree)) {
603     uint64_t GUID = InlineTreeNode.GUID;
604     uint64_t Hash = InlineTreeNode.Hash;
605     uint32_t ParentId = InlineTreeNode.ParentIndexDelta;
606     uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
607     const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
608     if (!InlineTreeNodeId) {
609       Cur = Root;
610     } else if (const MCDecodedPseudoProbeInlineTree *Parent =
611                    getInlineTreeNode(ParentId)) {
612       for (const MCDecodedPseudoProbeInlineTree &Child :
613            Parent->getChildren()) {
614         if (Child.Guid == GUID) {
615           if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
616             Cur = &Child;
617           break;
618         }
619       }
620     }
621     // Don't match nodes if the profile is stale (mismatching binary FuncHash
622     // and YAML Hash)
623     if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash)
624       mapInlineTreeNode(InlineTreeNodeId, Cur);
625   }
626   return Map.size();
627 }
628 
629 // Decode index deltas and indirection through \p YamlPD. Return modified copy
630 // of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex).
631 static std::vector<yaml::bolt::InlineTreeNode>
632 decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD,
633                      std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree) {
634   uint32_t ParentId = 0;
635   uint32_t PrevGUIDIdx = 0;
636   for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
637     uint32_t GUIDIdx = InlineTreeNode.GUIDIndex;
638     if (GUIDIdx != UINT32_MAX)
639       PrevGUIDIdx = GUIDIdx;
640     else
641       GUIDIdx = PrevGUIDIdx;
642     uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
643     ParentId += InlineTreeNode.ParentIndexDelta;
644     InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx];
645     InlineTreeNode.Hash = YamlPD.Hash[HashIdx];
646     InlineTreeNode.ParentIndexDelta = ParentId;
647   }
648   return YamlInlineTree;
649 }
650 
651 size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) {
652   if (!opts::StaleMatchingWithPseudoProbes)
653     return 0;
654 
655   const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
656   const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc;
657 
658   // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe
659   // matching.
660   assert(Decoder &&
661          "If pseudo probes are in use, pseudo probe decoder should exist");
662   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
663     // BF is preliminary name-matched function to YamlBF
664     // MatchedBF is final matched function
665     BinaryFunction *MatchedBF = YamlProfileToFunction.lookup(YamlBF.Id);
666     if (!BF)
667       BF = MatchedBF;
668     if (!BF)
669       continue;
670     uint64_t GUID = BF->getGUID();
671     if (!GUID)
672       continue;
673     auto It = TopLevelGUIDToInlineTree.find(GUID);
674     if (It == TopLevelGUIDToInlineTree.end())
675       continue;
676     const MCDecodedPseudoProbeInlineTree *Node = It->second;
677     assert(Node && "Malformed TopLevelGUIDToInlineTree");
678     auto &MatchSpecs = BFToProbeMatchSpecs[BF];
679     auto &InlineTreeMap =
680         MatchSpecs.emplace_back(InlineTreeNodeMapTy(), YamlBF).first;
681     std::vector<yaml::bolt::InlineTreeNode> ProfileInlineTree =
682         decodeYamlInlineTree(YamlPD, YamlBF.InlineTree);
683     // Erase unsuccessful match
684     if (!InlineTreeMap.matchInlineTrees(*Decoder, ProfileInlineTree, Node))
685       MatchSpecs.pop_back();
686   }
687 
688   return 0;
689 }
690 
691 size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
692   if (opts::NameSimilarityFunctionMatchingThreshold == 0)
693     return 0;
694 
695   size_t MatchedWithNameSimilarity = 0;
696   ItaniumPartialDemangler Demangler;
697 
698   // Demangle and derive namespace from function name.
699   auto DemangleName = [&](std::string &FunctionName) {
700     StringRef RestoredName = NameResolver::restore(FunctionName);
701     return demangle(RestoredName);
702   };
703   auto DeriveNameSpace = [&](std::string &DemangledName) {
704     if (Demangler.partialDemangle(DemangledName.c_str()))
705       return std::string("");
706     std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
707     size_t BufferSize;
708     char *NameSpace =
709         Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
710     return std::string(NameSpace, BufferSize);
711   };
712 
713   // Maps namespaces to associated function block counts and gets profile
714   // function names and namespaces to minimize the number of BFs to process and
715   // avoid repeated name demangling/namespace derivation.
716   StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
717   std::vector<std::string> ProfileBFDemangledNames;
718   ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
719   std::vector<std::string> ProfiledBFNamespaces;
720   ProfiledBFNamespaces.reserve(YamlBP.Functions.size());
721 
722   for (auto &YamlBF : YamlBP.Functions) {
723     std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
724     ProfileBFDemangledNames.push_back(YamlBFDemangledName);
725     std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
726     ProfiledBFNamespaces.push_back(YamlBFNamespace);
727     NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
728   }
729 
730   StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;
731 
732   // Maps namespaces to BFs excluding binary functions with no equal sized
733   // profiled functions belonging to the same namespace.
734   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
735     std::string DemangledName = BF->getDemangledName();
736     std::string Namespace = DeriveNameSpace(DemangledName);
737 
738     auto NamespaceToProfiledBFSizesIt =
739         NamespaceToProfiledBFSizes.find(Namespace);
740     // Skip if there are no ProfileBFs with a given \p Namespace.
741     if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
742       continue;
743     // Skip if there are no ProfileBFs in a given \p Namespace with
744     // equal number of blocks.
745     if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
746       continue;
747     NamespaceToBFs[Namespace].push_back(BF);
748   }
749 
750   // Iterates through all profiled functions and binary functions belonging to
751   // the same namespace and matches based on edit distance threshold.
752   assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
753          ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
754   for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
755     yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
756     std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
757     if (YamlBF.Used)
758       continue;
759     // Skip if there are no BFs in a given \p Namespace.
760     auto It = NamespaceToBFs.find(YamlBFNamespace);
761     if (It == NamespaceToBFs.end())
762       continue;
763 
764     std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
765     std::vector<BinaryFunction *> BFs = It->second;
766     unsigned MinEditDistance = UINT_MAX;
767     BinaryFunction *ClosestNameBF = nullptr;
768 
769     // Determines BF the closest to the profiled function, in the
770     // same namespace.
771     for (BinaryFunction *BF : BFs) {
772       if (ProfiledFunctions.count(BF))
773         continue;
774       if (BF->size() != YamlBF.NumBasicBlocks)
775         continue;
776       std::string BFDemangledName = BF->getDemangledName();
777       unsigned BFEditDistance =
778           StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
779       if (BFEditDistance < MinEditDistance) {
780         MinEditDistance = BFEditDistance;
781         ClosestNameBF = BF;
782       }
783     }
784 
785     if (ClosestNameBF &&
786         MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
787       matchProfileToFunction(YamlBF, *ClosestNameBF);
788       ++MatchedWithNameSimilarity;
789     }
790   }
791 
792   return MatchedWithNameSimilarity;
793 }
794 
795 Error YAMLProfileReader::readProfile(BinaryContext &BC) {
796   if (opts::Verbosity >= 1) {
797     outs() << "BOLT-INFO: YAML profile with hash: ";
798     switch (YamlBP.Header.HashFunction) {
799     case HashFunction::StdHash:
800       outs() << "std::hash\n";
801       break;
802     case HashFunction::XXH3:
803       outs() << "xxh3\n";
804       break;
805     }
806   }
807   YamlProfileToFunction.reserve(YamlBP.Functions.size());
808 
809   // Computes hash for binary functions.
810   if (opts::MatchProfileWithFunctionHash) {
811     for (auto &[_, BF] : BC.getBinaryFunctions()) {
812       BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
813     }
814   } else if (!opts::IgnoreHash) {
815     for (BinaryFunction *BF : ProfileBFs) {
816       if (!BF)
817         continue;
818       BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
819     }
820   }
821 
822   if (opts::StaleMatchingWithPseudoProbes) {
823     const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
824     assert(Decoder &&
825            "If pseudo probes are in use, pseudo probe decoder should exist");
826     for (const MCDecodedPseudoProbeInlineTree &TopLev :
827          Decoder->getDummyInlineRoot().getChildren())
828       TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
829   }
830 
831   // Map profiled function ids to names.
832   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
833     IdToYamLBF[YamlBF.Id] = &YamlBF;
834 
835   const size_t MatchedWithExactName = matchWithExactName();
836   const size_t MatchedWithHash = matchWithHash(BC);
837   const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
838   const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
839   const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);
840   [[maybe_unused]] const size_t MatchedWithPseudoProbes =
841       matchWithPseudoProbes(BC);
842 
843   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
844     if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
845       matchProfileToFunction(YamlBF, *BF);
846 
847 
848   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
849     if (!YamlBF.Used && opts::Verbosity >= 1)
850       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
851              << '\n';
852 
853   if (opts::Verbosity >= 1) {
854     outs() << "BOLT-INFO: matched " << MatchedWithExactName
855            << " functions with identical names\n";
856     outs() << "BOLT-INFO: matched " << MatchedWithHash
857            << " functions with hash\n";
858     outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
859            << " functions with matching LTO common names\n";
860     outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
861            << " functions with call graph\n";
862     outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
863            << " functions with similar names\n";
864   }
865 
866   // Set for parseFunctionProfile().
867   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
868   NormalizeByCalls = usesEvent("branches");
869   uint64_t NumUnused = 0;
870   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
871     if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id))
872       parseFunctionProfile(*BF, YamlBF);
873     else
874       ++NumUnused;
875   }
876 
877   BC.setNumUnusedProfiledObjects(NumUnused);
878 
879   if (opts::Lite &&
880       (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
881     for (BinaryFunction *BF : BC.getAllBinaryFunctions())
882       if (!BF->hasProfile())
883         BF->setIgnored();
884   }
885 
886   return Error::success();
887 }
888 
889 bool YAMLProfileReader::usesEvent(StringRef Name) const {
890   return YamlBP.Header.EventNames.find(std::string(Name)) != StringRef::npos;
891 }
892 
893 } // end namespace bolt
894 } // end namespace llvm
895