xref: /llvm-project/bolt/lib/Profile/DataReader.cpp (revision 2db9b6a93f9fc9b943f52efed51264ef8760cec2)
1 //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Errc.h"
21 #include <map>
22 
23 #undef  DEBUG_TYPE
24 #define DEBUG_TYPE "bolt-prof"
25 
26 using namespace llvm;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltCategory;
31 extern llvm::cl::opt<unsigned> Verbosity;
32 
33 static cl::opt<bool>
34 DumpData("dump-data",
35   cl::desc("dump parsed bolt data for debugging"),
36   cl::Hidden,
37   cl::cat(BoltCategory));
38 
39 } // namespace opts
40 
41 namespace llvm {
42 namespace bolt {
43 
44 namespace {
45 
46 /// Return true if the function name can change across compilations.
47 bool hasVolatileName(const BinaryFunction &BF) {
48   for (const StringRef &Name : BF.getNames())
49     if (getLTOCommonName(Name))
50       return true;
51 
52   return false;
53 }
54 
55 /// Return standard escaped name of the function possibly renamed by BOLT.
56 std::string normalizeName(StringRef NameRef) {
57   // Strip "PG." prefix used for globalized locals.
58   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
59   return getEscapedName(NameRef);
60 }
61 
62 } // anonymous namespace
63 
64 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
65   if (Loc.IsSymbol) {
66     OS << Loc.Name;
67     if (Loc.Offset)
68       OS << "+" << Twine::utohexstr(Loc.Offset);
69   } else {
70     OS << Twine::utohexstr(Loc.Offset);
71   }
72   return OS;
73 }
74 
75 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
76   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
77   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
78     if (I->From.Name == FBD.Name) {
79       I->From.Name = this->Name;
80       I->From.Offset += Offset;
81     }
82     if (I->To.Name == FBD.Name) {
83       I->To.Name = this->Name;
84       I->To.Offset += Offset;
85     }
86   }
87   llvm::stable_sort(Data);
88   ExecutionCount += FBD.ExecutionCount;
89   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
90     assert(I->To.Name == FBD.Name);
91     auto NewElmt = EntryData.insert(EntryData.end(), *I);
92     NewElmt->To.Name = this->Name;
93     NewElmt->To.Offset += Offset;
94   }
95 }
96 
97 uint64_t FuncBranchData::getNumExecutedBranches() const {
98   uint64_t ExecutedBranches = 0;
99   for (const BranchInfo &BI : Data) {
100     int64_t BranchCount = BI.Branches;
101     assert(BranchCount >= 0 && "branch execution count should not be negative");
102     ExecutedBranches += BranchCount;
103   }
104   return ExecutedBranches;
105 }
106 
107 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
108 
109 void SampleInfo::print(raw_ostream &OS) const {
110   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
111      << " " << Hits << "\n";
112 }
113 
114 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
115   assert(llvm::is_sorted(Data));
116   struct Compare {
117     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
118       return SI.Loc.Offset < Val;
119     }
120     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
121       return Val < SI.Loc.Offset;
122     }
123   };
124   uint64_t Result = 0;
125   for (auto I = llvm::lower_bound(Data, Start, Compare()),
126             E = llvm::lower_bound(Data, End, Compare());
127        I != E; ++I)
128     Result += I->Hits;
129   return Result;
130 }
131 
132 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
133   auto Iter = Index.find(Offset);
134   if (Iter == Index.end()) {
135     Data.emplace_back(Location(true, Name, Offset), Count);
136     Index[Offset] = Data.size() - 1;
137     return;
138   }
139   SampleInfo &SI = Data[Iter->second];
140   SI.Hits += Count;
141 }
142 
143 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
144                                      uint64_t Count, uint64_t Mispreds) {
145   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
146   if (Iter == IntraIndex[OffsetFrom].end()) {
147     Data.emplace_back(Location(true, Name, OffsetFrom),
148                       Location(true, Name, OffsetTo), Mispreds, Count);
149     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
150     return;
151   }
152   BranchInfo &BI = Data[Iter->second];
153   BI.Branches += Count;
154   BI.Mispreds += Mispreds;
155 }
156 
157 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
158                                    uint64_t Count, uint64_t Mispreds) {
159   auto Iter = InterIndex[OffsetFrom].find(To);
160   if (Iter == InterIndex[OffsetFrom].end()) {
161     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
162     InterIndex[OffsetFrom][To] = Data.size() - 1;
163     return;
164   }
165   BranchInfo &BI = Data[Iter->second];
166   BI.Branches += Count;
167   BI.Mispreds += Mispreds;
168 }
169 
170 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
171                                     uint64_t Count, uint64_t Mispreds) {
172   auto Iter = EntryIndex[OffsetTo].find(From);
173   if (Iter == EntryIndex[OffsetTo].end()) {
174     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
175                            Count);
176     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
177     return;
178   }
179   BranchInfo &BI = EntryData[Iter->second];
180   BI.Branches += Count;
181   BI.Mispreds += Mispreds;
182 }
183 
184 void BranchInfo::mergeWith(const BranchInfo &BI) {
185   Branches += BI.Branches;
186   Mispreds += BI.Mispreds;
187 }
188 
189 void BranchInfo::print(raw_ostream &OS) const {
190   OS << From.IsSymbol << " " << From.Name << " "
191      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
192      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
193      << '\n';
194 }
195 
196 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
197                                                       uint64_t To) const {
198   for (const BranchInfo &I : Data)
199     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
200       return I;
201 
202   return make_error_code(llvm::errc::invalid_argument);
203 }
204 
205 ErrorOr<const BranchInfo &>
206 FuncBranchData::getDirectCallBranch(uint64_t From) const {
207   // Commented out because it can be expensive.
208   // assert(std::is_sorted(Data.begin(), Data.end()));
209   struct Compare {
210     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
211       return BI.From.Offset < Val;
212     }
213     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
214       return Val < BI.From.Offset;
215     }
216   };
217   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
218   for (const auto &RI : llvm::make_range(Range))
219     if (RI.From.Name != RI.To.Name)
220       return RI;
221 
222   return make_error_code(llvm::errc::invalid_argument);
223 }
224 
225 void MemInfo::print(raw_ostream &OS) const {
226   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
227      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
228      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
229      << "\n";
230 }
231 
232 void MemInfo::prettyPrint(raw_ostream &OS) const {
233   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
234 }
235 
236 void FuncMemData::update(const Location &Offset, const Location &Addr) {
237   auto Iter = EventIndex[Offset.Offset].find(Addr);
238   if (Iter == EventIndex[Offset.Offset].end()) {
239     Data.emplace_back(MemInfo(Offset, Addr, 1));
240     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
241     return;
242   }
243   ++Data[Iter->second].Count;
244 }
245 
246 Error DataReader::preprocessProfile(BinaryContext &BC) {
247   if (std::error_code EC = parseInput())
248     return errorCodeToError(EC);
249 
250   if (opts::DumpData)
251     dump();
252 
253   if (collectedInBoltedBinary())
254     outs() << "BOLT-INFO: profile collection done on a binary already "
255               "processed by BOLT\n";
256 
257   for (auto &BFI : BC.getBinaryFunctions()) {
258     BinaryFunction &Function = BFI.second;
259     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
260       setMemData(Function, MemData);
261       MemData->Used = true;
262     }
263     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
264       setBranchData(Function, FuncData);
265       Function.ExecutionCount = FuncData->ExecutionCount;
266       FuncData->Used = true;
267     }
268   }
269 
270   for (auto &BFI : BC.getBinaryFunctions()) {
271     BinaryFunction &Function = BFI.second;
272     matchProfileMemData(Function);
273   }
274 
275   return Error::success();
276 }
277 
278 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
279   for (auto &BFI : BC.getBinaryFunctions()) {
280     BinaryFunction &Function = BFI.second;
281     FuncMemData *MemoryData = getMemData(Function);
282     if (!MemoryData)
283       continue;
284 
285     for (MemInfo &MI : MemoryData->Data) {
286       const uint64_t Offset = MI.Offset.Offset;
287       auto II = Function.Instructions.find(Offset);
288       if (II == Function.Instructions.end()) {
289         // Ignore bad instruction address.
290         continue;
291       }
292 
293       auto &MemAccessProfile =
294           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
295               II->second, "MemoryAccessProfile");
296       BinaryData *BD = nullptr;
297       if (MI.Addr.IsSymbol)
298         BD = BC.getBinaryDataByName(MI.Addr.Name);
299       MemAccessProfile.AddressAccessInfo.push_back(
300           {BD, MI.Addr.Offset, MI.Count});
301       auto NextII = std::next(II);
302       if (NextII == Function.Instructions.end())
303         MemAccessProfile.NextInstrOffset = Function.getSize();
304       else
305         MemAccessProfile.NextInstrOffset = II->first;
306     }
307     Function.HasMemoryProfile = true;
308   }
309 
310   return Error::success();
311 }
312 
313 Error DataReader::readProfile(BinaryContext &BC) {
314   for (auto &BFI : BC.getBinaryFunctions()) {
315     BinaryFunction &Function = BFI.second;
316     readProfile(Function);
317   }
318 
319   uint64_t NumUnused = 0;
320   for (const auto &KV : NamesToBranches) {
321     const FuncBranchData &FBD = KV.second;
322     if (!FBD.Used)
323       ++NumUnused;
324   }
325   BC.setNumUnusedProfiledObjects(NumUnused);
326 
327   return Error::success();
328 }
329 
330 std::error_code DataReader::parseInput() {
331   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
332       MemoryBuffer::getFileOrSTDIN(Filename);
333   if (std::error_code EC = MB.getError()) {
334     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
335     return EC;
336   }
337   FileBuf = std::move(MB.get());
338   ParsingBuf = FileBuf->getBuffer();
339   if (std::error_code EC = parse())
340     return EC;
341   if (!ParsingBuf.empty())
342     Diag << "WARNING: invalid profile data detected at line " << Line
343          << ". Possibly corrupted profile.\n";
344 
345   buildLTONameMaps();
346 
347   return std::error_code();
348 }
349 
350 void DataReader::readProfile(BinaryFunction &BF) {
351   if (BF.empty())
352     return;
353 
354   if (!hasLBR()) {
355     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
356     readSampleData(BF);
357     return;
358   }
359 
360   BF.ProfileFlags = BinaryFunction::PF_LBR;
361 
362   // Possibly assign/re-assign branch profile data.
363   matchProfileData(BF);
364 
365   FuncBranchData *FBD = getBranchData(BF);
366   if (!FBD)
367     return;
368 
369   // Assign basic block counts to function entry points. These only include
370   // counts for outside entries.
371   //
372   // There is a slight skew introduced here as branches originated from RETs
373   // may be accounted for in the execution count of an entry block if the last
374   // instruction in a predecessor fall-through block is a call. This situation
375   // should rarely happen because there are few multiple-entry functions.
376   for (const BranchInfo &BI : FBD->EntryData) {
377     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
378     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
379       uint64_t Count = BB->getExecutionCount();
380       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
381         Count = 0;
382       BB->setExecutionCount(Count + BI.Branches);
383     }
384   }
385 
386   for (const BranchInfo &BI : FBD->Data) {
387     if (BI.From.Name != BI.To.Name)
388       continue;
389 
390     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
391                       BI.Mispreds)) {
392       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
393                         << BI.To.Offset << '\n');
394     }
395   }
396 
397   // Convert branch data into annotations.
398   convertBranchData(BF);
399 }
400 
401 void DataReader::matchProfileData(BinaryFunction &BF) {
402   // This functionality is available for LBR-mode only
403   // TODO: Implement evaluateProfileData() for samples, checking whether
404   // sample addresses match instruction addresses in the function
405   if (!hasLBR())
406     return;
407 
408   FuncBranchData *FBD = getBranchData(BF);
409   if (FBD) {
410     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
411     BF.RawBranchCount = FBD->getNumExecutedBranches();
412     if (BF.ProfileMatchRatio == 1.0f) {
413       if (fetchProfileForOtherEntryPoints(BF)) {
414         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
415         BF.ExecutionCount = FBD->ExecutionCount;
416         BF.RawBranchCount = FBD->getNumExecutedBranches();
417       }
418       return;
419     }
420   }
421 
422   // Check if the function name can fluctuate between several compilations
423   // possibly triggered by minor unrelated code changes in the source code
424   // of the input binary.
425   if (!hasVolatileName(BF))
426     return;
427 
428   // Check for a profile that matches with 100% confidence.
429   const std::vector<FuncBranchData *> AllBranchData =
430       getBranchDataForNamesRegex(BF.getNames());
431   for (FuncBranchData *NewBranchData : AllBranchData) {
432     // Prevent functions from sharing the same profile.
433     if (NewBranchData->Used)
434       continue;
435 
436     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
437       continue;
438 
439     if (FBD)
440       FBD->Used = false;
441 
442     // Update function profile data with the new set.
443     setBranchData(BF, NewBranchData);
444     NewBranchData->Used = true;
445     BF.ExecutionCount = NewBranchData->ExecutionCount;
446     BF.ProfileMatchRatio = 1.0f;
447     break;
448   }
449 }
450 
451 void DataReader::matchProfileMemData(BinaryFunction &BF) {
452   const std::vector<FuncMemData *> AllMemData =
453       getMemDataForNamesRegex(BF.getNames());
454   for (FuncMemData *NewMemData : AllMemData) {
455     // Prevent functions from sharing the same profile.
456     if (NewMemData->Used)
457       continue;
458 
459     if (FuncMemData *MD = getMemData(BF))
460       MD->Used = false;
461 
462     // Update function profile data with the new set.
463     setMemData(BF, NewMemData);
464     NewMemData->Used = true;
465     break;
466   }
467 }
468 
469 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
470   BinaryContext &BC = BF.getBinaryContext();
471 
472   FuncBranchData *FBD = getBranchData(BF);
473   if (!FBD)
474     return false;
475 
476   // Check if we are missing profiling data for secondary entry points
477   bool First = true;
478   bool Updated = false;
479   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
480     if (First) {
481       First = false;
482       continue;
483     }
484     if (BB->isEntryPoint()) {
485       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
486       // Look for branch data associated with this entry point
487       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
488         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
489           FBD->appendFrom(*Data, BB->getOffset());
490           Data->Used = true;
491           Updated = true;
492         }
493       }
494     }
495   }
496 
497   return Updated;
498 }
499 
500 float DataReader::evaluateProfileData(BinaryFunction &BF,
501                                       const FuncBranchData &BranchData) const {
502   BinaryContext &BC = BF.getBinaryContext();
503 
504   // Until we define a minimal profile, we consider an empty branch data to be
505   // a valid profile. It could happen to a function without branches when we
506   // still have an EntryData for the execution count.
507   if (BranchData.Data.empty())
508     return 1.0f;
509 
510   uint64_t NumMatchedBranches = 0;
511   for (const BranchInfo &BI : BranchData.Data) {
512     bool IsValid = false;
513     if (BI.From.Name == BI.To.Name) {
514       // Try to record information with 0 count.
515       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
516     } else if (collectedInBoltedBinary()) {
517       // We can't check branch source for collections in bolted binaries because
518       // the source of the branch may be mapped to the first instruction in a BB
519       // instead of the original branch (which may not exist in the source bin).
520       IsValid = true;
521     } else {
522       // The branch has to originate from this function.
523       // Check for calls, tail calls, rets and indirect branches.
524       // When matching profiling info, we did not reach the stage
525       // when we identify tail calls, so they are still represented
526       // by regular branch instructions and we need isBranch() here.
527       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
528       // If it's a prefix - skip it.
529       if (Instr && BC.MIB->isPrefix(*Instr))
530         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
531       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
532                     BC.MIB->isReturn(*Instr)))
533         IsValid = true;
534     }
535 
536     if (IsValid) {
537       ++NumMatchedBranches;
538       continue;
539     }
540 
541     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
542                       << Twine::utohexstr(BI.From.Offset) << " -> ";
543                if (BI.From.Name == BI.To.Name) dbgs()
544                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
545                else dbgs() << "<outbounds>\n";);
546   }
547 
548   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
549   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size())
550     errs() << "BOLT-WARNING: profile branches match only "
551            << format("%.1f%%", MatchRatio * 100.0f) << " ("
552            << NumMatchedBranches << '/' << BranchData.Data.size()
553            << ") for function " << BF << '\n';
554 
555   return MatchRatio;
556 }
557 
558 void DataReader::readSampleData(BinaryFunction &BF) {
559   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
560   if (!SampleDataOrErr)
561     return;
562 
563   // Basic samples mode territory (without LBR info)
564   // First step is to assign BB execution count based on samples from perf
565   BF.ProfileMatchRatio = 1.0f;
566   BF.removeTagsFromProfile();
567   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
568   bool NormalizeByCalls = usesEvent("branches");
569   static bool NagUser = true;
570   if (NagUser) {
571     outs()
572         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
573     if (NormalizeByInsnCount)
574       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
575     else if (NormalizeByCalls)
576       outs() << "BOLT-INFO: normalizing samples by branches.\n";
577 
578     NagUser = false;
579   }
580   uint64_t LastOffset = BF.getSize();
581   uint64_t TotalEntryCount = 0;
582   for (BinaryFunction::BasicBlockOffset &BBOffset :
583        llvm::reverse(BF.BasicBlockOffsets)) {
584     uint64_t CurOffset = BBOffset.first;
585     // Always work with samples multiplied by 1000 to avoid losing them if we
586     // later need to normalize numbers
587     uint64_t NumSamples =
588         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
589     if (NormalizeByInsnCount && BBOffset.second->getNumNonPseudos()) {
590       NumSamples /= BBOffset.second->getNumNonPseudos();
591     } else if (NormalizeByCalls) {
592       uint32_t NumCalls = BBOffset.second->getNumCalls();
593       NumSamples /= NumCalls + 1;
594     }
595     BBOffset.second->setExecutionCount(NumSamples);
596     if (BBOffset.second->isEntryPoint())
597       TotalEntryCount += NumSamples;
598     LastOffset = CurOffset;
599   }
600 
601   BF.ExecutionCount = TotalEntryCount;
602 
603   estimateEdgeCounts(BF);
604 }
605 
606 void DataReader::convertBranchData(BinaryFunction &BF) const {
607   BinaryContext &BC = BF.getBinaryContext();
608 
609   if (BF.empty())
610     return;
611 
612   FuncBranchData *FBD = getBranchData(BF);
613   if (!FBD)
614     return;
615 
616   // Profile information for calls.
617   //
618   // There are 3 cases that we annotate differently:
619   //   1) Conditional tail calls that could be mispredicted.
620   //   2) Indirect calls to multiple destinations with mispredictions.
621   //      Before we validate CFG we have to handle indirect branches here too.
622   //   3) Regular direct calls. The count could be different from containing
623   //      basic block count. Keep this data in case we find it useful.
624   //
625   for (BranchInfo &BI : FBD->Data) {
626     // Ignore internal branches.
627     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
628       continue;
629 
630     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
631     if (!Instr ||
632         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
633       continue;
634 
635     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
636       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name))
637         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
638                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
639                << '\n';
640       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
641       Value += Count;
642     };
643 
644     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
645       IndirectCallSiteProfile &CSP =
646           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
647               *Instr, "CallProfile");
648       MCSymbol *CalleeSymbol = nullptr;
649       if (BI.To.IsSymbol) {
650         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
651           CalleeSymbol = BD->getSymbol();
652       }
653       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
654     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
655       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
656       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
657     } else {
658       setOrUpdateAnnotation("Count", BI.Branches);
659     }
660   }
661 }
662 
663 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
664                               uint64_t Count, uint64_t Mispreds) const {
665   BinaryContext &BC = BF.getBinaryContext();
666 
667   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
668   const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
669 
670   if (!FromBB || !ToBB) {
671     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
672     return false;
673   }
674 
675   // Could be bad LBR data; ignore the branch. In the case of data collected
676   // in binaries optimized by BOLT, a source BB may be mapped to two output
677   // BBs as a result of optimizations. In that case, a branch between these
678   // two will be recorded as a branch from A going to A in the source address
679   // space. Keep processing.
680   if (From == To)
681     return true;
682 
683   // Return from a tail call.
684   if (FromBB->succ_size() == 0)
685     return true;
686 
687   // Very rarely we will see ignored branches. Do a linear check.
688   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
689     if (Branch ==
690         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
691       return true;
692 
693   bool OffsetMatches = !!(To == ToBB->getOffset());
694   if (!OffsetMatches) {
695     // Skip the nops to support old .fdata
696     uint64_t Offset = ToBB->getOffset();
697     for (const MCInst &Instr : *ToBB) {
698       if (!BC.MIB->isNoop(Instr))
699         break;
700 
701       if (std::optional<uint32_t> Size = BC.MIB->getSize(Instr))
702         Offset += *Size;
703     }
704 
705     if (To == Offset)
706       OffsetMatches = true;
707   }
708 
709   if (!OffsetMatches) {
710     // "To" could be referring to nop instructions in between 2 basic blocks.
711     // While building the CFG we make sure these nops are attributed to the
712     // previous basic block, thus we check if the destination belongs to the
713     // gap past the last instruction.
714     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
715     if (LastInstr) {
716       const uint32_t LastInstrOffset =
717           BC.MIB->getOffsetWithDefault(*LastInstr, 0);
718 
719       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
720       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr))
721         return true;
722 
723       if (To <= LastInstrOffset) {
724         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
725                           << " in " << BF << " : " << From << " -> " << To
726                           << '\n');
727         return false;
728       }
729     }
730 
731     // The real destination is the layout successor of the detected ToBB.
732     if (ToBB == BF.getLayout().block_back())
733       return false;
734     const BinaryBasicBlock *NextBB =
735         BF.getLayout().getBlock(ToBB->getIndex() + 1);
736     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
737     ToBB = NextBB;
738   }
739 
740   // If there's no corresponding instruction for 'From', we have probably
741   // discarded it as a FT from __builtin_unreachable.
742   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
743   if (!FromInstruction) {
744     // If the data was collected in a bolted binary, the From addresses may be
745     // translated to the first instruction of the source BB if BOLT inserted
746     // a new branch that did not exist in the source (we can't map it to the
747     // source instruction, so we map it to the first instr of source BB).
748     // We do not keep offsets for random instructions. So the check above will
749     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
750     if (collectedInBoltedBinary()) {
751       if (FromBB->getInputOffset() != From) {
752         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
753                           << BF << '\n');
754         return false;
755       }
756       FromInstruction = nullptr;
757     } else {
758       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
759                         << '\n');
760       return false;
761     }
762   }
763 
764   if (!FromBB->getSuccessor(ToBB->getLabel())) {
765     // Check if this is a recursive call or a return from a recursive call.
766     if (FromInstruction && ToBB->isEntryPoint() &&
767         (BC.MIB->isCall(*FromInstruction) ||
768          BC.MIB->isIndirectBranch(*FromInstruction))) {
769       // Execution count is already accounted for.
770       return true;
771     }
772     // For data collected in a bolted binary, we may have created two output BBs
773     // that map to one original block. Branches between these two blocks will
774     // appear here as one BB jumping to itself, even though it has no loop
775     // edges. Ignore these.
776     if (collectedInBoltedBinary() && FromBB == ToBB)
777       return true;
778 
779     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
780     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
781         FTSuccessor->getSuccessor(ToBB->getLabel())) {
782       BinaryBasicBlock::BinaryBranchInfo &FTBI =
783           FTSuccessor->getBranchInfo(*ToBB);
784       FTBI.Count += Count;
785       if (Count)
786         FTBI.MispredictedCount += Mispreds;
787       ToBB = FTSuccessor;
788     } else {
789       LLVM_DEBUG(dbgs() << "invalid branch in " << BF
790                         << formatv(": {0:x} -> {1:x}\n", From, To));
791       return false;
792     }
793   }
794 
795   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
796   BI.Count += Count;
797   // Only update mispredicted count if it the count was real.
798   if (Count) {
799     BI.MispredictedCount += Mispreds;
800   }
801 
802   return true;
803 }
804 
805 void DataReader::reportError(StringRef ErrorMsg) {
806   Diag << "Error reading BOLT data input file: line " << Line << ", column "
807        << Col << ": " << ErrorMsg << '\n';
808 }
809 
810 bool DataReader::expectAndConsumeFS() {
811   if (ParsingBuf[0] != FieldSeparator) {
812     reportError("expected field separator");
813     return false;
814   }
815   ParsingBuf = ParsingBuf.drop_front(1);
816   Col += 1;
817   return true;
818 }
819 
820 void DataReader::consumeAllRemainingFS() {
821   while (ParsingBuf[0] == FieldSeparator) {
822     ParsingBuf = ParsingBuf.drop_front(1);
823     Col += 1;
824   }
825 }
826 
827 bool DataReader::checkAndConsumeNewLine() {
828   if (ParsingBuf[0] != '\n')
829     return false;
830 
831   ParsingBuf = ParsingBuf.drop_front(1);
832   Col = 0;
833   Line += 1;
834   return true;
835 }
836 
837 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
838   if (EndChar == '\\') {
839     reportError("EndChar could not be backslash");
840     return make_error_code(llvm::errc::io_error);
841   }
842 
843   std::string EndChars(1, EndChar);
844   EndChars.push_back('\\');
845   if (EndNl)
846     EndChars.push_back('\n');
847 
848   size_t StringEnd = 0;
849   do {
850     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
851     if (StringEnd == StringRef::npos ||
852         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
853       reportError("malformed field");
854       return make_error_code(llvm::errc::io_error);
855     }
856 
857     if (ParsingBuf[StringEnd] != '\\')
858       break;
859 
860     StringEnd += 2;
861   } while (true);
862 
863   StringRef Str = ParsingBuf.substr(0, StringEnd);
864 
865   // If EndNl was set and nl was found instead of EndChar, do not consume the
866   // new line.
867   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
868   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
869 
870   ParsingBuf = ParsingBuf.drop_front(End);
871   if (EndChar == '\n') {
872     Col = 0;
873     Line += 1;
874   } else {
875     Col += End;
876   }
877   return Str;
878 }
879 
880 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
881   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
882   if (std::error_code EC = NumStrRes.getError())
883     return EC;
884   StringRef NumStr = NumStrRes.get();
885   int64_t Num;
886   if (NumStr.getAsInteger(10, Num)) {
887     reportError("expected decimal number");
888     Diag << "Found: " << NumStr << "\n";
889     return make_error_code(llvm::errc::io_error);
890   }
891   return Num;
892 }
893 
894 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
895   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
896   if (std::error_code EC = NumStrRes.getError())
897     return EC;
898   StringRef NumStr = NumStrRes.get();
899   uint64_t Num;
900   if (NumStr.getAsInteger(16, Num)) {
901     reportError("expected hexidecimal number");
902     Diag << "Found: " << NumStr << "\n";
903     return make_error_code(llvm::errc::io_error);
904   }
905   return Num;
906 }
907 
908 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
909                                             bool ExpectMemLoc) {
910   // Read whether the location of the branch should be DSO or a symbol
911   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
912   // symbol.
913   // The symbol flag is also used to tag memory load events by adding 3 to the
914   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
915   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
916       ParsingBuf[0] != '2') {
917     reportError("expected 0, 1 or 2");
918     return make_error_code(llvm::errc::io_error);
919   }
920 
921   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
922       ParsingBuf[0] != '5') {
923     reportError("expected 3, 4 or 5");
924     return make_error_code(llvm::errc::io_error);
925   }
926 
927   bool IsSymbol =
928       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
929       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
930   ParsingBuf = ParsingBuf.drop_front(1);
931   Col += 1;
932 
933   if (!expectAndConsumeFS())
934     return make_error_code(llvm::errc::io_error);
935   consumeAllRemainingFS();
936 
937   // Read the string containing the symbol or the DSO name
938   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
939   if (std::error_code EC = NameRes.getError())
940     return EC;
941   StringRef Name = NameRes.get();
942   consumeAllRemainingFS();
943 
944   // Read the offset
945   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
946   if (std::error_code EC = Offset.getError())
947     return EC;
948 
949   return Location(IsSymbol, Name, Offset.get());
950 }
951 
952 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
953   ErrorOr<Location> Res = parseLocation(FieldSeparator);
954   if (std::error_code EC = Res.getError())
955     return EC;
956   Location From = Res.get();
957 
958   consumeAllRemainingFS();
959   Res = parseLocation(FieldSeparator);
960   if (std::error_code EC = Res.getError())
961     return EC;
962   Location To = Res.get();
963 
964   consumeAllRemainingFS();
965   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
966   if (std::error_code EC = MRes.getError())
967     return EC;
968   int64_t NumMispreds = MRes.get();
969 
970   consumeAllRemainingFS();
971   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
972   if (std::error_code EC = BRes.getError())
973     return EC;
974   int64_t NumBranches = BRes.get();
975 
976   consumeAllRemainingFS();
977   if (!checkAndConsumeNewLine()) {
978     reportError("expected end of line");
979     return make_error_code(llvm::errc::io_error);
980   }
981 
982   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
983 }
984 
985 ErrorOr<MemInfo> DataReader::parseMemInfo() {
986   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
987   if (std::error_code EC = Res.getError())
988     return EC;
989   Location Offset = Res.get();
990 
991   consumeAllRemainingFS();
992   Res = parseMemLocation(FieldSeparator);
993   if (std::error_code EC = Res.getError())
994     return EC;
995   Location Addr = Res.get();
996 
997   consumeAllRemainingFS();
998   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
999   if (std::error_code EC = CountRes.getError())
1000     return EC;
1001 
1002   consumeAllRemainingFS();
1003   if (!checkAndConsumeNewLine()) {
1004     reportError("expected end of line");
1005     return make_error_code(llvm::errc::io_error);
1006   }
1007 
1008   return MemInfo(Offset, Addr, CountRes.get());
1009 }
1010 
1011 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1012   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1013   if (std::error_code EC = Res.getError())
1014     return EC;
1015   Location Address = Res.get();
1016 
1017   consumeAllRemainingFS();
1018   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1019   if (std::error_code EC = BRes.getError())
1020     return EC;
1021   int64_t Occurrences = BRes.get();
1022 
1023   consumeAllRemainingFS();
1024   if (!checkAndConsumeNewLine()) {
1025     reportError("expected end of line");
1026     return make_error_code(llvm::errc::io_error);
1027   }
1028 
1029   return SampleInfo(std::move(Address), Occurrences);
1030 }
1031 
1032 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1033   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1034     return false;
1035   ParsingBuf = ParsingBuf.drop_front(6);
1036   Col += 6;
1037 
1038   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1039     ParsingBuf = ParsingBuf.drop_front(1);
1040 
1041   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1042     ErrorOr<StringRef> EventName = parseString(' ', true);
1043     if (!EventName)
1044       return make_error_code(llvm::errc::io_error);
1045     EventNames.insert(EventName.get());
1046   }
1047 
1048   if (!checkAndConsumeNewLine()) {
1049     reportError("malformed no_lbr line");
1050     return make_error_code(llvm::errc::io_error);
1051   }
1052   return true;
1053 }
1054 
1055 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1056   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1057     return false;
1058   ParsingBuf = ParsingBuf.drop_front(16);
1059   Col += 16;
1060 
1061   if (!checkAndConsumeNewLine()) {
1062     reportError("malformed boltedcollection line");
1063     return make_error_code(llvm::errc::io_error);
1064   }
1065   return true;
1066 }
1067 
1068 bool DataReader::hasBranchData() {
1069   if (ParsingBuf.size() == 0)
1070     return false;
1071 
1072   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1073     return true;
1074   return false;
1075 }
1076 
1077 bool DataReader::hasMemData() {
1078   if (ParsingBuf.size() == 0)
1079     return false;
1080 
1081   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1082     return true;
1083   return false;
1084 }
1085 
1086 std::error_code DataReader::parseInNoLBRMode() {
1087   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1088     auto I = NamesToSamples.find(Name);
1089     if (I == NamesToSamples.end()) {
1090       bool Success;
1091       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1092           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1093 
1094       assert(Success && "unexpected result of insert");
1095     }
1096     return I;
1097   };
1098 
1099   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1100     auto I = NamesToMemEvents.find(Name);
1101     if (I == NamesToMemEvents.end()) {
1102       bool Success;
1103       std::tie(I, Success) = NamesToMemEvents.insert(
1104           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1105       assert(Success && "unexpected result of insert");
1106     }
1107     return I;
1108   };
1109 
1110   while (hasBranchData()) {
1111     ErrorOr<SampleInfo> Res = parseSampleInfo();
1112     if (std::error_code EC = Res.getError())
1113       return EC;
1114 
1115     SampleInfo SI = Res.get();
1116 
1117     // Ignore samples not involving known locations
1118     if (!SI.Loc.IsSymbol)
1119       continue;
1120 
1121     auto I = GetOrCreateFuncEntry(SI.Loc.Name);
1122     I->second.Data.emplace_back(std::move(SI));
1123   }
1124 
1125   while (hasMemData()) {
1126     ErrorOr<MemInfo> Res = parseMemInfo();
1127     if (std::error_code EC = Res.getError())
1128       return EC;
1129 
1130     MemInfo MI = Res.get();
1131 
1132     // Ignore memory events not involving known pc.
1133     if (!MI.Offset.IsSymbol)
1134       continue;
1135 
1136     auto I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1137     I->second.Data.emplace_back(std::move(MI));
1138   }
1139 
1140   for (auto &FuncSamples : NamesToSamples)
1141     llvm::stable_sort(FuncSamples.second.Data);
1142 
1143   for (auto &MemEvents : NamesToMemEvents)
1144     llvm::stable_sort(MemEvents.second.Data);
1145 
1146   return std::error_code();
1147 }
1148 
1149 std::error_code DataReader::parse() {
1150   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1151     auto I = NamesToBranches.find(Name);
1152     if (I == NamesToBranches.end()) {
1153       bool Success;
1154       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1155           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1156                                FuncBranchData::ContainerTy())));
1157       assert(Success && "unexpected result of insert");
1158     }
1159     return I;
1160   };
1161 
1162   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1163     auto I = NamesToMemEvents.find(Name);
1164     if (I == NamesToMemEvents.end()) {
1165       bool Success;
1166       std::tie(I, Success) = NamesToMemEvents.insert(
1167           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1168       assert(Success && "unexpected result of insert");
1169     }
1170     return I;
1171   };
1172 
1173   Col = 0;
1174   Line = 1;
1175   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1176   if (!FlagOrErr)
1177     return FlagOrErr.getError();
1178   NoLBRMode = *FlagOrErr;
1179 
1180   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1181   if (!BATFlagOrErr)
1182     return BATFlagOrErr.getError();
1183   BATMode = *BATFlagOrErr;
1184 
1185   if (!hasBranchData() && !hasMemData()) {
1186     Diag << "ERROR: no valid profile data found\n";
1187     return make_error_code(llvm::errc::io_error);
1188   }
1189 
1190   if (NoLBRMode)
1191     return parseInNoLBRMode();
1192 
1193   while (hasBranchData()) {
1194     ErrorOr<BranchInfo> Res = parseBranchInfo();
1195     if (std::error_code EC = Res.getError())
1196       return EC;
1197 
1198     BranchInfo BI = Res.get();
1199 
1200     // Ignore branches not involving known location.
1201     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1202       continue;
1203 
1204     auto I = GetOrCreateFuncEntry(BI.From.Name);
1205     I->second.Data.emplace_back(std::move(BI));
1206 
1207     // Add entry data for branches to another function or branches
1208     // to entry points (including recursive calls)
1209     if (BI.To.IsSymbol &&
1210         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1211       I = GetOrCreateFuncEntry(BI.To.Name);
1212       I->second.EntryData.emplace_back(std::move(BI));
1213     }
1214 
1215     // If destination is the function start - update execution count.
1216     // NB: the data is skewed since we cannot tell tail recursion from
1217     //     branches to the function start.
1218     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1219       I = GetOrCreateFuncEntry(BI.To.Name);
1220       I->second.ExecutionCount += BI.Branches;
1221     }
1222   }
1223 
1224   while (hasMemData()) {
1225     ErrorOr<MemInfo> Res = parseMemInfo();
1226     if (std::error_code EC = Res.getError())
1227       return EC;
1228 
1229     MemInfo MI = Res.get();
1230 
1231     // Ignore memory events not involving known pc.
1232     if (!MI.Offset.IsSymbol)
1233       continue;
1234 
1235     auto I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1236     I->second.Data.emplace_back(std::move(MI));
1237   }
1238 
1239   for (auto &FuncBranches : NamesToBranches)
1240     llvm::stable_sort(FuncBranches.second.Data);
1241 
1242   for (auto &MemEvents : NamesToMemEvents)
1243     llvm::stable_sort(MemEvents.second.Data);
1244 
1245   return std::error_code();
1246 }
1247 
1248 void DataReader::buildLTONameMaps() {
1249   for (auto &FuncData : NamesToBranches) {
1250     const StringRef FuncName = FuncData.first;
1251     const std::optional<StringRef> CommonName = getLTOCommonName(FuncName);
1252     if (CommonName)
1253       LTOCommonNameMap[*CommonName].push_back(&FuncData.second);
1254   }
1255 
1256   for (auto &FuncData : NamesToMemEvents) {
1257     const StringRef FuncName = FuncData.first;
1258     const std::optional<StringRef> CommonName = getLTOCommonName(FuncName);
1259     if (CommonName)
1260       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.second);
1261   }
1262 }
1263 
1264 template <typename MapTy>
1265 static typename MapTy::mapped_type *
1266 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1267   // Do a reverse order iteration since the name in profile has a higher chance
1268   // of matching a name at the end of the list.
1269   for (const MCSymbol *Symbol : llvm::reverse(Symbols)) {
1270     auto I = Map.find(normalizeName(Symbol->getName()));
1271     if (I != Map.end())
1272       return &I->second;
1273   }
1274   return nullptr;
1275 }
1276 
1277 template <typename MapTy>
1278 static typename MapTy::mapped_type *
1279 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1280   // Do a reverse order iteration since the name in profile has a higher chance
1281   // of matching a name at the end of the list.
1282   for (StringRef Name : llvm::reverse(FuncNames)) {
1283     auto I = Map.find(normalizeName(Name));
1284     if (I != Map.end())
1285       return &I->second;
1286   }
1287   return nullptr;
1288 }
1289 
1290 template <typename MapTy>
1291 static std::vector<typename MapTy::mapped_type *>
1292 fetchMapEntriesRegex(MapTy &Map,
1293                      const StringMap<std::vector<typename MapTy::mapped_type *>>
1294                          &LTOCommonNameMap,
1295                      const std::vector<StringRef> &FuncNames) {
1296   std::vector<typename MapTy::mapped_type *> AllData;
1297   // Do a reverse order iteration since the name in profile has a higher chance
1298   // of matching a name at the end of the list.
1299   for (StringRef FuncName : llvm::reverse(FuncNames)) {
1300     std::string Name = normalizeName(FuncName);
1301     const std::optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1302     if (LTOCommonName) {
1303       auto I = LTOCommonNameMap.find(*LTOCommonName);
1304       if (I != LTOCommonNameMap.end()) {
1305         const std::vector<typename MapTy::mapped_type *> &CommonData =
1306             I->second;
1307         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1308       }
1309     } else {
1310       auto I = Map.find(Name);
1311       if (I != Map.end())
1312         return {&I->second};
1313     }
1314   }
1315   return AllData;
1316 }
1317 
1318 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1319   if (getBranchData(Function) || getMemData(Function))
1320     return true;
1321 
1322   if (getFuncSampleData(Function.getNames()) ||
1323       getBranchDataForNames(Function.getNames()) ||
1324       getMemDataForNames(Function.getNames()))
1325     return true;
1326 
1327   if (!hasVolatileName(Function))
1328     return false;
1329 
1330   const std::vector<FuncBranchData *> AllBranchData =
1331       getBranchDataForNamesRegex(Function.getNames());
1332   if (!AllBranchData.empty())
1333     return true;
1334 
1335   const std::vector<FuncMemData *> AllMemData =
1336       getMemDataForNamesRegex(Function.getNames());
1337   if (!AllMemData.empty())
1338     return true;
1339 
1340   return false;
1341 }
1342 
1343 FuncBranchData *
1344 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1345   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1346 }
1347 
1348 FuncBranchData *
1349 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1350   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1351 }
1352 
1353 FuncMemData *
1354 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1355   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1356 }
1357 
1358 FuncSampleData *
1359 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1360   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1361 }
1362 
1363 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1364     const std::vector<StringRef> &FuncNames) {
1365   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1366 }
1367 
1368 std::vector<FuncMemData *>
1369 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1370   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1371 }
1372 
1373 bool DataReader::hasLocalsWithFileName() const {
1374   for (const auto &Func : NamesToBranches) {
1375     const StringRef &FuncName = Func.first;
1376     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1377       return true;
1378   }
1379   return false;
1380 }
1381 
1382 void DataReader::dump() const {
1383   for (const auto &KV : NamesToBranches) {
1384     const StringRef Name = KV.first;
1385     const FuncBranchData &FBD = KV.second;
1386     Diag << Name << " branches:\n";
1387     for (const BranchInfo &BI : FBD.Data)
1388       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1389            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1390     Diag << Name << " entry points:\n";
1391     for (const BranchInfo &BI : FBD.EntryData)
1392       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1393            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1394   }
1395 
1396   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1397     StringRef Event = I->getKey();
1398     Diag << "Data was collected with event: " << Event << "\n";
1399   }
1400   for (const auto &KV : NamesToSamples) {
1401     const StringRef Name = KV.first;
1402     const FuncSampleData &FSD = KV.second;
1403     Diag << Name << " samples:\n";
1404     for (const SampleInfo &SI : FSD.Data)
1405       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1406   }
1407 
1408   for (const auto &KV : NamesToMemEvents) {
1409     const StringRef Name = KV.first;
1410     const FuncMemData &FMD = KV.second;
1411     Diag << "Memory events for " << Name;
1412     Location LastOffset(0);
1413     for (const MemInfo &MI : FMD.Data) {
1414       if (MI.Offset == LastOffset)
1415         Diag << ", " << MI.Addr << "/" << MI.Count;
1416       else
1417         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1418       LastOffset = MI.Offset;
1419     }
1420     Diag << "\n";
1421   }
1422 }
1423 
1424 } // namespace bolt
1425 } // namespace llvm
1426