xref: /llvm-project/bolt/lib/Profile/DataReader.cpp (revision f3dc732b3623c86164f9d95f7563f982cecc5558)
1 //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Errc.h"
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 using namespace llvm;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 extern llvm::cl::opt<unsigned> Verbosity;
31 
32 static cl::opt<bool>
33 DumpData("dump-data",
34   cl::desc("dump parsed bolt data for debugging"),
35   cl::Hidden,
36   cl::cat(BoltCategory));
37 
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 namespace {
44 
45 /// Return true if the function name can change across compilations.
hasVolatileName(const BinaryFunction & BF)46 bool hasVolatileName(const BinaryFunction &BF) {
47   for (const StringRef &Name : BF.getNames())
48     if (getLTOCommonName(Name))
49       return true;
50 
51   return false;
52 }
53 
54 /// Return standard escaped name of the function possibly renamed by BOLT.
normalizeName(StringRef NameRef)55 std::string normalizeName(StringRef NameRef) {
56   // Strip "PG." prefix used for globalized locals.
57   NameRef = NameRef.starts_with("PG.") ? NameRef.substr(2) : NameRef;
58   return getEscapedName(NameRef);
59 }
60 
61 } // anonymous namespace
62 
operator <<(raw_ostream & OS,const Location & Loc)63 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
64   if (Loc.IsSymbol) {
65     OS << Loc.Name;
66     if (Loc.Offset)
67       OS << "+" << Twine::utohexstr(Loc.Offset);
68   } else {
69     OS << Twine::utohexstr(Loc.Offset);
70   }
71   return OS;
72 }
73 
appendFrom(const FuncBranchData & FBD,uint64_t Offset)74 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
75   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
76   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
77     if (I->From.Name == FBD.Name) {
78       I->From.Name = this->Name;
79       I->From.Offset += Offset;
80     }
81     if (I->To.Name == FBD.Name) {
82       I->To.Name = this->Name;
83       I->To.Offset += Offset;
84     }
85   }
86   llvm::stable_sort(Data);
87   ExecutionCount += FBD.ExecutionCount;
88   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
89     assert(I->To.Name == FBD.Name);
90     auto NewElmt = EntryData.insert(EntryData.end(), *I);
91     NewElmt->To.Name = this->Name;
92     NewElmt->To.Offset += Offset;
93   }
94 }
95 
getNumExecutedBranches() const96 uint64_t FuncBranchData::getNumExecutedBranches() const {
97   uint64_t ExecutedBranches = 0;
98   for (const BranchInfo &BI : Data) {
99     int64_t BranchCount = BI.Branches;
100     assert(BranchCount >= 0 && "branch execution count should not be negative");
101     ExecutedBranches += BranchCount;
102   }
103   return ExecutedBranches;
104 }
105 
mergeWith(const SampleInfo & SI)106 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
107 
print(raw_ostream & OS) const108 void SampleInfo::print(raw_ostream &OS) const {
109   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
110      << " " << Hits << "\n";
111 }
112 
getSamples(uint64_t Start,uint64_t End) const113 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
114   assert(llvm::is_sorted(Data));
115   struct Compare {
116     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
117       return SI.Loc.Offset < Val;
118     }
119     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
120       return Val < SI.Loc.Offset;
121     }
122   };
123   uint64_t Result = 0;
124   for (auto I = llvm::lower_bound(Data, Start, Compare()),
125             E = llvm::lower_bound(Data, End, Compare());
126        I != E; ++I)
127     Result += I->Hits;
128   return Result;
129 }
130 
bumpCount(uint64_t Offset,uint64_t Count)131 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
132   auto Iter = Index.find(Offset);
133   if (Iter == Index.end()) {
134     Data.emplace_back(Location(true, Name, Offset), Count);
135     Index[Offset] = Data.size() - 1;
136     return;
137   }
138   SampleInfo &SI = Data[Iter->second];
139   SI.Hits += Count;
140 }
141 
bumpBranchCount(uint64_t OffsetFrom,uint64_t OffsetTo,uint64_t Count,uint64_t Mispreds)142 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
143                                      uint64_t Count, uint64_t Mispreds) {
144   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
145   if (Iter == IntraIndex[OffsetFrom].end()) {
146     Data.emplace_back(Location(true, Name, OffsetFrom),
147                       Location(true, Name, OffsetTo), Mispreds, Count);
148     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
149     return;
150   }
151   BranchInfo &BI = Data[Iter->second];
152   BI.Branches += Count;
153   BI.Mispreds += Mispreds;
154 }
155 
bumpCallCount(uint64_t OffsetFrom,const Location & To,uint64_t Count,uint64_t Mispreds)156 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
157                                    uint64_t Count, uint64_t Mispreds) {
158   auto Iter = InterIndex[OffsetFrom].find(To);
159   if (Iter == InterIndex[OffsetFrom].end()) {
160     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
161     InterIndex[OffsetFrom][To] = Data.size() - 1;
162     return;
163   }
164   BranchInfo &BI = Data[Iter->second];
165   BI.Branches += Count;
166   BI.Mispreds += Mispreds;
167 }
168 
bumpEntryCount(const Location & From,uint64_t OffsetTo,uint64_t Count,uint64_t Mispreds)169 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
170                                     uint64_t Count, uint64_t Mispreds) {
171   auto Iter = EntryIndex[OffsetTo].find(From);
172   if (Iter == EntryIndex[OffsetTo].end()) {
173     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
174                            Count);
175     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
176     return;
177   }
178   BranchInfo &BI = EntryData[Iter->second];
179   BI.Branches += Count;
180   BI.Mispreds += Mispreds;
181 }
182 
mergeWith(const BranchInfo & BI)183 void BranchInfo::mergeWith(const BranchInfo &BI) {
184   Branches += BI.Branches;
185   Mispreds += BI.Mispreds;
186 }
187 
print(raw_ostream & OS) const188 void BranchInfo::print(raw_ostream &OS) const {
189   OS << From.IsSymbol << " " << From.Name << " "
190      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
191      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
192      << '\n';
193 }
194 
getBranch(uint64_t From,uint64_t To) const195 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
196                                                       uint64_t To) const {
197   for (const BranchInfo &I : Data)
198     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
199       return I;
200 
201   return make_error_code(llvm::errc::invalid_argument);
202 }
203 
204 ErrorOr<const BranchInfo &>
getDirectCallBranch(uint64_t From) const205 FuncBranchData::getDirectCallBranch(uint64_t From) const {
206   // Commented out because it can be expensive.
207   // assert(std::is_sorted(Data.begin(), Data.end()));
208   struct Compare {
209     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
210       return BI.From.Offset < Val;
211     }
212     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
213       return Val < BI.From.Offset;
214     }
215   };
216   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
217   for (const auto &RI : llvm::make_range(Range))
218     if (RI.From.Name != RI.To.Name)
219       return RI;
220 
221   return make_error_code(llvm::errc::invalid_argument);
222 }
223 
print(raw_ostream & OS) const224 void MemInfo::print(raw_ostream &OS) const {
225   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
226      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
227      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
228      << "\n";
229 }
230 
prettyPrint(raw_ostream & OS) const231 void MemInfo::prettyPrint(raw_ostream &OS) const {
232   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
233 }
234 
update(const Location & Offset,const Location & Addr)235 void FuncMemData::update(const Location &Offset, const Location &Addr) {
236   auto Iter = EventIndex[Offset.Offset].find(Addr);
237   if (Iter == EventIndex[Offset.Offset].end()) {
238     Data.emplace_back(MemInfo(Offset, Addr, 1));
239     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
240     return;
241   }
242   ++Data[Iter->second].Count;
243 }
244 
preprocessProfile(BinaryContext & BC)245 Error DataReader::preprocessProfile(BinaryContext &BC) {
246   if (std::error_code EC = parseInput())
247     return errorCodeToError(EC);
248 
249   if (opts::DumpData)
250     dump();
251 
252   if (collectedInBoltedBinary())
253     outs() << "BOLT-INFO: profile collection done on a binary already "
254               "processed by BOLT\n";
255 
256   for (auto &BFI : BC.getBinaryFunctions()) {
257     BinaryFunction &Function = BFI.second;
258     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
259       setMemData(Function, MemData);
260       MemData->Used = true;
261     }
262     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
263       setBranchData(Function, FuncData);
264       Function.ExecutionCount = FuncData->ExecutionCount;
265       FuncData->Used = true;
266     }
267   }
268 
269   for (auto &BFI : BC.getBinaryFunctions()) {
270     BinaryFunction &Function = BFI.second;
271     matchProfileMemData(Function);
272   }
273 
274   return Error::success();
275 }
276 
readProfilePreCFG(BinaryContext & BC)277 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
278   for (auto &BFI : BC.getBinaryFunctions()) {
279     BinaryFunction &Function = BFI.second;
280     FuncMemData *MemoryData = getMemData(Function);
281     if (!MemoryData)
282       continue;
283 
284     for (MemInfo &MI : MemoryData->Data) {
285       const uint64_t Offset = MI.Offset.Offset;
286       auto II = Function.Instructions.find(Offset);
287       if (II == Function.Instructions.end()) {
288         // Ignore bad instruction address.
289         continue;
290       }
291 
292       auto &MemAccessProfile =
293           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
294               II->second, "MemoryAccessProfile");
295       BinaryData *BD = nullptr;
296       if (MI.Addr.IsSymbol)
297         BD = BC.getBinaryDataByName(MI.Addr.Name);
298       MemAccessProfile.AddressAccessInfo.push_back(
299           {BD, MI.Addr.Offset, MI.Count});
300       auto NextII = std::next(II);
301       if (NextII == Function.Instructions.end())
302         MemAccessProfile.NextInstrOffset = Function.getSize();
303       else
304         MemAccessProfile.NextInstrOffset = II->first;
305     }
306     Function.HasMemoryProfile = true;
307   }
308 
309   return Error::success();
310 }
311 
readProfile(BinaryContext & BC)312 Error DataReader::readProfile(BinaryContext &BC) {
313   for (auto &BFI : BC.getBinaryFunctions()) {
314     BinaryFunction &Function = BFI.second;
315     readProfile(Function);
316   }
317 
318   uint64_t NumUnused = 0;
319   for (const auto &KV : NamesToBranches) {
320     const FuncBranchData &FBD = KV.second;
321     if (!FBD.Used)
322       ++NumUnused;
323   }
324   BC.setNumUnusedProfiledObjects(NumUnused);
325 
326   return Error::success();
327 }
328 
parseInput()329 std::error_code DataReader::parseInput() {
330   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
331       MemoryBuffer::getFileOrSTDIN(Filename);
332   if (std::error_code EC = MB.getError()) {
333     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
334     return EC;
335   }
336   FileBuf = std::move(MB.get());
337   ParsingBuf = FileBuf->getBuffer();
338   if (std::error_code EC = parse())
339     return EC;
340   if (!ParsingBuf.empty())
341     Diag << "WARNING: invalid profile data detected at line " << Line
342          << ". Possibly corrupted profile.\n";
343 
344   buildLTONameMaps();
345 
346   return std::error_code();
347 }
348 
readProfile(BinaryFunction & BF)349 void DataReader::readProfile(BinaryFunction &BF) {
350   if (BF.empty())
351     return;
352 
353   if (!hasLBR()) {
354     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
355     readSampleData(BF);
356     return;
357   }
358 
359   BF.ProfileFlags = BinaryFunction::PF_LBR;
360 
361   // Possibly assign/re-assign branch profile data.
362   matchProfileData(BF);
363 
364   FuncBranchData *FBD = getBranchData(BF);
365   if (!FBD)
366     return;
367 
368   // Assign basic block counts to function entry points. These only include
369   // counts for outside entries.
370   //
371   // There is a slight skew introduced here as branches originated from RETs
372   // may be accounted for in the execution count of an entry block if the last
373   // instruction in a predecessor fall-through block is a call. This situation
374   // should rarely happen because there are few multiple-entry functions.
375   for (const BranchInfo &BI : FBD->EntryData) {
376     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
377     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
378       uint64_t Count = BB->getExecutionCount();
379       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
380         Count = 0;
381       BB->setExecutionCount(Count + BI.Branches);
382     }
383   }
384 
385   for (const BranchInfo &BI : FBD->Data) {
386     if (BI.From.Name != BI.To.Name)
387       continue;
388 
389     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
390                       BI.Mispreds)) {
391       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
392                         << BI.To.Offset << '\n');
393     }
394   }
395 
396   // Convert branch data into annotations.
397   convertBranchData(BF);
398 }
399 
matchProfileData(BinaryFunction & BF)400 void DataReader::matchProfileData(BinaryFunction &BF) {
401   // This functionality is available for LBR-mode only
402   // TODO: Implement evaluateProfileData() for samples, checking whether
403   // sample addresses match instruction addresses in the function
404   if (!hasLBR())
405     return;
406 
407   FuncBranchData *FBD = getBranchData(BF);
408   if (FBD) {
409     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
410     BF.RawBranchCount = FBD->getNumExecutedBranches();
411     if (BF.ProfileMatchRatio == 1.0f) {
412       if (fetchProfileForOtherEntryPoints(BF)) {
413         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
414         BF.ExecutionCount = FBD->ExecutionCount;
415         BF.RawBranchCount = FBD->getNumExecutedBranches();
416       }
417       return;
418     }
419   }
420 
421   // Check if the function name can fluctuate between several compilations
422   // possibly triggered by minor unrelated code changes in the source code
423   // of the input binary.
424   if (!hasVolatileName(BF))
425     return;
426 
427   // Check for a profile that matches with 100% confidence.
428   const std::vector<FuncBranchData *> AllBranchData =
429       getBranchDataForNamesRegex(BF.getNames());
430   for (FuncBranchData *NewBranchData : AllBranchData) {
431     // Prevent functions from sharing the same profile.
432     if (NewBranchData->Used)
433       continue;
434 
435     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
436       continue;
437 
438     if (FBD)
439       FBD->Used = false;
440 
441     // Update function profile data with the new set.
442     setBranchData(BF, NewBranchData);
443     NewBranchData->Used = true;
444     BF.ExecutionCount = NewBranchData->ExecutionCount;
445     BF.ProfileMatchRatio = 1.0f;
446     break;
447   }
448 }
449 
matchProfileMemData(BinaryFunction & BF)450 void DataReader::matchProfileMemData(BinaryFunction &BF) {
451   const std::vector<FuncMemData *> AllMemData =
452       getMemDataForNamesRegex(BF.getNames());
453   for (FuncMemData *NewMemData : AllMemData) {
454     // Prevent functions from sharing the same profile.
455     if (NewMemData->Used)
456       continue;
457 
458     if (FuncMemData *MD = getMemData(BF))
459       MD->Used = false;
460 
461     // Update function profile data with the new set.
462     setMemData(BF, NewMemData);
463     NewMemData->Used = true;
464     break;
465   }
466 }
467 
fetchProfileForOtherEntryPoints(BinaryFunction & BF)468 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
469   BinaryContext &BC = BF.getBinaryContext();
470 
471   FuncBranchData *FBD = getBranchData(BF);
472   if (!FBD)
473     return false;
474 
475   // Check if we are missing profiling data for secondary entry points
476   bool First = true;
477   bool Updated = false;
478   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
479     if (First) {
480       First = false;
481       continue;
482     }
483     if (BB->isEntryPoint()) {
484       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
485       // Look for branch data associated with this entry point
486       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
487         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
488           FBD->appendFrom(*Data, BB->getOffset());
489           Data->Used = true;
490           Updated = true;
491         }
492       }
493     }
494   }
495 
496   return Updated;
497 }
498 
evaluateProfileData(BinaryFunction & BF,const FuncBranchData & BranchData) const499 float DataReader::evaluateProfileData(BinaryFunction &BF,
500                                       const FuncBranchData &BranchData) const {
501   BinaryContext &BC = BF.getBinaryContext();
502 
503   // Until we define a minimal profile, we consider an empty branch data to be
504   // a valid profile. It could happen to a function without branches when we
505   // still have an EntryData for the execution count.
506   if (BranchData.Data.empty())
507     return 1.0f;
508 
509   uint64_t NumMatchedBranches = 0;
510   for (const BranchInfo &BI : BranchData.Data) {
511     bool IsValid = false;
512     if (BI.From.Name == BI.To.Name) {
513       // Try to record information with 0 count.
514       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
515     } else if (collectedInBoltedBinary()) {
516       // We can't check branch source for collections in bolted binaries because
517       // the source of the branch may be mapped to the first instruction in a BB
518       // instead of the original branch (which may not exist in the source bin).
519       IsValid = true;
520     } else {
521       // The branch has to originate from this function.
522       // Check for calls, tail calls, rets and indirect branches.
523       // When matching profiling info, we did not reach the stage
524       // when we identify tail calls, so they are still represented
525       // by regular branch instructions and we need isBranch() here.
526       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
527       // If it's a prefix - skip it.
528       if (Instr && BC.MIB->isPrefix(*Instr))
529         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
530       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
531                     BC.MIB->isReturn(*Instr)))
532         IsValid = true;
533     }
534 
535     if (IsValid) {
536       ++NumMatchedBranches;
537       continue;
538     }
539 
540     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
541                       << Twine::utohexstr(BI.From.Offset) << " -> ";
542                if (BI.From.Name == BI.To.Name) dbgs()
543                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
544                else dbgs() << "<outbounds>\n";);
545   }
546 
547   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
548   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size())
549     errs() << "BOLT-WARNING: profile branches match only "
550            << format("%.1f%%", MatchRatio * 100.0f) << " ("
551            << NumMatchedBranches << '/' << BranchData.Data.size()
552            << ") for function " << BF << '\n';
553 
554   return MatchRatio;
555 }
556 
readSampleData(BinaryFunction & BF)557 void DataReader::readSampleData(BinaryFunction &BF) {
558   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
559   if (!SampleDataOrErr)
560     return;
561 
562   // Basic samples mode territory (without LBR info)
563   // First step is to assign BB execution count based on samples from perf
564   BF.ProfileMatchRatio = 1.0f;
565   BF.removeTagsFromProfile();
566   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
567   bool NormalizeByCalls = usesEvent("branches");
568   static bool NagUser = true;
569   if (NagUser) {
570     outs()
571         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
572     if (NormalizeByInsnCount)
573       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
574     else if (NormalizeByCalls)
575       outs() << "BOLT-INFO: normalizing samples by branches.\n";
576 
577     NagUser = false;
578   }
579   uint64_t LastOffset = BF.getSize();
580   uint64_t TotalEntryCount = 0;
581   for (BinaryFunction::BasicBlockOffset &BBOffset :
582        llvm::reverse(BF.BasicBlockOffsets)) {
583     uint64_t CurOffset = BBOffset.first;
584     // Always work with samples multiplied by 1000 to avoid losing them if we
585     // later need to normalize numbers
586     uint64_t NumSamples =
587         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
588     if (NormalizeByInsnCount && BBOffset.second->getNumNonPseudos()) {
589       NumSamples /= BBOffset.second->getNumNonPseudos();
590     } else if (NormalizeByCalls) {
591       uint32_t NumCalls = BBOffset.second->getNumCalls();
592       NumSamples /= NumCalls + 1;
593     }
594     BBOffset.second->setExecutionCount(NumSamples);
595     if (BBOffset.second->isEntryPoint())
596       TotalEntryCount += NumSamples;
597     LastOffset = CurOffset;
598   }
599 
600   BF.ExecutionCount = TotalEntryCount;
601 }
602 
convertBranchData(BinaryFunction & BF) const603 void DataReader::convertBranchData(BinaryFunction &BF) const {
604   BinaryContext &BC = BF.getBinaryContext();
605 
606   if (BF.empty())
607     return;
608 
609   FuncBranchData *FBD = getBranchData(BF);
610   if (!FBD)
611     return;
612 
613   // Profile information for calls.
614   //
615   // There are 3 cases that we annotate differently:
616   //   1) Conditional tail calls that could be mispredicted.
617   //   2) Indirect calls to multiple destinations with mispredictions.
618   //      Before we validate CFG we have to handle indirect branches here too.
619   //   3) Regular direct calls. The count could be different from containing
620   //      basic block count. Keep this data in case we find it useful.
621   //
622   for (BranchInfo &BI : FBD->Data) {
623     // Ignore internal branches.
624     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
625       continue;
626 
627     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
628     if (!Instr ||
629         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
630       continue;
631 
632     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
633       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name))
634         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
635                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
636                << '\n';
637       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
638       Value += Count;
639     };
640 
641     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
642       IndirectCallSiteProfile &CSP =
643           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
644               *Instr, "CallProfile");
645       MCSymbol *CalleeSymbol = nullptr;
646       if (BI.To.IsSymbol) {
647         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
648           CalleeSymbol = BD->getSymbol();
649       }
650       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
651     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
652       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
653       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
654     } else {
655       setOrUpdateAnnotation("Count", BI.Branches);
656     }
657   }
658 }
659 
recordBranch(BinaryFunction & BF,uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds) const660 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
661                               uint64_t Count, uint64_t Mispreds) const {
662   BinaryContext &BC = BF.getBinaryContext();
663 
664   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
665   const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
666 
667   if (!FromBB || !ToBB) {
668     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
669     return false;
670   }
671 
672   // Could be bad LBR data; ignore the branch. In the case of data collected
673   // in binaries optimized by BOLT, a source BB may be mapped to two output
674   // BBs as a result of optimizations. In that case, a branch between these
675   // two will be recorded as a branch from A going to A in the source address
676   // space. Keep processing.
677   if (From == To)
678     return true;
679 
680   // Return from a tail call.
681   if (FromBB->succ_size() == 0)
682     return true;
683 
684   // Very rarely we will see ignored branches. Do a linear check.
685   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
686     if (Branch ==
687         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
688       return true;
689 
690   bool OffsetMatches = !!(To == ToBB->getOffset());
691   if (!OffsetMatches) {
692     // Skip the nops to support old .fdata
693     uint64_t Offset = ToBB->getOffset();
694     for (const MCInst &Instr : *ToBB) {
695       if (!BC.MIB->isNoop(Instr))
696         break;
697 
698       if (std::optional<uint32_t> Size = BC.MIB->getSize(Instr))
699         Offset += *Size;
700     }
701 
702     if (To == Offset)
703       OffsetMatches = true;
704   }
705 
706   if (!OffsetMatches) {
707     // "To" could be referring to nop instructions in between 2 basic blocks.
708     // While building the CFG we make sure these nops are attributed to the
709     // previous basic block, thus we check if the destination belongs to the
710     // gap past the last instruction.
711     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
712     if (LastInstr) {
713       const uint32_t LastInstrOffset =
714           BC.MIB->getOffsetWithDefault(*LastInstr, 0);
715 
716       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
717       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr))
718         return true;
719 
720       if (To <= LastInstrOffset) {
721         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
722                           << " in " << BF << " : " << From << " -> " << To
723                           << '\n');
724         return false;
725       }
726     }
727 
728     // The real destination is the layout successor of the detected ToBB.
729     if (ToBB == BF.getLayout().block_back())
730       return false;
731     const BinaryBasicBlock *NextBB =
732         BF.getLayout().getBlock(ToBB->getIndex() + 1);
733     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
734     ToBB = NextBB;
735   }
736 
737   // If there's no corresponding instruction for 'From', we have probably
738   // discarded it as a FT from __builtin_unreachable.
739   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
740   if (!FromInstruction) {
741     // If the data was collected in a bolted binary, the From addresses may be
742     // translated to the first instruction of the source BB if BOLT inserted
743     // a new branch that did not exist in the source (we can't map it to the
744     // source instruction, so we map it to the first instr of source BB).
745     // We do not keep offsets for random instructions. So the check above will
746     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
747     if (collectedInBoltedBinary()) {
748       if (FromBB->getInputOffset() != From) {
749         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
750                           << BF << '\n');
751         return false;
752       }
753       FromInstruction = nullptr;
754     } else {
755       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
756                         << '\n');
757       return false;
758     }
759   }
760 
761   if (!FromBB->getSuccessor(ToBB->getLabel())) {
762     // Check if this is a recursive call or a return from a recursive call.
763     if (FromInstruction && ToBB->isEntryPoint() &&
764         (BC.MIB->isCall(*FromInstruction) ||
765          BC.MIB->isIndirectBranch(*FromInstruction))) {
766       // Execution count is already accounted for.
767       return true;
768     }
769     // For data collected in a bolted binary, we may have created two output BBs
770     // that map to one original block. Branches between these two blocks will
771     // appear here as one BB jumping to itself, even though it has no loop
772     // edges. Ignore these.
773     if (collectedInBoltedBinary() && FromBB == ToBB)
774       return true;
775 
776     // Allow passthrough blocks.
777     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
778     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
779         FTSuccessor->getSuccessor(ToBB->getLabel())) {
780       BinaryBasicBlock::BinaryBranchInfo &FTBI =
781           FTSuccessor->getBranchInfo(*ToBB);
782       FTBI.Count += Count;
783       if (Count)
784         FTBI.MispredictedCount += Mispreds;
785       ToBB = FTSuccessor;
786     } else {
787       LLVM_DEBUG(dbgs() << "invalid branch in " << BF
788                         << formatv(": {0:x} -> {1:x}\n", From, To));
789       return false;
790     }
791   }
792 
793   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
794   BI.Count += Count;
795   // Only update mispredicted count if it the count was real.
796   if (Count) {
797     BI.MispredictedCount += Mispreds;
798   }
799 
800   return true;
801 }
802 
reportError(StringRef ErrorMsg)803 void DataReader::reportError(StringRef ErrorMsg) {
804   Diag << "Error reading BOLT data input file: line " << Line << ", column "
805        << Col << ": " << ErrorMsg << '\n';
806 }
807 
expectAndConsumeFS()808 bool DataReader::expectAndConsumeFS() {
809   if (ParsingBuf[0] != FieldSeparator) {
810     reportError("expected field separator");
811     return false;
812   }
813   ParsingBuf = ParsingBuf.drop_front(1);
814   Col += 1;
815   return true;
816 }
817 
consumeAllRemainingFS()818 void DataReader::consumeAllRemainingFS() {
819   while (ParsingBuf[0] == FieldSeparator) {
820     ParsingBuf = ParsingBuf.drop_front(1);
821     Col += 1;
822   }
823 }
824 
checkAndConsumeNewLine()825 bool DataReader::checkAndConsumeNewLine() {
826   if (ParsingBuf[0] != '\n')
827     return false;
828 
829   ParsingBuf = ParsingBuf.drop_front(1);
830   Col = 0;
831   Line += 1;
832   return true;
833 }
834 
parseString(char EndChar,bool EndNl)835 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
836   if (EndChar == '\\') {
837     reportError("EndChar could not be backslash");
838     return make_error_code(llvm::errc::io_error);
839   }
840 
841   std::string EndChars(1, EndChar);
842   EndChars.push_back('\\');
843   if (EndNl)
844     EndChars.push_back('\n');
845 
846   size_t StringEnd = 0;
847   do {
848     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
849     if (StringEnd == StringRef::npos ||
850         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
851       reportError("malformed field");
852       return make_error_code(llvm::errc::io_error);
853     }
854 
855     if (ParsingBuf[StringEnd] != '\\')
856       break;
857 
858     StringEnd += 2;
859   } while (true);
860 
861   StringRef Str = ParsingBuf.substr(0, StringEnd);
862 
863   // If EndNl was set and nl was found instead of EndChar, do not consume the
864   // new line.
865   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
866   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
867 
868   ParsingBuf = ParsingBuf.drop_front(End);
869   if (EndChar == '\n') {
870     Col = 0;
871     Line += 1;
872   } else {
873     Col += End;
874   }
875   return Str;
876 }
877 
parseNumberField(char EndChar,bool EndNl)878 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
879   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
880   if (std::error_code EC = NumStrRes.getError())
881     return EC;
882   StringRef NumStr = NumStrRes.get();
883   int64_t Num;
884   if (NumStr.getAsInteger(10, Num)) {
885     reportError("expected decimal number");
886     Diag << "Found: " << NumStr << "\n";
887     return make_error_code(llvm::errc::io_error);
888   }
889   return Num;
890 }
891 
parseHexField(char EndChar,bool EndNl)892 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
893   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
894   if (std::error_code EC = NumStrRes.getError())
895     return EC;
896   StringRef NumStr = NumStrRes.get();
897   uint64_t Num;
898   if (NumStr.getAsInteger(16, Num)) {
899     reportError("expected hexidecimal number");
900     Diag << "Found: " << NumStr << "\n";
901     return make_error_code(llvm::errc::io_error);
902   }
903   return Num;
904 }
905 
parseLocation(char EndChar,bool EndNl,bool ExpectMemLoc)906 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
907                                             bool ExpectMemLoc) {
908   // Read whether the location of the branch should be DSO or a symbol
909   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
910   // symbol.
911   // The symbol flag is also used to tag memory load events by adding 3 to the
912   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
913   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
914       ParsingBuf[0] != '2') {
915     reportError("expected 0, 1 or 2");
916     return make_error_code(llvm::errc::io_error);
917   }
918 
919   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
920       ParsingBuf[0] != '5') {
921     reportError("expected 3, 4 or 5");
922     return make_error_code(llvm::errc::io_error);
923   }
924 
925   bool IsSymbol =
926       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
927       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
928   ParsingBuf = ParsingBuf.drop_front(1);
929   Col += 1;
930 
931   if (!expectAndConsumeFS())
932     return make_error_code(llvm::errc::io_error);
933   consumeAllRemainingFS();
934 
935   // Read the string containing the symbol or the DSO name
936   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
937   if (std::error_code EC = NameRes.getError())
938     return EC;
939   StringRef Name = NameRes.get();
940   consumeAllRemainingFS();
941 
942   // Read the offset
943   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
944   if (std::error_code EC = Offset.getError())
945     return EC;
946 
947   return Location(IsSymbol, Name, Offset.get());
948 }
949 
parseBranchInfo()950 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
951   ErrorOr<Location> Res = parseLocation(FieldSeparator);
952   if (std::error_code EC = Res.getError())
953     return EC;
954   Location From = Res.get();
955 
956   consumeAllRemainingFS();
957   Res = parseLocation(FieldSeparator);
958   if (std::error_code EC = Res.getError())
959     return EC;
960   Location To = Res.get();
961 
962   consumeAllRemainingFS();
963   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
964   if (std::error_code EC = MRes.getError())
965     return EC;
966   int64_t NumMispreds = MRes.get();
967 
968   consumeAllRemainingFS();
969   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
970   if (std::error_code EC = BRes.getError())
971     return EC;
972   int64_t NumBranches = BRes.get();
973 
974   consumeAllRemainingFS();
975   if (!checkAndConsumeNewLine()) {
976     reportError("expected end of line");
977     return make_error_code(llvm::errc::io_error);
978   }
979 
980   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
981 }
982 
parseMemInfo()983 ErrorOr<MemInfo> DataReader::parseMemInfo() {
984   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
985   if (std::error_code EC = Res.getError())
986     return EC;
987   Location Offset = Res.get();
988 
989   consumeAllRemainingFS();
990   Res = parseMemLocation(FieldSeparator);
991   if (std::error_code EC = Res.getError())
992     return EC;
993   Location Addr = Res.get();
994 
995   consumeAllRemainingFS();
996   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
997   if (std::error_code EC = CountRes.getError())
998     return EC;
999 
1000   consumeAllRemainingFS();
1001   if (!checkAndConsumeNewLine()) {
1002     reportError("expected end of line");
1003     return make_error_code(llvm::errc::io_error);
1004   }
1005 
1006   return MemInfo(Offset, Addr, CountRes.get());
1007 }
1008 
parseSampleInfo()1009 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1010   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1011   if (std::error_code EC = Res.getError())
1012     return EC;
1013   Location Address = Res.get();
1014 
1015   consumeAllRemainingFS();
1016   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1017   if (std::error_code EC = BRes.getError())
1018     return EC;
1019   int64_t Occurrences = BRes.get();
1020 
1021   consumeAllRemainingFS();
1022   if (!checkAndConsumeNewLine()) {
1023     reportError("expected end of line");
1024     return make_error_code(llvm::errc::io_error);
1025   }
1026 
1027   return SampleInfo(std::move(Address), Occurrences);
1028 }
1029 
maybeParseNoLBRFlag()1030 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1031   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1032     return false;
1033   ParsingBuf = ParsingBuf.drop_front(6);
1034   Col += 6;
1035 
1036   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1037     ParsingBuf = ParsingBuf.drop_front(1);
1038 
1039   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1040     ErrorOr<StringRef> EventName = parseString(' ', true);
1041     if (!EventName)
1042       return make_error_code(llvm::errc::io_error);
1043     EventNames.insert(EventName.get());
1044   }
1045 
1046   if (!checkAndConsumeNewLine()) {
1047     reportError("malformed no_lbr line");
1048     return make_error_code(llvm::errc::io_error);
1049   }
1050   return true;
1051 }
1052 
maybeParseBATFlag()1053 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1054   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1055     return false;
1056   ParsingBuf = ParsingBuf.drop_front(16);
1057   Col += 16;
1058 
1059   if (!checkAndConsumeNewLine()) {
1060     reportError("malformed boltedcollection line");
1061     return make_error_code(llvm::errc::io_error);
1062   }
1063   return true;
1064 }
1065 
hasBranchData()1066 bool DataReader::hasBranchData() {
1067   if (ParsingBuf.size() == 0)
1068     return false;
1069 
1070   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1071     return true;
1072   return false;
1073 }
1074 
hasMemData()1075 bool DataReader::hasMemData() {
1076   if (ParsingBuf.size() == 0)
1077     return false;
1078 
1079   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1080     return true;
1081   return false;
1082 }
1083 
parseInNoLBRMode()1084 std::error_code DataReader::parseInNoLBRMode() {
1085   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1086     auto I = NamesToSamples.find(Name);
1087     if (I == NamesToSamples.end()) {
1088       bool Success;
1089       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1090           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1091 
1092       assert(Success && "unexpected result of insert");
1093     }
1094     return I;
1095   };
1096 
1097   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1098     auto I = NamesToMemEvents.find(Name);
1099     if (I == NamesToMemEvents.end()) {
1100       bool Success;
1101       std::tie(I, Success) = NamesToMemEvents.insert(
1102           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1103       assert(Success && "unexpected result of insert");
1104     }
1105     return I;
1106   };
1107 
1108   while (hasBranchData()) {
1109     ErrorOr<SampleInfo> Res = parseSampleInfo();
1110     if (std::error_code EC = Res.getError())
1111       return EC;
1112 
1113     SampleInfo SI = Res.get();
1114 
1115     // Ignore samples not involving known locations
1116     if (!SI.Loc.IsSymbol)
1117       continue;
1118 
1119     auto I = GetOrCreateFuncEntry(SI.Loc.Name);
1120     I->second.Data.emplace_back(std::move(SI));
1121   }
1122 
1123   while (hasMemData()) {
1124     ErrorOr<MemInfo> Res = parseMemInfo();
1125     if (std::error_code EC = Res.getError())
1126       return EC;
1127 
1128     MemInfo MI = Res.get();
1129 
1130     // Ignore memory events not involving known pc.
1131     if (!MI.Offset.IsSymbol)
1132       continue;
1133 
1134     auto I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1135     I->second.Data.emplace_back(std::move(MI));
1136   }
1137 
1138   for (auto &FuncSamples : NamesToSamples)
1139     llvm::stable_sort(FuncSamples.second.Data);
1140 
1141   for (auto &MemEvents : NamesToMemEvents)
1142     llvm::stable_sort(MemEvents.second.Data);
1143 
1144   return std::error_code();
1145 }
1146 
parse()1147 std::error_code DataReader::parse() {
1148   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1149     auto I = NamesToBranches.find(Name);
1150     if (I == NamesToBranches.end()) {
1151       bool Success;
1152       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1153           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1154                                FuncBranchData::ContainerTy())));
1155       assert(Success && "unexpected result of insert");
1156     }
1157     return I;
1158   };
1159 
1160   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1161     auto I = NamesToMemEvents.find(Name);
1162     if (I == NamesToMemEvents.end()) {
1163       bool Success;
1164       std::tie(I, Success) = NamesToMemEvents.insert(
1165           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1166       assert(Success && "unexpected result of insert");
1167     }
1168     return I;
1169   };
1170 
1171   Col = 0;
1172   Line = 1;
1173   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1174   if (!FlagOrErr)
1175     return FlagOrErr.getError();
1176   NoLBRMode = *FlagOrErr;
1177 
1178   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1179   if (!BATFlagOrErr)
1180     return BATFlagOrErr.getError();
1181   BATMode = *BATFlagOrErr;
1182 
1183   if (!hasBranchData() && !hasMemData()) {
1184     Diag << "ERROR: no valid profile data found\n";
1185     return make_error_code(llvm::errc::io_error);
1186   }
1187 
1188   if (NoLBRMode)
1189     return parseInNoLBRMode();
1190 
1191   while (hasBranchData()) {
1192     ErrorOr<BranchInfo> Res = parseBranchInfo();
1193     if (std::error_code EC = Res.getError())
1194       return EC;
1195 
1196     BranchInfo BI = Res.get();
1197 
1198     // Ignore branches not involving known location.
1199     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1200       continue;
1201 
1202     auto I = GetOrCreateFuncEntry(BI.From.Name);
1203     I->second.Data.emplace_back(std::move(BI));
1204 
1205     // Add entry data for branches to another function or branches
1206     // to entry points (including recursive calls)
1207     if (BI.To.IsSymbol && (BI.From.Name != BI.To.Name || BI.To.Offset == 0)) {
1208       I = GetOrCreateFuncEntry(BI.To.Name);
1209       I->second.EntryData.emplace_back(std::move(BI));
1210     }
1211 
1212     // If destination is the function start - update execution count.
1213     // NB: the data is skewed since we cannot tell tail recursion from
1214     //     branches to the function start.
1215     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1216       I = GetOrCreateFuncEntry(BI.To.Name);
1217       I->second.ExecutionCount += BI.Branches;
1218     }
1219   }
1220 
1221   while (hasMemData()) {
1222     ErrorOr<MemInfo> Res = parseMemInfo();
1223     if (std::error_code EC = Res.getError())
1224       return EC;
1225 
1226     MemInfo MI = Res.get();
1227 
1228     // Ignore memory events not involving known pc.
1229     if (!MI.Offset.IsSymbol)
1230       continue;
1231 
1232     auto I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1233     I->second.Data.emplace_back(std::move(MI));
1234   }
1235 
1236   for (auto &FuncBranches : NamesToBranches)
1237     llvm::stable_sort(FuncBranches.second.Data);
1238 
1239   for (auto &MemEvents : NamesToMemEvents)
1240     llvm::stable_sort(MemEvents.second.Data);
1241 
1242   return std::error_code();
1243 }
1244 
buildLTONameMaps()1245 void DataReader::buildLTONameMaps() {
1246   for (auto &FuncData : NamesToBranches) {
1247     const StringRef FuncName = FuncData.first;
1248     const std::optional<StringRef> CommonName = getLTOCommonName(FuncName);
1249     if (CommonName)
1250       LTOCommonNameMap[*CommonName].push_back(&FuncData.second);
1251   }
1252 
1253   for (auto &FuncData : NamesToMemEvents) {
1254     const StringRef FuncName = FuncData.first;
1255     const std::optional<StringRef> CommonName = getLTOCommonName(FuncName);
1256     if (CommonName)
1257       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.second);
1258   }
1259 }
1260 
1261 template <typename MapTy>
1262 static typename MapTy::mapped_type *
fetchMapEntry(MapTy & Map,const std::vector<MCSymbol * > & Symbols)1263 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1264   // Do a reverse order iteration since the name in profile has a higher chance
1265   // of matching a name at the end of the list.
1266   for (const MCSymbol *Symbol : llvm::reverse(Symbols)) {
1267     auto I = Map.find(normalizeName(Symbol->getName()));
1268     if (I != Map.end())
1269       return &I->second;
1270   }
1271   return nullptr;
1272 }
1273 
1274 template <typename MapTy>
1275 static typename MapTy::mapped_type *
fetchMapEntry(MapTy & Map,const std::vector<StringRef> & FuncNames)1276 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1277   // Do a reverse order iteration since the name in profile has a higher chance
1278   // of matching a name at the end of the list.
1279   for (StringRef Name : llvm::reverse(FuncNames)) {
1280     auto I = Map.find(normalizeName(Name));
1281     if (I != Map.end())
1282       return &I->second;
1283   }
1284   return nullptr;
1285 }
1286 
1287 template <typename MapTy>
1288 static std::vector<typename MapTy::mapped_type *>
fetchMapEntriesRegex(MapTy & Map,const StringMap<std::vector<typename MapTy::mapped_type * >> & LTOCommonNameMap,const std::vector<StringRef> & FuncNames)1289 fetchMapEntriesRegex(MapTy &Map,
1290                      const StringMap<std::vector<typename MapTy::mapped_type *>>
1291                          &LTOCommonNameMap,
1292                      const std::vector<StringRef> &FuncNames) {
1293   std::vector<typename MapTy::mapped_type *> AllData;
1294   // Do a reverse order iteration since the name in profile has a higher chance
1295   // of matching a name at the end of the list.
1296   for (StringRef FuncName : llvm::reverse(FuncNames)) {
1297     std::string Name = normalizeName(FuncName);
1298     const std::optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1299     if (LTOCommonName) {
1300       auto I = LTOCommonNameMap.find(*LTOCommonName);
1301       if (I != LTOCommonNameMap.end()) {
1302         const std::vector<typename MapTy::mapped_type *> &CommonData =
1303             I->second;
1304         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1305       }
1306     } else {
1307       auto I = Map.find(Name);
1308       if (I != Map.end())
1309         return {&I->second};
1310     }
1311   }
1312   return AllData;
1313 }
1314 
mayHaveProfileData(const BinaryFunction & Function)1315 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1316   if (getBranchData(Function) || getMemData(Function))
1317     return true;
1318 
1319   if (getFuncSampleData(Function.getNames()) ||
1320       getBranchDataForNames(Function.getNames()) ||
1321       getMemDataForNames(Function.getNames()))
1322     return true;
1323 
1324   if (!hasVolatileName(Function))
1325     return false;
1326 
1327   const std::vector<FuncBranchData *> AllBranchData =
1328       getBranchDataForNamesRegex(Function.getNames());
1329   if (!AllBranchData.empty())
1330     return true;
1331 
1332   const std::vector<FuncMemData *> AllMemData =
1333       getMemDataForNamesRegex(Function.getNames());
1334   if (!AllMemData.empty())
1335     return true;
1336 
1337   return false;
1338 }
1339 
1340 FuncBranchData *
getBranchDataForNames(const std::vector<StringRef> & FuncNames)1341 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1342   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1343 }
1344 
1345 FuncBranchData *
getBranchDataForSymbols(const std::vector<MCSymbol * > & Symbols)1346 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1347   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1348 }
1349 
1350 FuncMemData *
getMemDataForNames(const std::vector<StringRef> & FuncNames)1351 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1352   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1353 }
1354 
1355 FuncSampleData *
getFuncSampleData(const std::vector<StringRef> & FuncNames)1356 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1357   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1358 }
1359 
getBranchDataForNamesRegex(const std::vector<StringRef> & FuncNames)1360 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1361     const std::vector<StringRef> &FuncNames) {
1362   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1363 }
1364 
1365 std::vector<FuncMemData *>
getMemDataForNamesRegex(const std::vector<StringRef> & FuncNames)1366 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1367   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1368 }
1369 
hasLocalsWithFileName() const1370 bool DataReader::hasLocalsWithFileName() const {
1371   for (const auto &Func : NamesToBranches) {
1372     const StringRef &FuncName = Func.first;
1373     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1374       return true;
1375   }
1376   return false;
1377 }
1378 
dump() const1379 void DataReader::dump() const {
1380   for (const auto &KV : NamesToBranches) {
1381     const StringRef Name = KV.first;
1382     const FuncBranchData &FBD = KV.second;
1383     Diag << Name << " branches:\n";
1384     for (const BranchInfo &BI : FBD.Data)
1385       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1386            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1387     Diag << Name << " entry points:\n";
1388     for (const BranchInfo &BI : FBD.EntryData)
1389       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1390            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1391   }
1392 
1393   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1394     StringRef Event = I->getKey();
1395     Diag << "Data was collected with event: " << Event << "\n";
1396   }
1397   for (const auto &KV : NamesToSamples) {
1398     const StringRef Name = KV.first;
1399     const FuncSampleData &FSD = KV.second;
1400     Diag << Name << " samples:\n";
1401     for (const SampleInfo &SI : FSD.Data)
1402       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1403   }
1404 
1405   for (const auto &KV : NamesToMemEvents) {
1406     const StringRef Name = KV.first;
1407     const FuncMemData &FMD = KV.second;
1408     Diag << "Memory events for " << Name;
1409     Location LastOffset(0);
1410     for (const MemInfo &MI : FMD.Data) {
1411       if (MI.Offset == LastOffset)
1412         Diag << ", " << MI.Addr << "/" << MI.Count;
1413       else
1414         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1415       LastOffset = MI.Offset;
1416     }
1417     Diag << "\n";
1418   }
1419 }
1420 
1421 } // namespace bolt
1422 } // namespace llvm
1423