xref: /llvm-project/bolt/lib/Profile/DataReader.cpp (revision 76981fbcf6222271f4f1c166789cf1a0d6077e7e)
1 //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Errc.h"
21 #include <map>
22 
23 #undef  DEBUG_TYPE
24 #define DEBUG_TYPE "bolt-prof"
25 
26 using namespace llvm;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltCategory;
31 extern llvm::cl::opt<unsigned> Verbosity;
32 
33 static cl::opt<bool>
34 DumpData("dump-data",
35   cl::desc("dump parsed bolt data for debugging"),
36   cl::Hidden,
37   cl::cat(BoltCategory));
38 
39 } // namespace opts
40 
41 namespace llvm {
42 namespace bolt {
43 
44 Optional<StringRef> getLTOCommonName(const StringRef Name) {
45   size_t LTOSuffixPos = Name.find(".lto_priv.");
46   if (LTOSuffixPos != StringRef::npos)
47     return Name.substr(0, LTOSuffixPos + 10);
48   if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos)
49     return Name.substr(0, LTOSuffixPos + 11);
50   if ((LTOSuffixPos = Name.find(".llvm.")) != StringRef::npos)
51     return Name.substr(0, LTOSuffixPos + 6);
52   return NoneType();
53 }
54 
55 namespace {
56 
57 /// Return true if the function name can change across compilations.
58 bool hasVolatileName(const BinaryFunction &BF) {
59   for (const StringRef Name : BF.getNames())
60     if (getLTOCommonName(Name))
61       return true;
62 
63   return false;
64 }
65 
66 /// Return standard escaped name of the function possibly renamed by BOLT.
67 std::string normalizeName(StringRef NameRef) {
68   // Strip "PG." prefix used for globalized locals.
69   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
70   return getEscapedName(NameRef);
71 }
72 
73 } // anonymous namespace
74 
75 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
76   if (Loc.IsSymbol) {
77     OS << Loc.Name;
78     if (Loc.Offset)
79       OS << "+" << Twine::utohexstr(Loc.Offset);
80   } else {
81     OS << Twine::utohexstr(Loc.Offset);
82   }
83   return OS;
84 }
85 
86 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
87   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
88   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
89     if (I->From.Name == FBD.Name) {
90       I->From.Name = this->Name;
91       I->From.Offset += Offset;
92     }
93     if (I->To.Name == FBD.Name) {
94       I->To.Name = this->Name;
95       I->To.Offset += Offset;
96     }
97   }
98   std::stable_sort(Data.begin(), Data.end());
99   ExecutionCount += FBD.ExecutionCount;
100   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
101     assert(I->To.Name == FBD.Name);
102     auto NewElmt = EntryData.insert(EntryData.end(), *I);
103     NewElmt->To.Name = this->Name;
104     NewElmt->To.Offset += Offset;
105   }
106 }
107 
108 uint64_t FuncBranchData::getNumExecutedBranches() const {
109   uint64_t ExecutedBranches = 0;
110   for (const BranchInfo &BI : Data) {
111     int64_t BranchCount = BI.Branches;
112     assert(BranchCount >= 0 && "branch execution count should not be negative");
113     ExecutedBranches += BranchCount;
114   }
115   return ExecutedBranches;
116 }
117 
118 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
119 
120 void SampleInfo::print(raw_ostream &OS) const {
121   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
122      << " " << Hits << "\n";
123 }
124 
125 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
126   assert(std::is_sorted(Data.begin(), Data.end()));
127   struct Compare {
128     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
129       return SI.Loc.Offset < Val;
130     }
131     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
132       return Val < SI.Loc.Offset;
133     }
134   };
135   uint64_t Result = 0;
136   for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()),
137             E = std::lower_bound(Data.begin(), Data.end(), End, Compare());
138        I != E; ++I)
139     Result += I->Hits;
140   return Result;
141 }
142 
143 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
144   auto Iter = Index.find(Offset);
145   if (Iter == Index.end()) {
146     Data.emplace_back(Location(true, Name, Offset), Count);
147     Index[Offset] = Data.size() - 1;
148     return;
149   }
150   SampleInfo &SI = Data[Iter->second];
151   SI.Hits += Count;
152 }
153 
154 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
155                                      uint64_t Count, uint64_t Mispreds) {
156   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
157   if (Iter == IntraIndex[OffsetFrom].end()) {
158     Data.emplace_back(Location(true, Name, OffsetFrom),
159                       Location(true, Name, OffsetTo), Mispreds, Count);
160     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
161     return;
162   }
163   BranchInfo &BI = Data[Iter->second];
164   BI.Branches += Count;
165   BI.Mispreds += Mispreds;
166 }
167 
168 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
169                                    uint64_t Count, uint64_t Mispreds) {
170   auto Iter = InterIndex[OffsetFrom].find(To);
171   if (Iter == InterIndex[OffsetFrom].end()) {
172     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
173     InterIndex[OffsetFrom][To] = Data.size() - 1;
174     return;
175   }
176   BranchInfo &BI = Data[Iter->second];
177   BI.Branches += Count;
178   BI.Mispreds += Mispreds;
179 }
180 
181 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
182                                     uint64_t Count, uint64_t Mispreds) {
183   auto Iter = EntryIndex[OffsetTo].find(From);
184   if (Iter == EntryIndex[OffsetTo].end()) {
185     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
186                            Count);
187     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
188     return;
189   }
190   BranchInfo &BI = EntryData[Iter->second];
191   BI.Branches += Count;
192   BI.Mispreds += Mispreds;
193 }
194 
195 void BranchInfo::mergeWith(const BranchInfo &BI) {
196   Branches += BI.Branches;
197   Mispreds += BI.Mispreds;
198 }
199 
200 void BranchInfo::print(raw_ostream &OS) const {
201   OS << From.IsSymbol << " " << From.Name << " "
202      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
203      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
204      << '\n';
205 }
206 
207 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
208                                                       uint64_t To) const {
209   for (const BranchInfo &I : Data)
210     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
211       return I;
212 
213   return make_error_code(llvm::errc::invalid_argument);
214 }
215 
216 ErrorOr<const BranchInfo &>
217 FuncBranchData::getDirectCallBranch(uint64_t From) const {
218   // Commented out because it can be expensive.
219   // assert(std::is_sorted(Data.begin(), Data.end()));
220   struct Compare {
221     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
222       return BI.From.Offset < Val;
223     }
224     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
225       return Val < BI.From.Offset;
226     }
227   };
228   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
229   for (auto I = Range.first; I != Range.second; ++I)
230     if (I->From.Name != I->To.Name)
231       return *I;
232 
233   return make_error_code(llvm::errc::invalid_argument);
234 }
235 
236 void MemInfo::print(raw_ostream &OS) const {
237   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
238      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
239      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
240      << "\n";
241 }
242 
243 void MemInfo::prettyPrint(raw_ostream &OS) const {
244   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
245 }
246 
247 void FuncMemData::update(const Location &Offset, const Location &Addr) {
248   auto Iter = EventIndex[Offset.Offset].find(Addr);
249   if (Iter == EventIndex[Offset.Offset].end()) {
250     Data.emplace_back(MemInfo(Offset, Addr, 1));
251     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
252     return;
253   }
254   ++Data[Iter->second].Count;
255 }
256 
257 Error DataReader::preprocessProfile(BinaryContext &BC) {
258   if (std::error_code EC = parseInput())
259     return errorCodeToError(EC);
260 
261   if (opts::DumpData)
262     dump();
263 
264   if (collectedInBoltedBinary())
265     outs() << "BOLT-INFO: profile collection done on a binary already "
266               "processed by BOLT\n";
267 
268   for (auto &BFI : BC.getBinaryFunctions()) {
269     BinaryFunction &Function = BFI.second;
270     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
271       setMemData(Function, MemData);
272       MemData->Used = true;
273     }
274     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
275       setBranchData(Function, FuncData);
276       Function.ExecutionCount = FuncData->ExecutionCount;
277       FuncData->Used = true;
278     }
279   }
280 
281   for (auto &BFI : BC.getBinaryFunctions()) {
282     BinaryFunction &Function = BFI.second;
283     matchProfileMemData(Function);
284   }
285 
286   return Error::success();
287 }
288 
289 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
290   for (auto &BFI : BC.getBinaryFunctions()) {
291     BinaryFunction &Function = BFI.second;
292     FuncMemData *MemoryData = getMemData(Function);
293     if (!MemoryData)
294       continue;
295 
296     for (MemInfo &MI : MemoryData->Data) {
297       const uint64_t Offset = MI.Offset.Offset;
298       auto II = Function.Instructions.find(Offset);
299       if (II == Function.Instructions.end()) {
300         // Ignore bad instruction address.
301         continue;
302       }
303 
304       auto &MemAccessProfile =
305           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
306               II->second, "MemoryAccessProfile");
307       BinaryData *BD = nullptr;
308       if (MI.Addr.IsSymbol)
309         BD = BC.getBinaryDataByName(MI.Addr.Name);
310       MemAccessProfile.AddressAccessInfo.push_back(
311           {BD, MI.Addr.Offset, MI.Count});
312       auto NextII = std::next(II);
313       if (NextII == Function.Instructions.end())
314         MemAccessProfile.NextInstrOffset = Function.getSize();
315       else
316         MemAccessProfile.NextInstrOffset = II->first;
317     }
318     Function.HasMemoryProfile = true;
319   }
320 
321   return Error::success();
322 }
323 
324 Error DataReader::readProfile(BinaryContext &BC) {
325   for (auto &BFI : BC.getBinaryFunctions()) {
326     BinaryFunction &Function = BFI.second;
327     readProfile(Function);
328   }
329 
330   uint64_t NumUnused = 0;
331   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
332     if (!FuncData.getValue().Used)
333       ++NumUnused;
334   BC.setNumUnusedProfiledObjects(NumUnused);
335 
336   return Error::success();
337 }
338 
339 std::error_code DataReader::parseInput() {
340   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
341       MemoryBuffer::getFileOrSTDIN(Filename);
342   if (std::error_code EC = MB.getError()) {
343     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
344     return EC;
345   }
346   FileBuf = std::move(MB.get());
347   ParsingBuf = FileBuf->getBuffer();
348   if (std::error_code EC = parse())
349     return EC;
350   if (!ParsingBuf.empty())
351     Diag << "WARNING: invalid profile data detected at line " << Line
352          << ". Possibly corrupted profile.\n";
353 
354   buildLTONameMaps();
355 
356   return std::error_code();
357 }
358 
359 void DataReader::readProfile(BinaryFunction &BF) {
360   if (BF.empty())
361     return;
362 
363   if (!hasLBR()) {
364     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
365     readSampleData(BF);
366     return;
367   }
368 
369   BF.ProfileFlags = BinaryFunction::PF_LBR;
370 
371   // Possibly assign/re-assign branch profile data.
372   matchProfileData(BF);
373 
374   FuncBranchData *FBD = getBranchData(BF);
375   if (!FBD)
376     return;
377 
378   // Assign basic block counts to function entry points. These only include
379   // counts for outside entries.
380   //
381   // There is a slight skew introduced here as branches originated from RETs
382   // may be accounted for in the execution count of an entry block if the last
383   // instruction in a predecessor fall-through block is a call. This situation
384   // should rarely happen because there are few multiple-entry functions.
385   for (const BranchInfo &BI : FBD->EntryData) {
386     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
387     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
388       uint64_t Count = BB->getExecutionCount();
389       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
390         Count = 0;
391       BB->setExecutionCount(Count + BI.Branches);
392     }
393   }
394 
395   uint64_t MismatchedBranches = 0;
396   for (const BranchInfo &BI : FBD->Data) {
397     if (BI.From.Name != BI.To.Name)
398       continue;
399 
400     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
401                       BI.Mispreds)) {
402       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
403                         << BI.To.Offset << '\n');
404       ++MismatchedBranches;
405     }
406   }
407 
408   // Convert branch data into annotations.
409   convertBranchData(BF);
410 }
411 
412 void DataReader::matchProfileData(BinaryFunction &BF) {
413   // This functionality is available for LBR-mode only
414   // TODO: Implement evaluateProfileData() for samples, checking whether
415   // sample addresses match instruction addresses in the function
416   if (!hasLBR())
417     return;
418 
419   FuncBranchData *FBD = getBranchData(BF);
420   if (FBD) {
421     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
422     BF.RawBranchCount = FBD->getNumExecutedBranches();
423     if (BF.ProfileMatchRatio == 1.0f) {
424       if (fetchProfileForOtherEntryPoints(BF)) {
425         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
426         BF.ExecutionCount = FBD->ExecutionCount;
427         BF.RawBranchCount = FBD->getNumExecutedBranches();
428       }
429       return;
430     }
431   }
432 
433   // Check if the function name can fluctuate between several compilations
434   // possibly triggered by minor unrelated code changes in the source code
435   // of the input binary.
436   if (!hasVolatileName(BF))
437     return;
438 
439   // Check for a profile that matches with 100% confidence.
440   const std::vector<FuncBranchData *> AllBranchData =
441       getBranchDataForNamesRegex(BF.getNames());
442   for (FuncBranchData *NewBranchData : AllBranchData) {
443     // Prevent functions from sharing the same profile.
444     if (NewBranchData->Used)
445       continue;
446 
447     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
448       continue;
449 
450     if (FBD)
451       FBD->Used = false;
452 
453     // Update function profile data with the new set.
454     setBranchData(BF, NewBranchData);
455     NewBranchData->Used = true;
456     BF.ExecutionCount = NewBranchData->ExecutionCount;
457     BF.ProfileMatchRatio = 1.0f;
458     break;
459   }
460 }
461 
462 void DataReader::matchProfileMemData(BinaryFunction &BF) {
463   const std::vector<FuncMemData *> AllMemData =
464       getMemDataForNamesRegex(BF.getNames());
465   for (FuncMemData *NewMemData : AllMemData) {
466     // Prevent functions from sharing the same profile.
467     if (NewMemData->Used)
468       continue;
469 
470     if (FuncMemData *MD = getMemData(BF))
471       MD->Used = false;
472 
473     // Update function profile data with the new set.
474     setMemData(BF, NewMemData);
475     NewMemData->Used = true;
476     break;
477   }
478 }
479 
480 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
481   BinaryContext &BC = BF.getBinaryContext();
482 
483   FuncBranchData *FBD = getBranchData(BF);
484   if (!FBD)
485     return false;
486 
487   // Check if we are missing profiling data for secondary entry points
488   bool First = true;
489   bool Updated = false;
490   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
491     if (First) {
492       First = false;
493       continue;
494     }
495     if (BB->isEntryPoint()) {
496       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
497       // Look for branch data associated with this entry point
498       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
499         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
500           FBD->appendFrom(*Data, BB->getOffset());
501           Data->Used = true;
502           Updated = true;
503         }
504       }
505     }
506   }
507 
508   return Updated;
509 }
510 
511 float DataReader::evaluateProfileData(BinaryFunction &BF,
512                                       const FuncBranchData &BranchData) const {
513   BinaryContext &BC = BF.getBinaryContext();
514 
515   // Until we define a minimal profile, we consider an empty branch data to be
516   // a valid profile. It could happen to a function without branches when we
517   // still have an EntryData for the execution count.
518   if (BranchData.Data.empty())
519     return 1.0f;
520 
521   uint64_t NumMatchedBranches = 0;
522   for (const BranchInfo &BI : BranchData.Data) {
523     bool IsValid = false;
524     if (BI.From.Name == BI.To.Name) {
525       // Try to record information with 0 count.
526       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
527     } else if (collectedInBoltedBinary()) {
528       // We can't check branch source for collections in bolted binaries because
529       // the source of the branch may be mapped to the first instruction in a BB
530       // instead of the original branch (which may not exist in the source bin).
531       IsValid = true;
532     } else {
533       // The branch has to originate from this function.
534       // Check for calls, tail calls, rets and indirect branches.
535       // When matching profiling info, we did not reach the stage
536       // when we identify tail calls, so they are still represented
537       // by regular branch instructions and we need isBranch() here.
538       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
539       // If it's a prefix - skip it.
540       if (Instr && BC.MIB->isPrefix(*Instr))
541         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
542       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
543                     BC.MIB->isReturn(*Instr)))
544         IsValid = true;
545     }
546 
547     if (IsValid) {
548       ++NumMatchedBranches;
549       continue;
550     }
551 
552     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
553                       << Twine::utohexstr(BI.From.Offset) << " -> ";
554                if (BI.From.Name == BI.To.Name) dbgs()
555                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
556                else dbgs() << "<outbounds>\n";);
557   }
558 
559   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
560   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size())
561     errs() << "BOLT-WARNING: profile branches match only "
562            << format("%.1f%%", MatchRatio * 100.0f) << " ("
563            << NumMatchedBranches << '/' << BranchData.Data.size()
564            << ") for function " << BF << '\n';
565 
566   return MatchRatio;
567 }
568 
569 void DataReader::readSampleData(BinaryFunction &BF) {
570   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
571   if (!SampleDataOrErr)
572     return;
573 
574   // Basic samples mode territory (without LBR info)
575   // First step is to assign BB execution count based on samples from perf
576   BF.ProfileMatchRatio = 1.0f;
577   BF.removeTagsFromProfile();
578   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
579   bool NormalizeByCalls = usesEvent("branches");
580   static bool NagUser = true;
581   if (NagUser) {
582     outs()
583         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
584     if (NormalizeByInsnCount)
585       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
586     else if (NormalizeByCalls)
587       outs() << "BOLT-INFO: normalizing samples by branches.\n";
588 
589     NagUser = false;
590   }
591   uint64_t LastOffset = BF.getSize();
592   uint64_t TotalEntryCount = 0;
593   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
594        I != E; ++I) {
595     uint64_t CurOffset = I->first;
596     // Always work with samples multiplied by 1000 to avoid losing them if we
597     // later need to normalize numbers
598     uint64_t NumSamples =
599         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
600     if (NormalizeByInsnCount && I->second->getNumNonPseudos()) {
601       NumSamples /= I->second->getNumNonPseudos();
602     } else if (NormalizeByCalls) {
603       uint32_t NumCalls = I->second->getNumCalls();
604       NumSamples /= NumCalls + 1;
605     }
606     I->second->setExecutionCount(NumSamples);
607     if (I->second->isEntryPoint())
608       TotalEntryCount += NumSamples;
609     LastOffset = CurOffset;
610   }
611 
612   BF.ExecutionCount = TotalEntryCount;
613 
614   estimateEdgeCounts(BF);
615 }
616 
617 void DataReader::convertBranchData(BinaryFunction &BF) const {
618   BinaryContext &BC = BF.getBinaryContext();
619 
620   if (BF.empty())
621     return;
622 
623   FuncBranchData *FBD = getBranchData(BF);
624   if (!FBD)
625     return;
626 
627   // Profile information for calls.
628   //
629   // There are 3 cases that we annotate differently:
630   //   1) Conditional tail calls that could be mispredicted.
631   //   2) Indirect calls to multiple destinations with mispredictions.
632   //      Before we validate CFG we have to handle indirect branches here too.
633   //   3) Regular direct calls. The count could be different from containing
634   //      basic block count. Keep this data in case we find it useful.
635   //
636   for (BranchInfo &BI : FBD->Data) {
637     // Ignore internal branches.
638     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
639       continue;
640 
641     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
642     if (!Instr ||
643         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
644       continue;
645 
646     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
647       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name))
648         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
649                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
650                << '\n';
651       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
652       Value += Count;
653     };
654 
655     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
656       IndirectCallSiteProfile &CSP =
657           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
658               *Instr, "CallProfile");
659       MCSymbol *CalleeSymbol = nullptr;
660       if (BI.To.IsSymbol) {
661         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
662           CalleeSymbol = BD->getSymbol();
663       }
664       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
665     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
666       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
667       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
668     } else {
669       setOrUpdateAnnotation("Count", BI.Branches);
670     }
671   }
672 }
673 
674 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
675                               uint64_t Count, uint64_t Mispreds) const {
676   BinaryContext &BC = BF.getBinaryContext();
677 
678   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
679   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
680 
681   if (!FromBB || !ToBB) {
682     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
683     return false;
684   }
685 
686   // Could be bad LBR data; ignore the branch. In the case of data collected
687   // in binaries optimized by BOLT, a source BB may be mapped to two output
688   // BBs as a result of optimizations. In that case, a branch between these
689   // two will be recorded as a branch from A going to A in the source address
690   // space. Keep processing.
691   if (From == To)
692     return true;
693 
694   // Return from a tail call.
695   if (FromBB->succ_size() == 0)
696     return true;
697 
698   // Very rarely we will see ignored branches. Do a linear check.
699   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
700     if (Branch ==
701         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
702       return true;
703 
704   bool OffsetMatches = !!(To == ToBB->getOffset());
705   if (!OffsetMatches) {
706     // Skip the nops to support old .fdata
707     uint64_t Offset = ToBB->getOffset();
708     for (MCInst &Instr : *ToBB) {
709       if (!BC.MIB->isNoop(Instr))
710         break;
711 
712       Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
713     }
714 
715     if (To == Offset)
716       OffsetMatches = true;
717   }
718 
719   if (!OffsetMatches) {
720     // "To" could be referring to nop instructions in between 2 basic blocks.
721     // While building the CFG we make sure these nops are attributed to the
722     // previous basic block, thus we check if the destination belongs to the
723     // gap past the last instruction.
724     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
725     if (LastInstr) {
726       const uint32_t LastInstrOffset =
727           BC.MIB->getOffsetWithDefault(*LastInstr, 0);
728 
729       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
730       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr))
731         return true;
732 
733       if (To <= LastInstrOffset) {
734         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
735                           << " in " << BF << " : " << From << " -> " << To
736                           << '\n');
737         return false;
738       }
739     }
740 
741     // The real destination is the layout successor of the detected ToBB.
742     if (ToBB == BF.BasicBlocksLayout.back())
743       return false;
744     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
745     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
746     ToBB = NextBB;
747   }
748 
749   // If there's no corresponding instruction for 'From', we have probably
750   // discarded it as a FT from __builtin_unreachable.
751   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
752   if (!FromInstruction) {
753     // If the data was collected in a bolted binary, the From addresses may be
754     // translated to the first instruction of the source BB if BOLT inserted
755     // a new branch that did not exist in the source (we can't map it to the
756     // source instruction, so we map it to the first instr of source BB).
757     // We do not keep offsets for random instructions. So the check above will
758     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
759     if (collectedInBoltedBinary()) {
760       if (FromBB->getInputOffset() != From) {
761         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
762                           << BF << '\n');
763         return false;
764       }
765       FromInstruction = nullptr;
766     } else {
767       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
768                         << '\n');
769       return false;
770     }
771   }
772 
773   if (!FromBB->getSuccessor(ToBB->getLabel())) {
774     // Check if this is a recursive call or a return from a recursive call.
775     if (FromInstruction && ToBB->isEntryPoint() &&
776         (BC.MIB->isCall(*FromInstruction) ||
777          BC.MIB->isIndirectBranch(*FromInstruction))) {
778       // Execution count is already accounted for.
779       return true;
780     }
781     // For data collected in a bolted binary, we may have created two output BBs
782     // that map to one original block. Branches between these two blocks will
783     // appear here as one BB jumping to itself, even though it has no loop
784     // edges. Ignore these.
785     if (collectedInBoltedBinary() && FromBB == ToBB)
786       return true;
787 
788     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
789     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
790         FTSuccessor->getSuccessor(ToBB->getLabel())) {
791       BinaryBasicBlock::BinaryBranchInfo &FTBI =
792           FTSuccessor->getBranchInfo(*ToBB);
793       FTBI.Count += Count;
794       if (Count)
795         FTBI.MispredictedCount += Mispreds;
796       ToBB = FTSuccessor;
797     } else {
798       LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
799                         << Twine::utohexstr(From) << " -> "
800                         << Twine::utohexstr(To) << '\n');
801       return false;
802     }
803   }
804 
805   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
806   BI.Count += Count;
807   // Only update mispredicted count if it the count was real.
808   if (Count) {
809     BI.MispredictedCount += Mispreds;
810   }
811 
812   return true;
813 }
814 
815 void DataReader::reportError(StringRef ErrorMsg) {
816   Diag << "Error reading BOLT data input file: line " << Line << ", column "
817        << Col << ": " << ErrorMsg << '\n';
818 }
819 
820 bool DataReader::expectAndConsumeFS() {
821   if (ParsingBuf[0] != FieldSeparator) {
822     reportError("expected field separator");
823     return false;
824   }
825   ParsingBuf = ParsingBuf.drop_front(1);
826   Col += 1;
827   return true;
828 }
829 
830 void DataReader::consumeAllRemainingFS() {
831   while (ParsingBuf[0] == FieldSeparator) {
832     ParsingBuf = ParsingBuf.drop_front(1);
833     Col += 1;
834   }
835 }
836 
837 bool DataReader::checkAndConsumeNewLine() {
838   if (ParsingBuf[0] != '\n')
839     return false;
840 
841   ParsingBuf = ParsingBuf.drop_front(1);
842   Col = 0;
843   Line += 1;
844   return true;
845 }
846 
847 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
848   if (EndChar == '\\') {
849     reportError("EndChar could not be backslash");
850     return make_error_code(llvm::errc::io_error);
851   }
852 
853   std::string EndChars(1, EndChar);
854   EndChars.push_back('\\');
855   if (EndNl)
856     EndChars.push_back('\n');
857 
858   size_t StringEnd = 0;
859   do {
860     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
861     if (StringEnd == StringRef::npos ||
862         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
863       reportError("malformed field");
864       return make_error_code(llvm::errc::io_error);
865     }
866 
867     if (ParsingBuf[StringEnd] != '\\')
868       break;
869 
870     StringEnd += 2;
871   } while (1);
872 
873   StringRef Str = ParsingBuf.substr(0, StringEnd);
874 
875   // If EndNl was set and nl was found instead of EndChar, do not consume the
876   // new line.
877   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
878   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
879 
880   ParsingBuf = ParsingBuf.drop_front(End);
881   if (EndChar == '\n') {
882     Col = 0;
883     Line += 1;
884   } else {
885     Col += End;
886   }
887   return Str;
888 }
889 
890 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
891   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
892   if (std::error_code EC = NumStrRes.getError())
893     return EC;
894   StringRef NumStr = NumStrRes.get();
895   int64_t Num;
896   if (NumStr.getAsInteger(10, Num)) {
897     reportError("expected decimal number");
898     Diag << "Found: " << NumStr << "\n";
899     return make_error_code(llvm::errc::io_error);
900   }
901   return Num;
902 }
903 
904 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
905   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
906   if (std::error_code EC = NumStrRes.getError())
907     return EC;
908   StringRef NumStr = NumStrRes.get();
909   uint64_t Num;
910   if (NumStr.getAsInteger(16, Num)) {
911     reportError("expected hexidecimal number");
912     Diag << "Found: " << NumStr << "\n";
913     return make_error_code(llvm::errc::io_error);
914   }
915   return Num;
916 }
917 
918 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
919                                             bool ExpectMemLoc) {
920   // Read whether the location of the branch should be DSO or a symbol
921   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
922   // symbol.
923   // The symbol flag is also used to tag memory load events by adding 3 to the
924   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
925   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
926       ParsingBuf[0] != '2') {
927     reportError("expected 0, 1 or 2");
928     return make_error_code(llvm::errc::io_error);
929   }
930 
931   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
932       ParsingBuf[0] != '5') {
933     reportError("expected 3, 4 or 5");
934     return make_error_code(llvm::errc::io_error);
935   }
936 
937   bool IsSymbol =
938       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
939       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
940   ParsingBuf = ParsingBuf.drop_front(1);
941   Col += 1;
942 
943   if (!expectAndConsumeFS())
944     return make_error_code(llvm::errc::io_error);
945   consumeAllRemainingFS();
946 
947   // Read the string containing the symbol or the DSO name
948   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
949   if (std::error_code EC = NameRes.getError())
950     return EC;
951   StringRef Name = NameRes.get();
952   consumeAllRemainingFS();
953 
954   // Read the offset
955   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
956   if (std::error_code EC = Offset.getError())
957     return EC;
958 
959   return Location(IsSymbol, Name, Offset.get());
960 }
961 
962 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
963   ErrorOr<Location> Res = parseLocation(FieldSeparator);
964   if (std::error_code EC = Res.getError())
965     return EC;
966   Location From = Res.get();
967 
968   consumeAllRemainingFS();
969   Res = parseLocation(FieldSeparator);
970   if (std::error_code EC = Res.getError())
971     return EC;
972   Location To = Res.get();
973 
974   consumeAllRemainingFS();
975   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
976   if (std::error_code EC = MRes.getError())
977     return EC;
978   int64_t NumMispreds = MRes.get();
979 
980   consumeAllRemainingFS();
981   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
982   if (std::error_code EC = BRes.getError())
983     return EC;
984   int64_t NumBranches = BRes.get();
985 
986   consumeAllRemainingFS();
987   if (!checkAndConsumeNewLine()) {
988     reportError("expected end of line");
989     return make_error_code(llvm::errc::io_error);
990   }
991 
992   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
993 }
994 
995 ErrorOr<MemInfo> DataReader::parseMemInfo() {
996   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
997   if (std::error_code EC = Res.getError())
998     return EC;
999   Location Offset = Res.get();
1000 
1001   consumeAllRemainingFS();
1002   Res = parseMemLocation(FieldSeparator);
1003   if (std::error_code EC = Res.getError())
1004     return EC;
1005   Location Addr = Res.get();
1006 
1007   consumeAllRemainingFS();
1008   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1009   if (std::error_code EC = CountRes.getError())
1010     return EC;
1011 
1012   consumeAllRemainingFS();
1013   if (!checkAndConsumeNewLine()) {
1014     reportError("expected end of line");
1015     return make_error_code(llvm::errc::io_error);
1016   }
1017 
1018   return MemInfo(Offset, Addr, CountRes.get());
1019 }
1020 
1021 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1022   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1023   if (std::error_code EC = Res.getError())
1024     return EC;
1025   Location Address = Res.get();
1026 
1027   consumeAllRemainingFS();
1028   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1029   if (std::error_code EC = BRes.getError())
1030     return EC;
1031   int64_t Occurrences = BRes.get();
1032 
1033   consumeAllRemainingFS();
1034   if (!checkAndConsumeNewLine()) {
1035     reportError("expected end of line");
1036     return make_error_code(llvm::errc::io_error);
1037   }
1038 
1039   return SampleInfo(std::move(Address), Occurrences);
1040 }
1041 
1042 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1043   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1044     return false;
1045   ParsingBuf = ParsingBuf.drop_front(6);
1046   Col += 6;
1047 
1048   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1049     ParsingBuf = ParsingBuf.drop_front(1);
1050 
1051   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1052     ErrorOr<StringRef> EventName = parseString(' ', true);
1053     if (!EventName)
1054       return make_error_code(llvm::errc::io_error);
1055     EventNames.insert(EventName.get());
1056   }
1057 
1058   if (!checkAndConsumeNewLine()) {
1059     reportError("malformed no_lbr line");
1060     return make_error_code(llvm::errc::io_error);
1061   }
1062   return true;
1063 }
1064 
1065 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1066   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1067     return false;
1068   ParsingBuf = ParsingBuf.drop_front(16);
1069   Col += 16;
1070 
1071   if (!checkAndConsumeNewLine()) {
1072     reportError("malformed boltedcollection line");
1073     return make_error_code(llvm::errc::io_error);
1074   }
1075   return true;
1076 }
1077 
1078 bool DataReader::hasBranchData() {
1079   if (ParsingBuf.size() == 0)
1080     return false;
1081 
1082   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1083     return true;
1084   return false;
1085 }
1086 
1087 bool DataReader::hasMemData() {
1088   if (ParsingBuf.size() == 0)
1089     return false;
1090 
1091   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1092     return true;
1093   return false;
1094 }
1095 
1096 std::error_code DataReader::parseInNoLBRMode() {
1097   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1098     auto I = NamesToSamples.find(Name);
1099     if (I == NamesToSamples.end()) {
1100       bool Success;
1101       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1102           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1103 
1104       assert(Success && "unexpected result of insert");
1105     }
1106     return I;
1107   };
1108 
1109   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1110     auto I = NamesToMemEvents.find(Name);
1111     if (I == NamesToMemEvents.end()) {
1112       bool Success;
1113       std::tie(I, Success) = NamesToMemEvents.insert(
1114           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1115       assert(Success && "unexpected result of insert");
1116     }
1117     return I;
1118   };
1119 
1120   while (hasBranchData()) {
1121     ErrorOr<SampleInfo> Res = parseSampleInfo();
1122     if (std::error_code EC = Res.getError())
1123       return EC;
1124 
1125     SampleInfo SI = Res.get();
1126 
1127     // Ignore samples not involving known locations
1128     if (!SI.Loc.IsSymbol)
1129       continue;
1130 
1131     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1132     I->getValue().Data.emplace_back(std::move(SI));
1133   }
1134 
1135   while (hasMemData()) {
1136     ErrorOr<MemInfo> Res = parseMemInfo();
1137     if (std::error_code EC = Res.getError())
1138       return EC;
1139 
1140     MemInfo MI = Res.get();
1141 
1142     // Ignore memory events not involving known pc.
1143     if (!MI.Offset.IsSymbol)
1144       continue;
1145 
1146     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1147     I->getValue().Data.emplace_back(std::move(MI));
1148   }
1149 
1150   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples)
1151     std::stable_sort(FuncSamples.second.Data.begin(),
1152                      FuncSamples.second.Data.end());
1153 
1154   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
1155     std::stable_sort(MemEvents.second.Data.begin(),
1156                      MemEvents.second.Data.end());
1157 
1158   return std::error_code();
1159 }
1160 
1161 std::error_code DataReader::parse() {
1162   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1163     auto I = NamesToBranches.find(Name);
1164     if (I == NamesToBranches.end()) {
1165       bool Success;
1166       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1167           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1168                                FuncBranchData::ContainerTy())));
1169       assert(Success && "unexpected result of insert");
1170     }
1171     return I;
1172   };
1173 
1174   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1175     auto I = NamesToMemEvents.find(Name);
1176     if (I == NamesToMemEvents.end()) {
1177       bool Success;
1178       std::tie(I, Success) = NamesToMemEvents.insert(
1179           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1180       assert(Success && "unexpected result of insert");
1181     }
1182     return I;
1183   };
1184 
1185   Col = 0;
1186   Line = 1;
1187   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1188   if (!FlagOrErr)
1189     return FlagOrErr.getError();
1190   NoLBRMode = *FlagOrErr;
1191 
1192   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1193   if (!BATFlagOrErr)
1194     return BATFlagOrErr.getError();
1195   BATMode = *BATFlagOrErr;
1196 
1197   if (!hasBranchData() && !hasMemData()) {
1198     Diag << "ERROR: no valid profile data found\n";
1199     return make_error_code(llvm::errc::io_error);
1200   }
1201 
1202   if (NoLBRMode)
1203     return parseInNoLBRMode();
1204 
1205   while (hasBranchData()) {
1206     ErrorOr<BranchInfo> Res = parseBranchInfo();
1207     if (std::error_code EC = Res.getError())
1208       return EC;
1209 
1210     BranchInfo BI = Res.get();
1211 
1212     // Ignore branches not involving known location.
1213     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1214       continue;
1215 
1216     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1217     I->getValue().Data.emplace_back(std::move(BI));
1218 
1219     // Add entry data for branches to another function or branches
1220     // to entry points (including recursive calls)
1221     if (BI.To.IsSymbol &&
1222         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1223       I = GetOrCreateFuncEntry(BI.To.Name);
1224       I->getValue().EntryData.emplace_back(std::move(BI));
1225     }
1226 
1227     // If destination is the function start - update execution count.
1228     // NB: the data is skewed since we cannot tell tail recursion from
1229     //     branches to the function start.
1230     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1231       I = GetOrCreateFuncEntry(BI.To.Name);
1232       I->getValue().ExecutionCount += BI.Branches;
1233     }
1234   }
1235 
1236   while (hasMemData()) {
1237     ErrorOr<MemInfo> Res = parseMemInfo();
1238     if (std::error_code EC = Res.getError())
1239       return EC;
1240 
1241     MemInfo MI = Res.get();
1242 
1243     // Ignore memory events not involving known pc.
1244     if (!MI.Offset.IsSymbol)
1245       continue;
1246 
1247     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1248     I->getValue().Data.emplace_back(std::move(MI));
1249   }
1250 
1251   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches)
1252     std::stable_sort(FuncBranches.second.Data.begin(),
1253                      FuncBranches.second.Data.end());
1254 
1255   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
1256     std::stable_sort(MemEvents.second.Data.begin(),
1257                      MemEvents.second.Data.end());
1258 
1259   return std::error_code();
1260 }
1261 
1262 void DataReader::buildLTONameMaps() {
1263   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1264     const StringRef FuncName = FuncData.getKey();
1265     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1266     if (CommonName)
1267       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1268   }
1269 
1270   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1271     const StringRef FuncName = FuncData.getKey();
1272     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1273     if (CommonName)
1274       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1275   }
1276 }
1277 
1278 namespace {
1279 template <typename MapTy>
1280 decltype(MapTy::MapEntryTy::second) *
1281 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1282   // Do a reverse order iteration since the name in profile has a higher chance
1283   // of matching a name at the end of the list.
1284   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1285     auto I = Map.find(normalizeName((*SI)->getName()));
1286     if (I != Map.end())
1287       return &I->getValue();
1288   }
1289   return nullptr;
1290 }
1291 
1292 template <typename MapTy>
1293 decltype(MapTy::MapEntryTy::second) *
1294 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1295   // Do a reverse order iteration since the name in profile has a higher chance
1296   // of matching a name at the end of the list.
1297   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1298     auto I = Map.find(normalizeName(*FI));
1299     if (I != Map.end())
1300       return &I->getValue();
1301   }
1302   return nullptr;
1303 }
1304 
1305 template <typename MapTy>
1306 std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
1307     MapTy &Map,
1308     const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
1309         &LTOCommonNameMap,
1310     const std::vector<StringRef> &FuncNames) {
1311   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1312   // Do a reverse order iteration since the name in profile has a higher chance
1313   // of matching a name at the end of the list.
1314   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1315     std::string Name = normalizeName(*FI);
1316     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1317     if (LTOCommonName) {
1318       auto I = LTOCommonNameMap.find(*LTOCommonName);
1319       if (I != LTOCommonNameMap.end()) {
1320         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1321             I->getValue();
1322         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1323       }
1324     } else {
1325       auto I = Map.find(Name);
1326       if (I != Map.end())
1327         return {&I->getValue()};
1328     }
1329   }
1330   return AllData;
1331 }
1332 
1333 }
1334 
1335 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1336   if (getBranchData(Function) || getMemData(Function))
1337     return true;
1338 
1339   if (getBranchDataForNames(Function.getNames()) ||
1340       getMemDataForNames(Function.getNames()))
1341     return true;
1342 
1343   if (!hasVolatileName(Function))
1344     return false;
1345 
1346   const std::vector<FuncBranchData *> AllBranchData =
1347       getBranchDataForNamesRegex(Function.getNames());
1348   if (!AllBranchData.empty())
1349     return true;
1350 
1351   const std::vector<FuncMemData *> AllMemData =
1352       getMemDataForNamesRegex(Function.getNames());
1353   if (!AllMemData.empty())
1354     return true;
1355 
1356   return false;
1357 }
1358 
1359 FuncBranchData *
1360 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1361   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1362 }
1363 
1364 FuncBranchData *
1365 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1366   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1367 }
1368 
1369 FuncMemData *
1370 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1371   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1372 }
1373 
1374 FuncSampleData *
1375 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1376   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1377 }
1378 
1379 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1380     const std::vector<StringRef> &FuncNames) {
1381   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1382 }
1383 
1384 std::vector<FuncMemData *>
1385 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1386   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1387 }
1388 
1389 bool DataReader::hasLocalsWithFileName() const {
1390   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1391     const StringRef &FuncName = Func.getKey();
1392     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1393       return true;
1394   }
1395   return false;
1396 }
1397 
1398 void DataReader::dump() const {
1399   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1400     Diag << Func.getKey() << " branches:\n";
1401     for (const BranchInfo &BI : Func.getValue().Data)
1402       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1403            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1404     Diag << Func.getKey() << " entry points:\n";
1405     for (const BranchInfo &BI : Func.getValue().EntryData)
1406       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1407            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1408   }
1409 
1410   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1411     StringRef Event = I->getKey();
1412     Diag << "Data was collected with event: " << Event << "\n";
1413   }
1414   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1415     Diag << Func.getKey() << " samples:\n";
1416     for (const SampleInfo &SI : Func.getValue().Data)
1417       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1418   }
1419 
1420   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1421     Diag << "Memory events for " << Func.getValue().Name;
1422     Location LastOffset(0);
1423     for (const MemInfo &MI : Func.getValue().Data) {
1424       if (MI.Offset == LastOffset)
1425         Diag << ", " << MI.Addr << "/" << MI.Count;
1426       else
1427         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1428       LastOffset = MI.Offset;
1429     }
1430     Diag << "\n";
1431   }
1432 }
1433 
1434 } // namespace bolt
1435 } // namespace llvm
1436