12f09f445SMaksim Panchenko //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 9a34c753fSRafael Auler // This family of functions reads profile data written by the perf2bolt 10a34c753fSRafael Auler // utility and stores it in memory for llvm-bolt consumption. 11a34c753fSRafael Auler // 12a34c753fSRafael Auler //===----------------------------------------------------------------------===// 13a34c753fSRafael Auler 14a34c753fSRafael Auler #include "bolt/Profile/DataReader.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16a34c753fSRafael Auler #include "bolt/Passes/MCF.h" 17a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 18a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 19a34c753fSRafael Auler #include "llvm/Support/Debug.h" 20290e4823Sserge-sans-paille #include "llvm/Support/Errc.h" 21a34c753fSRafael Auler #include <map> 22a34c753fSRafael Auler 23a34c753fSRafael Auler #undef DEBUG_TYPE 24a34c753fSRafael Auler #define DEBUG_TYPE "bolt-prof" 25a34c753fSRafael Auler 26a34c753fSRafael Auler using namespace llvm; 27a34c753fSRafael Auler 28a34c753fSRafael Auler namespace opts { 29a34c753fSRafael Auler 30a34c753fSRafael Auler extern cl::OptionCategory BoltCategory; 31a34c753fSRafael Auler extern llvm::cl::opt<unsigned> Verbosity; 32a34c753fSRafael Auler 33a34c753fSRafael Auler static cl::opt<bool> 34a34c753fSRafael Auler DumpData("dump-data", 35a34c753fSRafael Auler cl::desc("dump parsed bolt data for debugging"), 36a34c753fSRafael Auler cl::Hidden, 37a34c753fSRafael Auler cl::cat(BoltCategory)); 38a34c753fSRafael Auler 39a34c753fSRafael Auler } // namespace opts 40a34c753fSRafael Auler 41a34c753fSRafael Auler namespace llvm { 42a34c753fSRafael Auler namespace bolt { 43a34c753fSRafael Auler 44a34c753fSRafael Auler namespace { 45a34c753fSRafael Auler 46a34c753fSRafael Auler /// Return true if the function name can change across compilations. 47a34c753fSRafael Auler bool hasVolatileName(const BinaryFunction &BF) { 48253b8f0aSAmir Ayupov for (const StringRef &Name : BF.getNames()) 49a34c753fSRafael Auler if (getLTOCommonName(Name)) 50a34c753fSRafael Auler return true; 51def464aaSAmir Ayupov 52a34c753fSRafael Auler return false; 53a34c753fSRafael Auler } 54a34c753fSRafael Auler 55a34c753fSRafael Auler /// Return standard escaped name of the function possibly renamed by BOLT. 56a34c753fSRafael Auler std::string normalizeName(StringRef NameRef) { 57a34c753fSRafael Auler // Strip "PG." prefix used for globalized locals. 58a34c753fSRafael Auler NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef; 59a34c753fSRafael Auler return getEscapedName(NameRef); 60a34c753fSRafael Auler } 61a34c753fSRafael Auler 62a34c753fSRafael Auler } // anonymous namespace 63a34c753fSRafael Auler 64a34c753fSRafael Auler raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) { 65a34c753fSRafael Auler if (Loc.IsSymbol) { 66a34c753fSRafael Auler OS << Loc.Name; 67a34c753fSRafael Auler if (Loc.Offset) 68a34c753fSRafael Auler OS << "+" << Twine::utohexstr(Loc.Offset); 69a34c753fSRafael Auler } else { 70a34c753fSRafael Auler OS << Twine::utohexstr(Loc.Offset); 71a34c753fSRafael Auler } 72a34c753fSRafael Auler return OS; 73a34c753fSRafael Auler } 74a34c753fSRafael Auler 75a34c753fSRafael Auler void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) { 76a34c753fSRafael Auler Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end()); 77a34c753fSRafael Auler for (auto I = Data.begin(), E = Data.end(); I != E; ++I) { 78a34c753fSRafael Auler if (I->From.Name == FBD.Name) { 79a34c753fSRafael Auler I->From.Name = this->Name; 80a34c753fSRafael Auler I->From.Offset += Offset; 81a34c753fSRafael Auler } 82a34c753fSRafael Auler if (I->To.Name == FBD.Name) { 83a34c753fSRafael Auler I->To.Name = this->Name; 84a34c753fSRafael Auler I->To.Offset += Offset; 85a34c753fSRafael Auler } 86a34c753fSRafael Auler } 87d2c87699SAmir Ayupov llvm::stable_sort(Data); 88a34c753fSRafael Auler ExecutionCount += FBD.ExecutionCount; 89a34c753fSRafael Auler for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) { 90a34c753fSRafael Auler assert(I->To.Name == FBD.Name); 91a34c753fSRafael Auler auto NewElmt = EntryData.insert(EntryData.end(), *I); 92a34c753fSRafael Auler NewElmt->To.Name = this->Name; 93a34c753fSRafael Auler NewElmt->To.Offset += Offset; 94a34c753fSRafael Auler } 95a34c753fSRafael Auler } 96a34c753fSRafael Auler 97a34c753fSRafael Auler uint64_t FuncBranchData::getNumExecutedBranches() const { 98a34c753fSRafael Auler uint64_t ExecutedBranches = 0; 99a34c753fSRafael Auler for (const BranchInfo &BI : Data) { 100a34c753fSRafael Auler int64_t BranchCount = BI.Branches; 101a34c753fSRafael Auler assert(BranchCount >= 0 && "branch execution count should not be negative"); 102a34c753fSRafael Auler ExecutedBranches += BranchCount; 103a34c753fSRafael Auler } 104a34c753fSRafael Auler return ExecutedBranches; 105a34c753fSRafael Auler } 106a34c753fSRafael Auler 10740c2e0faSMaksim Panchenko void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; } 108a34c753fSRafael Auler 109a34c753fSRafael Auler void SampleInfo::print(raw_ostream &OS) const { 110a34c753fSRafael Auler OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset) 111a34c753fSRafael Auler << " " << Hits << "\n"; 112a34c753fSRafael Auler } 113a34c753fSRafael Auler 11440c2e0faSMaksim Panchenko uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const { 115d2c87699SAmir Ayupov assert(llvm::is_sorted(Data)); 116a34c753fSRafael Auler struct Compare { 117a34c753fSRafael Auler bool operator()(const SampleInfo &SI, const uint64_t Val) const { 118a34c753fSRafael Auler return SI.Loc.Offset < Val; 119a34c753fSRafael Auler } 120a34c753fSRafael Auler bool operator()(const uint64_t Val, const SampleInfo &SI) const { 121a34c753fSRafael Auler return Val < SI.Loc.Offset; 122a34c753fSRafael Auler } 123a34c753fSRafael Auler }; 124a34c753fSRafael Auler uint64_t Result = 0; 125d2c87699SAmir Ayupov for (auto I = llvm::lower_bound(Data, Start, Compare()), 126d2c87699SAmir Ayupov E = llvm::lower_bound(Data, End, Compare()); 127def464aaSAmir Ayupov I != E; ++I) 128a34c753fSRafael Auler Result += I->Hits; 129a34c753fSRafael Auler return Result; 130a34c753fSRafael Auler } 131a34c753fSRafael Auler 132a34c753fSRafael Auler void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) { 133a34c753fSRafael Auler auto Iter = Index.find(Offset); 134a34c753fSRafael Auler if (Iter == Index.end()) { 135a34c753fSRafael Auler Data.emplace_back(Location(true, Name, Offset), Count); 136a34c753fSRafael Auler Index[Offset] = Data.size() - 1; 137a34c753fSRafael Auler return; 138a34c753fSRafael Auler } 139a34c753fSRafael Auler SampleInfo &SI = Data[Iter->second]; 140a34c753fSRafael Auler SI.Hits += Count; 141a34c753fSRafael Auler } 142a34c753fSRafael Auler 143a34c753fSRafael Auler void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo, 144a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) { 145a34c753fSRafael Auler auto Iter = IntraIndex[OffsetFrom].find(OffsetTo); 146a34c753fSRafael Auler if (Iter == IntraIndex[OffsetFrom].end()) { 147a34c753fSRafael Auler Data.emplace_back(Location(true, Name, OffsetFrom), 148a34c753fSRafael Auler Location(true, Name, OffsetTo), Mispreds, Count); 149a34c753fSRafael Auler IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1; 150a34c753fSRafael Auler return; 151a34c753fSRafael Auler } 152a34c753fSRafael Auler BranchInfo &BI = Data[Iter->second]; 153a34c753fSRafael Auler BI.Branches += Count; 154a34c753fSRafael Auler BI.Mispreds += Mispreds; 155a34c753fSRafael Auler } 156a34c753fSRafael Auler 157a34c753fSRafael Auler void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To, 158a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) { 159a34c753fSRafael Auler auto Iter = InterIndex[OffsetFrom].find(To); 160a34c753fSRafael Auler if (Iter == InterIndex[OffsetFrom].end()) { 161a34c753fSRafael Auler Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count); 162a34c753fSRafael Auler InterIndex[OffsetFrom][To] = Data.size() - 1; 163a34c753fSRafael Auler return; 164a34c753fSRafael Auler } 165a34c753fSRafael Auler BranchInfo &BI = Data[Iter->second]; 166a34c753fSRafael Auler BI.Branches += Count; 167a34c753fSRafael Auler BI.Mispreds += Mispreds; 168a34c753fSRafael Auler } 169a34c753fSRafael Auler 170a34c753fSRafael Auler void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo, 171a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) { 172a34c753fSRafael Auler auto Iter = EntryIndex[OffsetTo].find(From); 173a34c753fSRafael Auler if (Iter == EntryIndex[OffsetTo].end()) { 174a34c753fSRafael Auler EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds, 175a34c753fSRafael Auler Count); 176a34c753fSRafael Auler EntryIndex[OffsetTo][From] = EntryData.size() - 1; 177a34c753fSRafael Auler return; 178a34c753fSRafael Auler } 179a34c753fSRafael Auler BranchInfo &BI = EntryData[Iter->second]; 180a34c753fSRafael Auler BI.Branches += Count; 181a34c753fSRafael Auler BI.Mispreds += Mispreds; 182a34c753fSRafael Auler } 183a34c753fSRafael Auler 184a34c753fSRafael Auler void BranchInfo::mergeWith(const BranchInfo &BI) { 185a34c753fSRafael Auler Branches += BI.Branches; 186a34c753fSRafael Auler Mispreds += BI.Mispreds; 187a34c753fSRafael Auler } 188a34c753fSRafael Auler 189a34c753fSRafael Auler void BranchInfo::print(raw_ostream &OS) const { 190a34c753fSRafael Auler OS << From.IsSymbol << " " << From.Name << " " 19140c2e0faSMaksim Panchenko << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name 19240c2e0faSMaksim Panchenko << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches 19340c2e0faSMaksim Panchenko << '\n'; 194a34c753fSRafael Auler } 195a34c753fSRafael Auler 196a34c753fSRafael Auler ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From, 197a34c753fSRafael Auler uint64_t To) const { 198def464aaSAmir Ayupov for (const BranchInfo &I : Data) 19940c2e0faSMaksim Panchenko if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name) 200a34c753fSRafael Auler return I; 201def464aaSAmir Ayupov 202a34c753fSRafael Auler return make_error_code(llvm::errc::invalid_argument); 203a34c753fSRafael Auler } 204a34c753fSRafael Auler 205a34c753fSRafael Auler ErrorOr<const BranchInfo &> 206a34c753fSRafael Auler FuncBranchData::getDirectCallBranch(uint64_t From) const { 207a34c753fSRafael Auler // Commented out because it can be expensive. 208a34c753fSRafael Auler // assert(std::is_sorted(Data.begin(), Data.end())); 209a34c753fSRafael Auler struct Compare { 210a34c753fSRafael Auler bool operator()(const BranchInfo &BI, const uint64_t Val) const { 211a34c753fSRafael Auler return BI.From.Offset < Val; 212a34c753fSRafael Auler } 213a34c753fSRafael Auler bool operator()(const uint64_t Val, const BranchInfo &BI) const { 214a34c753fSRafael Auler return Val < BI.From.Offset; 215a34c753fSRafael Auler } 216a34c753fSRafael Auler }; 217a34c753fSRafael Auler auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare()); 21817f3cbe3SAmir Ayupov for (const auto &RI : llvm::make_range(Range)) 21917f3cbe3SAmir Ayupov if (RI.From.Name != RI.To.Name) 22017f3cbe3SAmir Ayupov return RI; 221def464aaSAmir Ayupov 222a34c753fSRafael Auler return make_error_code(llvm::errc::invalid_argument); 223a34c753fSRafael Auler } 224a34c753fSRafael Auler 225a34c753fSRafael Auler void MemInfo::print(raw_ostream &OS) const { 226a34c753fSRafael Auler OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " " 22740c2e0faSMaksim Panchenko << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " " 22840c2e0faSMaksim Panchenko << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count 22940c2e0faSMaksim Panchenko << "\n"; 230a34c753fSRafael Auler } 231a34c753fSRafael Auler 232a34c753fSRafael Auler void MemInfo::prettyPrint(raw_ostream &OS) const { 233a34c753fSRafael Auler OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")"; 234a34c753fSRafael Auler } 235a34c753fSRafael Auler 236a34c753fSRafael Auler void FuncMemData::update(const Location &Offset, const Location &Addr) { 237a34c753fSRafael Auler auto Iter = EventIndex[Offset.Offset].find(Addr); 238a34c753fSRafael Auler if (Iter == EventIndex[Offset.Offset].end()) { 239a34c753fSRafael Auler Data.emplace_back(MemInfo(Offset, Addr, 1)); 240a34c753fSRafael Auler EventIndex[Offset.Offset][Addr] = Data.size() - 1; 241a34c753fSRafael Auler return; 242a34c753fSRafael Auler } 243a34c753fSRafael Auler ++Data[Iter->second].Count; 244a34c753fSRafael Auler } 245a34c753fSRafael Auler 246a34c753fSRafael Auler Error DataReader::preprocessProfile(BinaryContext &BC) { 247def464aaSAmir Ayupov if (std::error_code EC = parseInput()) 248a34c753fSRafael Auler return errorCodeToError(EC); 249a34c753fSRafael Auler 250def464aaSAmir Ayupov if (opts::DumpData) 251a34c753fSRafael Auler dump(); 252a34c753fSRafael Auler 253def464aaSAmir Ayupov if (collectedInBoltedBinary()) 254a34c753fSRafael Auler outs() << "BOLT-INFO: profile collection done on a binary already " 255a34c753fSRafael Auler "processed by BOLT\n"; 256a34c753fSRafael Auler 257a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 258a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 259a34c753fSRafael Auler if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) { 260a34c753fSRafael Auler setMemData(Function, MemData); 261a34c753fSRafael Auler MemData->Used = true; 262a34c753fSRafael Auler } 263a34c753fSRafael Auler if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) { 264a34c753fSRafael Auler setBranchData(Function, FuncData); 265a34c753fSRafael Auler Function.ExecutionCount = FuncData->ExecutionCount; 266a34c753fSRafael Auler FuncData->Used = true; 267a34c753fSRafael Auler } 268a34c753fSRafael Auler } 269a34c753fSRafael Auler 270a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 271a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 272a34c753fSRafael Auler matchProfileMemData(Function); 273a34c753fSRafael Auler } 274a34c753fSRafael Auler 275a34c753fSRafael Auler return Error::success(); 276a34c753fSRafael Auler } 277a34c753fSRafael Auler 278a34c753fSRafael Auler Error DataReader::readProfilePreCFG(BinaryContext &BC) { 279a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 280a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 281a34c753fSRafael Auler FuncMemData *MemoryData = getMemData(Function); 282a34c753fSRafael Auler if (!MemoryData) 283a34c753fSRafael Auler continue; 284a34c753fSRafael Auler 285a34c753fSRafael Auler for (MemInfo &MI : MemoryData->Data) { 286a34c753fSRafael Auler const uint64_t Offset = MI.Offset.Offset; 287a34c753fSRafael Auler auto II = Function.Instructions.find(Offset); 288a34c753fSRafael Auler if (II == Function.Instructions.end()) { 289a34c753fSRafael Auler // Ignore bad instruction address. 290a34c753fSRafael Auler continue; 291a34c753fSRafael Auler } 292a34c753fSRafael Auler 293a34c753fSRafael Auler auto &MemAccessProfile = 294a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>( 295a34c753fSRafael Auler II->second, "MemoryAccessProfile"); 296a34c753fSRafael Auler BinaryData *BD = nullptr; 297def464aaSAmir Ayupov if (MI.Addr.IsSymbol) 298a34c753fSRafael Auler BD = BC.getBinaryDataByName(MI.Addr.Name); 29940c2e0faSMaksim Panchenko MemAccessProfile.AddressAccessInfo.push_back( 30040c2e0faSMaksim Panchenko {BD, MI.Addr.Offset, MI.Count}); 301a34c753fSRafael Auler auto NextII = std::next(II); 302def464aaSAmir Ayupov if (NextII == Function.Instructions.end()) 303a34c753fSRafael Auler MemAccessProfile.NextInstrOffset = Function.getSize(); 304def464aaSAmir Ayupov else 305a34c753fSRafael Auler MemAccessProfile.NextInstrOffset = II->first; 306a34c753fSRafael Auler } 307a34c753fSRafael Auler Function.HasMemoryProfile = true; 308a34c753fSRafael Auler } 309a34c753fSRafael Auler 310a34c753fSRafael Auler return Error::success(); 311a34c753fSRafael Auler } 312a34c753fSRafael Auler 313a34c753fSRafael Auler Error DataReader::readProfile(BinaryContext &BC) { 314a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 315a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 316a34c753fSRafael Auler readProfile(Function); 317a34c753fSRafael Auler } 318a34c753fSRafael Auler 319a34c753fSRafael Auler uint64_t NumUnused = 0; 32073b89e3fSMaksim Panchenko for (const auto &KV : NamesToBranches) { 32173b89e3fSMaksim Panchenko const FuncBranchData &FBD = KV.second; 32273b89e3fSMaksim Panchenko if (!FBD.Used) 323a34c753fSRafael Auler ++NumUnused; 32473b89e3fSMaksim Panchenko } 325a34c753fSRafael Auler BC.setNumUnusedProfiledObjects(NumUnused); 326a34c753fSRafael Auler 327a34c753fSRafael Auler return Error::success(); 328a34c753fSRafael Auler } 329a34c753fSRafael Auler 330a34c753fSRafael Auler std::error_code DataReader::parseInput() { 331a34c753fSRafael Auler ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 332a34c753fSRafael Auler MemoryBuffer::getFileOrSTDIN(Filename); 333a34c753fSRafael Auler if (std::error_code EC = MB.getError()) { 334a34c753fSRafael Auler Diag << "cannot open " << Filename << ": " << EC.message() << "\n"; 335a34c753fSRafael Auler return EC; 336a34c753fSRafael Auler } 337a34c753fSRafael Auler FileBuf = std::move(MB.get()); 338a34c753fSRafael Auler ParsingBuf = FileBuf->getBuffer(); 339def464aaSAmir Ayupov if (std::error_code EC = parse()) 340a34c753fSRafael Auler return EC; 341def464aaSAmir Ayupov if (!ParsingBuf.empty()) 342a34c753fSRafael Auler Diag << "WARNING: invalid profile data detected at line " << Line 343a34c753fSRafael Auler << ". Possibly corrupted profile.\n"; 344a34c753fSRafael Auler 345a34c753fSRafael Auler buildLTONameMaps(); 346a34c753fSRafael Auler 347a34c753fSRafael Auler return std::error_code(); 348a34c753fSRafael Auler } 349a34c753fSRafael Auler 350a34c753fSRafael Auler void DataReader::readProfile(BinaryFunction &BF) { 351a34c753fSRafael Auler if (BF.empty()) 352a34c753fSRafael Auler return; 353a34c753fSRafael Auler 354a34c753fSRafael Auler if (!hasLBR()) { 355a34c753fSRafael Auler BF.ProfileFlags = BinaryFunction::PF_SAMPLE; 356a34c753fSRafael Auler readSampleData(BF); 357a34c753fSRafael Auler return; 358a34c753fSRafael Auler } 359a34c753fSRafael Auler 360a34c753fSRafael Auler BF.ProfileFlags = BinaryFunction::PF_LBR; 361a34c753fSRafael Auler 362a34c753fSRafael Auler // Possibly assign/re-assign branch profile data. 363a34c753fSRafael Auler matchProfileData(BF); 364a34c753fSRafael Auler 365a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 366a34c753fSRafael Auler if (!FBD) 367a34c753fSRafael Auler return; 368a34c753fSRafael Auler 369a34c753fSRafael Auler // Assign basic block counts to function entry points. These only include 370a34c753fSRafael Auler // counts for outside entries. 371a34c753fSRafael Auler // 372a34c753fSRafael Auler // There is a slight skew introduced here as branches originated from RETs 373a34c753fSRafael Auler // may be accounted for in the execution count of an entry block if the last 374a34c753fSRafael Auler // instruction in a predecessor fall-through block is a call. This situation 375a34c753fSRafael Auler // should rarely happen because there are few multiple-entry functions. 376a34c753fSRafael Auler for (const BranchInfo &BI : FBD->EntryData) { 377a34c753fSRafael Auler BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset); 378a34c753fSRafael Auler if (BB && (BB->isEntryPoint() || BB->isLandingPad())) { 379a34c753fSRafael Auler uint64_t Count = BB->getExecutionCount(); 380a34c753fSRafael Auler if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) 381a34c753fSRafael Auler Count = 0; 382a34c753fSRafael Auler BB->setExecutionCount(Count + BI.Branches); 383a34c753fSRafael Auler } 384a34c753fSRafael Auler } 385a34c753fSRafael Auler 386a34c753fSRafael Auler for (const BranchInfo &BI : FBD->Data) { 387def464aaSAmir Ayupov if (BI.From.Name != BI.To.Name) 388a34c753fSRafael Auler continue; 389a34c753fSRafael Auler 39040c2e0faSMaksim Panchenko if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches, 39140c2e0faSMaksim Panchenko BI.Mispreds)) { 392a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> " 393a34c753fSRafael Auler << BI.To.Offset << '\n'); 394a34c753fSRafael Auler } 395a34c753fSRafael Auler } 396a34c753fSRafael Auler 397a34c753fSRafael Auler // Convert branch data into annotations. 398a34c753fSRafael Auler convertBranchData(BF); 399a34c753fSRafael Auler } 400a34c753fSRafael Auler 401a34c753fSRafael Auler void DataReader::matchProfileData(BinaryFunction &BF) { 402a34c753fSRafael Auler // This functionality is available for LBR-mode only 403a34c753fSRafael Auler // TODO: Implement evaluateProfileData() for samples, checking whether 404a34c753fSRafael Auler // sample addresses match instruction addresses in the function 405a34c753fSRafael Auler if (!hasLBR()) 406a34c753fSRafael Auler return; 407a34c753fSRafael Auler 408a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 409a34c753fSRafael Auler if (FBD) { 410a34c753fSRafael Auler BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); 411a34c753fSRafael Auler BF.RawBranchCount = FBD->getNumExecutedBranches(); 412a34c753fSRafael Auler if (BF.ProfileMatchRatio == 1.0f) { 413a34c753fSRafael Auler if (fetchProfileForOtherEntryPoints(BF)) { 414a34c753fSRafael Auler BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); 415a34c753fSRafael Auler BF.ExecutionCount = FBD->ExecutionCount; 416a34c753fSRafael Auler BF.RawBranchCount = FBD->getNumExecutedBranches(); 417a34c753fSRafael Auler } 418a34c753fSRafael Auler return; 419a34c753fSRafael Auler } 420a34c753fSRafael Auler } 421a34c753fSRafael Auler 422a34c753fSRafael Auler // Check if the function name can fluctuate between several compilations 423a34c753fSRafael Auler // possibly triggered by minor unrelated code changes in the source code 424a34c753fSRafael Auler // of the input binary. 425a34c753fSRafael Auler if (!hasVolatileName(BF)) 426a34c753fSRafael Auler return; 427a34c753fSRafael Auler 428a34c753fSRafael Auler // Check for a profile that matches with 100% confidence. 429a34c753fSRafael Auler const std::vector<FuncBranchData *> AllBranchData = 430a34c753fSRafael Auler getBranchDataForNamesRegex(BF.getNames()); 431a34c753fSRafael Auler for (FuncBranchData *NewBranchData : AllBranchData) { 432a34c753fSRafael Auler // Prevent functions from sharing the same profile. 433a34c753fSRafael Auler if (NewBranchData->Used) 434a34c753fSRafael Auler continue; 435a34c753fSRafael Auler 436a34c753fSRafael Auler if (evaluateProfileData(BF, *NewBranchData) != 1.0f) 437a34c753fSRafael Auler continue; 438a34c753fSRafael Auler 439a34c753fSRafael Auler if (FBD) 440a34c753fSRafael Auler FBD->Used = false; 441a34c753fSRafael Auler 442a34c753fSRafael Auler // Update function profile data with the new set. 443a34c753fSRafael Auler setBranchData(BF, NewBranchData); 444a34c753fSRafael Auler NewBranchData->Used = true; 445a34c753fSRafael Auler BF.ExecutionCount = NewBranchData->ExecutionCount; 446a34c753fSRafael Auler BF.ProfileMatchRatio = 1.0f; 447a34c753fSRafael Auler break; 448a34c753fSRafael Auler } 449a34c753fSRafael Auler } 450a34c753fSRafael Auler 451a34c753fSRafael Auler void DataReader::matchProfileMemData(BinaryFunction &BF) { 452a34c753fSRafael Auler const std::vector<FuncMemData *> AllMemData = 453a34c753fSRafael Auler getMemDataForNamesRegex(BF.getNames()); 454a34c753fSRafael Auler for (FuncMemData *NewMemData : AllMemData) { 455a34c753fSRafael Auler // Prevent functions from sharing the same profile. 456a34c753fSRafael Auler if (NewMemData->Used) 457a34c753fSRafael Auler continue; 458a34c753fSRafael Auler 459a34c753fSRafael Auler if (FuncMemData *MD = getMemData(BF)) 460a34c753fSRafael Auler MD->Used = false; 461a34c753fSRafael Auler 462a34c753fSRafael Auler // Update function profile data with the new set. 463a34c753fSRafael Auler setMemData(BF, NewMemData); 464a34c753fSRafael Auler NewMemData->Used = true; 465a34c753fSRafael Auler break; 466a34c753fSRafael Auler } 467a34c753fSRafael Auler } 468a34c753fSRafael Auler 469a34c753fSRafael Auler bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) { 470a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 471a34c753fSRafael Auler 472a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 473a34c753fSRafael Auler if (!FBD) 474a34c753fSRafael Auler return false; 475a34c753fSRafael Auler 476a34c753fSRafael Auler // Check if we are missing profiling data for secondary entry points 477a34c753fSRafael Auler bool First = true; 478a34c753fSRafael Auler bool Updated = false; 479a34c753fSRafael Auler for (BinaryBasicBlock *BB : BF.BasicBlocks) { 480a34c753fSRafael Auler if (First) { 481a34c753fSRafael Auler First = false; 482a34c753fSRafael Auler continue; 483a34c753fSRafael Auler } 484a34c753fSRafael Auler if (BB->isEntryPoint()) { 485a34c753fSRafael Auler uint64_t EntryAddress = BB->getOffset() + BF.getAddress(); 486a34c753fSRafael Auler // Look for branch data associated with this entry point 487a34c753fSRafael Auler if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) { 488a34c753fSRafael Auler if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) { 489a34c753fSRafael Auler FBD->appendFrom(*Data, BB->getOffset()); 490a34c753fSRafael Auler Data->Used = true; 491a34c753fSRafael Auler Updated = true; 492a34c753fSRafael Auler } 493a34c753fSRafael Auler } 494a34c753fSRafael Auler } 495a34c753fSRafael Auler } 496a34c753fSRafael Auler 497a34c753fSRafael Auler return Updated; 498a34c753fSRafael Auler } 499a34c753fSRafael Auler 500a34c753fSRafael Auler float DataReader::evaluateProfileData(BinaryFunction &BF, 501a34c753fSRafael Auler const FuncBranchData &BranchData) const { 502a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 503a34c753fSRafael Auler 504a34c753fSRafael Auler // Until we define a minimal profile, we consider an empty branch data to be 505a34c753fSRafael Auler // a valid profile. It could happen to a function without branches when we 506a34c753fSRafael Auler // still have an EntryData for the execution count. 507def464aaSAmir Ayupov if (BranchData.Data.empty()) 508a34c753fSRafael Auler return 1.0f; 509a34c753fSRafael Auler 510a34c753fSRafael Auler uint64_t NumMatchedBranches = 0; 511a34c753fSRafael Auler for (const BranchInfo &BI : BranchData.Data) { 512a34c753fSRafael Auler bool IsValid = false; 513a34c753fSRafael Auler if (BI.From.Name == BI.To.Name) { 514a34c753fSRafael Auler // Try to record information with 0 count. 515a34c753fSRafael Auler IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0); 516a34c753fSRafael Auler } else if (collectedInBoltedBinary()) { 517a34c753fSRafael Auler // We can't check branch source for collections in bolted binaries because 518a34c753fSRafael Auler // the source of the branch may be mapped to the first instruction in a BB 519a34c753fSRafael Auler // instead of the original branch (which may not exist in the source bin). 520a34c753fSRafael Auler IsValid = true; 521a34c753fSRafael Auler } else { 522a34c753fSRafael Auler // The branch has to originate from this function. 523a34c753fSRafael Auler // Check for calls, tail calls, rets and indirect branches. 524a34c753fSRafael Auler // When matching profiling info, we did not reach the stage 525a34c753fSRafael Auler // when we identify tail calls, so they are still represented 526a34c753fSRafael Auler // by regular branch instructions and we need isBranch() here. 527a34c753fSRafael Auler MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset); 528a34c753fSRafael Auler // If it's a prefix - skip it. 529a34c753fSRafael Auler if (Instr && BC.MIB->isPrefix(*Instr)) 530a34c753fSRafael Auler Instr = BF.getInstructionAtOffset(BI.From.Offset + 1); 53140c2e0faSMaksim Panchenko if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) || 532def464aaSAmir Ayupov BC.MIB->isReturn(*Instr))) 533a34c753fSRafael Auler IsValid = true; 534a34c753fSRafael Auler } 535a34c753fSRafael Auler 536a34c753fSRafael Auler if (IsValid) { 537a34c753fSRafael Auler ++NumMatchedBranches; 538a34c753fSRafael Auler continue; 539a34c753fSRafael Auler } 540a34c753fSRafael Auler 541a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x" 542a34c753fSRafael Auler << Twine::utohexstr(BI.From.Offset) << " -> "; 543a34c753fSRafael Auler if (BI.From.Name == BI.To.Name) dbgs() 544a34c753fSRafael Auler << "0x" << Twine::utohexstr(BI.To.Offset) << '\n'; 545a34c753fSRafael Auler else dbgs() << "<outbounds>\n";); 546a34c753fSRafael Auler } 547a34c753fSRafael Auler 548a34c753fSRafael Auler const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size(); 549def464aaSAmir Ayupov if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) 550a34c753fSRafael Auler errs() << "BOLT-WARNING: profile branches match only " 551a34c753fSRafael Auler << format("%.1f%%", MatchRatio * 100.0f) << " (" 552a34c753fSRafael Auler << NumMatchedBranches << '/' << BranchData.Data.size() 553a34c753fSRafael Auler << ") for function " << BF << '\n'; 554a34c753fSRafael Auler 555a34c753fSRafael Auler return MatchRatio; 556a34c753fSRafael Auler } 557a34c753fSRafael Auler 558a34c753fSRafael Auler void DataReader::readSampleData(BinaryFunction &BF) { 559a34c753fSRafael Auler FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); 560a34c753fSRafael Auler if (!SampleDataOrErr) 561a34c753fSRafael Auler return; 562a34c753fSRafael Auler 563a34c753fSRafael Auler // Basic samples mode territory (without LBR info) 564a34c753fSRafael Auler // First step is to assign BB execution count based on samples from perf 565a34c753fSRafael Auler BF.ProfileMatchRatio = 1.0f; 566a34c753fSRafael Auler BF.removeTagsFromProfile(); 567a34c753fSRafael Auler bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); 568a34c753fSRafael Auler bool NormalizeByCalls = usesEvent("branches"); 569a34c753fSRafael Auler static bool NagUser = true; 570a34c753fSRafael Auler if (NagUser) { 571a34c753fSRafael Auler outs() 572a34c753fSRafael Auler << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n"; 573def464aaSAmir Ayupov if (NormalizeByInsnCount) 574a34c753fSRafael Auler outs() << "BOLT-INFO: normalizing samples by instruction count.\n"; 575def464aaSAmir Ayupov else if (NormalizeByCalls) 576a34c753fSRafael Auler outs() << "BOLT-INFO: normalizing samples by branches.\n"; 577def464aaSAmir Ayupov 578a34c753fSRafael Auler NagUser = false; 579a34c753fSRafael Auler } 580a34c753fSRafael Auler uint64_t LastOffset = BF.getSize(); 581a34c753fSRafael Auler uint64_t TotalEntryCount = 0; 582f40d25ddSAmir Ayupov for (BinaryFunction::BasicBlockOffset &BBOffset : 583f40d25ddSAmir Ayupov llvm::reverse(BF.BasicBlockOffsets)) { 584f40d25ddSAmir Ayupov uint64_t CurOffset = BBOffset.first; 585a34c753fSRafael Auler // Always work with samples multiplied by 1000 to avoid losing them if we 586a34c753fSRafael Auler // later need to normalize numbers 587a34c753fSRafael Auler uint64_t NumSamples = 588a34c753fSRafael Auler SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000; 589f40d25ddSAmir Ayupov if (NormalizeByInsnCount && BBOffset.second->getNumNonPseudos()) { 590f40d25ddSAmir Ayupov NumSamples /= BBOffset.second->getNumNonPseudos(); 591def464aaSAmir Ayupov } else if (NormalizeByCalls) { 592f40d25ddSAmir Ayupov uint32_t NumCalls = BBOffset.second->getNumCalls(); 593a34c753fSRafael Auler NumSamples /= NumCalls + 1; 594a34c753fSRafael Auler } 595f40d25ddSAmir Ayupov BBOffset.second->setExecutionCount(NumSamples); 596f40d25ddSAmir Ayupov if (BBOffset.second->isEntryPoint()) 597a34c753fSRafael Auler TotalEntryCount += NumSamples; 598a34c753fSRafael Auler LastOffset = CurOffset; 599a34c753fSRafael Auler } 600a34c753fSRafael Auler 601a34c753fSRafael Auler BF.ExecutionCount = TotalEntryCount; 602a34c753fSRafael Auler 603a34c753fSRafael Auler estimateEdgeCounts(BF); 604a34c753fSRafael Auler } 605a34c753fSRafael Auler 606a34c753fSRafael Auler void DataReader::convertBranchData(BinaryFunction &BF) const { 607a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 608a34c753fSRafael Auler 609a34c753fSRafael Auler if (BF.empty()) 610a34c753fSRafael Auler return; 611a34c753fSRafael Auler 612a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 613a34c753fSRafael Auler if (!FBD) 614a34c753fSRafael Auler return; 615a34c753fSRafael Auler 616a34c753fSRafael Auler // Profile information for calls. 617a34c753fSRafael Auler // 618a34c753fSRafael Auler // There are 3 cases that we annotate differently: 619a34c753fSRafael Auler // 1) Conditional tail calls that could be mispredicted. 620a34c753fSRafael Auler // 2) Indirect calls to multiple destinations with mispredictions. 621a34c753fSRafael Auler // Before we validate CFG we have to handle indirect branches here too. 622a34c753fSRafael Auler // 3) Regular direct calls. The count could be different from containing 623a34c753fSRafael Auler // basic block count. Keep this data in case we find it useful. 624a34c753fSRafael Auler // 625a34c753fSRafael Auler for (BranchInfo &BI : FBD->Data) { 626a34c753fSRafael Auler // Ignore internal branches. 627a34c753fSRafael Auler if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) 628a34c753fSRafael Auler continue; 629a34c753fSRafael Auler 630a34c753fSRafael Auler MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset); 631a34c753fSRafael Auler if (!Instr || 632a34c753fSRafael Auler (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr))) 633a34c753fSRafael Auler continue; 634a34c753fSRafael Auler 635a34c753fSRafael Auler auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) { 636def464aaSAmir Ayupov if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) 637a34c753fSRafael Auler errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x" 63840c2e0faSMaksim Panchenko << Twine::utohexstr(BI.From.Offset) << " in function " << BF 63940c2e0faSMaksim Panchenko << '\n'; 640a34c753fSRafael Auler auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name); 641a34c753fSRafael Auler Value += Count; 642a34c753fSRafael Auler }; 643a34c753fSRafael Auler 644a34c753fSRafael Auler if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) { 645a34c753fSRafael Auler IndirectCallSiteProfile &CSP = 646a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( 647a34c753fSRafael Auler *Instr, "CallProfile"); 648a34c753fSRafael Auler MCSymbol *CalleeSymbol = nullptr; 649a34c753fSRafael Auler if (BI.To.IsSymbol) { 650def464aaSAmir Ayupov if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) 651a34c753fSRafael Auler CalleeSymbol = BD->getSymbol(); 652a34c753fSRafael Auler } 653a34c753fSRafael Auler CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds); 654a34c753fSRafael Auler } else if (BC.MIB->getConditionalTailCall(*Instr)) { 655a34c753fSRafael Auler setOrUpdateAnnotation("CTCTakenCount", BI.Branches); 656a34c753fSRafael Auler setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds); 657a34c753fSRafael Auler } else { 658a34c753fSRafael Auler setOrUpdateAnnotation("Count", BI.Branches); 659a34c753fSRafael Auler } 660a34c753fSRafael Auler } 661a34c753fSRafael Auler } 662a34c753fSRafael Auler 66340c2e0faSMaksim Panchenko bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To, 664a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) const { 665a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 666a34c753fSRafael Auler 667a34c753fSRafael Auler BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 668d5c03defSFabian Parzefall const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 669a34c753fSRafael Auler 670a34c753fSRafael Auler if (!FromBB || !ToBB) { 671a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n"); 672a34c753fSRafael Auler return false; 673a34c753fSRafael Auler } 674a34c753fSRafael Auler 675a34c753fSRafael Auler // Could be bad LBR data; ignore the branch. In the case of data collected 676a34c753fSRafael Auler // in binaries optimized by BOLT, a source BB may be mapped to two output 677a34c753fSRafael Auler // BBs as a result of optimizations. In that case, a branch between these 678a34c753fSRafael Auler // two will be recorded as a branch from A going to A in the source address 679a34c753fSRafael Auler // space. Keep processing. 680def464aaSAmir Ayupov if (From == To) 681a34c753fSRafael Auler return true; 682a34c753fSRafael Auler 683a34c753fSRafael Auler // Return from a tail call. 684ccb99dd1SMaksim Panchenko if (FromBB->succ_size() == 0) 685a34c753fSRafael Auler return true; 686a34c753fSRafael Auler 687a34c753fSRafael Auler // Very rarely we will see ignored branches. Do a linear check. 688def464aaSAmir Ayupov for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) 68940c2e0faSMaksim Panchenko if (Branch == 69040c2e0faSMaksim Panchenko std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To))) 691a34c753fSRafael Auler return true; 692a34c753fSRafael Auler 69308f56926SVladislav Khmelevsky bool OffsetMatches = !!(To == ToBB->getOffset()); 69408f56926SVladislav Khmelevsky if (!OffsetMatches) { 69508f56926SVladislav Khmelevsky // Skip the nops to support old .fdata 69608f56926SVladislav Khmelevsky uint64_t Offset = ToBB->getOffset(); 697d5c03defSFabian Parzefall for (const MCInst &Instr : *ToBB) { 69808f56926SVladislav Khmelevsky if (!BC.MIB->isNoop(Instr)) 69908f56926SVladislav Khmelevsky break; 70008f56926SVladislav Khmelevsky 70108f56926SVladislav Khmelevsky Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size"); 70208f56926SVladislav Khmelevsky } 70308f56926SVladislav Khmelevsky 70408f56926SVladislav Khmelevsky if (To == Offset) 70508f56926SVladislav Khmelevsky OffsetMatches = true; 70608f56926SVladislav Khmelevsky } 70708f56926SVladislav Khmelevsky 70808f56926SVladislav Khmelevsky if (!OffsetMatches) { 709a34c753fSRafael Auler // "To" could be referring to nop instructions in between 2 basic blocks. 710a34c753fSRafael Auler // While building the CFG we make sure these nops are attributed to the 711a34c753fSRafael Auler // previous basic block, thus we check if the destination belongs to the 712a34c753fSRafael Auler // gap past the last instruction. 713a34c753fSRafael Auler const MCInst *LastInstr = ToBB->getLastNonPseudoInstr(); 714a34c753fSRafael Auler if (LastInstr) { 715a34c753fSRafael Auler const uint32_t LastInstrOffset = 716a9cd49d5SAmir Ayupov BC.MIB->getOffsetWithDefault(*LastInstr, 0); 717a34c753fSRafael Auler 718a34c753fSRafael Auler // With old .fdata we are getting FT branches for "jcc,jmp" sequences. 719def464aaSAmir Ayupov if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) 720a34c753fSRafael Auler return true; 721a34c753fSRafael Auler 722a34c753fSRafael Auler if (To <= LastInstrOffset) { 723a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block" 724a34c753fSRafael Auler << " in " << BF << " : " << From << " -> " << To 725a34c753fSRafael Auler << '\n'); 726a34c753fSRafael Auler return false; 727a34c753fSRafael Auler } 728a34c753fSRafael Auler } 729a34c753fSRafael Auler 730a34c753fSRafael Auler // The real destination is the layout successor of the detected ToBB. 7318477bc67SFabian Parzefall if (ToBB == BF.getLayout().block_back()) 732a34c753fSRafael Auler return false; 733d5c03defSFabian Parzefall const BinaryBasicBlock *NextBB = 734d5c03defSFabian Parzefall BF.getLayout().getBlock(ToBB->getIndex() + 1); 735a34c753fSRafael Auler assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout"); 736a34c753fSRafael Auler ToBB = NextBB; 737a34c753fSRafael Auler } 738a34c753fSRafael Auler 739a34c753fSRafael Auler // If there's no corresponding instruction for 'From', we have probably 740a34c753fSRafael Auler // discarded it as a FT from __builtin_unreachable. 741a34c753fSRafael Auler MCInst *FromInstruction = BF.getInstructionAtOffset(From); 742a34c753fSRafael Auler if (!FromInstruction) { 743a34c753fSRafael Auler // If the data was collected in a bolted binary, the From addresses may be 744a34c753fSRafael Auler // translated to the first instruction of the source BB if BOLT inserted 745a34c753fSRafael Auler // a new branch that did not exist in the source (we can't map it to the 746a34c753fSRafael Auler // source instruction, so we map it to the first instr of source BB). 747a34c753fSRafael Auler // We do not keep offsets for random instructions. So the check above will 748a34c753fSRafael Auler // evaluate to true if the first instr is not a branch (call/jmp/ret/etc) 749a34c753fSRafael Auler if (collectedInBoltedBinary()) { 750a34c753fSRafael Auler if (FromBB->getInputOffset() != From) { 751a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in " 752a34c753fSRafael Auler << BF << '\n'); 753a34c753fSRafael Auler return false; 754a34c753fSRafael Auler } 755a34c753fSRafael Auler FromInstruction = nullptr; 756a34c753fSRafael Auler } else { 757a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF 758a34c753fSRafael Auler << '\n'); 759a34c753fSRafael Auler return false; 760a34c753fSRafael Auler } 761a34c753fSRafael Auler } 762a34c753fSRafael Auler 763a34c753fSRafael Auler if (!FromBB->getSuccessor(ToBB->getLabel())) { 764a34c753fSRafael Auler // Check if this is a recursive call or a return from a recursive call. 765a34c753fSRafael Auler if (FromInstruction && ToBB->isEntryPoint() && 766a34c753fSRafael Auler (BC.MIB->isCall(*FromInstruction) || 767a34c753fSRafael Auler BC.MIB->isIndirectBranch(*FromInstruction))) { 768a34c753fSRafael Auler // Execution count is already accounted for. 769a34c753fSRafael Auler return true; 770a34c753fSRafael Auler } 771a34c753fSRafael Auler // For data collected in a bolted binary, we may have created two output BBs 772a34c753fSRafael Auler // that map to one original block. Branches between these two blocks will 77340c2e0faSMaksim Panchenko // appear here as one BB jumping to itself, even though it has no loop 77440c2e0faSMaksim Panchenko // edges. Ignore these. 775a34c753fSRafael Auler if (collectedInBoltedBinary() && FromBB == ToBB) 776a34c753fSRafael Auler return true; 777a34c753fSRafael Auler 778ccb99dd1SMaksim Panchenko BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false); 779ccb99dd1SMaksim Panchenko if (FTSuccessor && FTSuccessor->succ_size() == 1 && 780ccb99dd1SMaksim Panchenko FTSuccessor->getSuccessor(ToBB->getLabel())) { 781ccb99dd1SMaksim Panchenko BinaryBasicBlock::BinaryBranchInfo &FTBI = 782ccb99dd1SMaksim Panchenko FTSuccessor->getBranchInfo(*ToBB); 783ccb99dd1SMaksim Panchenko FTBI.Count += Count; 784ccb99dd1SMaksim Panchenko if (Count) 785ccb99dd1SMaksim Panchenko FTBI.MispredictedCount += Mispreds; 786ccb99dd1SMaksim Panchenko ToBB = FTSuccessor; 787ccb99dd1SMaksim Panchenko } else { 788*ffef4fe0SAmir Ayupov LLVM_DEBUG(dbgs() << "invalid branch in " << BF 789*ffef4fe0SAmir Ayupov << formatv(": {0:x} -> {1:x}\n", From, To)); 790a34c753fSRafael Auler return false; 791a34c753fSRafael Auler } 792ccb99dd1SMaksim Panchenko } 793a34c753fSRafael Auler 794a34c753fSRafael Auler BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); 795a34c753fSRafael Auler BI.Count += Count; 796a34c753fSRafael Auler // Only update mispredicted count if it the count was real. 797a34c753fSRafael Auler if (Count) { 798a34c753fSRafael Auler BI.MispredictedCount += Mispreds; 799a34c753fSRafael Auler } 800a34c753fSRafael Auler 801a34c753fSRafael Auler return true; 802a34c753fSRafael Auler } 803a34c753fSRafael Auler 804a34c753fSRafael Auler void DataReader::reportError(StringRef ErrorMsg) { 805a34c753fSRafael Auler Diag << "Error reading BOLT data input file: line " << Line << ", column " 806a34c753fSRafael Auler << Col << ": " << ErrorMsg << '\n'; 807a34c753fSRafael Auler } 808a34c753fSRafael Auler 809a34c753fSRafael Auler bool DataReader::expectAndConsumeFS() { 810a34c753fSRafael Auler if (ParsingBuf[0] != FieldSeparator) { 811a34c753fSRafael Auler reportError("expected field separator"); 812a34c753fSRafael Auler return false; 813a34c753fSRafael Auler } 814a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 815a34c753fSRafael Auler Col += 1; 816a34c753fSRafael Auler return true; 817a34c753fSRafael Auler } 818a34c753fSRafael Auler 819a34c753fSRafael Auler void DataReader::consumeAllRemainingFS() { 820a34c753fSRafael Auler while (ParsingBuf[0] == FieldSeparator) { 821a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 822a34c753fSRafael Auler Col += 1; 823a34c753fSRafael Auler } 824a34c753fSRafael Auler } 825a34c753fSRafael Auler 826a34c753fSRafael Auler bool DataReader::checkAndConsumeNewLine() { 827a34c753fSRafael Auler if (ParsingBuf[0] != '\n') 828a34c753fSRafael Auler return false; 829a34c753fSRafael Auler 830a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 831a34c753fSRafael Auler Col = 0; 832a34c753fSRafael Auler Line += 1; 833a34c753fSRafael Auler return true; 834a34c753fSRafael Auler } 835a34c753fSRafael Auler 836a34c753fSRafael Auler ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) { 837a34c753fSRafael Auler if (EndChar == '\\') { 838a34c753fSRafael Auler reportError("EndChar could not be backslash"); 839a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 840a34c753fSRafael Auler } 841a34c753fSRafael Auler 842a34c753fSRafael Auler std::string EndChars(1, EndChar); 843a34c753fSRafael Auler EndChars.push_back('\\'); 844a34c753fSRafael Auler if (EndNl) 845a34c753fSRafael Auler EndChars.push_back('\n'); 846a34c753fSRafael Auler 847a34c753fSRafael Auler size_t StringEnd = 0; 848a34c753fSRafael Auler do { 849a34c753fSRafael Auler StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd); 850a34c753fSRafael Auler if (StringEnd == StringRef::npos || 851a34c753fSRafael Auler (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) { 852a34c753fSRafael Auler reportError("malformed field"); 853a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 854a34c753fSRafael Auler } 855a34c753fSRafael Auler 856a34c753fSRafael Auler if (ParsingBuf[StringEnd] != '\\') 857a34c753fSRafael Auler break; 858a34c753fSRafael Auler 859a34c753fSRafael Auler StringEnd += 2; 8601bf531a5SKazu Hirata } while (true); 861a34c753fSRafael Auler 862a34c753fSRafael Auler StringRef Str = ParsingBuf.substr(0, StringEnd); 863a34c753fSRafael Auler 864a34c753fSRafael Auler // If EndNl was set and nl was found instead of EndChar, do not consume the 865a34c753fSRafael Auler // new line. 86640c2e0faSMaksim Panchenko bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n'; 867a34c753fSRafael Auler unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1; 868a34c753fSRafael Auler 869a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(End); 870a34c753fSRafael Auler if (EndChar == '\n') { 871a34c753fSRafael Auler Col = 0; 872a34c753fSRafael Auler Line += 1; 873a34c753fSRafael Auler } else { 874a34c753fSRafael Auler Col += End; 875a34c753fSRafael Auler } 876a34c753fSRafael Auler return Str; 877a34c753fSRafael Auler } 878a34c753fSRafael Auler 879a34c753fSRafael Auler ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) { 880a34c753fSRafael Auler ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl); 881a34c753fSRafael Auler if (std::error_code EC = NumStrRes.getError()) 882a34c753fSRafael Auler return EC; 883a34c753fSRafael Auler StringRef NumStr = NumStrRes.get(); 884a34c753fSRafael Auler int64_t Num; 885a34c753fSRafael Auler if (NumStr.getAsInteger(10, Num)) { 886a34c753fSRafael Auler reportError("expected decimal number"); 887a34c753fSRafael Auler Diag << "Found: " << NumStr << "\n"; 888a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 889a34c753fSRafael Auler } 890a34c753fSRafael Auler return Num; 891a34c753fSRafael Auler } 892a34c753fSRafael Auler 893a34c753fSRafael Auler ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) { 894a34c753fSRafael Auler ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl); 895a34c753fSRafael Auler if (std::error_code EC = NumStrRes.getError()) 896a34c753fSRafael Auler return EC; 897a34c753fSRafael Auler StringRef NumStr = NumStrRes.get(); 898a34c753fSRafael Auler uint64_t Num; 899a34c753fSRafael Auler if (NumStr.getAsInteger(16, Num)) { 900a34c753fSRafael Auler reportError("expected hexidecimal number"); 901a34c753fSRafael Auler Diag << "Found: " << NumStr << "\n"; 902a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 903a34c753fSRafael Auler } 904a34c753fSRafael Auler return Num; 905a34c753fSRafael Auler } 906a34c753fSRafael Auler 90740c2e0faSMaksim Panchenko ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl, 908a34c753fSRafael Auler bool ExpectMemLoc) { 909a34c753fSRafael Auler // Read whether the location of the branch should be DSO or a symbol 910a34c753fSRafael Auler // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local 911a34c753fSRafael Auler // symbol. 912a34c753fSRafael Auler // The symbol flag is also used to tag memory load events by adding 3 to the 913a34c753fSRafael Auler // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol. 91440c2e0faSMaksim Panchenko if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' && 91540c2e0faSMaksim Panchenko ParsingBuf[0] != '2') { 916a34c753fSRafael Auler reportError("expected 0, 1 or 2"); 917a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 918a34c753fSRafael Auler } 919a34c753fSRafael Auler 92040c2e0faSMaksim Panchenko if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' && 92140c2e0faSMaksim Panchenko ParsingBuf[0] != '5') { 922a34c753fSRafael Auler reportError("expected 3, 4 or 5"); 923a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 924a34c753fSRafael Auler } 925a34c753fSRafael Auler 926a34c753fSRafael Auler bool IsSymbol = 927a34c753fSRafael Auler (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) || 928a34c753fSRafael Auler (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5')); 929a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 930a34c753fSRafael Auler Col += 1; 931a34c753fSRafael Auler 932a34c753fSRafael Auler if (!expectAndConsumeFS()) 933a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 934a34c753fSRafael Auler consumeAllRemainingFS(); 935a34c753fSRafael Auler 936a34c753fSRafael Auler // Read the string containing the symbol or the DSO name 937a34c753fSRafael Auler ErrorOr<StringRef> NameRes = parseString(FieldSeparator); 938a34c753fSRafael Auler if (std::error_code EC = NameRes.getError()) 939a34c753fSRafael Auler return EC; 940a34c753fSRafael Auler StringRef Name = NameRes.get(); 941a34c753fSRafael Auler consumeAllRemainingFS(); 942a34c753fSRafael Auler 943a34c753fSRafael Auler // Read the offset 944a34c753fSRafael Auler ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl); 945a34c753fSRafael Auler if (std::error_code EC = Offset.getError()) 946a34c753fSRafael Auler return EC; 947a34c753fSRafael Auler 948a34c753fSRafael Auler return Location(IsSymbol, Name, Offset.get()); 949a34c753fSRafael Auler } 950a34c753fSRafael Auler 951a34c753fSRafael Auler ErrorOr<BranchInfo> DataReader::parseBranchInfo() { 952a34c753fSRafael Auler ErrorOr<Location> Res = parseLocation(FieldSeparator); 953a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 954a34c753fSRafael Auler return EC; 955a34c753fSRafael Auler Location From = Res.get(); 956a34c753fSRafael Auler 957a34c753fSRafael Auler consumeAllRemainingFS(); 958a34c753fSRafael Auler Res = parseLocation(FieldSeparator); 959a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 960a34c753fSRafael Auler return EC; 961a34c753fSRafael Auler Location To = Res.get(); 962a34c753fSRafael Auler 963a34c753fSRafael Auler consumeAllRemainingFS(); 964a34c753fSRafael Auler ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator); 965a34c753fSRafael Auler if (std::error_code EC = MRes.getError()) 966a34c753fSRafael Auler return EC; 967a34c753fSRafael Auler int64_t NumMispreds = MRes.get(); 968a34c753fSRafael Auler 969a34c753fSRafael Auler consumeAllRemainingFS(); 970a34c753fSRafael Auler ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true); 971a34c753fSRafael Auler if (std::error_code EC = BRes.getError()) 972a34c753fSRafael Auler return EC; 973a34c753fSRafael Auler int64_t NumBranches = BRes.get(); 974a34c753fSRafael Auler 975a34c753fSRafael Auler consumeAllRemainingFS(); 976a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 977a34c753fSRafael Auler reportError("expected end of line"); 978a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 979a34c753fSRafael Auler } 980a34c753fSRafael Auler 981a34c753fSRafael Auler return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches); 982a34c753fSRafael Auler } 983a34c753fSRafael Auler 984a34c753fSRafael Auler ErrorOr<MemInfo> DataReader::parseMemInfo() { 985a34c753fSRafael Auler ErrorOr<Location> Res = parseMemLocation(FieldSeparator); 986a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 987a34c753fSRafael Auler return EC; 988a34c753fSRafael Auler Location Offset = Res.get(); 989a34c753fSRafael Auler 990a34c753fSRafael Auler consumeAllRemainingFS(); 991a34c753fSRafael Auler Res = parseMemLocation(FieldSeparator); 992a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 993a34c753fSRafael Auler return EC; 994a34c753fSRafael Auler Location Addr = Res.get(); 995a34c753fSRafael Auler 996a34c753fSRafael Auler consumeAllRemainingFS(); 997a34c753fSRafael Auler ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true); 998a34c753fSRafael Auler if (std::error_code EC = CountRes.getError()) 999a34c753fSRafael Auler return EC; 1000a34c753fSRafael Auler 1001a34c753fSRafael Auler consumeAllRemainingFS(); 1002a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1003a34c753fSRafael Auler reportError("expected end of line"); 1004a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1005a34c753fSRafael Auler } 1006a34c753fSRafael Auler 1007a34c753fSRafael Auler return MemInfo(Offset, Addr, CountRes.get()); 1008a34c753fSRafael Auler } 1009a34c753fSRafael Auler 1010a34c753fSRafael Auler ErrorOr<SampleInfo> DataReader::parseSampleInfo() { 1011a34c753fSRafael Auler ErrorOr<Location> Res = parseLocation(FieldSeparator); 1012a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1013a34c753fSRafael Auler return EC; 1014a34c753fSRafael Auler Location Address = Res.get(); 1015a34c753fSRafael Auler 1016a34c753fSRafael Auler consumeAllRemainingFS(); 1017a34c753fSRafael Auler ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true); 1018a34c753fSRafael Auler if (std::error_code EC = BRes.getError()) 1019a34c753fSRafael Auler return EC; 1020a34c753fSRafael Auler int64_t Occurrences = BRes.get(); 1021a34c753fSRafael Auler 1022a34c753fSRafael Auler consumeAllRemainingFS(); 1023a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1024a34c753fSRafael Auler reportError("expected end of line"); 1025a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1026a34c753fSRafael Auler } 1027a34c753fSRafael Auler 1028a34c753fSRafael Auler return SampleInfo(std::move(Address), Occurrences); 1029a34c753fSRafael Auler } 1030a34c753fSRafael Auler 1031a34c753fSRafael Auler ErrorOr<bool> DataReader::maybeParseNoLBRFlag() { 1032a34c753fSRafael Auler if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr") 1033a34c753fSRafael Auler return false; 1034a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(6); 1035a34c753fSRafael Auler Col += 6; 1036a34c753fSRafael Auler 1037a34c753fSRafael Auler if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ') 1038a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 1039a34c753fSRafael Auler 1040a34c753fSRafael Auler while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') { 1041a34c753fSRafael Auler ErrorOr<StringRef> EventName = parseString(' ', true); 1042a34c753fSRafael Auler if (!EventName) 1043a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1044a34c753fSRafael Auler EventNames.insert(EventName.get()); 1045a34c753fSRafael Auler } 1046a34c753fSRafael Auler 1047a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1048a34c753fSRafael Auler reportError("malformed no_lbr line"); 1049a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1050a34c753fSRafael Auler } 1051a34c753fSRafael Auler return true; 1052a34c753fSRafael Auler } 1053a34c753fSRafael Auler 1054a34c753fSRafael Auler ErrorOr<bool> DataReader::maybeParseBATFlag() { 1055a34c753fSRafael Auler if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection") 1056a34c753fSRafael Auler return false; 1057a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(16); 1058a34c753fSRafael Auler Col += 16; 1059a34c753fSRafael Auler 1060a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1061a34c753fSRafael Auler reportError("malformed boltedcollection line"); 1062a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1063a34c753fSRafael Auler } 1064a34c753fSRafael Auler return true; 1065a34c753fSRafael Auler } 1066a34c753fSRafael Auler 1067a34c753fSRafael Auler bool DataReader::hasBranchData() { 1068a34c753fSRafael Auler if (ParsingBuf.size() == 0) 1069a34c753fSRafael Auler return false; 1070a34c753fSRafael Auler 1071a34c753fSRafael Auler if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2') 1072a34c753fSRafael Auler return true; 1073a34c753fSRafael Auler return false; 1074a34c753fSRafael Auler } 1075a34c753fSRafael Auler 1076a34c753fSRafael Auler bool DataReader::hasMemData() { 1077a34c753fSRafael Auler if (ParsingBuf.size() == 0) 1078a34c753fSRafael Auler return false; 1079a34c753fSRafael Auler 1080a34c753fSRafael Auler if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5') 1081a34c753fSRafael Auler return true; 1082a34c753fSRafael Auler return false; 1083a34c753fSRafael Auler } 1084a34c753fSRafael Auler 1085a34c753fSRafael Auler std::error_code DataReader::parseInNoLBRMode() { 1086a34c753fSRafael Auler auto GetOrCreateFuncEntry = [&](StringRef Name) { 1087a34c753fSRafael Auler auto I = NamesToSamples.find(Name); 1088a34c753fSRafael Auler if (I == NamesToSamples.end()) { 1089a34c753fSRafael Auler bool Success; 1090a34c753fSRafael Auler std::tie(I, Success) = NamesToSamples.insert(std::make_pair( 1091a34c753fSRafael Auler Name, FuncSampleData(Name, FuncSampleData::ContainerTy()))); 1092a34c753fSRafael Auler 1093a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1094a34c753fSRafael Auler } 1095a34c753fSRafael Auler return I; 1096a34c753fSRafael Auler }; 1097a34c753fSRafael Auler 1098a34c753fSRafael Auler auto GetOrCreateFuncMemEntry = [&](StringRef Name) { 1099a34c753fSRafael Auler auto I = NamesToMemEvents.find(Name); 1100a34c753fSRafael Auler if (I == NamesToMemEvents.end()) { 1101a34c753fSRafael Auler bool Success; 1102a34c753fSRafael Auler std::tie(I, Success) = NamesToMemEvents.insert( 1103a34c753fSRafael Auler std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy()))); 1104a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1105a34c753fSRafael Auler } 1106a34c753fSRafael Auler return I; 1107a34c753fSRafael Auler }; 1108a34c753fSRafael Auler 1109a34c753fSRafael Auler while (hasBranchData()) { 1110a34c753fSRafael Auler ErrorOr<SampleInfo> Res = parseSampleInfo(); 1111a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1112a34c753fSRafael Auler return EC; 1113a34c753fSRafael Auler 1114a34c753fSRafael Auler SampleInfo SI = Res.get(); 1115a34c753fSRafael Auler 1116a34c753fSRafael Auler // Ignore samples not involving known locations 1117a34c753fSRafael Auler if (!SI.Loc.IsSymbol) 1118a34c753fSRafael Auler continue; 1119a34c753fSRafael Auler 112073b89e3fSMaksim Panchenko auto I = GetOrCreateFuncEntry(SI.Loc.Name); 112173b89e3fSMaksim Panchenko I->second.Data.emplace_back(std::move(SI)); 1122a34c753fSRafael Auler } 1123a34c753fSRafael Auler 1124a34c753fSRafael Auler while (hasMemData()) { 1125a34c753fSRafael Auler ErrorOr<MemInfo> Res = parseMemInfo(); 1126a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1127a34c753fSRafael Auler return EC; 1128a34c753fSRafael Auler 1129a34c753fSRafael Auler MemInfo MI = Res.get(); 1130a34c753fSRafael Auler 1131a34c753fSRafael Auler // Ignore memory events not involving known pc. 1132a34c753fSRafael Auler if (!MI.Offset.IsSymbol) 1133a34c753fSRafael Auler continue; 1134a34c753fSRafael Auler 113573b89e3fSMaksim Panchenko auto I = GetOrCreateFuncMemEntry(MI.Offset.Name); 113673b89e3fSMaksim Panchenko I->second.Data.emplace_back(std::move(MI)); 1137a34c753fSRafael Auler } 1138a34c753fSRafael Auler 113973b89e3fSMaksim Panchenko for (auto &FuncSamples : NamesToSamples) 1140d2c87699SAmir Ayupov llvm::stable_sort(FuncSamples.second.Data); 1141a34c753fSRafael Auler 114273b89e3fSMaksim Panchenko for (auto &MemEvents : NamesToMemEvents) 1143d2c87699SAmir Ayupov llvm::stable_sort(MemEvents.second.Data); 1144a34c753fSRafael Auler 1145a34c753fSRafael Auler return std::error_code(); 1146a34c753fSRafael Auler } 1147a34c753fSRafael Auler 1148a34c753fSRafael Auler std::error_code DataReader::parse() { 1149a34c753fSRafael Auler auto GetOrCreateFuncEntry = [&](StringRef Name) { 1150a34c753fSRafael Auler auto I = NamesToBranches.find(Name); 1151a34c753fSRafael Auler if (I == NamesToBranches.end()) { 1152a34c753fSRafael Auler bool Success; 1153a34c753fSRafael Auler std::tie(I, Success) = NamesToBranches.insert(std::make_pair( 1154a34c753fSRafael Auler Name, FuncBranchData(Name, FuncBranchData::ContainerTy(), 1155a34c753fSRafael Auler FuncBranchData::ContainerTy()))); 1156a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1157a34c753fSRafael Auler } 1158a34c753fSRafael Auler return I; 1159a34c753fSRafael Auler }; 1160a34c753fSRafael Auler 1161a34c753fSRafael Auler auto GetOrCreateFuncMemEntry = [&](StringRef Name) { 1162a34c753fSRafael Auler auto I = NamesToMemEvents.find(Name); 1163a34c753fSRafael Auler if (I == NamesToMemEvents.end()) { 1164a34c753fSRafael Auler bool Success; 1165a34c753fSRafael Auler std::tie(I, Success) = NamesToMemEvents.insert( 1166a34c753fSRafael Auler std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy()))); 1167a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1168a34c753fSRafael Auler } 1169a34c753fSRafael Auler return I; 1170a34c753fSRafael Auler }; 1171a34c753fSRafael Auler 1172a34c753fSRafael Auler Col = 0; 1173a34c753fSRafael Auler Line = 1; 1174a34c753fSRafael Auler ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag(); 1175a34c753fSRafael Auler if (!FlagOrErr) 1176a34c753fSRafael Auler return FlagOrErr.getError(); 1177a34c753fSRafael Auler NoLBRMode = *FlagOrErr; 1178a34c753fSRafael Auler 1179a34c753fSRafael Auler ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag(); 1180a34c753fSRafael Auler if (!BATFlagOrErr) 1181a34c753fSRafael Auler return BATFlagOrErr.getError(); 1182a34c753fSRafael Auler BATMode = *BATFlagOrErr; 1183a34c753fSRafael Auler 1184a34c753fSRafael Auler if (!hasBranchData() && !hasMemData()) { 1185a34c753fSRafael Auler Diag << "ERROR: no valid profile data found\n"; 1186a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1187a34c753fSRafael Auler } 1188a34c753fSRafael Auler 1189a34c753fSRafael Auler if (NoLBRMode) 1190a34c753fSRafael Auler return parseInNoLBRMode(); 1191a34c753fSRafael Auler 1192a34c753fSRafael Auler while (hasBranchData()) { 1193a34c753fSRafael Auler ErrorOr<BranchInfo> Res = parseBranchInfo(); 1194a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1195a34c753fSRafael Auler return EC; 1196a34c753fSRafael Auler 1197a34c753fSRafael Auler BranchInfo BI = Res.get(); 1198a34c753fSRafael Auler 1199a34c753fSRafael Auler // Ignore branches not involving known location. 1200a34c753fSRafael Auler if (!BI.From.IsSymbol && !BI.To.IsSymbol) 1201a34c753fSRafael Auler continue; 1202a34c753fSRafael Auler 120373b89e3fSMaksim Panchenko auto I = GetOrCreateFuncEntry(BI.From.Name); 120473b89e3fSMaksim Panchenko I->second.Data.emplace_back(std::move(BI)); 1205a34c753fSRafael Auler 1206a34c753fSRafael Auler // Add entry data for branches to another function or branches 1207a34c753fSRafael Auler // to entry points (including recursive calls) 1208a34c753fSRafael Auler if (BI.To.IsSymbol && 1209a34c753fSRafael Auler (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) { 1210a34c753fSRafael Auler I = GetOrCreateFuncEntry(BI.To.Name); 121173b89e3fSMaksim Panchenko I->second.EntryData.emplace_back(std::move(BI)); 1212a34c753fSRafael Auler } 1213a34c753fSRafael Auler 1214a34c753fSRafael Auler // If destination is the function start - update execution count. 1215a34c753fSRafael Auler // NB: the data is skewed since we cannot tell tail recursion from 1216a34c753fSRafael Auler // branches to the function start. 1217a34c753fSRafael Auler if (BI.To.IsSymbol && BI.To.Offset == 0) { 1218a34c753fSRafael Auler I = GetOrCreateFuncEntry(BI.To.Name); 121973b89e3fSMaksim Panchenko I->second.ExecutionCount += BI.Branches; 1220a34c753fSRafael Auler } 1221a34c753fSRafael Auler } 1222a34c753fSRafael Auler 1223a34c753fSRafael Auler while (hasMemData()) { 1224a34c753fSRafael Auler ErrorOr<MemInfo> Res = parseMemInfo(); 1225a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1226a34c753fSRafael Auler return EC; 1227a34c753fSRafael Auler 1228a34c753fSRafael Auler MemInfo MI = Res.get(); 1229a34c753fSRafael Auler 1230a34c753fSRafael Auler // Ignore memory events not involving known pc. 1231a34c753fSRafael Auler if (!MI.Offset.IsSymbol) 1232a34c753fSRafael Auler continue; 1233a34c753fSRafael Auler 123473b89e3fSMaksim Panchenko auto I = GetOrCreateFuncMemEntry(MI.Offset.Name); 123573b89e3fSMaksim Panchenko I->second.Data.emplace_back(std::move(MI)); 1236a34c753fSRafael Auler } 1237a34c753fSRafael Auler 123873b89e3fSMaksim Panchenko for (auto &FuncBranches : NamesToBranches) 1239d2c87699SAmir Ayupov llvm::stable_sort(FuncBranches.second.Data); 1240a34c753fSRafael Auler 124173b89e3fSMaksim Panchenko for (auto &MemEvents : NamesToMemEvents) 1242d2c87699SAmir Ayupov llvm::stable_sort(MemEvents.second.Data); 1243a34c753fSRafael Auler 1244a34c753fSRafael Auler return std::error_code(); 1245a34c753fSRafael Auler } 1246a34c753fSRafael Auler 1247a34c753fSRafael Auler void DataReader::buildLTONameMaps() { 124873b89e3fSMaksim Panchenko for (auto &FuncData : NamesToBranches) { 124973b89e3fSMaksim Panchenko const StringRef FuncName = FuncData.first; 125015d1e517SAmir Ayupov const std::optional<StringRef> CommonName = getLTOCommonName(FuncName); 1251a34c753fSRafael Auler if (CommonName) 125273b89e3fSMaksim Panchenko LTOCommonNameMap[*CommonName].push_back(&FuncData.second); 1253a34c753fSRafael Auler } 1254a34c753fSRafael Auler 125573b89e3fSMaksim Panchenko for (auto &FuncData : NamesToMemEvents) { 125673b89e3fSMaksim Panchenko const StringRef FuncName = FuncData.first; 125715d1e517SAmir Ayupov const std::optional<StringRef> CommonName = getLTOCommonName(FuncName); 1258a34c753fSRafael Auler if (CommonName) 125973b89e3fSMaksim Panchenko LTOCommonNameMemMap[*CommonName].push_back(&FuncData.second); 1260a34c753fSRafael Auler } 1261a34c753fSRafael Auler } 1262a34c753fSRafael Auler 1263a34c753fSRafael Auler template <typename MapTy> 126473b89e3fSMaksim Panchenko static typename MapTy::mapped_type * 1265a34c753fSRafael Auler fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) { 1266a34c753fSRafael Auler // Do a reverse order iteration since the name in profile has a higher chance 1267a34c753fSRafael Auler // of matching a name at the end of the list. 1268f40d25ddSAmir Ayupov for (const MCSymbol *Symbol : llvm::reverse(Symbols)) { 1269f40d25ddSAmir Ayupov auto I = Map.find(normalizeName(Symbol->getName())); 1270a34c753fSRafael Auler if (I != Map.end()) 127173b89e3fSMaksim Panchenko return &I->second; 1272a34c753fSRafael Auler } 1273a34c753fSRafael Auler return nullptr; 1274a34c753fSRafael Auler } 1275a34c753fSRafael Auler 1276a34c753fSRafael Auler template <typename MapTy> 127773b89e3fSMaksim Panchenko static typename MapTy::mapped_type * 1278a34c753fSRafael Auler fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) { 1279a34c753fSRafael Auler // Do a reverse order iteration since the name in profile has a higher chance 1280a34c753fSRafael Auler // of matching a name at the end of the list. 1281f40d25ddSAmir Ayupov for (StringRef Name : llvm::reverse(FuncNames)) { 1282f40d25ddSAmir Ayupov auto I = Map.find(normalizeName(Name)); 1283a34c753fSRafael Auler if (I != Map.end()) 128473b89e3fSMaksim Panchenko return &I->second; 1285a34c753fSRafael Auler } 1286a34c753fSRafael Auler return nullptr; 1287a34c753fSRafael Auler } 1288a34c753fSRafael Auler 1289a34c753fSRafael Auler template <typename MapTy> 129073b89e3fSMaksim Panchenko static std::vector<typename MapTy::mapped_type *> 129173b89e3fSMaksim Panchenko fetchMapEntriesRegex(MapTy &Map, 129273b89e3fSMaksim Panchenko const StringMap<std::vector<typename MapTy::mapped_type *>> 129340c2e0faSMaksim Panchenko <OCommonNameMap, 1294a34c753fSRafael Auler const std::vector<StringRef> &FuncNames) { 129573b89e3fSMaksim Panchenko std::vector<typename MapTy::mapped_type *> AllData; 1296a34c753fSRafael Auler // Do a reverse order iteration since the name in profile has a higher chance 1297a34c753fSRafael Auler // of matching a name at the end of the list. 1298f40d25ddSAmir Ayupov for (StringRef FuncName : llvm::reverse(FuncNames)) { 1299f40d25ddSAmir Ayupov std::string Name = normalizeName(FuncName); 130015d1e517SAmir Ayupov const std::optional<StringRef> LTOCommonName = getLTOCommonName(Name); 1301a34c753fSRafael Auler if (LTOCommonName) { 1302a34c753fSRafael Auler auto I = LTOCommonNameMap.find(*LTOCommonName); 1303a34c753fSRafael Auler if (I != LTOCommonNameMap.end()) { 130473b89e3fSMaksim Panchenko const std::vector<typename MapTy::mapped_type *> &CommonData = 130573b89e3fSMaksim Panchenko I->second; 1306a34c753fSRafael Auler AllData.insert(AllData.end(), CommonData.begin(), CommonData.end()); 1307a34c753fSRafael Auler } 1308a34c753fSRafael Auler } else { 1309a34c753fSRafael Auler auto I = Map.find(Name); 1310def464aaSAmir Ayupov if (I != Map.end()) 131173b89e3fSMaksim Panchenko return {&I->second}; 1312a34c753fSRafael Auler } 1313a34c753fSRafael Auler } 1314a34c753fSRafael Auler return AllData; 1315a34c753fSRafael Auler } 1316a34c753fSRafael Auler 1317a34c753fSRafael Auler bool DataReader::mayHaveProfileData(const BinaryFunction &Function) { 1318a34c753fSRafael Auler if (getBranchData(Function) || getMemData(Function)) 1319a34c753fSRafael Auler return true; 1320a34c753fSRafael Auler 13216b05a62aSAmir Ayupov if (getFuncSampleData(Function.getNames()) || 13226b05a62aSAmir Ayupov getBranchDataForNames(Function.getNames()) || 1323a34c753fSRafael Auler getMemDataForNames(Function.getNames())) 1324a34c753fSRafael Auler return true; 1325a34c753fSRafael Auler 1326a34c753fSRafael Auler if (!hasVolatileName(Function)) 1327a34c753fSRafael Auler return false; 1328a34c753fSRafael Auler 1329a34c753fSRafael Auler const std::vector<FuncBranchData *> AllBranchData = 1330a34c753fSRafael Auler getBranchDataForNamesRegex(Function.getNames()); 1331a34c753fSRafael Auler if (!AllBranchData.empty()) 1332a34c753fSRafael Auler return true; 1333a34c753fSRafael Auler 1334a34c753fSRafael Auler const std::vector<FuncMemData *> AllMemData = 1335a34c753fSRafael Auler getMemDataForNamesRegex(Function.getNames()); 1336a34c753fSRafael Auler if (!AllMemData.empty()) 1337a34c753fSRafael Auler return true; 1338a34c753fSRafael Auler 1339a34c753fSRafael Auler return false; 1340a34c753fSRafael Auler } 1341a34c753fSRafael Auler 1342a34c753fSRafael Auler FuncBranchData * 1343a34c753fSRafael Auler DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) { 1344a34c753fSRafael Auler return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames); 1345a34c753fSRafael Auler } 1346a34c753fSRafael Auler 1347a34c753fSRafael Auler FuncBranchData * 1348a34c753fSRafael Auler DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) { 1349a34c753fSRafael Auler return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols); 1350a34c753fSRafael Auler } 1351a34c753fSRafael Auler 1352a34c753fSRafael Auler FuncMemData * 1353a34c753fSRafael Auler DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) { 1354a34c753fSRafael Auler return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames); 1355a34c753fSRafael Auler } 1356a34c753fSRafael Auler 1357a34c753fSRafael Auler FuncSampleData * 1358a34c753fSRafael Auler DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) { 1359a34c753fSRafael Auler return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames); 1360a34c753fSRafael Auler } 1361a34c753fSRafael Auler 136240c2e0faSMaksim Panchenko std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex( 1363a34c753fSRafael Auler const std::vector<StringRef> &FuncNames) { 1364a34c753fSRafael Auler return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames); 1365a34c753fSRafael Auler } 1366a34c753fSRafael Auler 1367a34c753fSRafael Auler std::vector<FuncMemData *> 1368a34c753fSRafael Auler DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) { 1369a34c753fSRafael Auler return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames); 1370a34c753fSRafael Auler } 1371a34c753fSRafael Auler 1372a34c753fSRafael Auler bool DataReader::hasLocalsWithFileName() const { 137373b89e3fSMaksim Panchenko for (const auto &Func : NamesToBranches) { 137473b89e3fSMaksim Panchenko const StringRef &FuncName = Func.first; 1375a34c753fSRafael Auler if (FuncName.count('/') == 2 && FuncName[0] != '/') 1376a34c753fSRafael Auler return true; 1377a34c753fSRafael Auler } 1378a34c753fSRafael Auler return false; 1379a34c753fSRafael Auler } 1380a34c753fSRafael Auler 1381a34c753fSRafael Auler void DataReader::dump() const { 138273b89e3fSMaksim Panchenko for (const auto &KV : NamesToBranches) { 138373b89e3fSMaksim Panchenko const StringRef Name = KV.first; 138473b89e3fSMaksim Panchenko const FuncBranchData &FBD = KV.second; 138573b89e3fSMaksim Panchenko Diag << Name << " branches:\n"; 138673b89e3fSMaksim Panchenko for (const BranchInfo &BI : FBD.Data) 1387a34c753fSRafael Auler Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " " 1388a34c753fSRafael Auler << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n"; 138973b89e3fSMaksim Panchenko Diag << Name << " entry points:\n"; 139073b89e3fSMaksim Panchenko for (const BranchInfo &BI : FBD.EntryData) 1391a34c753fSRafael Auler Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " " 1392a34c753fSRafael Auler << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n"; 1393a34c753fSRafael Auler } 1394a34c753fSRafael Auler 1395a34c753fSRafael Auler for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) { 1396a34c753fSRafael Auler StringRef Event = I->getKey(); 1397a34c753fSRafael Auler Diag << "Data was collected with event: " << Event << "\n"; 1398a34c753fSRafael Auler } 139973b89e3fSMaksim Panchenko for (const auto &KV : NamesToSamples) { 140073b89e3fSMaksim Panchenko const StringRef Name = KV.first; 140173b89e3fSMaksim Panchenko const FuncSampleData &FSD = KV.second; 140273b89e3fSMaksim Panchenko Diag << Name << " samples:\n"; 140373b89e3fSMaksim Panchenko for (const SampleInfo &SI : FSD.Data) 140440c2e0faSMaksim Panchenko Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n"; 1405a34c753fSRafael Auler } 1406a34c753fSRafael Auler 140773b89e3fSMaksim Panchenko for (const auto &KV : NamesToMemEvents) { 140873b89e3fSMaksim Panchenko const StringRef Name = KV.first; 140973b89e3fSMaksim Panchenko const FuncMemData &FMD = KV.second; 141073b89e3fSMaksim Panchenko Diag << "Memory events for " << Name; 1411a34c753fSRafael Auler Location LastOffset(0); 141273b89e3fSMaksim Panchenko for (const MemInfo &MI : FMD.Data) { 1413def464aaSAmir Ayupov if (MI.Offset == LastOffset) 1414a34c753fSRafael Auler Diag << ", " << MI.Addr << "/" << MI.Count; 1415def464aaSAmir Ayupov else 1416a34c753fSRafael Auler Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count; 1417a34c753fSRafael Auler LastOffset = MI.Offset; 1418a34c753fSRafael Auler } 1419a34c753fSRafael Auler Diag << "\n"; 1420a34c753fSRafael Auler } 1421a34c753fSRafael Auler } 1422a34c753fSRafael Auler 1423a34c753fSRafael Auler } // namespace bolt 1424a34c753fSRafael Auler } // namespace llvm 1425