1a34c753fSRafael Auler //===-- DataReader.cpp - Perf data reader -----------------------*- C++ -*-===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 9a34c753fSRafael Auler // This family of functions reads profile data written by the perf2bolt 10a34c753fSRafael Auler // utility and stores it in memory for llvm-bolt consumption. 11a34c753fSRafael Auler // 12a34c753fSRafael Auler //===----------------------------------------------------------------------===// 13a34c753fSRafael Auler 14a34c753fSRafael Auler #include "bolt/Profile/DataReader.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16a34c753fSRafael Auler #include "bolt/Passes/MCF.h" 17a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 18a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 19a34c753fSRafael Auler #include "llvm/Support/Debug.h" 20a34c753fSRafael Auler #include <map> 21a34c753fSRafael Auler 22a34c753fSRafael Auler #undef DEBUG_TYPE 23a34c753fSRafael Auler #define DEBUG_TYPE "bolt-prof" 24a34c753fSRafael Auler 25a34c753fSRafael Auler using namespace llvm; 26a34c753fSRafael Auler 27a34c753fSRafael Auler namespace opts { 28a34c753fSRafael Auler 29a34c753fSRafael Auler extern cl::OptionCategory BoltCategory; 30a34c753fSRafael Auler extern llvm::cl::opt<unsigned> Verbosity; 31a34c753fSRafael Auler 32a34c753fSRafael Auler static cl::opt<bool> 33a34c753fSRafael Auler DumpData("dump-data", 34a34c753fSRafael Auler cl::desc("dump parsed bolt data for debugging"), 35a34c753fSRafael Auler cl::Hidden, 36a34c753fSRafael Auler cl::cat(BoltCategory)); 37a34c753fSRafael Auler 38a34c753fSRafael Auler } // namespace opts 39a34c753fSRafael Auler 40a34c753fSRafael Auler namespace llvm { 41a34c753fSRafael Auler namespace bolt { 42a34c753fSRafael Auler 43a34c753fSRafael Auler Optional<StringRef> getLTOCommonName(const StringRef Name) { 44a34c753fSRafael Auler size_t LTOSuffixPos = Name.find(".lto_priv."); 45a34c753fSRafael Auler if (LTOSuffixPos != StringRef::npos) { 46a34c753fSRafael Auler return Name.substr(0, LTOSuffixPos + 10); 47a34c753fSRafael Auler } else if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos) { 48a34c753fSRafael Auler return Name.substr(0, LTOSuffixPos + 11); 49a34c753fSRafael Auler } else { 50a34c753fSRafael Auler return NoneType(); 51a34c753fSRafael Auler } 52a34c753fSRafael Auler } 53a34c753fSRafael Auler 54a34c753fSRafael Auler namespace { 55a34c753fSRafael Auler 56a34c753fSRafael Auler /// Return true if the function name can change across compilations. 57a34c753fSRafael Auler bool hasVolatileName(const BinaryFunction &BF) { 58a34c753fSRafael Auler for (const StringRef Name : BF.getNames()) { 59a34c753fSRafael Auler if (getLTOCommonName(Name)) 60a34c753fSRafael Auler return true; 61a34c753fSRafael Auler } 62a34c753fSRafael Auler return false; 63a34c753fSRafael Auler } 64a34c753fSRafael Auler 65a34c753fSRafael Auler /// Return standard escaped name of the function possibly renamed by BOLT. 66a34c753fSRafael Auler std::string normalizeName(StringRef NameRef) { 67a34c753fSRafael Auler // Strip "PG." prefix used for globalized locals. 68a34c753fSRafael Auler NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef; 69a34c753fSRafael Auler return getEscapedName(NameRef); 70a34c753fSRafael Auler } 71a34c753fSRafael Auler 72a34c753fSRafael Auler } // anonymous namespace 73a34c753fSRafael Auler 74a34c753fSRafael Auler raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) { 75a34c753fSRafael Auler if (Loc.IsSymbol) { 76a34c753fSRafael Auler OS << Loc.Name; 77a34c753fSRafael Auler if (Loc.Offset) 78a34c753fSRafael Auler OS << "+" << Twine::utohexstr(Loc.Offset); 79a34c753fSRafael Auler } else { 80a34c753fSRafael Auler OS << Twine::utohexstr(Loc.Offset); 81a34c753fSRafael Auler } 82a34c753fSRafael Auler return OS; 83a34c753fSRafael Auler } 84a34c753fSRafael Auler 85a34c753fSRafael Auler iterator_range<FuncBranchData::ContainerTy::const_iterator> 86a34c753fSRafael Auler FuncBranchData::getBranchRange(uint64_t From) const { 87a34c753fSRafael Auler assert(std::is_sorted(Data.begin(), Data.end())); 88a34c753fSRafael Auler struct Compare { 89a34c753fSRafael Auler bool operator()(const BranchInfo &BI, const uint64_t Val) const { 90a34c753fSRafael Auler return BI.From.Offset < Val; 91a34c753fSRafael Auler } 92a34c753fSRafael Auler bool operator()(const uint64_t Val, const BranchInfo &BI) const { 93a34c753fSRafael Auler return Val < BI.From.Offset; 94a34c753fSRafael Auler } 95a34c753fSRafael Auler }; 96a34c753fSRafael Auler auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare()); 97a34c753fSRafael Auler return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second); 98a34c753fSRafael Auler } 99a34c753fSRafael Auler 100a34c753fSRafael Auler void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) { 101a34c753fSRafael Auler Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end()); 102a34c753fSRafael Auler for (auto I = Data.begin(), E = Data.end(); I != E; ++I) { 103a34c753fSRafael Auler if (I->From.Name == FBD.Name) { 104a34c753fSRafael Auler I->From.Name = this->Name; 105a34c753fSRafael Auler I->From.Offset += Offset; 106a34c753fSRafael Auler } 107a34c753fSRafael Auler if (I->To.Name == FBD.Name) { 108a34c753fSRafael Auler I->To.Name = this->Name; 109a34c753fSRafael Auler I->To.Offset += Offset; 110a34c753fSRafael Auler } 111a34c753fSRafael Auler } 112a34c753fSRafael Auler std::stable_sort(Data.begin(), Data.end()); 113a34c753fSRafael Auler ExecutionCount += FBD.ExecutionCount; 114a34c753fSRafael Auler for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) { 115a34c753fSRafael Auler assert(I->To.Name == FBD.Name); 116a34c753fSRafael Auler auto NewElmt = EntryData.insert(EntryData.end(), *I); 117a34c753fSRafael Auler NewElmt->To.Name = this->Name; 118a34c753fSRafael Auler NewElmt->To.Offset += Offset; 119a34c753fSRafael Auler } 120a34c753fSRafael Auler } 121a34c753fSRafael Auler 122a34c753fSRafael Auler uint64_t FuncBranchData::getNumExecutedBranches() const { 123a34c753fSRafael Auler uint64_t ExecutedBranches = 0; 124a34c753fSRafael Auler for (const BranchInfo &BI : Data) { 125a34c753fSRafael Auler int64_t BranchCount = BI.Branches; 126a34c753fSRafael Auler assert(BranchCount >= 0 && "branch execution count should not be negative"); 127a34c753fSRafael Auler ExecutedBranches += BranchCount; 128a34c753fSRafael Auler } 129a34c753fSRafael Auler return ExecutedBranches; 130a34c753fSRafael Auler } 131a34c753fSRafael Auler 13240c2e0faSMaksim Panchenko void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; } 133a34c753fSRafael Auler 134a34c753fSRafael Auler void SampleInfo::print(raw_ostream &OS) const { 135a34c753fSRafael Auler OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset) 136a34c753fSRafael Auler << " " << Hits << "\n"; 137a34c753fSRafael Auler } 138a34c753fSRafael Auler 13940c2e0faSMaksim Panchenko uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const { 140a34c753fSRafael Auler assert(std::is_sorted(Data.begin(), Data.end())); 141a34c753fSRafael Auler struct Compare { 142a34c753fSRafael Auler bool operator()(const SampleInfo &SI, const uint64_t Val) const { 143a34c753fSRafael Auler return SI.Loc.Offset < Val; 144a34c753fSRafael Auler } 145a34c753fSRafael Auler bool operator()(const uint64_t Val, const SampleInfo &SI) const { 146a34c753fSRafael Auler return Val < SI.Loc.Offset; 147a34c753fSRafael Auler } 148a34c753fSRafael Auler }; 149a34c753fSRafael Auler uint64_t Result = 0; 150a34c753fSRafael Auler for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()), 151a34c753fSRafael Auler E = std::lower_bound(Data.begin(), Data.end(), End, Compare()); 152a34c753fSRafael Auler I != E; ++I) { 153a34c753fSRafael Auler Result += I->Hits; 154a34c753fSRafael Auler } 155a34c753fSRafael Auler return Result; 156a34c753fSRafael Auler } 157a34c753fSRafael Auler 158a34c753fSRafael Auler void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) { 159a34c753fSRafael Auler auto Iter = Index.find(Offset); 160a34c753fSRafael Auler if (Iter == Index.end()) { 161a34c753fSRafael Auler Data.emplace_back(Location(true, Name, Offset), Count); 162a34c753fSRafael Auler Index[Offset] = Data.size() - 1; 163a34c753fSRafael Auler return; 164a34c753fSRafael Auler } 165a34c753fSRafael Auler SampleInfo &SI = Data[Iter->second]; 166a34c753fSRafael Auler SI.Hits += Count; 167a34c753fSRafael Auler } 168a34c753fSRafael Auler 169a34c753fSRafael Auler void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo, 170a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) { 171a34c753fSRafael Auler auto Iter = IntraIndex[OffsetFrom].find(OffsetTo); 172a34c753fSRafael Auler if (Iter == IntraIndex[OffsetFrom].end()) { 173a34c753fSRafael Auler Data.emplace_back(Location(true, Name, OffsetFrom), 174a34c753fSRafael Auler Location(true, Name, OffsetTo), Mispreds, Count); 175a34c753fSRafael Auler IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1; 176a34c753fSRafael Auler return; 177a34c753fSRafael Auler } 178a34c753fSRafael Auler BranchInfo &BI = Data[Iter->second]; 179a34c753fSRafael Auler BI.Branches += Count; 180a34c753fSRafael Auler BI.Mispreds += Mispreds; 181a34c753fSRafael Auler } 182a34c753fSRafael Auler 183a34c753fSRafael Auler void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To, 184a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) { 185a34c753fSRafael Auler auto Iter = InterIndex[OffsetFrom].find(To); 186a34c753fSRafael Auler if (Iter == InterIndex[OffsetFrom].end()) { 187a34c753fSRafael Auler Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count); 188a34c753fSRafael Auler InterIndex[OffsetFrom][To] = Data.size() - 1; 189a34c753fSRafael Auler return; 190a34c753fSRafael Auler } 191a34c753fSRafael Auler BranchInfo &BI = Data[Iter->second]; 192a34c753fSRafael Auler BI.Branches += Count; 193a34c753fSRafael Auler BI.Mispreds += Mispreds; 194a34c753fSRafael Auler } 195a34c753fSRafael Auler 196a34c753fSRafael Auler void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo, 197a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) { 198a34c753fSRafael Auler auto Iter = EntryIndex[OffsetTo].find(From); 199a34c753fSRafael Auler if (Iter == EntryIndex[OffsetTo].end()) { 200a34c753fSRafael Auler EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds, 201a34c753fSRafael Auler Count); 202a34c753fSRafael Auler EntryIndex[OffsetTo][From] = EntryData.size() - 1; 203a34c753fSRafael Auler return; 204a34c753fSRafael Auler } 205a34c753fSRafael Auler BranchInfo &BI = EntryData[Iter->second]; 206a34c753fSRafael Auler BI.Branches += Count; 207a34c753fSRafael Auler BI.Mispreds += Mispreds; 208a34c753fSRafael Auler } 209a34c753fSRafael Auler 210a34c753fSRafael Auler void BranchInfo::mergeWith(const BranchInfo &BI) { 211a34c753fSRafael Auler Branches += BI.Branches; 212a34c753fSRafael Auler Mispreds += BI.Mispreds; 213a34c753fSRafael Auler } 214a34c753fSRafael Auler 215a34c753fSRafael Auler void BranchInfo::print(raw_ostream &OS) const { 216a34c753fSRafael Auler OS << From.IsSymbol << " " << From.Name << " " 21740c2e0faSMaksim Panchenko << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name 21840c2e0faSMaksim Panchenko << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches 21940c2e0faSMaksim Panchenko << '\n'; 220a34c753fSRafael Auler } 221a34c753fSRafael Auler 222a34c753fSRafael Auler ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From, 223a34c753fSRafael Auler uint64_t To) const { 224a34c753fSRafael Auler for (const BranchInfo &I : Data) { 22540c2e0faSMaksim Panchenko if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name) 226a34c753fSRafael Auler return I; 227a34c753fSRafael Auler } 228a34c753fSRafael Auler return make_error_code(llvm::errc::invalid_argument); 229a34c753fSRafael Auler } 230a34c753fSRafael Auler 231a34c753fSRafael Auler ErrorOr<const BranchInfo &> 232a34c753fSRafael Auler FuncBranchData::getDirectCallBranch(uint64_t From) const { 233a34c753fSRafael Auler // Commented out because it can be expensive. 234a34c753fSRafael Auler // assert(std::is_sorted(Data.begin(), Data.end())); 235a34c753fSRafael Auler struct Compare { 236a34c753fSRafael Auler bool operator()(const BranchInfo &BI, const uint64_t Val) const { 237a34c753fSRafael Auler return BI.From.Offset < Val; 238a34c753fSRafael Auler } 239a34c753fSRafael Auler bool operator()(const uint64_t Val, const BranchInfo &BI) const { 240a34c753fSRafael Auler return Val < BI.From.Offset; 241a34c753fSRafael Auler } 242a34c753fSRafael Auler }; 243a34c753fSRafael Auler auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare()); 244a34c753fSRafael Auler for (auto I = Range.first; I != Range.second; ++I) { 245a34c753fSRafael Auler if (I->From.Name != I->To.Name) 246a34c753fSRafael Auler return *I; 247a34c753fSRafael Auler } 248a34c753fSRafael Auler return make_error_code(llvm::errc::invalid_argument); 249a34c753fSRafael Auler } 250a34c753fSRafael Auler 251a34c753fSRafael Auler void MemInfo::print(raw_ostream &OS) const { 252a34c753fSRafael Auler OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " " 25340c2e0faSMaksim Panchenko << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " " 25440c2e0faSMaksim Panchenko << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count 25540c2e0faSMaksim Panchenko << "\n"; 256a34c753fSRafael Auler } 257a34c753fSRafael Auler 258a34c753fSRafael Auler void MemInfo::prettyPrint(raw_ostream &OS) const { 259a34c753fSRafael Auler OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")"; 260a34c753fSRafael Auler } 261a34c753fSRafael Auler 262a34c753fSRafael Auler void FuncMemData::update(const Location &Offset, const Location &Addr) { 263a34c753fSRafael Auler auto Iter = EventIndex[Offset.Offset].find(Addr); 264a34c753fSRafael Auler if (Iter == EventIndex[Offset.Offset].end()) { 265a34c753fSRafael Auler Data.emplace_back(MemInfo(Offset, Addr, 1)); 266a34c753fSRafael Auler EventIndex[Offset.Offset][Addr] = Data.size() - 1; 267a34c753fSRafael Auler return; 268a34c753fSRafael Auler } 269a34c753fSRafael Auler ++Data[Iter->second].Count; 270a34c753fSRafael Auler } 271a34c753fSRafael Auler 272a34c753fSRafael Auler Error DataReader::preprocessProfile(BinaryContext &BC) { 273a34c753fSRafael Auler if (std::error_code EC = parseInput()) { 274a34c753fSRafael Auler return errorCodeToError(EC); 275a34c753fSRafael Auler } 276a34c753fSRafael Auler 277a34c753fSRafael Auler if (opts::DumpData) { 278a34c753fSRafael Auler dump(); 279a34c753fSRafael Auler } 280a34c753fSRafael Auler 281a34c753fSRafael Auler if (collectedInBoltedBinary()) { 282a34c753fSRafael Auler outs() << "BOLT-INFO: profile collection done on a binary already " 283a34c753fSRafael Auler "processed by BOLT\n"; 284a34c753fSRafael Auler } 285a34c753fSRafael Auler 286a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 287a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 288a34c753fSRafael Auler if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) { 289a34c753fSRafael Auler setMemData(Function, MemData); 290a34c753fSRafael Auler MemData->Used = true; 291a34c753fSRafael Auler } 292a34c753fSRafael Auler if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) { 293a34c753fSRafael Auler setBranchData(Function, FuncData); 294a34c753fSRafael Auler Function.ExecutionCount = FuncData->ExecutionCount; 295a34c753fSRafael Auler FuncData->Used = true; 296a34c753fSRafael Auler } 297a34c753fSRafael Auler } 298a34c753fSRafael Auler 299a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 300a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 301a34c753fSRafael Auler matchProfileMemData(Function); 302a34c753fSRafael Auler } 303a34c753fSRafael Auler 304a34c753fSRafael Auler return Error::success(); 305a34c753fSRafael Auler } 306a34c753fSRafael Auler 307a34c753fSRafael Auler Error DataReader::readProfilePreCFG(BinaryContext &BC) { 308a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 309a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 310a34c753fSRafael Auler FuncMemData *MemoryData = getMemData(Function); 311a34c753fSRafael Auler if (!MemoryData) 312a34c753fSRafael Auler continue; 313a34c753fSRafael Auler 314a34c753fSRafael Auler for (MemInfo &MI : MemoryData->Data) { 315a34c753fSRafael Auler const uint64_t Offset = MI.Offset.Offset; 316a34c753fSRafael Auler auto II = Function.Instructions.find(Offset); 317a34c753fSRafael Auler if (II == Function.Instructions.end()) { 318a34c753fSRafael Auler // Ignore bad instruction address. 319a34c753fSRafael Auler continue; 320a34c753fSRafael Auler } 321a34c753fSRafael Auler 322a34c753fSRafael Auler auto &MemAccessProfile = 323a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>( 324a34c753fSRafael Auler II->second, "MemoryAccessProfile"); 325a34c753fSRafael Auler BinaryData *BD = nullptr; 326a34c753fSRafael Auler if (MI.Addr.IsSymbol) { 327a34c753fSRafael Auler BD = BC.getBinaryDataByName(MI.Addr.Name); 328a34c753fSRafael Auler } 32940c2e0faSMaksim Panchenko MemAccessProfile.AddressAccessInfo.push_back( 33040c2e0faSMaksim Panchenko {BD, MI.Addr.Offset, MI.Count}); 331a34c753fSRafael Auler auto NextII = std::next(II); 332a34c753fSRafael Auler if (NextII == Function.Instructions.end()) { 333a34c753fSRafael Auler MemAccessProfile.NextInstrOffset = Function.getSize(); 334a34c753fSRafael Auler } else { 335a34c753fSRafael Auler MemAccessProfile.NextInstrOffset = II->first; 336a34c753fSRafael Auler } 337a34c753fSRafael Auler } 338a34c753fSRafael Auler Function.HasMemoryProfile = true; 339a34c753fSRafael Auler } 340a34c753fSRafael Auler 341a34c753fSRafael Auler return Error::success(); 342a34c753fSRafael Auler } 343a34c753fSRafael Auler 344a34c753fSRafael Auler Error DataReader::readProfile(BinaryContext &BC) { 345a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 346a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 347a34c753fSRafael Auler readProfile(Function); 348a34c753fSRafael Auler } 349a34c753fSRafael Auler 350a34c753fSRafael Auler uint64_t NumUnused = 0; 351a34c753fSRafael Auler for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) 352a34c753fSRafael Auler if (!FuncData.getValue().Used) 353a34c753fSRafael Auler ++NumUnused; 354a34c753fSRafael Auler BC.setNumUnusedProfiledObjects(NumUnused); 355a34c753fSRafael Auler 356a34c753fSRafael Auler return Error::success(); 357a34c753fSRafael Auler } 358a34c753fSRafael Auler 359a34c753fSRafael Auler std::error_code DataReader::parseInput() { 360a34c753fSRafael Auler ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 361a34c753fSRafael Auler MemoryBuffer::getFileOrSTDIN(Filename); 362a34c753fSRafael Auler if (std::error_code EC = MB.getError()) { 363a34c753fSRafael Auler Diag << "cannot open " << Filename << ": " << EC.message() << "\n"; 364a34c753fSRafael Auler return EC; 365a34c753fSRafael Auler } 366a34c753fSRafael Auler FileBuf = std::move(MB.get()); 367a34c753fSRafael Auler ParsingBuf = FileBuf->getBuffer(); 368a34c753fSRafael Auler if (std::error_code EC = parse()) { 369a34c753fSRafael Auler return EC; 370a34c753fSRafael Auler } 371a34c753fSRafael Auler if (!ParsingBuf.empty()) { 372a34c753fSRafael Auler Diag << "WARNING: invalid profile data detected at line " << Line 373a34c753fSRafael Auler << ". Possibly corrupted profile.\n"; 374a34c753fSRafael Auler } 375a34c753fSRafael Auler 376a34c753fSRafael Auler buildLTONameMaps(); 377a34c753fSRafael Auler 378a34c753fSRafael Auler return std::error_code(); 379a34c753fSRafael Auler } 380a34c753fSRafael Auler 381a34c753fSRafael Auler void DataReader::readProfile(BinaryFunction &BF) { 382a34c753fSRafael Auler if (BF.empty()) 383a34c753fSRafael Auler return; 384a34c753fSRafael Auler 385a34c753fSRafael Auler if (!hasLBR()) { 386a34c753fSRafael Auler BF.ProfileFlags = BinaryFunction::PF_SAMPLE; 387a34c753fSRafael Auler readSampleData(BF); 388a34c753fSRafael Auler return; 389a34c753fSRafael Auler } 390a34c753fSRafael Auler 391a34c753fSRafael Auler BF.ProfileFlags = BinaryFunction::PF_LBR; 392a34c753fSRafael Auler 393a34c753fSRafael Auler // Possibly assign/re-assign branch profile data. 394a34c753fSRafael Auler matchProfileData(BF); 395a34c753fSRafael Auler 396a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 397a34c753fSRafael Auler if (!FBD) 398a34c753fSRafael Auler return; 399a34c753fSRafael Auler 400a34c753fSRafael Auler // Assign basic block counts to function entry points. These only include 401a34c753fSRafael Auler // counts for outside entries. 402a34c753fSRafael Auler // 403a34c753fSRafael Auler // There is a slight skew introduced here as branches originated from RETs 404a34c753fSRafael Auler // may be accounted for in the execution count of an entry block if the last 405a34c753fSRafael Auler // instruction in a predecessor fall-through block is a call. This situation 406a34c753fSRafael Auler // should rarely happen because there are few multiple-entry functions. 407a34c753fSRafael Auler for (const BranchInfo &BI : FBD->EntryData) { 408a34c753fSRafael Auler BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset); 409a34c753fSRafael Auler if (BB && (BB->isEntryPoint() || BB->isLandingPad())) { 410a34c753fSRafael Auler uint64_t Count = BB->getExecutionCount(); 411a34c753fSRafael Auler if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) 412a34c753fSRafael Auler Count = 0; 413a34c753fSRafael Auler BB->setExecutionCount(Count + BI.Branches); 414a34c753fSRafael Auler } 415a34c753fSRafael Auler } 416a34c753fSRafael Auler 417a34c753fSRafael Auler uint64_t MismatchedBranches = 0; 418a34c753fSRafael Auler for (const BranchInfo &BI : FBD->Data) { 419a34c753fSRafael Auler if (BI.From.Name != BI.To.Name) { 420a34c753fSRafael Auler continue; 421a34c753fSRafael Auler } 422a34c753fSRafael Auler 42340c2e0faSMaksim Panchenko if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches, 42440c2e0faSMaksim Panchenko BI.Mispreds)) { 425a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> " 426a34c753fSRafael Auler << BI.To.Offset << '\n'); 427a34c753fSRafael Auler ++MismatchedBranches; 428a34c753fSRafael Auler } 429a34c753fSRafael Auler } 430a34c753fSRafael Auler 431a34c753fSRafael Auler // Convert branch data into annotations. 432a34c753fSRafael Auler convertBranchData(BF); 433a34c753fSRafael Auler } 434a34c753fSRafael Auler 435a34c753fSRafael Auler void DataReader::matchProfileData(BinaryFunction &BF) { 436a34c753fSRafael Auler // This functionality is available for LBR-mode only 437a34c753fSRafael Auler // TODO: Implement evaluateProfileData() for samples, checking whether 438a34c753fSRafael Auler // sample addresses match instruction addresses in the function 439a34c753fSRafael Auler if (!hasLBR()) 440a34c753fSRafael Auler return; 441a34c753fSRafael Auler 442a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 443a34c753fSRafael Auler if (FBD) { 444a34c753fSRafael Auler BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); 445a34c753fSRafael Auler BF.RawBranchCount = FBD->getNumExecutedBranches(); 446a34c753fSRafael Auler if (BF.ProfileMatchRatio == 1.0f) { 447a34c753fSRafael Auler if (fetchProfileForOtherEntryPoints(BF)) { 448a34c753fSRafael Auler BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); 449a34c753fSRafael Auler BF.ExecutionCount = FBD->ExecutionCount; 450a34c753fSRafael Auler BF.RawBranchCount = FBD->getNumExecutedBranches(); 451a34c753fSRafael Auler } 452a34c753fSRafael Auler return; 453a34c753fSRafael Auler } 454a34c753fSRafael Auler } 455a34c753fSRafael Auler 456a34c753fSRafael Auler // Check if the function name can fluctuate between several compilations 457a34c753fSRafael Auler // possibly triggered by minor unrelated code changes in the source code 458a34c753fSRafael Auler // of the input binary. 459a34c753fSRafael Auler if (!hasVolatileName(BF)) 460a34c753fSRafael Auler return; 461a34c753fSRafael Auler 462a34c753fSRafael Auler // Check for a profile that matches with 100% confidence. 463a34c753fSRafael Auler const std::vector<FuncBranchData *> AllBranchData = 464a34c753fSRafael Auler getBranchDataForNamesRegex(BF.getNames()); 465a34c753fSRafael Auler for (FuncBranchData *NewBranchData : AllBranchData) { 466a34c753fSRafael Auler // Prevent functions from sharing the same profile. 467a34c753fSRafael Auler if (NewBranchData->Used) 468a34c753fSRafael Auler continue; 469a34c753fSRafael Auler 470a34c753fSRafael Auler if (evaluateProfileData(BF, *NewBranchData) != 1.0f) 471a34c753fSRafael Auler continue; 472a34c753fSRafael Auler 473a34c753fSRafael Auler if (FBD) 474a34c753fSRafael Auler FBD->Used = false; 475a34c753fSRafael Auler 476a34c753fSRafael Auler // Update function profile data with the new set. 477a34c753fSRafael Auler setBranchData(BF, NewBranchData); 478a34c753fSRafael Auler NewBranchData->Used = true; 479a34c753fSRafael Auler BF.ExecutionCount = NewBranchData->ExecutionCount; 480a34c753fSRafael Auler BF.ProfileMatchRatio = 1.0f; 481a34c753fSRafael Auler break; 482a34c753fSRafael Auler } 483a34c753fSRafael Auler } 484a34c753fSRafael Auler 485a34c753fSRafael Auler void DataReader::matchProfileMemData(BinaryFunction &BF) { 486a34c753fSRafael Auler const std::vector<FuncMemData *> AllMemData = 487a34c753fSRafael Auler getMemDataForNamesRegex(BF.getNames()); 488a34c753fSRafael Auler for (FuncMemData *NewMemData : AllMemData) { 489a34c753fSRafael Auler // Prevent functions from sharing the same profile. 490a34c753fSRafael Auler if (NewMemData->Used) 491a34c753fSRafael Auler continue; 492a34c753fSRafael Auler 493a34c753fSRafael Auler if (FuncMemData *MD = getMemData(BF)) 494a34c753fSRafael Auler MD->Used = false; 495a34c753fSRafael Auler 496a34c753fSRafael Auler // Update function profile data with the new set. 497a34c753fSRafael Auler setMemData(BF, NewMemData); 498a34c753fSRafael Auler NewMemData->Used = true; 499a34c753fSRafael Auler break; 500a34c753fSRafael Auler } 501a34c753fSRafael Auler } 502a34c753fSRafael Auler 503a34c753fSRafael Auler bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) { 504a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 505a34c753fSRafael Auler 506a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 507a34c753fSRafael Auler if (!FBD) 508a34c753fSRafael Auler return false; 509a34c753fSRafael Auler 510a34c753fSRafael Auler // Check if we are missing profiling data for secondary entry points 511a34c753fSRafael Auler bool First = true; 512a34c753fSRafael Auler bool Updated = false; 513a34c753fSRafael Auler for (BinaryBasicBlock *BB : BF.BasicBlocks) { 514a34c753fSRafael Auler if (First) { 515a34c753fSRafael Auler First = false; 516a34c753fSRafael Auler continue; 517a34c753fSRafael Auler } 518a34c753fSRafael Auler if (BB->isEntryPoint()) { 519a34c753fSRafael Auler uint64_t EntryAddress = BB->getOffset() + BF.getAddress(); 520a34c753fSRafael Auler // Look for branch data associated with this entry point 521a34c753fSRafael Auler if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) { 522a34c753fSRafael Auler if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) { 523a34c753fSRafael Auler FBD->appendFrom(*Data, BB->getOffset()); 524a34c753fSRafael Auler Data->Used = true; 525a34c753fSRafael Auler Updated = true; 526a34c753fSRafael Auler } 527a34c753fSRafael Auler } 528a34c753fSRafael Auler } 529a34c753fSRafael Auler } 530a34c753fSRafael Auler 531a34c753fSRafael Auler return Updated; 532a34c753fSRafael Auler } 533a34c753fSRafael Auler 534a34c753fSRafael Auler float DataReader::evaluateProfileData(BinaryFunction &BF, 535a34c753fSRafael Auler const FuncBranchData &BranchData) const { 536a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 537a34c753fSRafael Auler 538a34c753fSRafael Auler // Until we define a minimal profile, we consider an empty branch data to be 539a34c753fSRafael Auler // a valid profile. It could happen to a function without branches when we 540a34c753fSRafael Auler // still have an EntryData for the execution count. 541a34c753fSRafael Auler if (BranchData.Data.empty()) { 542a34c753fSRafael Auler return 1.0f; 543a34c753fSRafael Auler } 544a34c753fSRafael Auler 545a34c753fSRafael Auler uint64_t NumMatchedBranches = 0; 546a34c753fSRafael Auler for (const BranchInfo &BI : BranchData.Data) { 547a34c753fSRafael Auler bool IsValid = false; 548a34c753fSRafael Auler if (BI.From.Name == BI.To.Name) { 549a34c753fSRafael Auler // Try to record information with 0 count. 550a34c753fSRafael Auler IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0); 551a34c753fSRafael Auler } else if (collectedInBoltedBinary()) { 552a34c753fSRafael Auler // We can't check branch source for collections in bolted binaries because 553a34c753fSRafael Auler // the source of the branch may be mapped to the first instruction in a BB 554a34c753fSRafael Auler // instead of the original branch (which may not exist in the source bin). 555a34c753fSRafael Auler IsValid = true; 556a34c753fSRafael Auler } else { 557a34c753fSRafael Auler // The branch has to originate from this function. 558a34c753fSRafael Auler // Check for calls, tail calls, rets and indirect branches. 559a34c753fSRafael Auler // When matching profiling info, we did not reach the stage 560a34c753fSRafael Auler // when we identify tail calls, so they are still represented 561a34c753fSRafael Auler // by regular branch instructions and we need isBranch() here. 562a34c753fSRafael Auler MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset); 563a34c753fSRafael Auler // If it's a prefix - skip it. 564a34c753fSRafael Auler if (Instr && BC.MIB->isPrefix(*Instr)) 565a34c753fSRafael Auler Instr = BF.getInstructionAtOffset(BI.From.Offset + 1); 56640c2e0faSMaksim Panchenko if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) || 567a34c753fSRafael Auler BC.MIB->isReturn(*Instr))) { 568a34c753fSRafael Auler IsValid = true; 569a34c753fSRafael Auler } 570a34c753fSRafael Auler } 571a34c753fSRafael Auler 572a34c753fSRafael Auler if (IsValid) { 573a34c753fSRafael Auler ++NumMatchedBranches; 574a34c753fSRafael Auler continue; 575a34c753fSRafael Auler } 576a34c753fSRafael Auler 577a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x" 578a34c753fSRafael Auler << Twine::utohexstr(BI.From.Offset) << " -> "; 579a34c753fSRafael Auler if (BI.From.Name == BI.To.Name) dbgs() 580a34c753fSRafael Auler << "0x" << Twine::utohexstr(BI.To.Offset) << '\n'; 581a34c753fSRafael Auler else dbgs() << "<outbounds>\n";); 582a34c753fSRafael Auler } 583a34c753fSRafael Auler 584a34c753fSRafael Auler const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size(); 585a34c753fSRafael Auler if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) { 586a34c753fSRafael Auler errs() << "BOLT-WARNING: profile branches match only " 587a34c753fSRafael Auler << format("%.1f%%", MatchRatio * 100.0f) << " (" 588a34c753fSRafael Auler << NumMatchedBranches << '/' << BranchData.Data.size() 589a34c753fSRafael Auler << ") for function " << BF << '\n'; 590a34c753fSRafael Auler } 591a34c753fSRafael Auler 592a34c753fSRafael Auler return MatchRatio; 593a34c753fSRafael Auler } 594a34c753fSRafael Auler 595a34c753fSRafael Auler void DataReader::readSampleData(BinaryFunction &BF) { 596a34c753fSRafael Auler FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); 597a34c753fSRafael Auler if (!SampleDataOrErr) 598a34c753fSRafael Auler return; 599a34c753fSRafael Auler 600a34c753fSRafael Auler // Basic samples mode territory (without LBR info) 601a34c753fSRafael Auler // First step is to assign BB execution count based on samples from perf 602a34c753fSRafael Auler BF.ProfileMatchRatio = 1.0f; 603a34c753fSRafael Auler BF.removeTagsFromProfile(); 604a34c753fSRafael Auler bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); 605a34c753fSRafael Auler bool NormalizeByCalls = usesEvent("branches"); 606a34c753fSRafael Auler static bool NagUser = true; 607a34c753fSRafael Auler if (NagUser) { 608a34c753fSRafael Auler outs() 609a34c753fSRafael Auler << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n"; 610a34c753fSRafael Auler if (NormalizeByInsnCount) { 611a34c753fSRafael Auler outs() << "BOLT-INFO: normalizing samples by instruction count.\n"; 612a34c753fSRafael Auler } else if (NormalizeByCalls) { 613a34c753fSRafael Auler outs() << "BOLT-INFO: normalizing samples by branches.\n"; 614a34c753fSRafael Auler } 615a34c753fSRafael Auler NagUser = false; 616a34c753fSRafael Auler } 617a34c753fSRafael Auler uint64_t LastOffset = BF.getSize(); 618a34c753fSRafael Auler uint64_t TotalEntryCount = 0; 619a34c753fSRafael Auler for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend(); 620a34c753fSRafael Auler I != E; ++I) { 621a34c753fSRafael Auler uint64_t CurOffset = I->first; 622a34c753fSRafael Auler // Always work with samples multiplied by 1000 to avoid losing them if we 623a34c753fSRafael Auler // later need to normalize numbers 624a34c753fSRafael Auler uint64_t NumSamples = 625a34c753fSRafael Auler SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000; 626a34c753fSRafael Auler if (NormalizeByInsnCount && I->second->getNumNonPseudos()) 627a34c753fSRafael Auler NumSamples /= I->second->getNumNonPseudos(); 628a34c753fSRafael Auler else if (NormalizeByCalls) { 629a34c753fSRafael Auler uint32_t NumCalls = I->second->getNumCalls(); 630a34c753fSRafael Auler NumSamples /= NumCalls + 1; 631a34c753fSRafael Auler } 632a34c753fSRafael Auler I->second->setExecutionCount(NumSamples); 633a34c753fSRafael Auler if (I->second->isEntryPoint()) 634a34c753fSRafael Auler TotalEntryCount += NumSamples; 635a34c753fSRafael Auler LastOffset = CurOffset; 636a34c753fSRafael Auler } 637a34c753fSRafael Auler 638a34c753fSRafael Auler BF.ExecutionCount = TotalEntryCount; 639a34c753fSRafael Auler 640a34c753fSRafael Auler estimateEdgeCounts(BF); 641a34c753fSRafael Auler } 642a34c753fSRafael Auler 643a34c753fSRafael Auler void DataReader::convertBranchData(BinaryFunction &BF) const { 644a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 645a34c753fSRafael Auler 646a34c753fSRafael Auler if (BF.empty()) 647a34c753fSRafael Auler return; 648a34c753fSRafael Auler 649a34c753fSRafael Auler FuncBranchData *FBD = getBranchData(BF); 650a34c753fSRafael Auler if (!FBD) 651a34c753fSRafael Auler return; 652a34c753fSRafael Auler 653a34c753fSRafael Auler // Profile information for calls. 654a34c753fSRafael Auler // 655a34c753fSRafael Auler // There are 3 cases that we annotate differently: 656a34c753fSRafael Auler // 1) Conditional tail calls that could be mispredicted. 657a34c753fSRafael Auler // 2) Indirect calls to multiple destinations with mispredictions. 658a34c753fSRafael Auler // Before we validate CFG we have to handle indirect branches here too. 659a34c753fSRafael Auler // 3) Regular direct calls. The count could be different from containing 660a34c753fSRafael Auler // basic block count. Keep this data in case we find it useful. 661a34c753fSRafael Auler // 662a34c753fSRafael Auler for (BranchInfo &BI : FBD->Data) { 663a34c753fSRafael Auler // Ignore internal branches. 664a34c753fSRafael Auler if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) 665a34c753fSRafael Auler continue; 666a34c753fSRafael Auler 667a34c753fSRafael Auler MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset); 668a34c753fSRafael Auler if (!Instr || 669a34c753fSRafael Auler (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr))) 670a34c753fSRafael Auler continue; 671a34c753fSRafael Auler 672a34c753fSRafael Auler auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) { 673a34c753fSRafael Auler if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) { 674a34c753fSRafael Auler errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x" 67540c2e0faSMaksim Panchenko << Twine::utohexstr(BI.From.Offset) << " in function " << BF 67640c2e0faSMaksim Panchenko << '\n'; 677a34c753fSRafael Auler } 678a34c753fSRafael Auler auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name); 679a34c753fSRafael Auler Value += Count; 680a34c753fSRafael Auler }; 681a34c753fSRafael Auler 682a34c753fSRafael Auler if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) { 683a34c753fSRafael Auler IndirectCallSiteProfile &CSP = 684a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( 685a34c753fSRafael Auler *Instr, "CallProfile"); 686a34c753fSRafael Auler MCSymbol *CalleeSymbol = nullptr; 687a34c753fSRafael Auler if (BI.To.IsSymbol) { 688a34c753fSRafael Auler if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) { 689a34c753fSRafael Auler CalleeSymbol = BD->getSymbol(); 690a34c753fSRafael Auler } 691a34c753fSRafael Auler } 692a34c753fSRafael Auler CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds); 693a34c753fSRafael Auler } else if (BC.MIB->getConditionalTailCall(*Instr)) { 694a34c753fSRafael Auler setOrUpdateAnnotation("CTCTakenCount", BI.Branches); 695a34c753fSRafael Auler setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds); 696a34c753fSRafael Auler } else { 697a34c753fSRafael Auler setOrUpdateAnnotation("Count", BI.Branches); 698a34c753fSRafael Auler } 699a34c753fSRafael Auler } 700a34c753fSRafael Auler } 701a34c753fSRafael Auler 70240c2e0faSMaksim Panchenko bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To, 703a34c753fSRafael Auler uint64_t Count, uint64_t Mispreds) const { 704a34c753fSRafael Auler BinaryContext &BC = BF.getBinaryContext(); 705a34c753fSRafael Auler 706a34c753fSRafael Auler BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 707a34c753fSRafael Auler BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 708a34c753fSRafael Auler 709a34c753fSRafael Auler if (!FromBB || !ToBB) { 710a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n"); 711a34c753fSRafael Auler return false; 712a34c753fSRafael Auler } 713a34c753fSRafael Auler 714a34c753fSRafael Auler // Could be bad LBR data; ignore the branch. In the case of data collected 715a34c753fSRafael Auler // in binaries optimized by BOLT, a source BB may be mapped to two output 716a34c753fSRafael Auler // BBs as a result of optimizations. In that case, a branch between these 717a34c753fSRafael Auler // two will be recorded as a branch from A going to A in the source address 718a34c753fSRafael Auler // space. Keep processing. 719a34c753fSRafael Auler if (From == To) { 720a34c753fSRafael Auler return true; 721a34c753fSRafael Auler } 722a34c753fSRafael Auler 723a34c753fSRafael Auler if (FromBB->succ_size() == 0) { 724a34c753fSRafael Auler // Return from a tail call. 725a34c753fSRafael Auler return true; 726a34c753fSRafael Auler } 727a34c753fSRafael Auler 728a34c753fSRafael Auler // Very rarely we will see ignored branches. Do a linear check. 729a34c753fSRafael Auler for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) { 73040c2e0faSMaksim Panchenko if (Branch == 73140c2e0faSMaksim Panchenko std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To))) 732a34c753fSRafael Auler return true; 733a34c753fSRafael Auler } 734a34c753fSRafael Auler 735*08f56926SVladislav Khmelevsky bool OffsetMatches = !!(To == ToBB->getOffset()); 736*08f56926SVladislav Khmelevsky if (!OffsetMatches) { 737*08f56926SVladislav Khmelevsky // Skip the nops to support old .fdata 738*08f56926SVladislav Khmelevsky uint64_t Offset = ToBB->getOffset(); 739*08f56926SVladislav Khmelevsky for (MCInst &Instr : *ToBB) { 740*08f56926SVladislav Khmelevsky if (!BC.MIB->isNoop(Instr)) 741*08f56926SVladislav Khmelevsky break; 742*08f56926SVladislav Khmelevsky 743*08f56926SVladislav Khmelevsky Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size"); 744*08f56926SVladislav Khmelevsky } 745*08f56926SVladislav Khmelevsky 746*08f56926SVladislav Khmelevsky if (To == Offset) 747*08f56926SVladislav Khmelevsky OffsetMatches = true; 748*08f56926SVladislav Khmelevsky } 749*08f56926SVladislav Khmelevsky 750*08f56926SVladislav Khmelevsky if (!OffsetMatches) { 751a34c753fSRafael Auler // "To" could be referring to nop instructions in between 2 basic blocks. 752a34c753fSRafael Auler // While building the CFG we make sure these nops are attributed to the 753a34c753fSRafael Auler // previous basic block, thus we check if the destination belongs to the 754a34c753fSRafael Auler // gap past the last instruction. 755a34c753fSRafael Auler const MCInst *LastInstr = ToBB->getLastNonPseudoInstr(); 756a34c753fSRafael Auler if (LastInstr) { 757a34c753fSRafael Auler const uint32_t LastInstrOffset = 758a34c753fSRafael Auler BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Offset"); 759a34c753fSRafael Auler 760a34c753fSRafael Auler // With old .fdata we are getting FT branches for "jcc,jmp" sequences. 761a34c753fSRafael Auler if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) { 762a34c753fSRafael Auler return true; 763a34c753fSRafael Auler } 764a34c753fSRafael Auler 765a34c753fSRafael Auler if (To <= LastInstrOffset) { 766a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block" 767a34c753fSRafael Auler << " in " << BF << " : " << From << " -> " << To 768a34c753fSRafael Auler << '\n'); 769a34c753fSRafael Auler return false; 770a34c753fSRafael Auler } 771a34c753fSRafael Auler } 772a34c753fSRafael Auler 773a34c753fSRafael Auler // The real destination is the layout successor of the detected ToBB. 774a34c753fSRafael Auler if (ToBB == BF.BasicBlocksLayout.back()) 775a34c753fSRafael Auler return false; 776a34c753fSRafael Auler BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1]; 777a34c753fSRafael Auler assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout"); 778a34c753fSRafael Auler ToBB = NextBB; 779a34c753fSRafael Auler } 780a34c753fSRafael Auler 781a34c753fSRafael Auler // If there's no corresponding instruction for 'From', we have probably 782a34c753fSRafael Auler // discarded it as a FT from __builtin_unreachable. 783a34c753fSRafael Auler MCInst *FromInstruction = BF.getInstructionAtOffset(From); 784a34c753fSRafael Auler if (!FromInstruction) { 785a34c753fSRafael Auler // If the data was collected in a bolted binary, the From addresses may be 786a34c753fSRafael Auler // translated to the first instruction of the source BB if BOLT inserted 787a34c753fSRafael Auler // a new branch that did not exist in the source (we can't map it to the 788a34c753fSRafael Auler // source instruction, so we map it to the first instr of source BB). 789a34c753fSRafael Auler // We do not keep offsets for random instructions. So the check above will 790a34c753fSRafael Auler // evaluate to true if the first instr is not a branch (call/jmp/ret/etc) 791a34c753fSRafael Auler if (collectedInBoltedBinary()) { 792a34c753fSRafael Auler if (FromBB->getInputOffset() != From) { 793a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in " 794a34c753fSRafael Auler << BF << '\n'); 795a34c753fSRafael Auler return false; 796a34c753fSRafael Auler } 797a34c753fSRafael Auler FromInstruction = nullptr; 798a34c753fSRafael Auler } else { 799a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF 800a34c753fSRafael Auler << '\n'); 801a34c753fSRafael Auler return false; 802a34c753fSRafael Auler } 803a34c753fSRafael Auler } 804a34c753fSRafael Auler 805a34c753fSRafael Auler if (!FromBB->getSuccessor(ToBB->getLabel())) { 806a34c753fSRafael Auler // Check if this is a recursive call or a return from a recursive call. 807a34c753fSRafael Auler if (FromInstruction && ToBB->isEntryPoint() && 808a34c753fSRafael Auler (BC.MIB->isCall(*FromInstruction) || 809a34c753fSRafael Auler BC.MIB->isIndirectBranch(*FromInstruction))) { 810a34c753fSRafael Auler // Execution count is already accounted for. 811a34c753fSRafael Auler return true; 812a34c753fSRafael Auler } 813a34c753fSRafael Auler // For data collected in a bolted binary, we may have created two output BBs 814a34c753fSRafael Auler // that map to one original block. Branches between these two blocks will 81540c2e0faSMaksim Panchenko // appear here as one BB jumping to itself, even though it has no loop 81640c2e0faSMaksim Panchenko // edges. Ignore these. 817a34c753fSRafael Auler if (collectedInBoltedBinary() && FromBB == ToBB) 818a34c753fSRafael Auler return true; 819a34c753fSRafael Auler 820a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n' 821a34c753fSRafael Auler << Twine::utohexstr(From) << " -> " 822a34c753fSRafael Auler << Twine::utohexstr(To) << '\n'); 823a34c753fSRafael Auler return false; 824a34c753fSRafael Auler } 825a34c753fSRafael Auler 826a34c753fSRafael Auler BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); 827a34c753fSRafael Auler BI.Count += Count; 828a34c753fSRafael Auler // Only update mispredicted count if it the count was real. 829a34c753fSRafael Auler if (Count) { 830a34c753fSRafael Auler BI.MispredictedCount += Mispreds; 831a34c753fSRafael Auler } 832a34c753fSRafael Auler 833a34c753fSRafael Auler return true; 834a34c753fSRafael Auler } 835a34c753fSRafael Auler 836a34c753fSRafael Auler void DataReader::reportError(StringRef ErrorMsg) { 837a34c753fSRafael Auler Diag << "Error reading BOLT data input file: line " << Line << ", column " 838a34c753fSRafael Auler << Col << ": " << ErrorMsg << '\n'; 839a34c753fSRafael Auler } 840a34c753fSRafael Auler 841a34c753fSRafael Auler bool DataReader::expectAndConsumeFS() { 842a34c753fSRafael Auler if (ParsingBuf[0] != FieldSeparator) { 843a34c753fSRafael Auler reportError("expected field separator"); 844a34c753fSRafael Auler return false; 845a34c753fSRafael Auler } 846a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 847a34c753fSRafael Auler Col += 1; 848a34c753fSRafael Auler return true; 849a34c753fSRafael Auler } 850a34c753fSRafael Auler 851a34c753fSRafael Auler void DataReader::consumeAllRemainingFS() { 852a34c753fSRafael Auler while (ParsingBuf[0] == FieldSeparator) { 853a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 854a34c753fSRafael Auler Col += 1; 855a34c753fSRafael Auler } 856a34c753fSRafael Auler } 857a34c753fSRafael Auler 858a34c753fSRafael Auler bool DataReader::checkAndConsumeNewLine() { 859a34c753fSRafael Auler if (ParsingBuf[0] != '\n') 860a34c753fSRafael Auler return false; 861a34c753fSRafael Auler 862a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 863a34c753fSRafael Auler Col = 0; 864a34c753fSRafael Auler Line += 1; 865a34c753fSRafael Auler return true; 866a34c753fSRafael Auler } 867a34c753fSRafael Auler 868a34c753fSRafael Auler ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) { 869a34c753fSRafael Auler if (EndChar == '\\') { 870a34c753fSRafael Auler reportError("EndChar could not be backslash"); 871a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 872a34c753fSRafael Auler } 873a34c753fSRafael Auler 874a34c753fSRafael Auler std::string EndChars(1, EndChar); 875a34c753fSRafael Auler EndChars.push_back('\\'); 876a34c753fSRafael Auler if (EndNl) 877a34c753fSRafael Auler EndChars.push_back('\n'); 878a34c753fSRafael Auler 879a34c753fSRafael Auler size_t StringEnd = 0; 880a34c753fSRafael Auler do { 881a34c753fSRafael Auler StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd); 882a34c753fSRafael Auler if (StringEnd == StringRef::npos || 883a34c753fSRafael Auler (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) { 884a34c753fSRafael Auler reportError("malformed field"); 885a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 886a34c753fSRafael Auler } 887a34c753fSRafael Auler 888a34c753fSRafael Auler if (ParsingBuf[StringEnd] != '\\') 889a34c753fSRafael Auler break; 890a34c753fSRafael Auler 891a34c753fSRafael Auler StringEnd += 2; 892a34c753fSRafael Auler } while (1); 893a34c753fSRafael Auler 894a34c753fSRafael Auler StringRef Str = ParsingBuf.substr(0, StringEnd); 895a34c753fSRafael Auler 896a34c753fSRafael Auler // If EndNl was set and nl was found instead of EndChar, do not consume the 897a34c753fSRafael Auler // new line. 89840c2e0faSMaksim Panchenko bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n'; 899a34c753fSRafael Auler unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1; 900a34c753fSRafael Auler 901a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(End); 902a34c753fSRafael Auler if (EndChar == '\n') { 903a34c753fSRafael Auler Col = 0; 904a34c753fSRafael Auler Line += 1; 905a34c753fSRafael Auler } else { 906a34c753fSRafael Auler Col += End; 907a34c753fSRafael Auler } 908a34c753fSRafael Auler return Str; 909a34c753fSRafael Auler } 910a34c753fSRafael Auler 911a34c753fSRafael Auler ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) { 912a34c753fSRafael Auler ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl); 913a34c753fSRafael Auler if (std::error_code EC = NumStrRes.getError()) 914a34c753fSRafael Auler return EC; 915a34c753fSRafael Auler StringRef NumStr = NumStrRes.get(); 916a34c753fSRafael Auler int64_t Num; 917a34c753fSRafael Auler if (NumStr.getAsInteger(10, Num)) { 918a34c753fSRafael Auler reportError("expected decimal number"); 919a34c753fSRafael Auler Diag << "Found: " << NumStr << "\n"; 920a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 921a34c753fSRafael Auler } 922a34c753fSRafael Auler return Num; 923a34c753fSRafael Auler } 924a34c753fSRafael Auler 925a34c753fSRafael Auler ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) { 926a34c753fSRafael Auler ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl); 927a34c753fSRafael Auler if (std::error_code EC = NumStrRes.getError()) 928a34c753fSRafael Auler return EC; 929a34c753fSRafael Auler StringRef NumStr = NumStrRes.get(); 930a34c753fSRafael Auler uint64_t Num; 931a34c753fSRafael Auler if (NumStr.getAsInteger(16, Num)) { 932a34c753fSRafael Auler reportError("expected hexidecimal number"); 933a34c753fSRafael Auler Diag << "Found: " << NumStr << "\n"; 934a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 935a34c753fSRafael Auler } 936a34c753fSRafael Auler return Num; 937a34c753fSRafael Auler } 938a34c753fSRafael Auler 93940c2e0faSMaksim Panchenko ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl, 940a34c753fSRafael Auler bool ExpectMemLoc) { 941a34c753fSRafael Auler // Read whether the location of the branch should be DSO or a symbol 942a34c753fSRafael Auler // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local 943a34c753fSRafael Auler // symbol. 944a34c753fSRafael Auler // The symbol flag is also used to tag memory load events by adding 3 to the 945a34c753fSRafael Auler // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol. 94640c2e0faSMaksim Panchenko if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' && 94740c2e0faSMaksim Panchenko ParsingBuf[0] != '2') { 948a34c753fSRafael Auler reportError("expected 0, 1 or 2"); 949a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 950a34c753fSRafael Auler } 951a34c753fSRafael Auler 95240c2e0faSMaksim Panchenko if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' && 95340c2e0faSMaksim Panchenko ParsingBuf[0] != '5') { 954a34c753fSRafael Auler reportError("expected 3, 4 or 5"); 955a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 956a34c753fSRafael Auler } 957a34c753fSRafael Auler 958a34c753fSRafael Auler bool IsSymbol = 959a34c753fSRafael Auler (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) || 960a34c753fSRafael Auler (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5')); 961a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 962a34c753fSRafael Auler Col += 1; 963a34c753fSRafael Auler 964a34c753fSRafael Auler if (!expectAndConsumeFS()) 965a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 966a34c753fSRafael Auler consumeAllRemainingFS(); 967a34c753fSRafael Auler 968a34c753fSRafael Auler // Read the string containing the symbol or the DSO name 969a34c753fSRafael Auler ErrorOr<StringRef> NameRes = parseString(FieldSeparator); 970a34c753fSRafael Auler if (std::error_code EC = NameRes.getError()) 971a34c753fSRafael Auler return EC; 972a34c753fSRafael Auler StringRef Name = NameRes.get(); 973a34c753fSRafael Auler consumeAllRemainingFS(); 974a34c753fSRafael Auler 975a34c753fSRafael Auler // Read the offset 976a34c753fSRafael Auler ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl); 977a34c753fSRafael Auler if (std::error_code EC = Offset.getError()) 978a34c753fSRafael Auler return EC; 979a34c753fSRafael Auler 980a34c753fSRafael Auler return Location(IsSymbol, Name, Offset.get()); 981a34c753fSRafael Auler } 982a34c753fSRafael Auler 983a34c753fSRafael Auler ErrorOr<BranchInfo> DataReader::parseBranchInfo() { 984a34c753fSRafael Auler ErrorOr<Location> Res = parseLocation(FieldSeparator); 985a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 986a34c753fSRafael Auler return EC; 987a34c753fSRafael Auler Location From = Res.get(); 988a34c753fSRafael Auler 989a34c753fSRafael Auler consumeAllRemainingFS(); 990a34c753fSRafael Auler Res = parseLocation(FieldSeparator); 991a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 992a34c753fSRafael Auler return EC; 993a34c753fSRafael Auler Location To = Res.get(); 994a34c753fSRafael Auler 995a34c753fSRafael Auler consumeAllRemainingFS(); 996a34c753fSRafael Auler ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator); 997a34c753fSRafael Auler if (std::error_code EC = MRes.getError()) 998a34c753fSRafael Auler return EC; 999a34c753fSRafael Auler int64_t NumMispreds = MRes.get(); 1000a34c753fSRafael Auler 1001a34c753fSRafael Auler consumeAllRemainingFS(); 1002a34c753fSRafael Auler ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true); 1003a34c753fSRafael Auler if (std::error_code EC = BRes.getError()) 1004a34c753fSRafael Auler return EC; 1005a34c753fSRafael Auler int64_t NumBranches = BRes.get(); 1006a34c753fSRafael Auler 1007a34c753fSRafael Auler consumeAllRemainingFS(); 1008a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1009a34c753fSRafael Auler reportError("expected end of line"); 1010a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1011a34c753fSRafael Auler } 1012a34c753fSRafael Auler 1013a34c753fSRafael Auler return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches); 1014a34c753fSRafael Auler } 1015a34c753fSRafael Auler 1016a34c753fSRafael Auler ErrorOr<MemInfo> DataReader::parseMemInfo() { 1017a34c753fSRafael Auler ErrorOr<Location> Res = parseMemLocation(FieldSeparator); 1018a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1019a34c753fSRafael Auler return EC; 1020a34c753fSRafael Auler Location Offset = Res.get(); 1021a34c753fSRafael Auler 1022a34c753fSRafael Auler consumeAllRemainingFS(); 1023a34c753fSRafael Auler Res = parseMemLocation(FieldSeparator); 1024a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1025a34c753fSRafael Auler return EC; 1026a34c753fSRafael Auler Location Addr = Res.get(); 1027a34c753fSRafael Auler 1028a34c753fSRafael Auler consumeAllRemainingFS(); 1029a34c753fSRafael Auler ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true); 1030a34c753fSRafael Auler if (std::error_code EC = CountRes.getError()) 1031a34c753fSRafael Auler return EC; 1032a34c753fSRafael Auler 1033a34c753fSRafael Auler consumeAllRemainingFS(); 1034a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1035a34c753fSRafael Auler reportError("expected end of line"); 1036a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1037a34c753fSRafael Auler } 1038a34c753fSRafael Auler 1039a34c753fSRafael Auler return MemInfo(Offset, Addr, CountRes.get()); 1040a34c753fSRafael Auler } 1041a34c753fSRafael Auler 1042a34c753fSRafael Auler ErrorOr<SampleInfo> DataReader::parseSampleInfo() { 1043a34c753fSRafael Auler ErrorOr<Location> Res = parseLocation(FieldSeparator); 1044a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1045a34c753fSRafael Auler return EC; 1046a34c753fSRafael Auler Location Address = Res.get(); 1047a34c753fSRafael Auler 1048a34c753fSRafael Auler consumeAllRemainingFS(); 1049a34c753fSRafael Auler ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true); 1050a34c753fSRafael Auler if (std::error_code EC = BRes.getError()) 1051a34c753fSRafael Auler return EC; 1052a34c753fSRafael Auler int64_t Occurrences = BRes.get(); 1053a34c753fSRafael Auler 1054a34c753fSRafael Auler consumeAllRemainingFS(); 1055a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1056a34c753fSRafael Auler reportError("expected end of line"); 1057a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1058a34c753fSRafael Auler } 1059a34c753fSRafael Auler 1060a34c753fSRafael Auler return SampleInfo(std::move(Address), Occurrences); 1061a34c753fSRafael Auler } 1062a34c753fSRafael Auler 1063a34c753fSRafael Auler ErrorOr<bool> DataReader::maybeParseNoLBRFlag() { 1064a34c753fSRafael Auler if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr") 1065a34c753fSRafael Auler return false; 1066a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(6); 1067a34c753fSRafael Auler Col += 6; 1068a34c753fSRafael Auler 1069a34c753fSRafael Auler if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ') 1070a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(1); 1071a34c753fSRafael Auler 1072a34c753fSRafael Auler while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') { 1073a34c753fSRafael Auler ErrorOr<StringRef> EventName = parseString(' ', true); 1074a34c753fSRafael Auler if (!EventName) 1075a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1076a34c753fSRafael Auler EventNames.insert(EventName.get()); 1077a34c753fSRafael Auler } 1078a34c753fSRafael Auler 1079a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1080a34c753fSRafael Auler reportError("malformed no_lbr line"); 1081a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1082a34c753fSRafael Auler } 1083a34c753fSRafael Auler return true; 1084a34c753fSRafael Auler } 1085a34c753fSRafael Auler 1086a34c753fSRafael Auler ErrorOr<bool> DataReader::maybeParseBATFlag() { 1087a34c753fSRafael Auler if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection") 1088a34c753fSRafael Auler return false; 1089a34c753fSRafael Auler ParsingBuf = ParsingBuf.drop_front(16); 1090a34c753fSRafael Auler Col += 16; 1091a34c753fSRafael Auler 1092a34c753fSRafael Auler if (!checkAndConsumeNewLine()) { 1093a34c753fSRafael Auler reportError("malformed boltedcollection line"); 1094a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1095a34c753fSRafael Auler } 1096a34c753fSRafael Auler return true; 1097a34c753fSRafael Auler } 1098a34c753fSRafael Auler 1099a34c753fSRafael Auler bool DataReader::hasBranchData() { 1100a34c753fSRafael Auler if (ParsingBuf.size() == 0) 1101a34c753fSRafael Auler return false; 1102a34c753fSRafael Auler 1103a34c753fSRafael Auler if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2') 1104a34c753fSRafael Auler return true; 1105a34c753fSRafael Auler return false; 1106a34c753fSRafael Auler } 1107a34c753fSRafael Auler 1108a34c753fSRafael Auler bool DataReader::hasMemData() { 1109a34c753fSRafael Auler if (ParsingBuf.size() == 0) 1110a34c753fSRafael Auler return false; 1111a34c753fSRafael Auler 1112a34c753fSRafael Auler if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5') 1113a34c753fSRafael Auler return true; 1114a34c753fSRafael Auler return false; 1115a34c753fSRafael Auler } 1116a34c753fSRafael Auler 1117a34c753fSRafael Auler std::error_code DataReader::parseInNoLBRMode() { 1118a34c753fSRafael Auler auto GetOrCreateFuncEntry = [&](StringRef Name) { 1119a34c753fSRafael Auler auto I = NamesToSamples.find(Name); 1120a34c753fSRafael Auler if (I == NamesToSamples.end()) { 1121a34c753fSRafael Auler bool Success; 1122a34c753fSRafael Auler std::tie(I, Success) = NamesToSamples.insert(std::make_pair( 1123a34c753fSRafael Auler Name, FuncSampleData(Name, FuncSampleData::ContainerTy()))); 1124a34c753fSRafael Auler 1125a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1126a34c753fSRafael Auler } 1127a34c753fSRafael Auler return I; 1128a34c753fSRafael Auler }; 1129a34c753fSRafael Auler 1130a34c753fSRafael Auler auto GetOrCreateFuncMemEntry = [&](StringRef Name) { 1131a34c753fSRafael Auler auto I = NamesToMemEvents.find(Name); 1132a34c753fSRafael Auler if (I == NamesToMemEvents.end()) { 1133a34c753fSRafael Auler bool Success; 1134a34c753fSRafael Auler std::tie(I, Success) = NamesToMemEvents.insert( 1135a34c753fSRafael Auler std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy()))); 1136a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1137a34c753fSRafael Auler } 1138a34c753fSRafael Auler return I; 1139a34c753fSRafael Auler }; 1140a34c753fSRafael Auler 1141a34c753fSRafael Auler while (hasBranchData()) { 1142a34c753fSRafael Auler ErrorOr<SampleInfo> Res = parseSampleInfo(); 1143a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1144a34c753fSRafael Auler return EC; 1145a34c753fSRafael Auler 1146a34c753fSRafael Auler SampleInfo SI = Res.get(); 1147a34c753fSRafael Auler 1148a34c753fSRafael Auler // Ignore samples not involving known locations 1149a34c753fSRafael Auler if (!SI.Loc.IsSymbol) 1150a34c753fSRafael Auler continue; 1151a34c753fSRafael Auler 1152a34c753fSRafael Auler StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name); 1153a34c753fSRafael Auler I->getValue().Data.emplace_back(std::move(SI)); 1154a34c753fSRafael Auler } 1155a34c753fSRafael Auler 1156a34c753fSRafael Auler while (hasMemData()) { 1157a34c753fSRafael Auler ErrorOr<MemInfo> Res = parseMemInfo(); 1158a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1159a34c753fSRafael Auler return EC; 1160a34c753fSRafael Auler 1161a34c753fSRafael Auler MemInfo MI = Res.get(); 1162a34c753fSRafael Auler 1163a34c753fSRafael Auler // Ignore memory events not involving known pc. 1164a34c753fSRafael Auler if (!MI.Offset.IsSymbol) 1165a34c753fSRafael Auler continue; 1166a34c753fSRafael Auler 1167a34c753fSRafael Auler StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name); 1168a34c753fSRafael Auler I->getValue().Data.emplace_back(std::move(MI)); 1169a34c753fSRafael Auler } 1170a34c753fSRafael Auler 1171a34c753fSRafael Auler for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples) { 1172a34c753fSRafael Auler std::stable_sort(FuncSamples.second.Data.begin(), 1173a34c753fSRafael Auler FuncSamples.second.Data.end()); 1174a34c753fSRafael Auler } 1175a34c753fSRafael Auler 1176a34c753fSRafael Auler for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) { 1177a34c753fSRafael Auler std::stable_sort(MemEvents.second.Data.begin(), 1178a34c753fSRafael Auler MemEvents.second.Data.end()); 1179a34c753fSRafael Auler } 1180a34c753fSRafael Auler 1181a34c753fSRafael Auler return std::error_code(); 1182a34c753fSRafael Auler } 1183a34c753fSRafael Auler 1184a34c753fSRafael Auler std::error_code DataReader::parse() { 1185a34c753fSRafael Auler auto GetOrCreateFuncEntry = [&](StringRef Name) { 1186a34c753fSRafael Auler auto I = NamesToBranches.find(Name); 1187a34c753fSRafael Auler if (I == NamesToBranches.end()) { 1188a34c753fSRafael Auler bool Success; 1189a34c753fSRafael Auler std::tie(I, Success) = NamesToBranches.insert(std::make_pair( 1190a34c753fSRafael Auler Name, FuncBranchData(Name, FuncBranchData::ContainerTy(), 1191a34c753fSRafael Auler FuncBranchData::ContainerTy()))); 1192a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1193a34c753fSRafael Auler } 1194a34c753fSRafael Auler return I; 1195a34c753fSRafael Auler }; 1196a34c753fSRafael Auler 1197a34c753fSRafael Auler auto GetOrCreateFuncMemEntry = [&](StringRef Name) { 1198a34c753fSRafael Auler auto I = NamesToMemEvents.find(Name); 1199a34c753fSRafael Auler if (I == NamesToMemEvents.end()) { 1200a34c753fSRafael Auler bool Success; 1201a34c753fSRafael Auler std::tie(I, Success) = NamesToMemEvents.insert( 1202a34c753fSRafael Auler std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy()))); 1203a34c753fSRafael Auler assert(Success && "unexpected result of insert"); 1204a34c753fSRafael Auler } 1205a34c753fSRafael Auler return I; 1206a34c753fSRafael Auler }; 1207a34c753fSRafael Auler 1208a34c753fSRafael Auler Col = 0; 1209a34c753fSRafael Auler Line = 1; 1210a34c753fSRafael Auler ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag(); 1211a34c753fSRafael Auler if (!FlagOrErr) 1212a34c753fSRafael Auler return FlagOrErr.getError(); 1213a34c753fSRafael Auler NoLBRMode = *FlagOrErr; 1214a34c753fSRafael Auler 1215a34c753fSRafael Auler ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag(); 1216a34c753fSRafael Auler if (!BATFlagOrErr) 1217a34c753fSRafael Auler return BATFlagOrErr.getError(); 1218a34c753fSRafael Auler BATMode = *BATFlagOrErr; 1219a34c753fSRafael Auler 1220a34c753fSRafael Auler if (!hasBranchData() && !hasMemData()) { 1221a34c753fSRafael Auler Diag << "ERROR: no valid profile data found\n"; 1222a34c753fSRafael Auler return make_error_code(llvm::errc::io_error); 1223a34c753fSRafael Auler } 1224a34c753fSRafael Auler 1225a34c753fSRafael Auler if (NoLBRMode) 1226a34c753fSRafael Auler return parseInNoLBRMode(); 1227a34c753fSRafael Auler 1228a34c753fSRafael Auler while (hasBranchData()) { 1229a34c753fSRafael Auler ErrorOr<BranchInfo> Res = parseBranchInfo(); 1230a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1231a34c753fSRafael Auler return EC; 1232a34c753fSRafael Auler 1233a34c753fSRafael Auler BranchInfo BI = Res.get(); 1234a34c753fSRafael Auler 1235a34c753fSRafael Auler // Ignore branches not involving known location. 1236a34c753fSRafael Auler if (!BI.From.IsSymbol && !BI.To.IsSymbol) 1237a34c753fSRafael Auler continue; 1238a34c753fSRafael Auler 1239a34c753fSRafael Auler StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name); 1240a34c753fSRafael Auler I->getValue().Data.emplace_back(std::move(BI)); 1241a34c753fSRafael Auler 1242a34c753fSRafael Auler // Add entry data for branches to another function or branches 1243a34c753fSRafael Auler // to entry points (including recursive calls) 1244a34c753fSRafael Auler if (BI.To.IsSymbol && 1245a34c753fSRafael Auler (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) { 1246a34c753fSRafael Auler I = GetOrCreateFuncEntry(BI.To.Name); 1247a34c753fSRafael Auler I->getValue().EntryData.emplace_back(std::move(BI)); 1248a34c753fSRafael Auler } 1249a34c753fSRafael Auler 1250a34c753fSRafael Auler // If destination is the function start - update execution count. 1251a34c753fSRafael Auler // NB: the data is skewed since we cannot tell tail recursion from 1252a34c753fSRafael Auler // branches to the function start. 1253a34c753fSRafael Auler if (BI.To.IsSymbol && BI.To.Offset == 0) { 1254a34c753fSRafael Auler I = GetOrCreateFuncEntry(BI.To.Name); 1255a34c753fSRafael Auler I->getValue().ExecutionCount += BI.Branches; 1256a34c753fSRafael Auler } 1257a34c753fSRafael Auler } 1258a34c753fSRafael Auler 1259a34c753fSRafael Auler while (hasMemData()) { 1260a34c753fSRafael Auler ErrorOr<MemInfo> Res = parseMemInfo(); 1261a34c753fSRafael Auler if (std::error_code EC = Res.getError()) 1262a34c753fSRafael Auler return EC; 1263a34c753fSRafael Auler 1264a34c753fSRafael Auler MemInfo MI = Res.get(); 1265a34c753fSRafael Auler 1266a34c753fSRafael Auler // Ignore memory events not involving known pc. 1267a34c753fSRafael Auler if (!MI.Offset.IsSymbol) 1268a34c753fSRafael Auler continue; 1269a34c753fSRafael Auler 1270a34c753fSRafael Auler StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name); 1271a34c753fSRafael Auler I->getValue().Data.emplace_back(std::move(MI)); 1272a34c753fSRafael Auler } 1273a34c753fSRafael Auler 1274a34c753fSRafael Auler for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches) { 1275a34c753fSRafael Auler std::stable_sort(FuncBranches.second.Data.begin(), 1276a34c753fSRafael Auler FuncBranches.second.Data.end()); 1277a34c753fSRafael Auler } 1278a34c753fSRafael Auler 1279a34c753fSRafael Auler for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) { 1280a34c753fSRafael Auler std::stable_sort(MemEvents.second.Data.begin(), 1281a34c753fSRafael Auler MemEvents.second.Data.end()); 1282a34c753fSRafael Auler } 1283a34c753fSRafael Auler 1284a34c753fSRafael Auler return std::error_code(); 1285a34c753fSRafael Auler } 1286a34c753fSRafael Auler 1287a34c753fSRafael Auler void DataReader::buildLTONameMaps() { 1288a34c753fSRafael Auler for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) { 1289a34c753fSRafael Auler const StringRef FuncName = FuncData.getKey(); 1290a34c753fSRafael Auler const Optional<StringRef> CommonName = getLTOCommonName(FuncName); 1291a34c753fSRafael Auler if (CommonName) 1292a34c753fSRafael Auler LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue()); 1293a34c753fSRafael Auler } 1294a34c753fSRafael Auler 1295a34c753fSRafael Auler for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) { 1296a34c753fSRafael Auler const StringRef FuncName = FuncData.getKey(); 1297a34c753fSRafael Auler const Optional<StringRef> CommonName = getLTOCommonName(FuncName); 1298a34c753fSRafael Auler if (CommonName) 1299a34c753fSRafael Auler LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue()); 1300a34c753fSRafael Auler } 1301a34c753fSRafael Auler } 1302a34c753fSRafael Auler 1303a34c753fSRafael Auler namespace { 1304a34c753fSRafael Auler template <typename MapTy> 1305a34c753fSRafael Auler decltype(MapTy::MapEntryTy::second) * 1306a34c753fSRafael Auler fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) { 1307a34c753fSRafael Auler // Do a reverse order iteration since the name in profile has a higher chance 1308a34c753fSRafael Auler // of matching a name at the end of the list. 1309a34c753fSRafael Auler for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) { 1310a34c753fSRafael Auler auto I = Map.find(normalizeName((*SI)->getName())); 1311a34c753fSRafael Auler if (I != Map.end()) 1312a34c753fSRafael Auler return &I->getValue(); 1313a34c753fSRafael Auler } 1314a34c753fSRafael Auler return nullptr; 1315a34c753fSRafael Auler } 1316a34c753fSRafael Auler 1317a34c753fSRafael Auler template <typename MapTy> 1318a34c753fSRafael Auler decltype(MapTy::MapEntryTy::second) * 1319a34c753fSRafael Auler fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) { 1320a34c753fSRafael Auler // Do a reverse order iteration since the name in profile has a higher chance 1321a34c753fSRafael Auler // of matching a name at the end of the list. 1322a34c753fSRafael Auler for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) { 1323a34c753fSRafael Auler auto I = Map.find(normalizeName(*FI)); 1324a34c753fSRafael Auler if (I != Map.end()) 1325a34c753fSRafael Auler return &I->getValue(); 1326a34c753fSRafael Auler } 1327a34c753fSRafael Auler return nullptr; 1328a34c753fSRafael Auler } 1329a34c753fSRafael Auler 1330a34c753fSRafael Auler template <typename MapTy> 133140c2e0faSMaksim Panchenko std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex( 1332a34c753fSRafael Auler MapTy &Map, 133340c2e0faSMaksim Panchenko const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>> 133440c2e0faSMaksim Panchenko <OCommonNameMap, 1335a34c753fSRafael Auler const std::vector<StringRef> &FuncNames) { 1336a34c753fSRafael Auler std::vector<decltype(MapTy::MapEntryTy::second) *> AllData; 1337a34c753fSRafael Auler // Do a reverse order iteration since the name in profile has a higher chance 1338a34c753fSRafael Auler // of matching a name at the end of the list. 1339a34c753fSRafael Auler for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) { 1340a34c753fSRafael Auler std::string Name = normalizeName(*FI); 1341a34c753fSRafael Auler const Optional<StringRef> LTOCommonName = getLTOCommonName(Name); 1342a34c753fSRafael Auler if (LTOCommonName) { 1343a34c753fSRafael Auler auto I = LTOCommonNameMap.find(*LTOCommonName); 1344a34c753fSRafael Auler if (I != LTOCommonNameMap.end()) { 1345a34c753fSRafael Auler const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData = 1346a34c753fSRafael Auler I->getValue(); 1347a34c753fSRafael Auler AllData.insert(AllData.end(), CommonData.begin(), CommonData.end()); 1348a34c753fSRafael Auler } 1349a34c753fSRafael Auler } else { 1350a34c753fSRafael Auler auto I = Map.find(Name); 1351a34c753fSRafael Auler if (I != Map.end()) { 1352a34c753fSRafael Auler return {&I->getValue()}; 1353a34c753fSRafael Auler } 1354a34c753fSRafael Auler } 1355a34c753fSRafael Auler } 1356a34c753fSRafael Auler return AllData; 1357a34c753fSRafael Auler } 1358a34c753fSRafael Auler 1359a34c753fSRafael Auler } 1360a34c753fSRafael Auler 1361a34c753fSRafael Auler bool DataReader::mayHaveProfileData(const BinaryFunction &Function) { 1362a34c753fSRafael Auler if (getBranchData(Function) || getMemData(Function)) 1363a34c753fSRafael Auler return true; 1364a34c753fSRafael Auler 1365a34c753fSRafael Auler if (getBranchDataForNames(Function.getNames()) || 1366a34c753fSRafael Auler getMemDataForNames(Function.getNames())) 1367a34c753fSRafael Auler return true; 1368a34c753fSRafael Auler 1369a34c753fSRafael Auler if (!hasVolatileName(Function)) 1370a34c753fSRafael Auler return false; 1371a34c753fSRafael Auler 1372a34c753fSRafael Auler const std::vector<FuncBranchData *> AllBranchData = 1373a34c753fSRafael Auler getBranchDataForNamesRegex(Function.getNames()); 1374a34c753fSRafael Auler if (!AllBranchData.empty()) 1375a34c753fSRafael Auler return true; 1376a34c753fSRafael Auler 1377a34c753fSRafael Auler const std::vector<FuncMemData *> AllMemData = 1378a34c753fSRafael Auler getMemDataForNamesRegex(Function.getNames()); 1379a34c753fSRafael Auler if (!AllMemData.empty()) 1380a34c753fSRafael Auler return true; 1381a34c753fSRafael Auler 1382a34c753fSRafael Auler return false; 1383a34c753fSRafael Auler } 1384a34c753fSRafael Auler 1385a34c753fSRafael Auler FuncBranchData * 1386a34c753fSRafael Auler DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) { 1387a34c753fSRafael Auler return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames); 1388a34c753fSRafael Auler } 1389a34c753fSRafael Auler 1390a34c753fSRafael Auler FuncBranchData * 1391a34c753fSRafael Auler DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) { 1392a34c753fSRafael Auler return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols); 1393a34c753fSRafael Auler } 1394a34c753fSRafael Auler 1395a34c753fSRafael Auler FuncMemData * 1396a34c753fSRafael Auler DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) { 1397a34c753fSRafael Auler return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames); 1398a34c753fSRafael Auler } 1399a34c753fSRafael Auler 1400a34c753fSRafael Auler FuncSampleData * 1401a34c753fSRafael Auler DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) { 1402a34c753fSRafael Auler return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames); 1403a34c753fSRafael Auler } 1404a34c753fSRafael Auler 140540c2e0faSMaksim Panchenko std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex( 1406a34c753fSRafael Auler const std::vector<StringRef> &FuncNames) { 1407a34c753fSRafael Auler return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames); 1408a34c753fSRafael Auler } 1409a34c753fSRafael Auler 1410a34c753fSRafael Auler std::vector<FuncMemData *> 1411a34c753fSRafael Auler DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) { 1412a34c753fSRafael Auler return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames); 1413a34c753fSRafael Auler } 1414a34c753fSRafael Auler 1415a34c753fSRafael Auler bool DataReader::hasLocalsWithFileName() const { 1416a34c753fSRafael Auler for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 1417a34c753fSRafael Auler const StringRef &FuncName = Func.getKey(); 1418a34c753fSRafael Auler if (FuncName.count('/') == 2 && FuncName[0] != '/') 1419a34c753fSRafael Auler return true; 1420a34c753fSRafael Auler } 1421a34c753fSRafael Auler return false; 1422a34c753fSRafael Auler } 1423a34c753fSRafael Auler 1424a34c753fSRafael Auler void DataReader::dump() const { 1425a34c753fSRafael Auler for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 1426a34c753fSRafael Auler Diag << Func.getKey() << " branches:\n"; 1427a34c753fSRafael Auler for (const BranchInfo &BI : Func.getValue().Data) { 1428a34c753fSRafael Auler Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " " 1429a34c753fSRafael Auler << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n"; 1430a34c753fSRafael Auler } 1431a34c753fSRafael Auler Diag << Func.getKey() << " entry points:\n"; 1432a34c753fSRafael Auler for (const BranchInfo &BI : Func.getValue().EntryData) { 1433a34c753fSRafael Auler Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " " 1434a34c753fSRafael Auler << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n"; 1435a34c753fSRafael Auler } 1436a34c753fSRafael Auler } 1437a34c753fSRafael Auler 1438a34c753fSRafael Auler for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) { 1439a34c753fSRafael Auler StringRef Event = I->getKey(); 1440a34c753fSRafael Auler Diag << "Data was collected with event: " << Event << "\n"; 1441a34c753fSRafael Auler } 1442a34c753fSRafael Auler for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 1443a34c753fSRafael Auler Diag << Func.getKey() << " samples:\n"; 1444a34c753fSRafael Auler for (const SampleInfo &SI : Func.getValue().Data) { 144540c2e0faSMaksim Panchenko Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n"; 1446a34c753fSRafael Auler } 1447a34c753fSRafael Auler } 1448a34c753fSRafael Auler 1449a34c753fSRafael Auler for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 1450a34c753fSRafael Auler Diag << "Memory events for " << Func.getValue().Name; 1451a34c753fSRafael Auler Location LastOffset(0); 1452a34c753fSRafael Auler for (const MemInfo &MI : Func.getValue().Data) { 1453a34c753fSRafael Auler if (MI.Offset == LastOffset) { 1454a34c753fSRafael Auler Diag << ", " << MI.Addr << "/" << MI.Count; 1455a34c753fSRafael Auler } else { 1456a34c753fSRafael Auler Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count; 1457a34c753fSRafael Auler } 1458a34c753fSRafael Auler LastOffset = MI.Offset; 1459a34c753fSRafael Auler } 1460a34c753fSRafael Auler Diag << "\n"; 1461a34c753fSRafael Auler } 1462a34c753fSRafael Auler } 1463a34c753fSRafael Auler 1464a34c753fSRafael Auler } // namespace bolt 1465a34c753fSRafael Auler } // namespace llvm 1466