1 //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Profile/YAMLProfileWriter.h" 10 #include "bolt/Core/BinaryBasicBlock.h" 11 #include "bolt/Core/BinaryFunction.h" 12 #include "bolt/Profile/BoltAddressTranslation.h" 13 #include "bolt/Profile/DataAggregator.h" 14 #include "bolt/Profile/ProfileReaderBase.h" 15 #include "bolt/Rewrite/RewriteInstance.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/MC/MCPseudoProbe.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/FileSystem.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #undef DEBUG_TYPE 23 #define DEBUG_TYPE "bolt-prof" 24 25 namespace opts { 26 using namespace llvm; 27 extern cl::opt<bool> ProfileUseDFS; 28 cl::opt<bool> ProfileWritePseudoProbes( 29 "profile-write-pseudo-probes", 30 cl::desc("Use pseudo probes in profile generation"), cl::Hidden, 31 cl::cat(BoltOptCategory)); 32 } // namespace opts 33 34 namespace llvm { 35 namespace bolt { 36 37 const BinaryFunction *YAMLProfileWriter::setCSIDestination( 38 const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, 39 const MCSymbol *Symbol, const BoltAddressTranslation *BAT, 40 uint32_t Offset) { 41 CSI.DestId = 0; // designated for unknown functions 42 CSI.EntryDiscriminator = 0; 43 44 if (Symbol) { 45 uint64_t EntryID = 0; 46 if (const BinaryFunction *Callee = 47 BC.getFunctionForSymbol(Symbol, &EntryID)) { 48 if (BAT && BAT->isBATFunction(Callee->getAddress())) 49 std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset); 50 else if (const BinaryBasicBlock *BB = 51 Callee->getBasicBlockContainingOffset(Offset)) 52 BC.getFunctionForSymbol(Callee->getSecondaryEntryPointSymbol(*BB), 53 &EntryID); 54 CSI.DestId = Callee->getFunctionNumber(); 55 CSI.EntryDiscriminator = EntryID; 56 return Callee; 57 } 58 } 59 return nullptr; 60 } 61 62 std::vector<YAMLProfileWriter::InlineTreeNode> 63 YAMLProfileWriter::collectInlineTree( 64 const MCPseudoProbeDecoder &Decoder, 65 const MCDecodedPseudoProbeInlineTree &Root) { 66 auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) { 67 return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash; 68 }; 69 std::vector<InlineTreeNode> InlineTree( 70 {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}}); 71 uint32_t ParentId = 0; 72 while (ParentId != InlineTree.size()) { 73 const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree; 74 for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) 75 InlineTree.emplace_back( 76 InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId, 77 std::get<1>(Child.getInlineSite())}); 78 ++ParentId; 79 } 80 81 return InlineTree; 82 } 83 84 std::tuple<yaml::bolt::ProfilePseudoProbeDesc, 85 YAMLProfileWriter::InlineTreeDesc> 86 YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { 87 yaml::bolt::ProfilePseudoProbeDesc Desc; 88 InlineTreeDesc InlineTree; 89 90 for (const MCDecodedPseudoProbeInlineTree &TopLev : 91 Decoder.getDummyInlineRoot().getChildren()) 92 InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; 93 94 for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap()) 95 ++InlineTree.HashIdxMap[FuncDesc.FuncHash]; 96 97 InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size()); 98 for (const auto &Node : Decoder.getInlineTreeVec()) 99 ++InlineTree.GUIDIdxMap[Node.Guid]; 100 101 std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec; 102 GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size()); 103 for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap) 104 GUIDFreqVec.emplace_back(Cnt, GUID); 105 llvm::sort(GUIDFreqVec); 106 107 std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec; 108 HashFreqVec.reserve(InlineTree.HashIdxMap.size()); 109 for (const auto [Hash, Cnt] : InlineTree.HashIdxMap) 110 HashFreqVec.emplace_back(Cnt, Hash); 111 llvm::sort(HashFreqVec); 112 113 uint32_t Index = 0; 114 Desc.Hash.reserve(HashFreqVec.size()); 115 for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) { 116 Desc.Hash.emplace_back(Hash); 117 InlineTree.HashIdxMap[Hash] = Index++; 118 } 119 120 Index = 0; 121 Desc.GUID.reserve(GUIDFreqVec.size()); 122 for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) { 123 Desc.GUID.emplace_back(GUID); 124 InlineTree.GUIDIdxMap[GUID] = Index++; 125 uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash; 126 Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]); 127 } 128 129 return {Desc, InlineTree}; 130 } 131 132 std::vector<yaml::bolt::PseudoProbeInfo> 133 YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { 134 struct BlockProbeInfoHasher { 135 size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { 136 auto HashCombine = [](auto &Range) { 137 return llvm::hash_combine_range(Range.begin(), Range.end()); 138 }; 139 return llvm::hash_combine(HashCombine(BPI.BlockProbes), 140 HashCombine(BPI.CallProbes), 141 HashCombine(BPI.IndCallProbes)); 142 } 143 }; 144 145 // Check identical BlockProbeInfo structs and merge them 146 std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>, 147 BlockProbeInfoHasher> 148 BPIToNodes; 149 for (auto &[NodeId, Probes] : NodeProbes) { 150 yaml::bolt::PseudoProbeInfo BPI; 151 BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end()); 152 BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end()); 153 BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end()); 154 BPIToNodes[BPI].push_back(NodeId); 155 } 156 157 auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) { 158 for (auto Id : Ids) 159 if (Id > 64) 160 Vec.emplace_back(Id); 161 else 162 Mask |= 1ull << (Id - 1); 163 }; 164 165 // Add to YAML with merged nodes/block mask optimizations 166 std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes; 167 YamlProbes.reserve(BPIToNodes.size()); 168 for (const auto &[BPI, Nodes] : BPIToNodes) { 169 auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo()); 170 YamlBPI.CallProbes = BPI.CallProbes; 171 YamlBPI.IndCallProbes = BPI.IndCallProbes; 172 if (Nodes.size() == 1) 173 YamlBPI.InlineTreeIndex = Nodes.front(); 174 else 175 YamlBPI.InlineTreeNodes = Nodes; 176 handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask); 177 } 178 return YamlProbes; 179 } 180 181 std::tuple<std::vector<yaml::bolt::InlineTreeNode>, 182 YAMLProfileWriter::InlineTreeMapTy> 183 YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, 184 const InlineTreeDesc &InlineTree, 185 uint64_t GUID) { 186 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; 187 std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree; 188 auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID); 189 if (It == InlineTree.TopLevelGUIDToInlineTree.end()) 190 return {YamlInlineTree, InlineTreeNodeId}; 191 const MCDecodedPseudoProbeInlineTree *Root = It->second; 192 assert(Root && "Malformed TopLevelGUIDToInlineTree"); 193 uint32_t Index = 0; 194 uint32_t PrevParent = 0; 195 uint32_t PrevGUIDIdx = 0; 196 for (const auto &Node : collectInlineTree(Decoder, *Root)) { 197 InlineTreeNodeId[Node.InlineTree] = Index++; 198 auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID); 199 assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap"); 200 uint32_t GUIDIdx = GUIDIdxIt->second; 201 if (GUIDIdx == PrevGUIDIdx) 202 GUIDIdx = UINT32_MAX; 203 else 204 PrevGUIDIdx = GUIDIdx; 205 YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{ 206 Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0}); 207 PrevParent = Node.ParentId; 208 } 209 return {YamlInlineTree, InlineTreeNodeId}; 210 } 211 212 yaml::bolt::BinaryFunctionProfile 213 YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, 214 const InlineTreeDesc &InlineTree, 215 const BoltAddressTranslation *BAT) { 216 yaml::bolt::BinaryFunctionProfile YamlBF; 217 const BinaryContext &BC = BF.getBinaryContext(); 218 const MCPseudoProbeDecoder *PseudoProbeDecoder = 219 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; 220 221 const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR; 222 223 // Prepare function and block hashes 224 BF.computeHash(UseDFS); 225 BF.computeBlockHashes(); 226 227 YamlBF.Name = DataAggregator::getLocationName(BF, BAT); 228 YamlBF.Id = BF.getFunctionNumber(); 229 YamlBF.Hash = BF.getHash(); 230 YamlBF.NumBasicBlocks = BF.size(); 231 YamlBF.ExecCount = BF.getKnownExecutionCount(); 232 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; 233 if (PseudoProbeDecoder && BF.getGUID()) { 234 std::tie(YamlBF.InlineTree, InlineTreeNodeId) = 235 convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID()); 236 } 237 238 BinaryFunction::BasicBlockOrderType Order; 239 llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(), 240 std::back_inserter(Order)); 241 242 const FunctionLayout Layout = BF.getLayout(); 243 Layout.updateLayoutIndices(Order); 244 245 for (const BinaryBasicBlock *BB : Order) { 246 yaml::bolt::BinaryBasicBlockProfile YamlBB; 247 YamlBB.Index = BB->getLayoutIndex(); 248 YamlBB.NumInstructions = BB->getNumNonPseudos(); 249 YamlBB.Hash = BB->getHash(); 250 251 if (!LBRProfile) { 252 YamlBB.EventCount = BB->getKnownExecutionCount(); 253 if (YamlBB.EventCount) 254 YamlBF.Blocks.emplace_back(YamlBB); 255 continue; 256 } 257 258 YamlBB.ExecCount = BB->getKnownExecutionCount(); 259 260 for (const MCInst &Instr : *BB) { 261 if (!BC.MIB->isCall(Instr) && !BC.MIB->isIndirectBranch(Instr)) 262 continue; 263 264 SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets; 265 yaml::bolt::CallSiteInfo CSI; 266 std::optional<uint32_t> Offset = BC.MIB->getOffset(Instr); 267 if (!Offset || *Offset < BB->getInputOffset()) 268 continue; 269 CSI.Offset = *Offset - BB->getInputOffset(); 270 271 if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) { 272 const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>( 273 Instr, "CallProfile"); 274 if (!ICSP) 275 continue; 276 for (const IndirectCallProfile &CSP : ICSP.get()) { 277 StringRef TargetName = ""; 278 const BinaryFunction *Callee = 279 setCSIDestination(BC, CSI, CSP.Symbol, BAT); 280 if (Callee) 281 TargetName = Callee->getOneName(); 282 CSI.Count = CSP.Count; 283 CSI.Mispreds = CSP.Mispreds; 284 CSTargets.emplace_back(TargetName, CSI); 285 } 286 } else { // direct call or a tail call 287 StringRef TargetName = ""; 288 const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr); 289 const BinaryFunction *const Callee = 290 setCSIDestination(BC, CSI, CalleeSymbol, BAT); 291 if (Callee) 292 TargetName = Callee->getOneName(); 293 294 auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) { 295 return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull); 296 }; 297 if (BC.MIB->getConditionalTailCall(Instr)) { 298 CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount"); 299 CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount"); 300 } else { 301 CSI.Count = getAnnotationWithDefault(Instr, "Count"); 302 } 303 304 if (CSI.Count) 305 CSTargets.emplace_back(TargetName, CSI); 306 } 307 // Sort targets in a similar way to getBranchData, see Location::operator< 308 llvm::sort(CSTargets, [](const auto &RHS, const auto &LHS) { 309 if (RHS.first != LHS.first) 310 return RHS.first < LHS.first; 311 return RHS.second.Offset < LHS.second.Offset; 312 }); 313 for (auto &KV : CSTargets) 314 YamlBB.CallSites.push_back(KV.second); 315 } 316 317 // Skip printing if there's no profile data for non-entry basic block. 318 // Include landing pads with non-zero execution count. 319 if (YamlBB.CallSites.empty() && !BB->isEntryPoint() && 320 !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) { 321 // Include blocks having successors or predecessors with positive counts. 322 uint64_t SuccessorExecCount = 0; 323 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 324 BB->branch_info()) 325 SuccessorExecCount += BranchInfo.Count; 326 uint64_t PredecessorExecCount = 0; 327 for (auto Pred : BB->predecessors()) 328 PredecessorExecCount += Pred->getBranchInfo(*BB).Count; 329 if (!SuccessorExecCount && !PredecessorExecCount) 330 continue; 331 } 332 333 auto BranchInfo = BB->branch_info_begin(); 334 for (const BinaryBasicBlock *Successor : BB->successors()) { 335 yaml::bolt::SuccessorInfo YamlSI; 336 YamlSI.Index = Successor->getLayoutIndex(); 337 YamlSI.Count = BranchInfo->Count; 338 YamlSI.Mispreds = BranchInfo->MispredictedCount; 339 340 YamlBB.Successors.emplace_back(YamlSI); 341 342 ++BranchInfo; 343 } 344 345 if (PseudoProbeDecoder) { 346 const AddressProbesMap &ProbeMap = 347 PseudoProbeDecoder->getAddress2ProbesMap(); 348 const uint64_t FuncAddr = BF.getAddress(); 349 const std::pair<uint64_t, uint64_t> &BlockRange = 350 BB->getInputAddressRange(); 351 const std::pair<uint64_t, uint64_t> BlockAddrRange = { 352 FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; 353 auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second); 354 YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId); 355 } 356 357 YamlBF.Blocks.emplace_back(YamlBB); 358 } 359 return YamlBF; 360 } 361 362 std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { 363 const BinaryContext &BC = RI.getBinaryContext(); 364 const auto &Functions = BC.getBinaryFunctions(); 365 366 std::error_code EC; 367 OS = std::make_unique<raw_fd_ostream>(Filename, EC, sys::fs::OF_None); 368 if (EC) { 369 errs() << "BOLT-WARNING: " << EC.message() << " : unable to open " 370 << Filename << " for output.\n"; 371 return EC; 372 } 373 374 yaml::bolt::BinaryProfile BP; 375 376 // Fill out the header info. 377 BP.Header.Version = 1; 378 BP.Header.FileName = std::string(BC.getFilename()); 379 std::optional<StringRef> BuildID = BC.getFileBuildID(); 380 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>"; 381 BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName()); 382 BP.Header.IsDFSOrder = opts::ProfileUseDFS; 383 BP.Header.HashFunction = HashFunction::Default; 384 385 StringSet<> EventNames = RI.getProfileReader()->getEventNames(); 386 if (!EventNames.empty()) { 387 std::string Sep; 388 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) { 389 BP.Header.EventNames += Sep + EventEntry.first().str(); 390 Sep = ","; 391 } 392 } 393 394 // Make sure the profile is consistent across all functions. 395 uint16_t ProfileFlags = BinaryFunction::PF_NONE; 396 for (const auto &BFI : Functions) { 397 const BinaryFunction &BF = BFI.second; 398 if (BF.hasProfile() && !BF.empty()) { 399 assert(BF.getProfileFlags() != BinaryFunction::PF_NONE); 400 if (ProfileFlags == BinaryFunction::PF_NONE) 401 ProfileFlags = BF.getProfileFlags(); 402 403 assert(BF.getProfileFlags() == ProfileFlags && 404 "expected consistent profile flags across all functions"); 405 } 406 } 407 BP.Header.Flags = ProfileFlags; 408 409 // Add probe inline tree nodes. 410 InlineTreeDesc InlineTree; 411 if (const MCPseudoProbeDecoder *Decoder = 412 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) 413 std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder); 414 415 // Add all function objects. 416 for (const auto &BFI : Functions) { 417 const BinaryFunction &BF = BFI.second; 418 if (BF.hasProfile()) { 419 if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource()) 420 continue; 421 422 BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree)); 423 } 424 } 425 426 // Write the profile. 427 yaml::Output Out(*OS, nullptr, 0); 428 Out << BP; 429 430 return std::error_code(); 431 } 432 433 } // namespace bolt 434 } // namespace llvm 435