1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "CFGMST.h" 51 #include "llvm/ADT/APInt.h" 52 #include "llvm/ADT/ArrayRef.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/SmallVector.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/ADT/StringRef.h" 57 #include "llvm/ADT/Triple.h" 58 #include "llvm/ADT/Twine.h" 59 #include "llvm/ADT/iterator.h" 60 #include "llvm/ADT/iterator_range.h" 61 #include "llvm/Analysis/BlockFrequencyInfo.h" 62 #include "llvm/Analysis/BranchProbabilityInfo.h" 63 #include "llvm/Analysis/CFG.h" 64 #include "llvm/Analysis/IndirectCallVisitor.h" 65 #include "llvm/Analysis/LoopInfo.h" 66 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 67 #include "llvm/Analysis/ProfileSummaryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/CallSite.h" 72 #include "llvm/IR/Comdat.h" 73 #include "llvm/IR/Constant.h" 74 #include "llvm/IR/Constants.h" 75 #include "llvm/IR/DiagnosticInfo.h" 76 #include "llvm/IR/Dominators.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfileSummary.h" 93 #include "llvm/IR/Type.h" 94 #include "llvm/IR/Value.h" 95 #include "llvm/Pass.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/Casting.h" 100 #include "llvm/Support/CommandLine.h" 101 #include "llvm/Support/DOTGraphTraits.h" 102 #include "llvm/Support/Debug.h" 103 #include "llvm/Support/Error.h" 104 #include "llvm/Support/ErrorHandling.h" 105 #include "llvm/Support/GraphWriter.h" 106 #include "llvm/Support/JamCRC.h" 107 #include "llvm/Support/raw_ostream.h" 108 #include "llvm/Transforms/Instrumentation.h" 109 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 110 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 111 #include <algorithm> 112 #include <cassert> 113 #include <cstdint> 114 #include <memory> 115 #include <numeric> 116 #include <string> 117 #include <unordered_map> 118 #include <utility> 119 #include <vector> 120 121 using namespace llvm; 122 using ProfileCount = Function::ProfileCount; 123 124 #define DEBUG_TYPE "pgo-instrumentation" 125 126 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 127 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 128 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 129 STATISTIC(NumOfPGOEdge, "Number of edges."); 130 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 131 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 132 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 133 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 134 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 135 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 136 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 137 STATISTIC(NumOfCSPGOSelectInsts, 138 "Number of select instruction instrumented in CSPGO."); 139 STATISTIC(NumOfCSPGOMemIntrinsics, 140 "Number of mem intrinsics instrumented in CSPGO."); 141 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 142 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 143 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 144 STATISTIC(NumOfCSPGOFunc, 145 "Number of functions having valid profile counts in CSPGO."); 146 STATISTIC(NumOfCSPGOMismatch, 147 "Number of functions having mismatch profile in CSPGO."); 148 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 149 150 // Command line option to specify the file to read profile from. This is 151 // mainly used for testing. 152 static cl::opt<std::string> 153 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 154 cl::value_desc("filename"), 155 cl::desc("Specify the path of profile data file. This is" 156 "mainly for test purpose.")); 157 static cl::opt<std::string> PGOTestProfileRemappingFile( 158 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 159 cl::value_desc("filename"), 160 cl::desc("Specify the path of profile remapping file. This is mainly for " 161 "test purpose.")); 162 163 // Command line option to disable value profiling. The default is false: 164 // i.e. value profiling is enabled by default. This is for debug purpose. 165 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 166 cl::Hidden, 167 cl::desc("Disable Value Profiling")); 168 169 // Command line option to set the maximum number of VP annotations to write to 170 // the metadata for a single indirect call callsite. 171 static cl::opt<unsigned> MaxNumAnnotations( 172 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 173 cl::desc("Max number of annotations for a single indirect " 174 "call callsite")); 175 176 // Command line option to set the maximum number of value annotations 177 // to write to the metadata for a single memop intrinsic. 178 static cl::opt<unsigned> MaxNumMemOPAnnotations( 179 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 180 cl::desc("Max number of preicise value annotations for a single memop" 181 "intrinsic")); 182 183 // Command line option to control appending FunctionHash to the name of a COMDAT 184 // function. This is to avoid the hash mismatch caused by the preinliner. 185 static cl::opt<bool> DoComdatRenaming( 186 "do-comdat-renaming", cl::init(false), cl::Hidden, 187 cl::desc("Append function hash to the name of COMDAT function to avoid " 188 "function hash mismatch due to the preinliner")); 189 190 // Command line option to enable/disable the warning about missing profile 191 // information. 192 static cl::opt<bool> 193 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 194 cl::desc("Use this option to turn on/off " 195 "warnings about missing profile data for " 196 "functions.")); 197 198 // Command line option to enable/disable the warning about a hash mismatch in 199 // the profile data. 200 static cl::opt<bool> 201 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 202 cl::desc("Use this option to turn off/on " 203 "warnings about profile cfg mismatch.")); 204 205 // Command line option to enable/disable the warning about a hash mismatch in 206 // the profile data for Comdat functions, which often turns out to be false 207 // positive due to the pre-instrumentation inline. 208 static cl::opt<bool> 209 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 210 cl::Hidden, 211 cl::desc("The option is used to turn on/off " 212 "warnings about hash mismatch for comdat " 213 "functions.")); 214 215 // Command line option to enable/disable select instruction instrumentation. 216 static cl::opt<bool> 217 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 218 cl::desc("Use this option to turn on/off SELECT " 219 "instruction instrumentation. ")); 220 221 // Command line option to turn on CFG dot or text dump of raw profile counts 222 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 223 "pgo-view-raw-counts", cl::Hidden, 224 cl::desc("A boolean option to show CFG dag or text " 225 "with raw profile counts from " 226 "profile data. See also option " 227 "-pgo-view-counts. To limit graph " 228 "display to only one function, use " 229 "filtering option -view-bfi-func-name."), 230 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 231 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 232 clEnumValN(PGOVCT_Text, "text", "show in text."))); 233 234 // Command line option to enable/disable memop intrinsic call.size profiling. 235 static cl::opt<bool> 236 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 237 cl::desc("Use this option to turn on/off " 238 "memory intrinsic size profiling.")); 239 240 // Emit branch probability as optimization remarks. 241 static cl::opt<bool> 242 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 243 cl::desc("When this option is on, the annotated " 244 "branch probability will be emitted as " 245 "optimization remarks: -{Rpass|" 246 "pass-remarks}=pgo-instrumentation")); 247 248 // Command line option to turn on CFG dot dump after profile annotation. 249 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 250 extern cl::opt<PGOViewCountsType> PGOViewCounts; 251 252 // Command line option to specify the name of the function for CFG dump 253 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 254 extern cl::opt<std::string> ViewBlockFreqFuncName; 255 256 // Return a string describing the branch condition that can be 257 // used in static branch probability heuristics: 258 static std::string getBranchCondString(Instruction *TI) { 259 BranchInst *BI = dyn_cast<BranchInst>(TI); 260 if (!BI || !BI->isConditional()) 261 return std::string(); 262 263 Value *Cond = BI->getCondition(); 264 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 265 if (!CI) 266 return std::string(); 267 268 std::string result; 269 raw_string_ostream OS(result); 270 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 271 CI->getOperand(0)->getType()->print(OS, true); 272 273 Value *RHS = CI->getOperand(1); 274 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 275 if (CV) { 276 if (CV->isZero()) 277 OS << "_Zero"; 278 else if (CV->isOne()) 279 OS << "_One"; 280 else if (CV->isMinusOne()) 281 OS << "_MinusOne"; 282 else 283 OS << "_Const"; 284 } 285 OS.flush(); 286 return result; 287 } 288 289 namespace { 290 291 /// The select instruction visitor plays three roles specified 292 /// by the mode. In \c VM_counting mode, it simply counts the number of 293 /// select instructions. In \c VM_instrument mode, it inserts code to count 294 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 295 /// it reads the profile data and annotate the select instruction with metadata. 296 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 297 class PGOUseFunc; 298 299 /// Instruction Visitor class to visit select instructions. 300 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 301 Function &F; 302 unsigned NSIs = 0; // Number of select instructions instrumented. 303 VisitMode Mode = VM_counting; // Visiting mode. 304 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 305 unsigned TotalNumCtrs = 0; // Total number of counters 306 GlobalVariable *FuncNameVar = nullptr; 307 uint64_t FuncHash = 0; 308 PGOUseFunc *UseFunc = nullptr; 309 310 SelectInstVisitor(Function &Func) : F(Func) {} 311 312 void countSelects(Function &Func) { 313 NSIs = 0; 314 Mode = VM_counting; 315 visit(Func); 316 } 317 318 // Visit the IR stream and instrument all select instructions. \p 319 // Ind is a pointer to the counter index variable; \p TotalNC 320 // is the total number of counters; \p FNV is the pointer to the 321 // PGO function name var; \p FHash is the function hash. 322 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 323 GlobalVariable *FNV, uint64_t FHash) { 324 Mode = VM_instrument; 325 CurCtrIdx = Ind; 326 TotalNumCtrs = TotalNC; 327 FuncHash = FHash; 328 FuncNameVar = FNV; 329 visit(Func); 330 } 331 332 // Visit the IR stream and annotate all select instructions. 333 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 334 Mode = VM_annotate; 335 UseFunc = UF; 336 CurCtrIdx = Ind; 337 visit(Func); 338 } 339 340 void instrumentOneSelectInst(SelectInst &SI); 341 void annotateOneSelectInst(SelectInst &SI); 342 343 // Visit \p SI instruction and perform tasks according to visit mode. 344 void visitSelectInst(SelectInst &SI); 345 346 // Return the number of select instructions. This needs be called after 347 // countSelects(). 348 unsigned getNumOfSelectInsts() const { return NSIs; } 349 }; 350 351 /// Instruction Visitor class to visit memory intrinsic calls. 352 struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> { 353 Function &F; 354 unsigned NMemIs = 0; // Number of memIntrinsics instrumented. 355 VisitMode Mode = VM_counting; // Visiting mode. 356 unsigned CurCtrId = 0; // Current counter index. 357 unsigned TotalNumCtrs = 0; // Total number of counters 358 GlobalVariable *FuncNameVar = nullptr; 359 uint64_t FuncHash = 0; 360 PGOUseFunc *UseFunc = nullptr; 361 std::vector<Instruction *> Candidates; 362 363 MemIntrinsicVisitor(Function &Func) : F(Func) {} 364 365 void countMemIntrinsics(Function &Func) { 366 NMemIs = 0; 367 Mode = VM_counting; 368 visit(Func); 369 } 370 371 void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, 372 GlobalVariable *FNV, uint64_t FHash) { 373 Mode = VM_instrument; 374 TotalNumCtrs = TotalNC; 375 FuncHash = FHash; 376 FuncNameVar = FNV; 377 visit(Func); 378 } 379 380 std::vector<Instruction *> findMemIntrinsics(Function &Func) { 381 Candidates.clear(); 382 Mode = VM_annotate; 383 visit(Func); 384 return Candidates; 385 } 386 387 // Visit the IR stream and annotate all mem intrinsic call instructions. 388 void instrumentOneMemIntrinsic(MemIntrinsic &MI); 389 390 // Visit \p MI instruction and perform tasks according to visit mode. 391 void visitMemIntrinsic(MemIntrinsic &SI); 392 393 unsigned getNumOfMemIntrinsics() const { return NMemIs; } 394 }; 395 396 class PGOInstrumentationGenLegacyPass : public ModulePass { 397 public: 398 static char ID; 399 400 PGOInstrumentationGenLegacyPass(bool IsCS = false) 401 : ModulePass(ID), IsCS(IsCS) { 402 initializePGOInstrumentationGenLegacyPassPass( 403 *PassRegistry::getPassRegistry()); 404 } 405 406 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 407 408 private: 409 // Is this is context-sensitive instrumentation. 410 bool IsCS; 411 bool runOnModule(Module &M) override; 412 413 void getAnalysisUsage(AnalysisUsage &AU) const override { 414 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 415 } 416 }; 417 418 class PGOInstrumentationUseLegacyPass : public ModulePass { 419 public: 420 static char ID; 421 422 // Provide the profile filename as the parameter. 423 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 424 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 425 if (!PGOTestProfileFile.empty()) 426 ProfileFileName = PGOTestProfileFile; 427 initializePGOInstrumentationUseLegacyPassPass( 428 *PassRegistry::getPassRegistry()); 429 } 430 431 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 432 433 private: 434 std::string ProfileFileName; 435 // Is this is context-sensitive instrumentation use. 436 bool IsCS; 437 438 bool runOnModule(Module &M) override; 439 440 void getAnalysisUsage(AnalysisUsage &AU) const override { 441 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 442 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 443 } 444 }; 445 446 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 447 public: 448 static char ID; 449 StringRef getPassName() const override { 450 return "PGOInstrumentationGenCreateVarPass"; 451 } 452 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 453 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 454 initializePGOInstrumentationGenCreateVarLegacyPassPass( 455 *PassRegistry::getPassRegistry()); 456 } 457 458 private: 459 bool runOnModule(Module &M) override { 460 createProfileFileNameVar(M, InstrProfileOutput); 461 createIRLevelProfileFlagVar(M, true); 462 return false; 463 } 464 std::string InstrProfileOutput; 465 }; 466 467 } // end anonymous namespace 468 469 char PGOInstrumentationGenLegacyPass::ID = 0; 470 471 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 472 "PGO instrumentation.", false, false) 473 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 474 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 475 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 476 "PGO instrumentation.", false, false) 477 478 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 479 return new PGOInstrumentationGenLegacyPass(IsCS); 480 } 481 482 char PGOInstrumentationUseLegacyPass::ID = 0; 483 484 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 485 "Read PGO instrumentation profile.", false, false) 486 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 487 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 488 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 489 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 490 "Read PGO instrumentation profile.", false, false) 491 492 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 493 bool IsCS) { 494 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 495 } 496 497 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 498 499 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 500 "pgo-instr-gen-create-var", 501 "Create PGO instrumentation version variable for CSPGO.", false, 502 false) 503 504 ModulePass * 505 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 506 return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); 507 } 508 509 namespace { 510 511 /// An MST based instrumentation for PGO 512 /// 513 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 514 /// in the function level. 515 struct PGOEdge { 516 // This class implements the CFG edges. Note the CFG can be a multi-graph. 517 // So there might be multiple edges with same SrcBB and DestBB. 518 const BasicBlock *SrcBB; 519 const BasicBlock *DestBB; 520 uint64_t Weight; 521 bool InMST = false; 522 bool Removed = false; 523 bool IsCritical = false; 524 525 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 526 : SrcBB(Src), DestBB(Dest), Weight(W) {} 527 528 // Return the information string of an edge. 529 const std::string infoString() const { 530 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 531 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 532 } 533 }; 534 535 // This class stores the auxiliary information for each BB. 536 struct BBInfo { 537 BBInfo *Group; 538 uint32_t Index; 539 uint32_t Rank = 0; 540 541 BBInfo(unsigned IX) : Group(this), Index(IX) {} 542 543 // Return the information string of this object. 544 const std::string infoString() const { 545 return (Twine("Index=") + Twine(Index)).str(); 546 } 547 548 // Empty function -- only applicable to UseBBInfo. 549 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 550 551 // Empty function -- only applicable to UseBBInfo. 552 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 553 }; 554 555 // This class implements the CFG edges. Note the CFG can be a multi-graph. 556 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 557 private: 558 Function &F; 559 560 // Is this is context-sensitive instrumentation. 561 bool IsCS; 562 563 // A map that stores the Comdat group in function F. 564 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 565 566 void computeCFGHash(); 567 void renameComdatFunction(); 568 569 public: 570 std::vector<std::vector<Instruction *>> ValueSites; 571 SelectInstVisitor SIVisitor; 572 MemIntrinsicVisitor MIVisitor; 573 std::string FuncName; 574 GlobalVariable *FuncNameVar; 575 576 // CFG hash value for this function. 577 uint64_t FunctionHash = 0; 578 579 // The Minimum Spanning Tree of function CFG. 580 CFGMST<Edge, BBInfo> MST; 581 582 // Collect all the BBs that will be instrumented, and store them in 583 // InstrumentBBs. 584 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 585 586 // Give an edge, find the BB that will be instrumented. 587 // Return nullptr if there is no BB to be instrumented. 588 BasicBlock *getInstrBB(Edge *E); 589 590 // Return the auxiliary BB information. 591 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 592 593 // Return the auxiliary BB information if available. 594 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 595 596 // Dump edges and BB information. 597 void dumpInfo(std::string Str = "") const { 598 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 599 Twine(FunctionHash) + "\t" + Str); 600 } 601 602 FuncPGOInstrumentation( 603 Function &Func, 604 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 605 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 606 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) 607 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), 608 ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), 609 MST(F, BPI, BFI) { 610 // This should be done before CFG hash computation. 611 SIVisitor.countSelects(Func); 612 MIVisitor.countMemIntrinsics(Func); 613 if (!IsCS) { 614 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 615 NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); 616 NumOfPGOBB += MST.BBInfos.size(); 617 ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); 618 } else { 619 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 620 NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); 621 NumOfCSPGOBB += MST.BBInfos.size(); 622 } 623 ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); 624 625 FuncName = getPGOFuncName(F); 626 computeCFGHash(); 627 if (!ComdatMembers.empty()) 628 renameComdatFunction(); 629 LLVM_DEBUG(dumpInfo("after CFGMST")); 630 631 for (auto &E : MST.AllEdges) { 632 if (E->Removed) 633 continue; 634 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 635 if (!E->InMST) 636 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 637 } 638 639 if (CreateGlobalVar) 640 FuncNameVar = createPGOFuncNameVar(F, FuncName); 641 } 642 }; 643 644 } // end anonymous namespace 645 646 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 647 // value of each BB in the CFG. The higher 32 bits record the number of edges. 648 template <class Edge, class BBInfo> 649 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 650 std::vector<char> Indexes; 651 JamCRC JC; 652 for (auto &BB : F) { 653 const Instruction *TI = BB.getTerminator(); 654 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 655 BasicBlock *Succ = TI->getSuccessor(I); 656 auto BI = findBBInfo(Succ); 657 if (BI == nullptr) 658 continue; 659 uint32_t Index = BI->Index; 660 for (int J = 0; J < 4; J++) 661 Indexes.push_back((char)(Index >> (J * 8))); 662 } 663 } 664 JC.update(Indexes); 665 666 // Hash format for context sensitive profile. Reserve 4 bits for other 667 // information. 668 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 669 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 670 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 671 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 672 // Reserve bit 60-63 for other information purpose. 673 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 674 if (IsCS) 675 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 676 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 677 << " CRC = " << JC.getCRC() 678 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 679 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 680 << ValueSites[IPVK_IndirectCallTarget].size() 681 << ", Hash = " << FunctionHash << "\n";); 682 } 683 684 // Check if we can safely rename this Comdat function. 685 static bool canRenameComdat( 686 Function &F, 687 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 688 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 689 return false; 690 691 // FIXME: Current only handle those Comdat groups that only containing one 692 // function and function aliases. 693 // (1) For a Comdat group containing multiple functions, we need to have a 694 // unique postfix based on the hashes for each function. There is a 695 // non-trivial code refactoring to do this efficiently. 696 // (2) Variables can not be renamed, so we can not rename Comdat function in a 697 // group including global vars. 698 Comdat *C = F.getComdat(); 699 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 700 if (dyn_cast<GlobalAlias>(CM.second)) 701 continue; 702 Function *FM = dyn_cast<Function>(CM.second); 703 if (FM != &F) 704 return false; 705 } 706 return true; 707 } 708 709 // Append the CFGHash to the Comdat function name. 710 template <class Edge, class BBInfo> 711 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 712 if (!canRenameComdat(F, ComdatMembers)) 713 return; 714 std::string OrigName = F.getName().str(); 715 std::string NewFuncName = 716 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 717 F.setName(Twine(NewFuncName)); 718 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 719 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 720 Comdat *NewComdat; 721 Module *M = F.getParent(); 722 // For AvailableExternallyLinkage functions, change the linkage to 723 // LinkOnceODR and put them into comdat. This is because after renaming, there 724 // is no backup external copy available for the function. 725 if (!F.hasComdat()) { 726 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 727 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 728 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 729 F.setComdat(NewComdat); 730 return; 731 } 732 733 // This function belongs to a single function Comdat group. 734 Comdat *OrigComdat = F.getComdat(); 735 std::string NewComdatName = 736 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 737 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 738 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 739 740 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 741 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 742 // For aliases, change the name directly. 743 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 744 std::string OrigGAName = GA->getName().str(); 745 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 746 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 747 continue; 748 } 749 // Must be a function. 750 Function *CF = dyn_cast<Function>(CM.second); 751 assert(CF); 752 CF->setComdat(NewComdat); 753 } 754 } 755 756 // Collect all the BBs that will be instruments and return them in 757 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 758 template <class Edge, class BBInfo> 759 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 760 std::vector<BasicBlock *> &InstrumentBBs) { 761 // Use a worklist as we will update the vector during the iteration. 762 std::vector<Edge *> EdgeList; 763 EdgeList.reserve(MST.AllEdges.size()); 764 for (auto &E : MST.AllEdges) 765 EdgeList.push_back(E.get()); 766 767 for (auto &E : EdgeList) { 768 BasicBlock *InstrBB = getInstrBB(E); 769 if (InstrBB) 770 InstrumentBBs.push_back(InstrBB); 771 } 772 773 // Set up InEdges/OutEdges for all BBs. 774 for (auto &E : MST.AllEdges) { 775 if (E->Removed) 776 continue; 777 const BasicBlock *SrcBB = E->SrcBB; 778 const BasicBlock *DestBB = E->DestBB; 779 BBInfo &SrcInfo = getBBInfo(SrcBB); 780 BBInfo &DestInfo = getBBInfo(DestBB); 781 SrcInfo.addOutEdge(E.get()); 782 DestInfo.addInEdge(E.get()); 783 } 784 } 785 786 // Given a CFG E to be instrumented, find which BB to place the instrumented 787 // code. The function will split the critical edge if necessary. 788 template <class Edge, class BBInfo> 789 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 790 if (E->InMST || E->Removed) 791 return nullptr; 792 793 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 794 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 795 // For a fake edge, instrument the real BB. 796 if (SrcBB == nullptr) 797 return DestBB; 798 if (DestBB == nullptr) 799 return SrcBB; 800 801 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 802 // There are basic blocks (such as catchswitch) cannot be instrumented. 803 // If the returned first insertion point is the end of BB, skip this BB. 804 if (BB->getFirstInsertionPt() == BB->end()) 805 return nullptr; 806 return BB; 807 }; 808 809 // Instrument the SrcBB if it has a single successor, 810 // otherwise, the DestBB if this is not a critical edge. 811 Instruction *TI = SrcBB->getTerminator(); 812 if (TI->getNumSuccessors() <= 1) 813 return canInstrument(SrcBB); 814 if (!E->IsCritical) 815 return canInstrument(DestBB); 816 817 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 818 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 819 if (!InstrBB) { 820 LLVM_DEBUG( 821 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 822 return nullptr; 823 } 824 // For a critical edge, we have to split. Instrument the newly 825 // created BB. 826 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 827 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 828 << " --> " << getBBInfo(DestBB).Index << "\n"); 829 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 830 MST.addEdge(SrcBB, InstrBB, 0); 831 // Second one: Add new edge of InstrBB->DestBB. 832 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 833 NewEdge1.InMST = true; 834 E->Removed = true; 835 836 return canInstrument(InstrBB); 837 } 838 839 // Visit all edge and instrument the edges not in MST, and do value profiling. 840 // Critical edges will be split. 841 static void instrumentOneFunc( 842 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 843 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 844 bool IsCS) { 845 // Split indirectbr critical edges here before computing the MST rather than 846 // later in getInstrBB() to avoid invalidating it. 847 SplitIndirectBrCriticalEdges(F, BPI, BFI); 848 849 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 850 BFI, IsCS); 851 std::vector<BasicBlock *> InstrumentBBs; 852 FuncInfo.getInstrumentBBs(InstrumentBBs); 853 unsigned NumCounters = 854 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 855 856 uint32_t I = 0; 857 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 858 for (auto *InstrBB : InstrumentBBs) { 859 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 860 assert(Builder.GetInsertPoint() != InstrBB->end() && 861 "Cannot get the Instrumentation point"); 862 Builder.CreateCall( 863 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 864 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 865 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 866 Builder.getInt32(I++)}); 867 } 868 869 // Now instrument select instructions: 870 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 871 FuncInfo.FunctionHash); 872 assert(I == NumCounters); 873 874 if (DisableValueProfiling) 875 return; 876 877 unsigned NumIndirectCalls = 0; 878 for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) { 879 CallSite CS(I); 880 Value *Callee = CS.getCalledValue(); 881 LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " 882 << NumIndirectCalls << "\n"); 883 IRBuilder<> Builder(I); 884 assert(Builder.GetInsertPoint() != I->getParent()->end() && 885 "Cannot get the Instrumentation point"); 886 Builder.CreateCall( 887 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 888 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 889 Builder.getInt64(FuncInfo.FunctionHash), 890 Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), 891 Builder.getInt32(IPVK_IndirectCallTarget), 892 Builder.getInt32(NumIndirectCalls++)}); 893 } 894 NumOfPGOICall += NumIndirectCalls; 895 896 // Now instrument memop intrinsic calls. 897 FuncInfo.MIVisitor.instrumentMemIntrinsics( 898 F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); 899 } 900 901 namespace { 902 903 // This class represents a CFG edge in profile use compilation. 904 struct PGOUseEdge : public PGOEdge { 905 bool CountValid = false; 906 uint64_t CountValue = 0; 907 908 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 909 : PGOEdge(Src, Dest, W) {} 910 911 // Set edge count value 912 void setEdgeCount(uint64_t Value) { 913 CountValue = Value; 914 CountValid = true; 915 } 916 917 // Return the information string for this object. 918 const std::string infoString() const { 919 if (!CountValid) 920 return PGOEdge::infoString(); 921 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 922 .str(); 923 } 924 }; 925 926 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 927 928 // This class stores the auxiliary information for each BB. 929 struct UseBBInfo : public BBInfo { 930 uint64_t CountValue = 0; 931 bool CountValid; 932 int32_t UnknownCountInEdge = 0; 933 int32_t UnknownCountOutEdge = 0; 934 DirectEdges InEdges; 935 DirectEdges OutEdges; 936 937 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 938 939 UseBBInfo(unsigned IX, uint64_t C) 940 : BBInfo(IX), CountValue(C), CountValid(true) {} 941 942 // Set the profile count value for this BB. 943 void setBBInfoCount(uint64_t Value) { 944 CountValue = Value; 945 CountValid = true; 946 } 947 948 // Return the information string of this object. 949 const std::string infoString() const { 950 if (!CountValid) 951 return BBInfo::infoString(); 952 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 953 } 954 955 // Add an OutEdge and update the edge count. 956 void addOutEdge(PGOUseEdge *E) { 957 OutEdges.push_back(E); 958 UnknownCountOutEdge++; 959 } 960 961 // Add an InEdge and update the edge count. 962 void addInEdge(PGOUseEdge *E) { 963 InEdges.push_back(E); 964 UnknownCountInEdge++; 965 } 966 }; 967 968 } // end anonymous namespace 969 970 // Sum up the count values for all the edges. 971 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 972 uint64_t Total = 0; 973 for (auto &E : Edges) { 974 if (E->Removed) 975 continue; 976 Total += E->CountValue; 977 } 978 return Total; 979 } 980 981 namespace { 982 983 class PGOUseFunc { 984 public: 985 PGOUseFunc(Function &Func, Module *Modu, 986 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 987 BranchProbabilityInfo *BPI = nullptr, 988 BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) 989 : F(Func), M(Modu), BFI(BFIin), 990 FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), 991 FreqAttr(FFA_Normal), IsCS(IsCS) {} 992 993 // Read counts for the instrumented BB from profile. 994 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); 995 996 // Populate the counts for all BBs. 997 void populateCounters(); 998 999 // Set the branch weights based on the count values. 1000 void setBranchWeights(); 1001 1002 // Annotate the value profile call sites for all value kind. 1003 void annotateValueSites(); 1004 1005 // Annotate the value profile call sites for one value kind. 1006 void annotateValueSites(uint32_t Kind); 1007 1008 // Annotate the irreducible loop header weights. 1009 void annotateIrrLoopHeaderWeights(); 1010 1011 // The hotness of the function from the profile count. 1012 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1013 1014 // Return the function hotness from the profile. 1015 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1016 1017 // Return the function hash. 1018 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1019 1020 // Return the profile record for this function; 1021 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1022 1023 // Return the auxiliary BB information. 1024 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1025 return FuncInfo.getBBInfo(BB); 1026 } 1027 1028 // Return the auxiliary BB information if available. 1029 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1030 return FuncInfo.findBBInfo(BB); 1031 } 1032 1033 Function &getFunc() const { return F; } 1034 1035 void dumpInfo(std::string Str = "") const { 1036 FuncInfo.dumpInfo(Str); 1037 } 1038 1039 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1040 private: 1041 Function &F; 1042 Module *M; 1043 BlockFrequencyInfo *BFI; 1044 1045 // This member stores the shared information with class PGOGenFunc. 1046 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1047 1048 // The maximum count value in the profile. This is only used in PGO use 1049 // compilation. 1050 uint64_t ProgramMaxCount; 1051 1052 // Position of counter that remains to be read. 1053 uint32_t CountPosition = 0; 1054 1055 // Total size of the profile count for this function. 1056 uint32_t ProfileCountSize = 0; 1057 1058 // ProfileRecord for this function. 1059 InstrProfRecord ProfileRecord; 1060 1061 // Function hotness info derived from profile. 1062 FuncFreqAttr FreqAttr; 1063 1064 // Is to use the context sensitive profile. 1065 bool IsCS; 1066 1067 // Find the Instrumented BB and set the value. Return false on error. 1068 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1069 1070 // Set the edge counter value for the unknown edge -- there should be only 1071 // one unknown edge. 1072 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1073 1074 // Return FuncName string; 1075 const std::string getFuncName() const { return FuncInfo.FuncName; } 1076 1077 // Set the hot/cold inline hints based on the count values. 1078 // FIXME: This function should be removed once the functionality in 1079 // the inliner is implemented. 1080 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1081 if (ProgramMaxCount == 0) 1082 return; 1083 // Threshold of the hot functions. 1084 const BranchProbability HotFunctionThreshold(1, 100); 1085 // Threshold of the cold functions. 1086 const BranchProbability ColdFunctionThreshold(2, 10000); 1087 if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) 1088 FreqAttr = FFA_Hot; 1089 else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) 1090 FreqAttr = FFA_Cold; 1091 } 1092 }; 1093 1094 } // end anonymous namespace 1095 1096 // Visit all the edges and assign the count value for the instrumented 1097 // edges and the BB. Return false on error. 1098 bool PGOUseFunc::setInstrumentedCounts( 1099 const std::vector<uint64_t> &CountFromProfile) { 1100 1101 std::vector<BasicBlock *> InstrumentBBs; 1102 FuncInfo.getInstrumentBBs(InstrumentBBs); 1103 unsigned NumCounters = 1104 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1105 // The number of counters here should match the number of counters 1106 // in profile. Return if they mismatch. 1107 if (NumCounters != CountFromProfile.size()) { 1108 return false; 1109 } 1110 // Set the profile count to the Instrumented BBs. 1111 uint32_t I = 0; 1112 for (BasicBlock *InstrBB : InstrumentBBs) { 1113 uint64_t CountValue = CountFromProfile[I++]; 1114 UseBBInfo &Info = getBBInfo(InstrBB); 1115 Info.setBBInfoCount(CountValue); 1116 } 1117 ProfileCountSize = CountFromProfile.size(); 1118 CountPosition = I; 1119 1120 // Set the edge count and update the count of unknown edges for BBs. 1121 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1122 E->setEdgeCount(Value); 1123 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1124 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1125 }; 1126 1127 // Set the profile count the Instrumented edges. There are BBs that not in 1128 // MST but not instrumented. Need to set the edge count value so that we can 1129 // populate the profile counts later. 1130 for (auto &E : FuncInfo.MST.AllEdges) { 1131 if (E->Removed || E->InMST) 1132 continue; 1133 const BasicBlock *SrcBB = E->SrcBB; 1134 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1135 1136 // If only one out-edge, the edge profile count should be the same as BB 1137 // profile count. 1138 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1139 setEdgeCount(E.get(), SrcInfo.CountValue); 1140 else { 1141 const BasicBlock *DestBB = E->DestBB; 1142 UseBBInfo &DestInfo = getBBInfo(DestBB); 1143 // If only one in-edge, the edge profile count should be the same as BB 1144 // profile count. 1145 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1146 setEdgeCount(E.get(), DestInfo.CountValue); 1147 } 1148 if (E->CountValid) 1149 continue; 1150 // E's count should have been set from profile. If not, this meenas E skips 1151 // the instrumentation. We set the count to 0. 1152 setEdgeCount(E.get(), 0); 1153 } 1154 return true; 1155 } 1156 1157 // Set the count value for the unknown edge. There should be one and only one 1158 // unknown edge in Edges vector. 1159 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1160 for (auto &E : Edges) { 1161 if (E->CountValid) 1162 continue; 1163 E->setEdgeCount(Value); 1164 1165 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1166 getBBInfo(E->DestBB).UnknownCountInEdge--; 1167 return; 1168 } 1169 llvm_unreachable("Cannot find the unknown count edge"); 1170 } 1171 1172 // Read the profile from ProfileFileName and assign the value to the 1173 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1174 // Return true if the profile are successfully read, and false on errors. 1175 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { 1176 auto &Ctx = M->getContext(); 1177 Expected<InstrProfRecord> Result = 1178 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1179 if (Error E = Result.takeError()) { 1180 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1181 auto Err = IPE.get(); 1182 bool SkipWarning = false; 1183 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1184 << FuncInfo.FuncName << ": "); 1185 if (Err == instrprof_error::unknown_function) { 1186 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1187 SkipWarning = !PGOWarnMissing; 1188 LLVM_DEBUG(dbgs() << "unknown function"); 1189 } else if (Err == instrprof_error::hash_mismatch || 1190 Err == instrprof_error::malformed) { 1191 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1192 SkipWarning = 1193 NoPGOWarnMismatch || 1194 (NoPGOWarnMismatchComdat && 1195 (F.hasComdat() || 1196 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1197 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1198 } 1199 1200 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1201 if (SkipWarning) 1202 return; 1203 1204 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1205 std::string(" Hash = ") + 1206 std::to_string(FuncInfo.FunctionHash); 1207 1208 Ctx.diagnose( 1209 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1210 }); 1211 return false; 1212 } 1213 ProfileRecord = std::move(Result.get()); 1214 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1215 1216 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1217 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1218 uint64_t ValueSum = 0; 1219 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1220 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1221 ValueSum += CountFromProfile[I]; 1222 } 1223 AllZeros = (ValueSum == 0); 1224 1225 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1226 1227 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1228 getBBInfo(nullptr).UnknownCountInEdge = 2; 1229 1230 if (!setInstrumentedCounts(CountFromProfile)) { 1231 LLVM_DEBUG( 1232 dbgs() << "Inconsistent number of counts, skipping this function"); 1233 Ctx.diagnose(DiagnosticInfoPGOProfile( 1234 M->getName().data(), 1235 Twine("Inconsistent number of counts in ") + F.getName().str() 1236 + Twine(": the profile may be stale or there is a function name collision."), 1237 DS_Warning)); 1238 return false; 1239 } 1240 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1241 return true; 1242 } 1243 1244 // Populate the counters from instrumented BBs to all BBs. 1245 // In the end of this operation, all BBs should have a valid count value. 1246 void PGOUseFunc::populateCounters() { 1247 bool Changes = true; 1248 unsigned NumPasses = 0; 1249 while (Changes) { 1250 NumPasses++; 1251 Changes = false; 1252 1253 // For efficient traversal, it's better to start from the end as most 1254 // of the instrumented edges are at the end. 1255 for (auto &BB : reverse(F)) { 1256 UseBBInfo *Count = findBBInfo(&BB); 1257 if (Count == nullptr) 1258 continue; 1259 if (!Count->CountValid) { 1260 if (Count->UnknownCountOutEdge == 0) { 1261 Count->CountValue = sumEdgeCount(Count->OutEdges); 1262 Count->CountValid = true; 1263 Changes = true; 1264 } else if (Count->UnknownCountInEdge == 0) { 1265 Count->CountValue = sumEdgeCount(Count->InEdges); 1266 Count->CountValid = true; 1267 Changes = true; 1268 } 1269 } 1270 if (Count->CountValid) { 1271 if (Count->UnknownCountOutEdge == 1) { 1272 uint64_t Total = 0; 1273 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1274 // If the one of the successor block can early terminate (no-return), 1275 // we can end up with situation where out edge sum count is larger as 1276 // the source BB's count is collected by a post-dominated block. 1277 if (Count->CountValue > OutSum) 1278 Total = Count->CountValue - OutSum; 1279 setEdgeCount(Count->OutEdges, Total); 1280 Changes = true; 1281 } 1282 if (Count->UnknownCountInEdge == 1) { 1283 uint64_t Total = 0; 1284 uint64_t InSum = sumEdgeCount(Count->InEdges); 1285 if (Count->CountValue > InSum) 1286 Total = Count->CountValue - InSum; 1287 setEdgeCount(Count->InEdges, Total); 1288 Changes = true; 1289 } 1290 } 1291 } 1292 } 1293 1294 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1295 #ifndef NDEBUG 1296 // Assert every BB has a valid counter. 1297 for (auto &BB : F) { 1298 auto BI = findBBInfo(&BB); 1299 if (BI == nullptr) 1300 continue; 1301 assert(BI->CountValid && "BB count is not valid"); 1302 } 1303 #endif 1304 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1305 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1306 uint64_t FuncMaxCount = FuncEntryCount; 1307 for (auto &BB : F) { 1308 auto BI = findBBInfo(&BB); 1309 if (BI == nullptr) 1310 continue; 1311 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1312 } 1313 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1314 1315 // Now annotate select instructions 1316 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1317 assert(CountPosition == ProfileCountSize); 1318 1319 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1320 } 1321 1322 // Assign the scaled count values to the BB with multiple out edges. 1323 void PGOUseFunc::setBranchWeights() { 1324 // Generate MD_prof metadata for every branch instruction. 1325 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1326 << " IsCS=" << IsCS << "\n"); 1327 for (auto &BB : F) { 1328 Instruction *TI = BB.getTerminator(); 1329 if (TI->getNumSuccessors() < 2) 1330 continue; 1331 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1332 isa<IndirectBrInst>(TI))) 1333 continue; 1334 1335 if (getBBInfo(&BB).CountValue == 0) 1336 continue; 1337 1338 // We have a non-zero Branch BB. 1339 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1340 unsigned Size = BBCountInfo.OutEdges.size(); 1341 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1342 uint64_t MaxCount = 0; 1343 for (unsigned s = 0; s < Size; s++) { 1344 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1345 const BasicBlock *SrcBB = E->SrcBB; 1346 const BasicBlock *DestBB = E->DestBB; 1347 if (DestBB == nullptr) 1348 continue; 1349 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1350 uint64_t EdgeCount = E->CountValue; 1351 if (EdgeCount > MaxCount) 1352 MaxCount = EdgeCount; 1353 EdgeCounts[SuccNum] = EdgeCount; 1354 } 1355 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1356 } 1357 } 1358 1359 static bool isIndirectBrTarget(BasicBlock *BB) { 1360 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1361 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1362 return true; 1363 } 1364 return false; 1365 } 1366 1367 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1368 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1369 // Find irr loop headers 1370 for (auto &BB : F) { 1371 // As a heuristic also annotate indrectbr targets as they have a high chance 1372 // to become an irreducible loop header after the indirectbr tail 1373 // duplication. 1374 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1375 Instruction *TI = BB.getTerminator(); 1376 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1377 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1378 } 1379 } 1380 } 1381 1382 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1383 Module *M = F.getParent(); 1384 IRBuilder<> Builder(&SI); 1385 Type *Int64Ty = Builder.getInt64Ty(); 1386 Type *I8PtrTy = Builder.getInt8PtrTy(); 1387 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1388 Builder.CreateCall( 1389 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1390 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1391 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1392 Builder.getInt32(*CurCtrIdx), Step}); 1393 ++(*CurCtrIdx); 1394 } 1395 1396 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1397 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1398 assert(*CurCtrIdx < CountFromProfile.size() && 1399 "Out of bound access of counters"); 1400 uint64_t SCounts[2]; 1401 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1402 ++(*CurCtrIdx); 1403 uint64_t TotalCount = 0; 1404 auto BI = UseFunc->findBBInfo(SI.getParent()); 1405 if (BI != nullptr) 1406 TotalCount = BI->CountValue; 1407 // False Count 1408 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1409 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1410 if (MaxCount) 1411 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1412 } 1413 1414 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1415 if (!PGOInstrSelect) 1416 return; 1417 // FIXME: do not handle this yet. 1418 if (SI.getCondition()->getType()->isVectorTy()) 1419 return; 1420 1421 switch (Mode) { 1422 case VM_counting: 1423 NSIs++; 1424 return; 1425 case VM_instrument: 1426 instrumentOneSelectInst(SI); 1427 return; 1428 case VM_annotate: 1429 annotateOneSelectInst(SI); 1430 return; 1431 } 1432 1433 llvm_unreachable("Unknown visiting mode"); 1434 } 1435 1436 void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { 1437 Module *M = F.getParent(); 1438 IRBuilder<> Builder(&MI); 1439 Type *Int64Ty = Builder.getInt64Ty(); 1440 Type *I8PtrTy = Builder.getInt8PtrTy(); 1441 Value *Length = MI.getLength(); 1442 assert(!isa<ConstantInt>(Length)); 1443 Builder.CreateCall( 1444 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 1445 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1446 Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), 1447 Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); 1448 ++CurCtrId; 1449 } 1450 1451 void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { 1452 if (!PGOInstrMemOP) 1453 return; 1454 Value *Length = MI.getLength(); 1455 // Not instrument constant length calls. 1456 if (dyn_cast<ConstantInt>(Length)) 1457 return; 1458 1459 switch (Mode) { 1460 case VM_counting: 1461 NMemIs++; 1462 return; 1463 case VM_instrument: 1464 instrumentOneMemIntrinsic(MI); 1465 return; 1466 case VM_annotate: 1467 Candidates.push_back(&MI); 1468 return; 1469 } 1470 llvm_unreachable("Unknown visiting mode"); 1471 } 1472 1473 // Traverse all valuesites and annotate the instructions for all value kind. 1474 void PGOUseFunc::annotateValueSites() { 1475 if (DisableValueProfiling) 1476 return; 1477 1478 // Create the PGOFuncName meta data. 1479 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1480 1481 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1482 annotateValueSites(Kind); 1483 } 1484 1485 static const char *ValueProfKindDescr[] = { 1486 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 1487 #include "llvm/ProfileData/InstrProfData.inc" 1488 }; 1489 1490 // Annotate the instructions for a specific value kind. 1491 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1492 assert(Kind <= IPVK_Last); 1493 unsigned ValueSiteIndex = 0; 1494 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1495 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1496 if (NumValueSites != ValueSites.size()) { 1497 auto &Ctx = M->getContext(); 1498 Ctx.diagnose(DiagnosticInfoPGOProfile( 1499 M->getName().data(), 1500 Twine("Inconsistent number of value sites for ") + 1501 Twine(ValueProfKindDescr[Kind]) + 1502 Twine(" profiling in \"") + F.getName().str() + 1503 Twine("\", possibly due to the use of a stale profile."), 1504 DS_Warning)); 1505 return; 1506 } 1507 1508 for (auto &I : ValueSites) { 1509 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1510 << "): Index = " << ValueSiteIndex << " out of " 1511 << NumValueSites << "\n"); 1512 annotateValueSite(*M, *I, ProfileRecord, 1513 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1514 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1515 : MaxNumAnnotations); 1516 ValueSiteIndex++; 1517 } 1518 } 1519 1520 // Collect the set of members for each Comdat in module M and store 1521 // in ComdatMembers. 1522 static void collectComdatMembers( 1523 Module &M, 1524 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1525 if (!DoComdatRenaming) 1526 return; 1527 for (Function &F : M) 1528 if (Comdat *C = F.getComdat()) 1529 ComdatMembers.insert(std::make_pair(C, &F)); 1530 for (GlobalVariable &GV : M.globals()) 1531 if (Comdat *C = GV.getComdat()) 1532 ComdatMembers.insert(std::make_pair(C, &GV)); 1533 for (GlobalAlias &GA : M.aliases()) 1534 if (Comdat *C = GA.getComdat()) 1535 ComdatMembers.insert(std::make_pair(C, &GA)); 1536 } 1537 1538 static bool InstrumentAllFunctions( 1539 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1540 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1541 // For the context-sensitve instrumentation, we should have a separated pass 1542 // (before LTO/ThinLTO linking) to create these variables. 1543 if (!IsCS) 1544 createIRLevelProfileFlagVar(M, /* IsCS */ false); 1545 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1546 collectComdatMembers(M, ComdatMembers); 1547 1548 for (auto &F : M) { 1549 if (F.isDeclaration()) 1550 continue; 1551 auto *BPI = LookupBPI(F); 1552 auto *BFI = LookupBFI(F); 1553 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); 1554 } 1555 return true; 1556 } 1557 1558 PreservedAnalyses 1559 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1560 createProfileFileNameVar(M, CSInstrName); 1561 createIRLevelProfileFlagVar(M, /* IsCS */ true); 1562 return PreservedAnalyses::all(); 1563 } 1564 1565 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1566 if (skipModule(M)) 1567 return false; 1568 1569 auto LookupBPI = [this](Function &F) { 1570 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1571 }; 1572 auto LookupBFI = [this](Function &F) { 1573 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1574 }; 1575 return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); 1576 } 1577 1578 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1579 ModuleAnalysisManager &AM) { 1580 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1581 auto LookupBPI = [&FAM](Function &F) { 1582 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1583 }; 1584 1585 auto LookupBFI = [&FAM](Function &F) { 1586 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1587 }; 1588 1589 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) 1590 return PreservedAnalyses::all(); 1591 1592 return PreservedAnalyses::none(); 1593 } 1594 1595 static bool annotateAllFunctions( 1596 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1597 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1598 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1599 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1600 auto &Ctx = M.getContext(); 1601 // Read the counter array from file. 1602 auto ReaderOrErr = 1603 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1604 if (Error E = ReaderOrErr.takeError()) { 1605 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1606 Ctx.diagnose( 1607 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1608 }); 1609 return false; 1610 } 1611 1612 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1613 std::move(ReaderOrErr.get()); 1614 if (!PGOReader) { 1615 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1616 StringRef("Cannot get PGOReader"))); 1617 return false; 1618 } 1619 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1620 return false; 1621 1622 // TODO: might need to change the warning once the clang option is finalized. 1623 if (!PGOReader->isIRLevelProfile()) { 1624 Ctx.diagnose(DiagnosticInfoPGOProfile( 1625 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1626 return false; 1627 } 1628 1629 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1630 collectComdatMembers(M, ComdatMembers); 1631 std::vector<Function *> HotFunctions; 1632 std::vector<Function *> ColdFunctions; 1633 for (auto &F : M) { 1634 if (F.isDeclaration()) 1635 continue; 1636 auto *BPI = LookupBPI(F); 1637 auto *BFI = LookupBFI(F); 1638 // Split indirectbr critical edges here before computing the MST rather than 1639 // later in getInstrBB() to avoid invalidating it. 1640 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1641 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); 1642 bool AllZeros = false; 1643 if (!Func.readCounters(PGOReader.get(), AllZeros)) 1644 continue; 1645 if (AllZeros) { 1646 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1647 if (Func.getProgramMaxCount() != 0) 1648 ColdFunctions.push_back(&F); 1649 continue; 1650 } 1651 Func.populateCounters(); 1652 Func.setBranchWeights(); 1653 Func.annotateValueSites(); 1654 Func.annotateIrrLoopHeaderWeights(); 1655 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1656 if (FreqAttr == PGOUseFunc::FFA_Cold) 1657 ColdFunctions.push_back(&F); 1658 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1659 HotFunctions.push_back(&F); 1660 if (PGOViewCounts != PGOVCT_None && 1661 (ViewBlockFreqFuncName.empty() || 1662 F.getName().equals(ViewBlockFreqFuncName))) { 1663 LoopInfo LI{DominatorTree(F)}; 1664 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1665 llvm::make_unique<BranchProbabilityInfo>(F, LI); 1666 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1667 llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1668 if (PGOViewCounts == PGOVCT_Graph) 1669 NewBFI->view(); 1670 else if (PGOViewCounts == PGOVCT_Text) { 1671 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1672 NewBFI->print(dbgs()); 1673 } 1674 } 1675 if (PGOViewRawCounts != PGOVCT_None && 1676 (ViewBlockFreqFuncName.empty() || 1677 F.getName().equals(ViewBlockFreqFuncName))) { 1678 if (PGOViewRawCounts == PGOVCT_Graph) 1679 if (ViewBlockFreqFuncName.empty()) 1680 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1681 else 1682 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1683 else if (PGOViewRawCounts == PGOVCT_Text) { 1684 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1685 Func.dumpInfo(); 1686 } 1687 } 1688 } 1689 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1690 IsCS ? ProfileSummary::PSK_CSInstr 1691 : ProfileSummary::PSK_Instr); 1692 1693 // Set function hotness attribute from the profile. 1694 // We have to apply these attributes at the end because their presence 1695 // can affect the BranchProbabilityInfo of any callers, resulting in an 1696 // inconsistent MST between prof-gen and prof-use. 1697 for (auto &F : HotFunctions) { 1698 F->addFnAttr(Attribute::InlineHint); 1699 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1700 << "\n"); 1701 } 1702 for (auto &F : ColdFunctions) { 1703 F->addFnAttr(Attribute::Cold); 1704 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1705 << "\n"); 1706 } 1707 return true; 1708 } 1709 1710 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1711 std::string RemappingFilename, 1712 bool IsCS) 1713 : ProfileFileName(std::move(Filename)), 1714 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1715 if (!PGOTestProfileFile.empty()) 1716 ProfileFileName = PGOTestProfileFile; 1717 if (!PGOTestProfileRemappingFile.empty()) 1718 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1719 } 1720 1721 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1722 ModuleAnalysisManager &AM) { 1723 1724 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1725 auto LookupBPI = [&FAM](Function &F) { 1726 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1727 }; 1728 1729 auto LookupBFI = [&FAM](Function &F) { 1730 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1731 }; 1732 1733 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1734 LookupBPI, LookupBFI, IsCS)) 1735 return PreservedAnalyses::all(); 1736 1737 return PreservedAnalyses::none(); 1738 } 1739 1740 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1741 if (skipModule(M)) 1742 return false; 1743 1744 auto LookupBPI = [this](Function &F) { 1745 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1746 }; 1747 auto LookupBFI = [this](Function &F) { 1748 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1749 }; 1750 1751 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, 1752 IsCS); 1753 } 1754 1755 static std::string getSimpleNodeName(const BasicBlock *Node) { 1756 if (!Node->getName().empty()) 1757 return Node->getName(); 1758 1759 std::string SimpleNodeName; 1760 raw_string_ostream OS(SimpleNodeName); 1761 Node->printAsOperand(OS, false); 1762 return OS.str(); 1763 } 1764 1765 void llvm::setProfMetadata(Module *M, Instruction *TI, 1766 ArrayRef<uint64_t> EdgeCounts, 1767 uint64_t MaxCount) { 1768 MDBuilder MDB(M->getContext()); 1769 assert(MaxCount > 0 && "Bad max count"); 1770 uint64_t Scale = calculateCountScale(MaxCount); 1771 SmallVector<unsigned, 4> Weights; 1772 for (const auto &ECI : EdgeCounts) 1773 Weights.push_back(scaleBranchCount(ECI, Scale)); 1774 1775 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1776 : Weights) { 1777 dbgs() << W << " "; 1778 } dbgs() << "\n";); 1779 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1780 if (EmitBranchProbability) { 1781 std::string BrCondStr = getBranchCondString(TI); 1782 if (BrCondStr.empty()) 1783 return; 1784 1785 uint64_t WSum = 1786 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1787 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1788 uint64_t TotalCount = 1789 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1790 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1791 Scale = calculateCountScale(WSum); 1792 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1793 scaleBranchCount(WSum, Scale)); 1794 std::string BranchProbStr; 1795 raw_string_ostream OS(BranchProbStr); 1796 OS << BP; 1797 OS << " (total count : " << TotalCount << ")"; 1798 OS.flush(); 1799 Function *F = TI->getParent()->getParent(); 1800 OptimizationRemarkEmitter ORE(F); 1801 ORE.emit([&]() { 1802 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1803 << BrCondStr << " is true with probability : " << BranchProbStr; 1804 }); 1805 } 1806 } 1807 1808 namespace llvm { 1809 1810 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1811 MDBuilder MDB(M->getContext()); 1812 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1813 MDB.createIrrLoopHeaderWeight(Count)); 1814 } 1815 1816 template <> struct GraphTraits<PGOUseFunc *> { 1817 using NodeRef = const BasicBlock *; 1818 using ChildIteratorType = succ_const_iterator; 1819 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1820 1821 static NodeRef getEntryNode(const PGOUseFunc *G) { 1822 return &G->getFunc().front(); 1823 } 1824 1825 static ChildIteratorType child_begin(const NodeRef N) { 1826 return succ_begin(N); 1827 } 1828 1829 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1830 1831 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1832 return nodes_iterator(G->getFunc().begin()); 1833 } 1834 1835 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1836 return nodes_iterator(G->getFunc().end()); 1837 } 1838 }; 1839 1840 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1841 explicit DOTGraphTraits(bool isSimple = false) 1842 : DefaultDOTGraphTraits(isSimple) {} 1843 1844 static std::string getGraphName(const PGOUseFunc *G) { 1845 return G->getFunc().getName(); 1846 } 1847 1848 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1849 std::string Result; 1850 raw_string_ostream OS(Result); 1851 1852 OS << getSimpleNodeName(Node) << ":\\l"; 1853 UseBBInfo *BI = Graph->findBBInfo(Node); 1854 OS << "Count : "; 1855 if (BI && BI->CountValid) 1856 OS << BI->CountValue << "\\l"; 1857 else 1858 OS << "Unknown\\l"; 1859 1860 if (!PGOInstrSelect) 1861 return Result; 1862 1863 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1864 auto *I = &*BI; 1865 if (!isa<SelectInst>(I)) 1866 continue; 1867 // Display scaled counts for SELECT instruction: 1868 OS << "SELECT : { T = "; 1869 uint64_t TC, FC; 1870 bool HasProf = I->extractProfMetadata(TC, FC); 1871 if (!HasProf) 1872 OS << "Unknown, F = Unknown }\\l"; 1873 else 1874 OS << TC << ", F = " << FC << " }\\l"; 1875 } 1876 return Result; 1877 } 1878 }; 1879 1880 } // end namespace llvm 1881