1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "ValueProfileCollector.h" 52 #include "llvm/ADT/APInt.h" 53 #include "llvm/ADT/ArrayRef.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/SmallVector.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/ADT/StringRef.h" 58 #include "llvm/ADT/Twine.h" 59 #include "llvm/ADT/iterator.h" 60 #include "llvm/ADT/iterator_range.h" 61 #include "llvm/Analysis/BlockFrequencyInfo.h" 62 #include "llvm/Analysis/BranchProbabilityInfo.h" 63 #include "llvm/Analysis/CFG.h" 64 #include "llvm/Analysis/LoopInfo.h" 65 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 66 #include "llvm/Analysis/ProfileSummaryInfo.h" 67 #include "llvm/Analysis/TargetLibraryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/Comdat.h" 72 #include "llvm/IR/Constant.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/DiagnosticInfo.h" 75 #include "llvm/IR/Dominators.h" 76 #include "llvm/IR/EHPersonalities.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfDataUtils.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/CRC.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/VirtualFileSystem.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/TargetParser/Triple.h" 110 #include "llvm/Transforms/Instrumentation.h" 111 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" 112 #include "llvm/Transforms/Instrumentation/CFGMST.h" 113 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 114 #include "llvm/Transforms/Utils/MisExpect.h" 115 #include "llvm/Transforms/Utils/ModuleUtils.h" 116 #include <algorithm> 117 #include <cassert> 118 #include <cstdint> 119 #include <memory> 120 #include <numeric> 121 #include <optional> 122 #include <string> 123 #include <unordered_map> 124 #include <utility> 125 #include <vector> 126 127 using namespace llvm; 128 using ProfileCount = Function::ProfileCount; 129 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 130 131 #define DEBUG_TYPE "pgo-instrumentation" 132 133 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 134 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 135 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 136 STATISTIC(NumOfPGOEdge, "Number of edges."); 137 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 138 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 139 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 140 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 141 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 142 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 143 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOSelectInsts, 145 "Number of select instruction instrumented in CSPGO."); 146 STATISTIC(NumOfCSPGOMemIntrinsics, 147 "Number of mem intrinsics instrumented in CSPGO."); 148 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 149 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 150 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 151 STATISTIC(NumOfCSPGOFunc, 152 "Number of functions having valid profile counts in CSPGO."); 153 STATISTIC(NumOfCSPGOMismatch, 154 "Number of functions having mismatch profile in CSPGO."); 155 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 156 STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); 157 158 // Command line option to specify the file to read profile from. This is 159 // mainly used for testing. 160 static cl::opt<std::string> 161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile data file. This is" 164 "mainly for test purpose.")); 165 static cl::opt<std::string> PGOTestProfileRemappingFile( 166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 167 cl::value_desc("filename"), 168 cl::desc("Specify the path of profile remapping file. This is mainly for " 169 "test purpose.")); 170 171 // Command line option to disable value profiling. The default is false: 172 // i.e. value profiling is enabled by default. This is for debug purpose. 173 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 174 cl::Hidden, 175 cl::desc("Disable Value Profiling")); 176 177 // Command line option to set the maximum number of VP annotations to write to 178 // the metadata for a single indirect call callsite. 179 static cl::opt<unsigned> MaxNumAnnotations( 180 "icp-max-annotations", cl::init(3), cl::Hidden, 181 cl::desc("Max number of annotations for a single indirect " 182 "call callsite")); 183 184 // Command line option to set the maximum number of value annotations 185 // to write to the metadata for a single memop intrinsic. 186 static cl::opt<unsigned> MaxNumMemOPAnnotations( 187 "memop-max-annotations", cl::init(4), cl::Hidden, 188 cl::desc("Max number of preicise value annotations for a single memop" 189 "intrinsic")); 190 191 // Command line option to control appending FunctionHash to the name of a COMDAT 192 // function. This is to avoid the hash mismatch caused by the preinliner. 193 static cl::opt<bool> DoComdatRenaming( 194 "do-comdat-renaming", cl::init(false), cl::Hidden, 195 cl::desc("Append function hash to the name of COMDAT function to avoid " 196 "function hash mismatch due to the preinliner")); 197 198 namespace llvm { 199 // Command line option to enable/disable the warning about missing profile 200 // information. 201 cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false), 202 cl::Hidden, 203 cl::desc("Use this option to turn on/off " 204 "warnings about missing profile data for " 205 "functions.")); 206 207 // Command line option to enable/disable the warning about a hash mismatch in 208 // the profile data. 209 cl::opt<bool> 210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 211 cl::desc("Use this option to turn off/on " 212 "warnings about profile cfg mismatch.")); 213 214 // Command line option to enable/disable the warning about a hash mismatch in 215 // the profile data for Comdat functions, which often turns out to be false 216 // positive due to the pre-instrumentation inline. 217 cl::opt<bool> NoPGOWarnMismatchComdatWeak( 218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 219 cl::desc("The option is used to turn on/off " 220 "warnings about hash mismatch for comdat " 221 "or weak functions.")); 222 } // namespace llvm 223 224 // Command line option to enable/disable select instruction instrumentation. 225 static cl::opt<bool> 226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 227 cl::desc("Use this option to turn on/off SELECT " 228 "instruction instrumentation. ")); 229 230 // Command line option to turn on CFG dot or text dump of raw profile counts 231 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 232 "pgo-view-raw-counts", cl::Hidden, 233 cl::desc("A boolean option to show CFG dag or text " 234 "with raw profile counts from " 235 "profile data. See also option " 236 "-pgo-view-counts. To limit graph " 237 "display to only one function, use " 238 "filtering option -view-bfi-func-name."), 239 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 240 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 241 clEnumValN(PGOVCT_Text, "text", "show in text."))); 242 243 // Command line option to enable/disable memop intrinsic call.size profiling. 244 static cl::opt<bool> 245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 246 cl::desc("Use this option to turn on/off " 247 "memory intrinsic size profiling.")); 248 249 // Emit branch probability as optimization remarks. 250 static cl::opt<bool> 251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 252 cl::desc("When this option is on, the annotated " 253 "branch probability will be emitted as " 254 "optimization remarks: -{Rpass|" 255 "pass-remarks}=pgo-instrumentation")); 256 257 static cl::opt<bool> PGOInstrumentEntry( 258 "pgo-instrument-entry", cl::init(false), cl::Hidden, 259 cl::desc("Force to instrument function entry basicblock.")); 260 261 static cl::opt<bool> PGOFunctionEntryCoverage( 262 "pgo-function-entry-coverage", cl::Hidden, 263 cl::desc( 264 "Use this option to enable function entry coverage instrumentation.")); 265 266 static cl::opt<bool> PGOBlockCoverage( 267 "pgo-block-coverage", 268 cl::desc("Use this option to enable basic block coverage instrumentation")); 269 270 static cl::opt<bool> 271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", 272 cl::desc("Create a dot file of CFGs with block " 273 "coverage inference information")); 274 275 static cl::opt<bool> PGOTemporalInstrumentation( 276 "pgo-temporal-instrumentation", 277 cl::desc("Use this option to enable temporal instrumentation")); 278 279 static cl::opt<bool> 280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 281 cl::desc("Fix function entry count in profile use.")); 282 283 static cl::opt<bool> PGOVerifyHotBFI( 284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 285 cl::desc("Print out the non-match BFI count if a hot raw profile count " 286 "becomes non-hot, or a cold raw profile count becomes hot. " 287 "The print is enabled under -Rpass-analysis=pgo, or " 288 "internal option -pass-remakrs-analysis=pgo.")); 289 290 static cl::opt<bool> PGOVerifyBFI( 291 "pgo-verify-bfi", cl::init(false), cl::Hidden, 292 cl::desc("Print out mismatched BFI counts after setting profile metadata " 293 "The print is enabled under -Rpass-analysis=pgo, or " 294 "internal option -pass-remakrs-analysis=pgo.")); 295 296 static cl::opt<unsigned> PGOVerifyBFIRatio( 297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 298 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 299 "mismatched BFI if the difference percentage is greater than " 300 "this value (in percentage).")); 301 302 static cl::opt<unsigned> PGOVerifyBFICutoff( 303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 305 "profile count value is below.")); 306 307 static cl::opt<std::string> PGOTraceFuncHash( 308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 309 cl::value_desc("function name"), 310 cl::desc("Trace the hash of the function with this name.")); 311 312 static cl::opt<unsigned> PGOFunctionSizeThreshold( 313 "pgo-function-size-threshold", cl::Hidden, 314 cl::desc("Do not instrument functions smaller than this threshold.")); 315 316 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( 317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, 318 cl::desc("Do not instrument functions with the number of critical edges " 319 " greater than this threshold.")); 320 321 namespace llvm { 322 // Command line option to turn on CFG dot dump after profile annotation. 323 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 324 extern cl::opt<PGOViewCountsType> PGOViewCounts; 325 326 // Command line option to specify the name of the function for CFG dump 327 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 328 extern cl::opt<std::string> ViewBlockFreqFuncName; 329 330 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; 331 } // namespace llvm 332 333 static cl::opt<bool> 334 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 335 cl::desc("Use the old CFG function hashing")); 336 337 // Return a string describing the branch condition that can be 338 // used in static branch probability heuristics: 339 static std::string getBranchCondString(Instruction *TI) { 340 BranchInst *BI = dyn_cast<BranchInst>(TI); 341 if (!BI || !BI->isConditional()) 342 return std::string(); 343 344 Value *Cond = BI->getCondition(); 345 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 346 if (!CI) 347 return std::string(); 348 349 std::string result; 350 raw_string_ostream OS(result); 351 OS << CI->getPredicate() << "_"; 352 CI->getOperand(0)->getType()->print(OS, true); 353 354 Value *RHS = CI->getOperand(1); 355 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 356 if (CV) { 357 if (CV->isZero()) 358 OS << "_Zero"; 359 else if (CV->isOne()) 360 OS << "_One"; 361 else if (CV->isMinusOne()) 362 OS << "_MinusOne"; 363 else 364 OS << "_Const"; 365 } 366 OS.flush(); 367 return result; 368 } 369 370 static const char *ValueProfKindDescr[] = { 371 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 372 #include "llvm/ProfileData/InstrProfData.inc" 373 }; 374 375 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 376 // aware this is an ir_level profile so it can set the version flag. 377 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 378 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 379 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 380 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 381 if (IsCS) 382 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 383 if (PGOInstrumentEntry) 384 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 385 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) 386 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 387 if (PGOFunctionEntryCoverage) 388 ProfileVersion |= 389 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 390 if (PGOBlockCoverage) 391 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; 392 if (PGOTemporalInstrumentation) 393 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; 394 auto IRLevelVersionVariable = new GlobalVariable( 395 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 396 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 397 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); 398 Triple TT(M.getTargetTriple()); 399 if (TT.supportsCOMDAT()) { 400 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 401 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 402 } 403 return IRLevelVersionVariable; 404 } 405 406 namespace { 407 408 /// The select instruction visitor plays three roles specified 409 /// by the mode. In \c VM_counting mode, it simply counts the number of 410 /// select instructions. In \c VM_instrument mode, it inserts code to count 411 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 412 /// it reads the profile data and annotate the select instruction with metadata. 413 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 414 class PGOUseFunc; 415 416 /// Instruction Visitor class to visit select instructions. 417 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 418 Function &F; 419 unsigned NSIs = 0; // Number of select instructions instrumented. 420 VisitMode Mode = VM_counting; // Visiting mode. 421 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 422 unsigned TotalNumCtrs = 0; // Total number of counters 423 GlobalVariable *FuncNameVar = nullptr; 424 uint64_t FuncHash = 0; 425 PGOUseFunc *UseFunc = nullptr; 426 bool HasSingleByteCoverage; 427 428 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) 429 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} 430 431 void countSelects() { 432 NSIs = 0; 433 Mode = VM_counting; 434 visit(F); 435 } 436 437 // Visit the IR stream and instrument all select instructions. \p 438 // Ind is a pointer to the counter index variable; \p TotalNC 439 // is the total number of counters; \p FNV is the pointer to the 440 // PGO function name var; \p FHash is the function hash. 441 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV, 442 uint64_t FHash) { 443 Mode = VM_instrument; 444 CurCtrIdx = Ind; 445 TotalNumCtrs = TotalNC; 446 FuncHash = FHash; 447 FuncNameVar = FNV; 448 visit(F); 449 } 450 451 // Visit the IR stream and annotate all select instructions. 452 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { 453 Mode = VM_annotate; 454 UseFunc = UF; 455 CurCtrIdx = Ind; 456 visit(F); 457 } 458 459 void instrumentOneSelectInst(SelectInst &SI); 460 void annotateOneSelectInst(SelectInst &SI); 461 462 // Visit \p SI instruction and perform tasks according to visit mode. 463 void visitSelectInst(SelectInst &SI); 464 465 // Return the number of select instructions. This needs be called after 466 // countSelects(). 467 unsigned getNumOfSelectInsts() const { return NSIs; } 468 }; 469 470 /// This class implements the CFG edges for the Minimum Spanning Tree (MST) 471 /// based instrumentation. 472 /// Note that the CFG can be a multi-graph. So there might be multiple edges 473 /// with the same SrcBB and DestBB. 474 struct PGOEdge { 475 BasicBlock *SrcBB; 476 BasicBlock *DestBB; 477 uint64_t Weight; 478 bool InMST = false; 479 bool Removed = false; 480 bool IsCritical = false; 481 482 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) 483 : SrcBB(Src), DestBB(Dest), Weight(W) {} 484 485 /// Return the information string of an edge. 486 std::string infoString() const { 487 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 488 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)) 489 .str(); 490 } 491 }; 492 493 /// This class stores the auxiliary information for each BB in the MST. 494 struct PGOBBInfo { 495 PGOBBInfo *Group; 496 uint32_t Index; 497 uint32_t Rank = 0; 498 499 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} 500 501 /// Return the information string of this object. 502 std::string infoString() const { 503 return (Twine("Index=") + Twine(Index)).str(); 504 } 505 }; 506 507 // This class implements the CFG edges. Note the CFG can be a multi-graph. 508 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 509 private: 510 Function &F; 511 512 // Is this is context-sensitive instrumentation. 513 bool IsCS; 514 515 // A map that stores the Comdat group in function F. 516 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 517 518 ValueProfileCollector VPC; 519 520 void computeCFGHash(); 521 void renameComdatFunction(); 522 523 public: 524 const TargetLibraryInfo &TLI; 525 std::vector<std::vector<VPCandidateInfo>> ValueSites; 526 SelectInstVisitor SIVisitor; 527 std::string FuncName; 528 std::string DeprecatedFuncName; 529 GlobalVariable *FuncNameVar; 530 531 // CFG hash value for this function. 532 uint64_t FunctionHash = 0; 533 534 // The Minimum Spanning Tree of function CFG. 535 CFGMST<Edge, BBInfo> MST; 536 537 const std::optional<BlockCoverageInference> BCI; 538 539 static std::optional<BlockCoverageInference> 540 constructBCI(Function &Func, bool HasSingleByteCoverage, 541 bool InstrumentFuncEntry) { 542 if (HasSingleByteCoverage) 543 return BlockCoverageInference(Func, InstrumentFuncEntry); 544 return {}; 545 } 546 547 // Collect all the BBs that will be instrumented, and store them in 548 // InstrumentBBs. 549 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 550 551 // Give an edge, find the BB that will be instrumented. 552 // Return nullptr if there is no BB to be instrumented. 553 BasicBlock *getInstrBB(Edge *E); 554 555 // Return the auxiliary BB information. 556 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 557 558 // Return the auxiliary BB information if available. 559 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 560 561 // Dump edges and BB information. 562 void dumpInfo(StringRef Str = "") const { 563 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + 564 " Hash: " + Twine(FunctionHash) + "\t" + Str); 565 } 566 567 FuncPGOInstrumentation( 568 Function &Func, TargetLibraryInfo &TLI, 569 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 570 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 571 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 572 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) 573 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 574 TLI(TLI), ValueSites(IPVK_Last + 1), 575 SIVisitor(Func, HasSingleByteCoverage), 576 MST(F, InstrumentFuncEntry, BPI, BFI), 577 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { 578 if (BCI && PGOViewBlockCoverageGraph) 579 BCI->viewBlockCoverageGraph(); 580 // This should be done before CFG hash computation. 581 SIVisitor.countSelects(); 582 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 583 if (!IsCS) { 584 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 585 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 586 NumOfPGOBB += MST.bbInfoSize(); 587 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 588 } else { 589 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 590 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 591 NumOfCSPGOBB += MST.bbInfoSize(); 592 } 593 594 FuncName = getIRPGOFuncName(F); 595 DeprecatedFuncName = getPGOFuncName(F); 596 computeCFGHash(); 597 if (!ComdatMembers.empty()) 598 renameComdatFunction(); 599 LLVM_DEBUG(dumpInfo("after CFGMST")); 600 601 for (const auto &E : MST.allEdges()) { 602 if (E->Removed) 603 continue; 604 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 605 if (!E->InMST) 606 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 607 } 608 609 if (CreateGlobalVar) 610 FuncNameVar = createPGOFuncNameVar(F, FuncName); 611 } 612 }; 613 614 } // end anonymous namespace 615 616 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 617 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 618 // of selects, indirect calls, mem ops and edges. 619 template <class Edge, class BBInfo> 620 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 621 std::vector<uint8_t> Indexes; 622 JamCRC JC; 623 for (auto &BB : F) { 624 const Instruction *TI = BB.getTerminator(); 625 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 626 BasicBlock *Succ = TI->getSuccessor(I); 627 auto BI = findBBInfo(Succ); 628 if (BI == nullptr) 629 continue; 630 uint32_t Index = BI->Index; 631 for (int J = 0; J < 4; J++) 632 Indexes.push_back((uint8_t)(Index >> (J * 8))); 633 } 634 } 635 JC.update(Indexes); 636 637 JamCRC JCH; 638 if (PGOOldCFGHashing) { 639 // Hash format for context sensitive profile. Reserve 4 bits for other 640 // information. 641 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 642 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 643 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 644 (uint64_t)MST.numEdges() << 32 | JC.getCRC(); 645 } else { 646 // The higher 32 bits. 647 auto updateJCH = [&JCH](uint64_t Num) { 648 uint8_t Data[8]; 649 support::endian::write64le(Data, Num); 650 JCH.update(Data); 651 }; 652 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 653 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 654 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 655 if (BCI) { 656 updateJCH(BCI->getInstrumentedBlocksHash()); 657 } else { 658 updateJCH((uint64_t)MST.numEdges()); 659 } 660 661 // Hash format for context sensitive profile. Reserve 4 bits for other 662 // information. 663 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 664 } 665 666 // Reserve bit 60-63 for other information purpose. 667 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 668 if (IsCS) 669 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 670 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 671 << " CRC = " << JC.getCRC() 672 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 673 << ", Edges = " << MST.numEdges() << ", ICSites = " 674 << ValueSites[IPVK_IndirectCallTarget].size()); 675 if (!PGOOldCFGHashing) { 676 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 677 << ", High32 CRC = " << JCH.getCRC()); 678 } 679 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 680 681 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 682 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 683 << " in building " << F.getParent()->getSourceFileName() << "\n"; 684 } 685 686 // Check if we can safely rename this Comdat function. 687 static bool canRenameComdat( 688 Function &F, 689 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 690 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 691 return false; 692 693 // FIXME: Current only handle those Comdat groups that only containing one 694 // function. 695 // (1) For a Comdat group containing multiple functions, we need to have a 696 // unique postfix based on the hashes for each function. There is a 697 // non-trivial code refactoring to do this efficiently. 698 // (2) Variables can not be renamed, so we can not rename Comdat function in a 699 // group including global vars. 700 Comdat *C = F.getComdat(); 701 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 702 assert(!isa<GlobalAlias>(CM.second)); 703 Function *FM = dyn_cast<Function>(CM.second); 704 if (FM != &F) 705 return false; 706 } 707 return true; 708 } 709 710 // Append the CFGHash to the Comdat function name. 711 template <class Edge, class BBInfo> 712 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 713 if (!canRenameComdat(F, ComdatMembers)) 714 return; 715 std::string OrigName = F.getName().str(); 716 std::string NewFuncName = 717 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 718 F.setName(Twine(NewFuncName)); 719 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 720 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 721 Comdat *NewComdat; 722 Module *M = F.getParent(); 723 // For AvailableExternallyLinkage functions, change the linkage to 724 // LinkOnceODR and put them into comdat. This is because after renaming, there 725 // is no backup external copy available for the function. 726 if (!F.hasComdat()) { 727 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 728 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 729 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 730 F.setComdat(NewComdat); 731 return; 732 } 733 734 // This function belongs to a single function Comdat group. 735 Comdat *OrigComdat = F.getComdat(); 736 std::string NewComdatName = 737 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 738 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 739 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 740 741 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 742 // Must be a function. 743 cast<Function>(CM.second)->setComdat(NewComdat); 744 } 745 } 746 747 /// Collect all the BBs that will be instruments and add them to 748 /// `InstrumentBBs`. 749 template <class Edge, class BBInfo> 750 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 751 std::vector<BasicBlock *> &InstrumentBBs) { 752 if (BCI) { 753 for (auto &BB : F) 754 if (BCI->shouldInstrumentBlock(BB)) 755 InstrumentBBs.push_back(&BB); 756 return; 757 } 758 759 // Use a worklist as we will update the vector during the iteration. 760 std::vector<Edge *> EdgeList; 761 EdgeList.reserve(MST.numEdges()); 762 for (const auto &E : MST.allEdges()) 763 EdgeList.push_back(E.get()); 764 765 for (auto &E : EdgeList) { 766 BasicBlock *InstrBB = getInstrBB(E); 767 if (InstrBB) 768 InstrumentBBs.push_back(InstrBB); 769 } 770 } 771 772 // Given a CFG E to be instrumented, find which BB to place the instrumented 773 // code. The function will split the critical edge if necessary. 774 template <class Edge, class BBInfo> 775 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 776 if (E->InMST || E->Removed) 777 return nullptr; 778 779 BasicBlock *SrcBB = E->SrcBB; 780 BasicBlock *DestBB = E->DestBB; 781 // For a fake edge, instrument the real BB. 782 if (SrcBB == nullptr) 783 return DestBB; 784 if (DestBB == nullptr) 785 return SrcBB; 786 787 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 788 // There are basic blocks (such as catchswitch) cannot be instrumented. 789 // If the returned first insertion point is the end of BB, skip this BB. 790 if (BB->getFirstInsertionPt() == BB->end()) 791 return nullptr; 792 return BB; 793 }; 794 795 // Instrument the SrcBB if it has a single successor, 796 // otherwise, the DestBB if this is not a critical edge. 797 Instruction *TI = SrcBB->getTerminator(); 798 if (TI->getNumSuccessors() <= 1) 799 return canInstrument(SrcBB); 800 if (!E->IsCritical) 801 return canInstrument(DestBB); 802 803 // Some IndirectBr critical edges cannot be split by the previous 804 // SplitIndirectBrCriticalEdges call. Bail out. 805 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 806 BasicBlock *InstrBB = 807 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 808 if (!InstrBB) { 809 LLVM_DEBUG( 810 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 811 return nullptr; 812 } 813 // For a critical edge, we have to split. Instrument the newly 814 // created BB. 815 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 816 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 817 << " --> " << getBBInfo(DestBB).Index << "\n"); 818 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 819 MST.addEdge(SrcBB, InstrBB, 0); 820 // Second one: Add new edge of InstrBB->DestBB. 821 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 822 NewEdge1.InMST = true; 823 E->Removed = true; 824 825 return canInstrument(InstrBB); 826 } 827 828 // When generating value profiling calls on Windows routines that make use of 829 // handler funclets for exception processing an operand bundle needs to attached 830 // to the called function. This routine will set \p OpBundles to contain the 831 // funclet information, if any is needed, that should be placed on the generated 832 // value profiling call for the value profile candidate call. 833 static void 834 populateEHOperandBundle(VPCandidateInfo &Cand, 835 DenseMap<BasicBlock *, ColorVector> &BlockColors, 836 SmallVectorImpl<OperandBundleDef> &OpBundles) { 837 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 838 if (!OrigCall) 839 return; 840 841 if (!isa<IntrinsicInst>(OrigCall)) { 842 // The instrumentation call should belong to the same funclet as a 843 // non-intrinsic call, so just copy the operand bundle, if any exists. 844 std::optional<OperandBundleUse> ParentFunclet = 845 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 846 if (ParentFunclet) 847 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 848 } else { 849 // Intrinsics or other instructions do not get funclet information from the 850 // front-end. Need to use the BlockColors that was computed by the routine 851 // colorEHFunclets to determine whether a funclet is needed. 852 if (!BlockColors.empty()) { 853 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 854 assert(CV.size() == 1 && "non-unique color for block!"); 855 Instruction *EHPad = CV.front()->getFirstNonPHI(); 856 if (EHPad->isEHPad()) 857 OpBundles.emplace_back("funclet", EHPad); 858 } 859 } 860 } 861 862 // Visit all edge and instrument the edges not in MST, and do value profiling. 863 // Critical edges will be split. 864 static void instrumentOneFunc( 865 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 866 BlockFrequencyInfo *BFI, 867 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 868 bool IsCS) { 869 if (!PGOBlockCoverage) { 870 // Split indirectbr critical edges here before computing the MST rather than 871 // later in getInstrBB() to avoid invalidating it. 872 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 873 } 874 875 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( 876 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, 877 PGOBlockCoverage); 878 879 auto Name = FuncInfo.FuncNameVar; 880 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 881 FuncInfo.FunctionHash); 882 if (PGOFunctionEntryCoverage) { 883 auto &EntryBB = F.getEntryBlock(); 884 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 885 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 886 // i32 <index>) 887 Builder.CreateCall( 888 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 889 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 890 return; 891 } 892 893 std::vector<BasicBlock *> InstrumentBBs; 894 FuncInfo.getInstrumentBBs(InstrumentBBs); 895 unsigned NumCounters = 896 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 897 898 uint32_t I = 0; 899 if (PGOTemporalInstrumentation) { 900 NumCounters += PGOBlockCoverage ? 8 : 1; 901 auto &EntryBB = F.getEntryBlock(); 902 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 903 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, 904 // i32 <index>) 905 Builder.CreateCall( 906 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp), 907 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)}); 908 I += PGOBlockCoverage ? 8 : 1; 909 } 910 911 for (auto *InstrBB : InstrumentBBs) { 912 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 913 assert(Builder.GetInsertPoint() != InstrBB->end() && 914 "Cannot get the Instrumentation point"); 915 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 916 // i32 <index>) 917 Builder.CreateCall( 918 Intrinsic::getDeclaration(M, PGOBlockCoverage 919 ? Intrinsic::instrprof_cover 920 : Intrinsic::instrprof_increment), 921 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 922 } 923 924 // Now instrument select instructions: 925 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar, 926 FuncInfo.FunctionHash); 927 assert(I == NumCounters); 928 929 if (DisableValueProfiling) 930 return; 931 932 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 933 934 // Intrinsic function calls do not have funclet operand bundles needed for 935 // Windows exception handling attached to them. However, if value profiling is 936 // inserted for one of these calls, then a funclet value will need to be set 937 // on the instrumentation call based on the funclet coloring. 938 DenseMap<BasicBlock *, ColorVector> BlockColors; 939 if (F.hasPersonalityFn() && 940 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 941 BlockColors = colorEHFunclets(F); 942 943 // For each VP Kind, walk the VP candidates and instrument each one. 944 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 945 unsigned SiteIndex = 0; 946 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 947 continue; 948 949 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 950 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 951 << " site: CallSite Index = " << SiteIndex << "\n"); 952 953 IRBuilder<> Builder(Cand.InsertPt); 954 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 955 "Cannot get the Instrumentation point"); 956 957 Value *ToProfile = nullptr; 958 if (Cand.V->getType()->isIntegerTy()) 959 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 960 else if (Cand.V->getType()->isPointerTy()) 961 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 962 assert(ToProfile && "value profiling Value is of unexpected type"); 963 964 SmallVector<OperandBundleDef, 1> OpBundles; 965 populateEHOperandBundle(Cand, BlockColors, OpBundles); 966 Builder.CreateCall( 967 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 968 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash), 969 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 970 OpBundles); 971 } 972 } // IPVK_First <= Kind <= IPVK_Last 973 } 974 975 namespace { 976 977 // This class represents a CFG edge in profile use compilation. 978 struct PGOUseEdge : public PGOEdge { 979 using PGOEdge::PGOEdge; 980 981 bool CountValid = false; 982 uint64_t CountValue = 0; 983 984 // Set edge count value 985 void setEdgeCount(uint64_t Value) { 986 CountValue = Value; 987 CountValid = true; 988 } 989 990 // Return the information string for this object. 991 std::string infoString() const { 992 if (!CountValid) 993 return PGOEdge::infoString(); 994 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 995 .str(); 996 } 997 }; 998 999 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1000 1001 // This class stores the auxiliary information for each BB. 1002 struct PGOUseBBInfo : public PGOBBInfo { 1003 uint64_t CountValue = 0; 1004 bool CountValid; 1005 int32_t UnknownCountInEdge = 0; 1006 int32_t UnknownCountOutEdge = 0; 1007 DirectEdges InEdges; 1008 DirectEdges OutEdges; 1009 1010 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX), CountValid(false) {} 1011 1012 // Set the profile count value for this BB. 1013 void setBBInfoCount(uint64_t Value) { 1014 CountValue = Value; 1015 CountValid = true; 1016 } 1017 1018 // Return the information string of this object. 1019 std::string infoString() const { 1020 if (!CountValid) 1021 return PGOBBInfo::infoString(); 1022 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue)) 1023 .str(); 1024 } 1025 1026 // Add an OutEdge and update the edge count. 1027 void addOutEdge(PGOUseEdge *E) { 1028 OutEdges.push_back(E); 1029 UnknownCountOutEdge++; 1030 } 1031 1032 // Add an InEdge and update the edge count. 1033 void addInEdge(PGOUseEdge *E) { 1034 InEdges.push_back(E); 1035 UnknownCountInEdge++; 1036 } 1037 }; 1038 1039 } // end anonymous namespace 1040 1041 // Sum up the count values for all the edges. 1042 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1043 uint64_t Total = 0; 1044 for (const auto &E : Edges) { 1045 if (E->Removed) 1046 continue; 1047 Total += E->CountValue; 1048 } 1049 return Total; 1050 } 1051 1052 namespace { 1053 1054 class PGOUseFunc { 1055 public: 1056 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1057 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1058 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1059 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, 1060 bool HasSingleByteCoverage) 1061 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1062 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1063 InstrumentFuncEntry, HasSingleByteCoverage), 1064 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1065 1066 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); 1067 1068 // Read counts for the instrumented BB from profile. 1069 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1070 InstrProfRecord::CountPseudoKind &PseudoKind); 1071 1072 // Populate the counts for all BBs. 1073 void populateCounters(); 1074 1075 // Set block coverage based on profile coverage values. 1076 void populateCoverage(IndexedInstrProfReader *PGOReader); 1077 1078 // Set the branch weights based on the count values. 1079 void setBranchWeights(); 1080 1081 // Annotate the value profile call sites for all value kind. 1082 void annotateValueSites(); 1083 1084 // Annotate the value profile call sites for one value kind. 1085 void annotateValueSites(uint32_t Kind); 1086 1087 // Annotate the irreducible loop header weights. 1088 void annotateIrrLoopHeaderWeights(); 1089 1090 // The hotness of the function from the profile count. 1091 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1092 1093 // Return the function hotness from the profile. 1094 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1095 1096 // Return the function hash. 1097 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1098 1099 // Return the profile record for this function; 1100 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1101 1102 // Return the auxiliary BB information. 1103 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { 1104 return FuncInfo.getBBInfo(BB); 1105 } 1106 1107 // Return the auxiliary BB information if available. 1108 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { 1109 return FuncInfo.findBBInfo(BB); 1110 } 1111 1112 Function &getFunc() const { return F; } 1113 1114 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); } 1115 1116 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1117 1118 private: 1119 Function &F; 1120 Module *M; 1121 BlockFrequencyInfo *BFI; 1122 ProfileSummaryInfo *PSI; 1123 1124 // This member stores the shared information with class PGOGenFunc. 1125 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; 1126 1127 // The maximum count value in the profile. This is only used in PGO use 1128 // compilation. 1129 uint64_t ProgramMaxCount; 1130 1131 // Position of counter that remains to be read. 1132 uint32_t CountPosition = 0; 1133 1134 // Total size of the profile count for this function. 1135 uint32_t ProfileCountSize = 0; 1136 1137 // ProfileRecord for this function. 1138 InstrProfRecord ProfileRecord; 1139 1140 // Function hotness info derived from profile. 1141 FuncFreqAttr FreqAttr; 1142 1143 // Is to use the context sensitive profile. 1144 bool IsCS; 1145 1146 // Find the Instrumented BB and set the value. Return false on error. 1147 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1148 1149 // Set the edge counter value for the unknown edge -- there should be only 1150 // one unknown edge. 1151 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1152 1153 // Set the hot/cold inline hints based on the count values. 1154 // FIXME: This function should be removed once the functionality in 1155 // the inliner is implemented. 1156 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1157 if (PSI->isHotCount(EntryCount)) 1158 FreqAttr = FFA_Hot; 1159 else if (PSI->isColdCount(MaxCount)) 1160 FreqAttr = FFA_Cold; 1161 } 1162 }; 1163 1164 } // end anonymous namespace 1165 1166 /// Set up InEdges/OutEdges for all BBs in the MST. 1167 static void setupBBInfoEdges( 1168 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { 1169 // This is not required when there is block coverage inference. 1170 if (FuncInfo.BCI) 1171 return; 1172 for (const auto &E : FuncInfo.MST.allEdges()) { 1173 if (E->Removed) 1174 continue; 1175 const BasicBlock *SrcBB = E->SrcBB; 1176 const BasicBlock *DestBB = E->DestBB; 1177 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB); 1178 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB); 1179 SrcInfo.addOutEdge(E.get()); 1180 DestInfo.addInEdge(E.get()); 1181 } 1182 } 1183 1184 // Visit all the edges and assign the count value for the instrumented 1185 // edges and the BB. Return false on error. 1186 bool PGOUseFunc::setInstrumentedCounts( 1187 const std::vector<uint64_t> &CountFromProfile) { 1188 1189 std::vector<BasicBlock *> InstrumentBBs; 1190 FuncInfo.getInstrumentBBs(InstrumentBBs); 1191 1192 setupBBInfoEdges(FuncInfo); 1193 1194 unsigned NumCounters = 1195 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1196 // The number of counters here should match the number of counters 1197 // in profile. Return if they mismatch. 1198 if (NumCounters != CountFromProfile.size()) { 1199 return false; 1200 } 1201 auto *FuncEntry = &*F.begin(); 1202 1203 // Set the profile count to the Instrumented BBs. 1204 uint32_t I = 0; 1205 for (BasicBlock *InstrBB : InstrumentBBs) { 1206 uint64_t CountValue = CountFromProfile[I++]; 1207 PGOUseBBInfo &Info = getBBInfo(InstrBB); 1208 // If we reach here, we know that we have some nonzero count 1209 // values in this function. The entry count should not be 0. 1210 // Fix it if necessary. 1211 if (InstrBB == FuncEntry && CountValue == 0) 1212 CountValue = 1; 1213 Info.setBBInfoCount(CountValue); 1214 } 1215 ProfileCountSize = CountFromProfile.size(); 1216 CountPosition = I; 1217 1218 // Set the edge count and update the count of unknown edges for BBs. 1219 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1220 E->setEdgeCount(Value); 1221 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1222 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1223 }; 1224 1225 // Set the profile count the Instrumented edges. There are BBs that not in 1226 // MST but not instrumented. Need to set the edge count value so that we can 1227 // populate the profile counts later. 1228 for (const auto &E : FuncInfo.MST.allEdges()) { 1229 if (E->Removed || E->InMST) 1230 continue; 1231 const BasicBlock *SrcBB = E->SrcBB; 1232 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB); 1233 1234 // If only one out-edge, the edge profile count should be the same as BB 1235 // profile count. 1236 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1237 setEdgeCount(E.get(), SrcInfo.CountValue); 1238 else { 1239 const BasicBlock *DestBB = E->DestBB; 1240 PGOUseBBInfo &DestInfo = getBBInfo(DestBB); 1241 // If only one in-edge, the edge profile count should be the same as BB 1242 // profile count. 1243 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1244 setEdgeCount(E.get(), DestInfo.CountValue); 1245 } 1246 if (E->CountValid) 1247 continue; 1248 // E's count should have been set from profile. If not, this meenas E skips 1249 // the instrumentation. We set the count to 0. 1250 setEdgeCount(E.get(), 0); 1251 } 1252 return true; 1253 } 1254 1255 // Set the count value for the unknown edge. There should be one and only one 1256 // unknown edge in Edges vector. 1257 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1258 for (auto &E : Edges) { 1259 if (E->CountValid) 1260 continue; 1261 E->setEdgeCount(Value); 1262 1263 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1264 getBBInfo(E->DestBB).UnknownCountInEdge--; 1265 return; 1266 } 1267 llvm_unreachable("Cannot find the unknown count edge"); 1268 } 1269 1270 // Emit function metadata indicating PGO profile mismatch. 1271 static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { 1272 const char MetadataName[] = "instr_prof_hash_mismatch"; 1273 SmallVector<Metadata *, 2> Names; 1274 // If this metadata already exists, ignore. 1275 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1276 if (Existing) { 1277 MDTuple *Tuple = cast<MDTuple>(Existing); 1278 for (const auto &N : Tuple->operands()) { 1279 if (N.equalsStr(MetadataName)) 1280 return; 1281 Names.push_back(N.get()); 1282 } 1283 } 1284 1285 MDBuilder MDB(ctx); 1286 Names.push_back(MDB.createString(MetadataName)); 1287 MDNode *MD = MDTuple::get(ctx, Names); 1288 F.setMetadata(LLVMContext::MD_annotation, MD); 1289 } 1290 1291 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { 1292 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 1293 auto &Ctx = M->getContext(); 1294 auto Err = IPE.get(); 1295 bool SkipWarning = false; 1296 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1297 << FuncInfo.FuncName << ": "); 1298 if (Err == instrprof_error::unknown_function) { 1299 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1300 SkipWarning = !PGOWarnMissing; 1301 LLVM_DEBUG(dbgs() << "unknown function"); 1302 } else if (Err == instrprof_error::hash_mismatch || 1303 Err == instrprof_error::malformed) { 1304 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1305 SkipWarning = 1306 NoPGOWarnMismatch || 1307 (NoPGOWarnMismatchComdatWeak && 1308 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1309 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1310 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1311 << " skip=" << SkipWarning << ")"); 1312 // Emit function metadata indicating PGO profile mismatch. 1313 annotateFunctionWithHashMismatch(F, M->getContext()); 1314 } 1315 1316 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1317 if (SkipWarning) 1318 return; 1319 1320 std::string Msg = 1321 IPE.message() + std::string(" ") + F.getName().str() + 1322 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1323 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1324 std::string(" count discarded"); 1325 1326 Ctx.diagnose( 1327 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1328 }); 1329 } 1330 1331 // Read the profile from ProfileFileName and assign the value to the 1332 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1333 // Return true if the profile are successfully read, and false on errors. 1334 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1335 InstrProfRecord::CountPseudoKind &PseudoKind) { 1336 auto &Ctx = M->getContext(); 1337 uint64_t MismatchedFuncSum = 0; 1338 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1339 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1340 &MismatchedFuncSum); 1341 if (Error E = Result.takeError()) { 1342 handleInstrProfError(std::move(E), MismatchedFuncSum); 1343 return false; 1344 } 1345 ProfileRecord = std::move(Result.get()); 1346 PseudoKind = ProfileRecord.getCountPseudoKind(); 1347 if (PseudoKind != InstrProfRecord::NotPseudo) { 1348 return true; 1349 } 1350 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1351 1352 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1353 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1354 1355 uint64_t ValueSum = 0; 1356 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1357 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1358 ValueSum += CountFromProfile[I]; 1359 } 1360 AllZeros = (ValueSum == 0); 1361 1362 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1363 1364 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1365 getBBInfo(nullptr).UnknownCountInEdge = 2; 1366 1367 if (!setInstrumentedCounts(CountFromProfile)) { 1368 LLVM_DEBUG( 1369 dbgs() << "Inconsistent number of counts, skipping this function"); 1370 Ctx.diagnose(DiagnosticInfoPGOProfile( 1371 M->getName().data(), 1372 Twine("Inconsistent number of counts in ") + F.getName().str() + 1373 Twine(": the profile may be stale or there is a function name " 1374 "collision."), 1375 DS_Warning)); 1376 return false; 1377 } 1378 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1379 return true; 1380 } 1381 1382 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { 1383 uint64_t MismatchedFuncSum = 0; 1384 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1385 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1386 &MismatchedFuncSum); 1387 if (auto Err = Result.takeError()) { 1388 handleInstrProfError(std::move(Err), MismatchedFuncSum); 1389 return; 1390 } 1391 1392 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; 1393 DenseMap<const BasicBlock *, bool> Coverage; 1394 unsigned Index = 0; 1395 for (auto &BB : F) 1396 if (FuncInfo.BCI->shouldInstrumentBlock(BB)) 1397 Coverage[&BB] = (CountsFromProfile[Index++] != 0); 1398 assert(Index == CountsFromProfile.size()); 1399 1400 // For each B in InverseDependencies[A], if A is covered then B is covered. 1401 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> 1402 InverseDependencies; 1403 for (auto &BB : F) { 1404 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { 1405 // If Dep is covered then BB is covered. 1406 InverseDependencies[Dep].insert(&BB); 1407 } 1408 } 1409 1410 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. 1411 std::stack<const BasicBlock *> CoveredBlocksToProcess; 1412 for (auto &[BB, IsCovered] : Coverage) 1413 if (IsCovered) 1414 CoveredBlocksToProcess.push(BB); 1415 1416 while (!CoveredBlocksToProcess.empty()) { 1417 auto *CoveredBlock = CoveredBlocksToProcess.top(); 1418 assert(Coverage[CoveredBlock]); 1419 CoveredBlocksToProcess.pop(); 1420 for (auto *BB : InverseDependencies[CoveredBlock]) { 1421 // If CoveredBlock is covered then BB is covered. 1422 if (Coverage[BB]) 1423 continue; 1424 Coverage[BB] = true; 1425 CoveredBlocksToProcess.push(BB); 1426 } 1427 } 1428 1429 // Annotate block coverage. 1430 MDBuilder MDB(F.getContext()); 1431 // We set the entry count to 10000 if the entry block is covered so that BFI 1432 // can propagate a fraction of this count to the other covered blocks. 1433 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0); 1434 for (auto &BB : F) { 1435 // For a block A and its successor B, we set the edge weight as follows: 1436 // If A is covered and B is covered, set weight=1. 1437 // If A is covered and B is uncovered, set weight=0. 1438 // If A is uncovered, set weight=1. 1439 // This setup will allow BFI to give nonzero profile counts to only covered 1440 // blocks. 1441 SmallVector<uint32_t, 4> Weights; 1442 for (auto *Succ : successors(&BB)) 1443 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); 1444 if (Weights.size() >= 2) 1445 llvm::setBranchWeights(*BB.getTerminator(), Weights); 1446 } 1447 1448 unsigned NumCorruptCoverage = 0; 1449 DominatorTree DT(F); 1450 LoopInfo LI(DT); 1451 BranchProbabilityInfo BPI(F, LI); 1452 BlockFrequencyInfo BFI(F, BPI, LI); 1453 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { 1454 if (auto C = BFI.getBlockProfileCount(&BB)) 1455 return C == 0; 1456 return {}; 1457 }; 1458 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n"); 1459 for (auto &BB : F) { 1460 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ") 1461 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName() 1462 << "\n"); 1463 // In some cases it is possible to find a covered block that has no covered 1464 // successors, e.g., when a block calls a function that may call exit(). In 1465 // those cases, BFI could find its successor to be covered while BCI could 1466 // find its successor to be dead. 1467 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) { 1468 LLVM_DEBUG( 1469 dbgs() << "Found inconsistent block covearge for " << BB.getName() 1470 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI=" 1471 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n"); 1472 ++NumCorruptCoverage; 1473 } 1474 if (Coverage[&BB]) 1475 ++NumCoveredBlocks; 1476 } 1477 if (PGOVerifyBFI && NumCorruptCoverage) { 1478 auto &Ctx = M->getContext(); 1479 Ctx.diagnose(DiagnosticInfoPGOProfile( 1480 M->getName().data(), 1481 Twine("Found inconsistent block coverage for function ") + F.getName() + 1482 " in " + Twine(NumCorruptCoverage) + " blocks.", 1483 DS_Warning)); 1484 } 1485 if (PGOViewBlockCoverageGraph) 1486 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage); 1487 } 1488 1489 // Populate the counters from instrumented BBs to all BBs. 1490 // In the end of this operation, all BBs should have a valid count value. 1491 void PGOUseFunc::populateCounters() { 1492 bool Changes = true; 1493 unsigned NumPasses = 0; 1494 while (Changes) { 1495 NumPasses++; 1496 Changes = false; 1497 1498 // For efficient traversal, it's better to start from the end as most 1499 // of the instrumented edges are at the end. 1500 for (auto &BB : reverse(F)) { 1501 PGOUseBBInfo *Count = findBBInfo(&BB); 1502 if (Count == nullptr) 1503 continue; 1504 if (!Count->CountValid) { 1505 if (Count->UnknownCountOutEdge == 0) { 1506 Count->CountValue = sumEdgeCount(Count->OutEdges); 1507 Count->CountValid = true; 1508 Changes = true; 1509 } else if (Count->UnknownCountInEdge == 0) { 1510 Count->CountValue = sumEdgeCount(Count->InEdges); 1511 Count->CountValid = true; 1512 Changes = true; 1513 } 1514 } 1515 if (Count->CountValid) { 1516 if (Count->UnknownCountOutEdge == 1) { 1517 uint64_t Total = 0; 1518 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1519 // If the one of the successor block can early terminate (no-return), 1520 // we can end up with situation where out edge sum count is larger as 1521 // the source BB's count is collected by a post-dominated block. 1522 if (Count->CountValue > OutSum) 1523 Total = Count->CountValue - OutSum; 1524 setEdgeCount(Count->OutEdges, Total); 1525 Changes = true; 1526 } 1527 if (Count->UnknownCountInEdge == 1) { 1528 uint64_t Total = 0; 1529 uint64_t InSum = sumEdgeCount(Count->InEdges); 1530 if (Count->CountValue > InSum) 1531 Total = Count->CountValue - InSum; 1532 setEdgeCount(Count->InEdges, Total); 1533 Changes = true; 1534 } 1535 } 1536 } 1537 } 1538 1539 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1540 (void)NumPasses; 1541 #ifndef NDEBUG 1542 // Assert every BB has a valid counter. 1543 for (auto &BB : F) { 1544 auto BI = findBBInfo(&BB); 1545 if (BI == nullptr) 1546 continue; 1547 assert(BI->CountValid && "BB count is not valid"); 1548 } 1549 #endif 1550 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1551 uint64_t FuncMaxCount = FuncEntryCount; 1552 for (auto &BB : F) { 1553 auto BI = findBBInfo(&BB); 1554 if (BI == nullptr) 1555 continue; 1556 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1557 } 1558 1559 // Fix the obviously inconsistent entry count. 1560 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1561 FuncEntryCount = 1; 1562 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1563 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1564 1565 // Now annotate select instructions 1566 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition); 1567 assert(CountPosition == ProfileCountSize); 1568 1569 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1570 } 1571 1572 // Assign the scaled count values to the BB with multiple out edges. 1573 void PGOUseFunc::setBranchWeights() { 1574 // Generate MD_prof metadata for every branch instruction. 1575 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1576 << " IsCS=" << IsCS << "\n"); 1577 for (auto &BB : F) { 1578 Instruction *TI = BB.getTerminator(); 1579 if (TI->getNumSuccessors() < 2) 1580 continue; 1581 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1582 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) || 1583 isa<CallBrInst>(TI))) 1584 continue; 1585 1586 if (getBBInfo(&BB).CountValue == 0) 1587 continue; 1588 1589 // We have a non-zero Branch BB. 1590 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1591 unsigned Size = BBCountInfo.OutEdges.size(); 1592 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1593 uint64_t MaxCount = 0; 1594 for (unsigned s = 0; s < Size; s++) { 1595 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1596 const BasicBlock *SrcBB = E->SrcBB; 1597 const BasicBlock *DestBB = E->DestBB; 1598 if (DestBB == nullptr) 1599 continue; 1600 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1601 uint64_t EdgeCount = E->CountValue; 1602 if (EdgeCount > MaxCount) 1603 MaxCount = EdgeCount; 1604 EdgeCounts[SuccNum] = EdgeCount; 1605 } 1606 1607 if (MaxCount) 1608 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1609 else { 1610 // A zero MaxCount can come about when we have a BB with a positive 1611 // count, and whose successor blocks all have 0 count. This can happen 1612 // when there is no exit block and the code exits via a noreturn function. 1613 auto &Ctx = M->getContext(); 1614 Ctx.diagnose(DiagnosticInfoPGOProfile( 1615 M->getName().data(), 1616 Twine("Profile in ") + F.getName().str() + 1617 Twine(" partially ignored") + 1618 Twine(", possibly due to the lack of a return path."), 1619 DS_Warning)); 1620 } 1621 } 1622 } 1623 1624 static bool isIndirectBrTarget(BasicBlock *BB) { 1625 for (BasicBlock *Pred : predecessors(BB)) { 1626 if (isa<IndirectBrInst>(Pred->getTerminator())) 1627 return true; 1628 } 1629 return false; 1630 } 1631 1632 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1633 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1634 // Find irr loop headers 1635 for (auto &BB : F) { 1636 // As a heuristic also annotate indrectbr targets as they have a high chance 1637 // to become an irreducible loop header after the indirectbr tail 1638 // duplication. 1639 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1640 Instruction *TI = BB.getTerminator(); 1641 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1642 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1643 } 1644 } 1645 } 1646 1647 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1648 Module *M = F.getParent(); 1649 IRBuilder<> Builder(&SI); 1650 Type *Int64Ty = Builder.getInt64Ty(); 1651 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1652 Builder.CreateCall( 1653 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1654 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1655 Builder.getInt32(*CurCtrIdx), Step}); 1656 ++(*CurCtrIdx); 1657 } 1658 1659 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1660 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1661 assert(*CurCtrIdx < CountFromProfile.size() && 1662 "Out of bound access of counters"); 1663 uint64_t SCounts[2]; 1664 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1665 ++(*CurCtrIdx); 1666 uint64_t TotalCount = 0; 1667 auto BI = UseFunc->findBBInfo(SI.getParent()); 1668 if (BI != nullptr) 1669 TotalCount = BI->CountValue; 1670 // False Count 1671 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1672 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1673 if (MaxCount) 1674 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1675 } 1676 1677 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1678 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) 1679 return; 1680 // FIXME: do not handle this yet. 1681 if (SI.getCondition()->getType()->isVectorTy()) 1682 return; 1683 1684 switch (Mode) { 1685 case VM_counting: 1686 NSIs++; 1687 return; 1688 case VM_instrument: 1689 instrumentOneSelectInst(SI); 1690 return; 1691 case VM_annotate: 1692 annotateOneSelectInst(SI); 1693 return; 1694 } 1695 1696 llvm_unreachable("Unknown visiting mode"); 1697 } 1698 1699 // Traverse all valuesites and annotate the instructions for all value kind. 1700 void PGOUseFunc::annotateValueSites() { 1701 if (DisableValueProfiling) 1702 return; 1703 1704 // Create the PGOFuncName meta data. 1705 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1706 1707 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1708 annotateValueSites(Kind); 1709 } 1710 1711 // Annotate the instructions for a specific value kind. 1712 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1713 assert(Kind <= IPVK_Last); 1714 unsigned ValueSiteIndex = 0; 1715 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1716 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1717 if (NumValueSites != ValueSites.size()) { 1718 auto &Ctx = M->getContext(); 1719 Ctx.diagnose(DiagnosticInfoPGOProfile( 1720 M->getName().data(), 1721 Twine("Inconsistent number of value sites for ") + 1722 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") + 1723 F.getName().str() + 1724 Twine("\", possibly due to the use of a stale profile."), 1725 DS_Warning)); 1726 return; 1727 } 1728 1729 for (VPCandidateInfo &I : ValueSites) { 1730 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1731 << "): Index = " << ValueSiteIndex << " out of " 1732 << NumValueSites << "\n"); 1733 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1734 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1735 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1736 : MaxNumAnnotations); 1737 ValueSiteIndex++; 1738 } 1739 } 1740 1741 // Collect the set of members for each Comdat in module M and store 1742 // in ComdatMembers. 1743 static void collectComdatMembers( 1744 Module &M, 1745 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1746 if (!DoComdatRenaming) 1747 return; 1748 for (Function &F : M) 1749 if (Comdat *C = F.getComdat()) 1750 ComdatMembers.insert(std::make_pair(C, &F)); 1751 for (GlobalVariable &GV : M.globals()) 1752 if (Comdat *C = GV.getComdat()) 1753 ComdatMembers.insert(std::make_pair(C, &GV)); 1754 for (GlobalAlias &GA : M.aliases()) 1755 if (Comdat *C = GA.getComdat()) 1756 ComdatMembers.insert(std::make_pair(C, &GA)); 1757 } 1758 1759 // Return true if we should not find instrumentation data for this function 1760 static bool skipPGOUse(const Function &F) { 1761 if (F.isDeclaration()) 1762 return true; 1763 // If there are too many critical edges, PGO might cause 1764 // compiler time problem. Skip PGO if the number of 1765 // critical edges execeed the threshold. 1766 unsigned NumCriticalEdges = 0; 1767 for (auto &BB : F) { 1768 const Instruction *TI = BB.getTerminator(); 1769 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 1770 if (isCriticalEdge(TI, I)) 1771 NumCriticalEdges++; 1772 } 1773 } 1774 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { 1775 LLVM_DEBUG(dbgs() << "In func " << F.getName() 1776 << ", NumCriticalEdges=" << NumCriticalEdges 1777 << " exceed the threshold. Skip PGO.\n"); 1778 return true; 1779 } 1780 return false; 1781 } 1782 1783 // Return true if we should not instrument this function 1784 static bool skipPGOGen(const Function &F) { 1785 if (skipPGOUse(F)) 1786 return true; 1787 if (F.hasFnAttribute(llvm::Attribute::Naked)) 1788 return true; 1789 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1790 return true; 1791 if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) 1792 return true; 1793 if (F.getInstructionCount() < PGOFunctionSizeThreshold) 1794 return true; 1795 return false; 1796 } 1797 1798 static bool InstrumentAllFunctions( 1799 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1800 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1801 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1802 // For the context-sensitve instrumentation, we should have a separated pass 1803 // (before LTO/ThinLTO linking) to create these variables. 1804 if (!IsCS) 1805 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1806 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1807 collectComdatMembers(M, ComdatMembers); 1808 1809 for (auto &F : M) { 1810 if (skipPGOGen(F)) 1811 continue; 1812 auto &TLI = LookupTLI(F); 1813 auto *BPI = LookupBPI(F); 1814 auto *BFI = LookupBFI(F); 1815 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1816 } 1817 return true; 1818 } 1819 1820 PreservedAnalyses 1821 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { 1822 createProfileFileNameVar(M, CSInstrName); 1823 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1824 // will be retained. 1825 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1826 PreservedAnalyses PA; 1827 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 1828 PA.preserveSet<AllAnalysesOn<Function>>(); 1829 return PA; 1830 } 1831 1832 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1833 ModuleAnalysisManager &MAM) { 1834 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1835 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1836 return FAM.getResult<TargetLibraryAnalysis>(F); 1837 }; 1838 auto LookupBPI = [&FAM](Function &F) { 1839 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1840 }; 1841 auto LookupBFI = [&FAM](Function &F) { 1842 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1843 }; 1844 1845 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1846 return PreservedAnalyses::all(); 1847 1848 return PreservedAnalyses::none(); 1849 } 1850 1851 // Using the ratio b/w sums of profile count values and BFI count values to 1852 // adjust the func entry count. 1853 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1854 BranchProbabilityInfo &NBPI) { 1855 Function &F = Func.getFunc(); 1856 BlockFrequencyInfo NBFI(F, NBPI, LI); 1857 #ifndef NDEBUG 1858 auto BFIEntryCount = F.getEntryCount(); 1859 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1860 "Invalid BFI Entrycount"); 1861 #endif 1862 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1863 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1864 for (auto &BBI : F) { 1865 uint64_t CountValue = 0; 1866 uint64_t BFICountValue = 0; 1867 if (!Func.findBBInfo(&BBI)) 1868 continue; 1869 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1870 CountValue = Func.getBBInfo(&BBI).CountValue; 1871 BFICountValue = *BFICount; 1872 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1873 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1874 } 1875 if (SumCount.isZero()) 1876 return; 1877 1878 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1879 "Incorrect sum of BFI counts"); 1880 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1881 return; 1882 double Scale = (SumCount / SumBFICount).convertToDouble(); 1883 if (Scale < 1.001 && Scale > 0.999) 1884 return; 1885 1886 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1887 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1888 if (NewEntryCount == 0) 1889 NewEntryCount = 1; 1890 if (NewEntryCount != FuncEntryCount) { 1891 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1892 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1893 << ", entry_count " << FuncEntryCount << " --> " 1894 << NewEntryCount << "\n"); 1895 } 1896 } 1897 1898 // Compare the profile count values with BFI count values, and print out 1899 // the non-matching ones. 1900 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1901 BranchProbabilityInfo &NBPI, 1902 uint64_t HotCountThreshold, 1903 uint64_t ColdCountThreshold) { 1904 Function &F = Func.getFunc(); 1905 BlockFrequencyInfo NBFI(F, NBPI, LI); 1906 // bool PrintFunc = false; 1907 bool HotBBOnly = PGOVerifyHotBFI; 1908 StringRef Msg; 1909 OptimizationRemarkEmitter ORE(&F); 1910 1911 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1912 for (auto &BBI : F) { 1913 uint64_t CountValue = 0; 1914 uint64_t BFICountValue = 0; 1915 1916 if (Func.getBBInfo(&BBI).CountValid) 1917 CountValue = Func.getBBInfo(&BBI).CountValue; 1918 1919 BBNum++; 1920 if (CountValue) 1921 NonZeroBBNum++; 1922 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1923 if (BFICount) 1924 BFICountValue = *BFICount; 1925 1926 if (HotBBOnly) { 1927 bool rawIsHot = CountValue >= HotCountThreshold; 1928 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1929 bool rawIsCold = CountValue <= ColdCountThreshold; 1930 bool ShowCount = false; 1931 if (rawIsHot && !BFIIsHot) { 1932 Msg = "raw-Hot to BFI-nonHot"; 1933 ShowCount = true; 1934 } else if (rawIsCold && BFIIsHot) { 1935 Msg = "raw-Cold to BFI-Hot"; 1936 ShowCount = true; 1937 } 1938 if (!ShowCount) 1939 continue; 1940 } else { 1941 if ((CountValue < PGOVerifyBFICutoff) && 1942 (BFICountValue < PGOVerifyBFICutoff)) 1943 continue; 1944 uint64_t Diff = (BFICountValue >= CountValue) 1945 ? BFICountValue - CountValue 1946 : CountValue - BFICountValue; 1947 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1948 continue; 1949 } 1950 BBMisMatchNum++; 1951 1952 ORE.emit([&]() { 1953 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1954 F.getSubprogram(), &BBI); 1955 Remark << "BB " << ore::NV("Block", BBI.getName()) 1956 << " Count=" << ore::NV("Count", CountValue) 1957 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1958 if (!Msg.empty()) 1959 Remark << " (" << Msg << ")"; 1960 return Remark; 1961 }); 1962 } 1963 if (BBMisMatchNum) 1964 ORE.emit([&]() { 1965 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1966 F.getSubprogram(), &F.getEntryBlock()) 1967 << "In Func " << ore::NV("Function", F.getName()) 1968 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1969 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1970 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1971 }); 1972 } 1973 1974 static bool annotateAllFunctions( 1975 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1976 vfs::FileSystem &FS, 1977 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1978 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1979 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1980 ProfileSummaryInfo *PSI, bool IsCS) { 1981 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1982 auto &Ctx = M.getContext(); 1983 // Read the counter array from file. 1984 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, 1985 ProfileRemappingFileName); 1986 if (Error E = ReaderOrErr.takeError()) { 1987 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1988 Ctx.diagnose( 1989 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1990 }); 1991 return false; 1992 } 1993 1994 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1995 std::move(ReaderOrErr.get()); 1996 if (!PGOReader) { 1997 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1998 StringRef("Cannot get PGOReader"))); 1999 return false; 2000 } 2001 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 2002 return false; 2003 2004 // TODO: might need to change the warning once the clang option is finalized. 2005 if (!PGOReader->isIRLevelProfile()) { 2006 Ctx.diagnose(DiagnosticInfoPGOProfile( 2007 ProfileFileName.data(), "Not an IR level instrumentation profile")); 2008 return false; 2009 } 2010 if (PGOReader->functionEntryOnly()) { 2011 Ctx.diagnose(DiagnosticInfoPGOProfile( 2012 ProfileFileName.data(), 2013 "Function entry profiles are not yet supported for optimization")); 2014 return false; 2015 } 2016 2017 // Add the profile summary (read from the header of the indexed summary) here 2018 // so that we can use it below when reading counters (which checks if the 2019 // function should be marked with a cold or inlinehint attribute). 2020 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 2021 IsCS ? ProfileSummary::PSK_CSInstr 2022 : ProfileSummary::PSK_Instr); 2023 PSI->refresh(); 2024 2025 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 2026 collectComdatMembers(M, ComdatMembers); 2027 std::vector<Function *> HotFunctions; 2028 std::vector<Function *> ColdFunctions; 2029 2030 // If the profile marked as always instrument the entry BB, do the 2031 // same. Note this can be overwritten by the internal option in CFGMST.h 2032 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 2033 if (PGOInstrumentEntry.getNumOccurrences() > 0) 2034 InstrumentFuncEntry = PGOInstrumentEntry; 2035 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); 2036 for (auto &F : M) { 2037 if (skipPGOUse(F)) 2038 continue; 2039 auto &TLI = LookupTLI(F); 2040 auto *BPI = LookupBPI(F); 2041 auto *BFI = LookupBFI(F); 2042 if (!HasSingleByteCoverage) { 2043 // Split indirectbr critical edges here before computing the MST rather 2044 // than later in getInstrBB() to avoid invalidating it. 2045 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, 2046 BFI); 2047 } 2048 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 2049 InstrumentFuncEntry, HasSingleByteCoverage); 2050 if (HasSingleByteCoverage) { 2051 Func.populateCoverage(PGOReader.get()); 2052 continue; 2053 } 2054 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, 2055 // it means the profile for the function is unrepresentative and this 2056 // function is actually hot / warm. We will reset the function hot / cold 2057 // attribute and drop all the profile counters. 2058 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; 2059 bool AllZeros = false; 2060 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) 2061 continue; 2062 if (AllZeros) { 2063 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 2064 if (Func.getProgramMaxCount() != 0) 2065 ColdFunctions.push_back(&F); 2066 continue; 2067 } 2068 if (PseudoKind != InstrProfRecord::NotPseudo) { 2069 // Clear function attribute cold. 2070 if (F.hasFnAttribute(Attribute::Cold)) 2071 F.removeFnAttr(Attribute::Cold); 2072 // Set function attribute as hot. 2073 if (PseudoKind == InstrProfRecord::PseudoHot) 2074 F.addFnAttr(Attribute::Hot); 2075 continue; 2076 } 2077 Func.populateCounters(); 2078 Func.setBranchWeights(); 2079 Func.annotateValueSites(); 2080 Func.annotateIrrLoopHeaderWeights(); 2081 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 2082 if (FreqAttr == PGOUseFunc::FFA_Cold) 2083 ColdFunctions.push_back(&F); 2084 else if (FreqAttr == PGOUseFunc::FFA_Hot) 2085 HotFunctions.push_back(&F); 2086 if (PGOViewCounts != PGOVCT_None && 2087 (ViewBlockFreqFuncName.empty() || 2088 F.getName().equals(ViewBlockFreqFuncName))) { 2089 LoopInfo LI{DominatorTree(F)}; 2090 std::unique_ptr<BranchProbabilityInfo> NewBPI = 2091 std::make_unique<BranchProbabilityInfo>(F, LI); 2092 std::unique_ptr<BlockFrequencyInfo> NewBFI = 2093 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 2094 if (PGOViewCounts == PGOVCT_Graph) 2095 NewBFI->view(); 2096 else if (PGOViewCounts == PGOVCT_Text) { 2097 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 2098 NewBFI->print(dbgs()); 2099 } 2100 } 2101 if (PGOViewRawCounts != PGOVCT_None && 2102 (ViewBlockFreqFuncName.empty() || 2103 F.getName().equals(ViewBlockFreqFuncName))) { 2104 if (PGOViewRawCounts == PGOVCT_Graph) 2105 if (ViewBlockFreqFuncName.empty()) 2106 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2107 else 2108 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2109 else if (PGOViewRawCounts == PGOVCT_Text) { 2110 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 2111 Func.dumpInfo(); 2112 } 2113 } 2114 2115 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 2116 LoopInfo LI{DominatorTree(F)}; 2117 BranchProbabilityInfo NBPI(F, LI); 2118 2119 // Fix func entry count. 2120 if (PGOFixEntryCount) 2121 fixFuncEntryCount(Func, LI, NBPI); 2122 2123 // Verify BlockFrequency information. 2124 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2125 if (PGOVerifyHotBFI) { 2126 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2127 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2128 } 2129 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2130 } 2131 } 2132 2133 // Set function hotness attribute from the profile. 2134 // We have to apply these attributes at the end because their presence 2135 // can affect the BranchProbabilityInfo of any callers, resulting in an 2136 // inconsistent MST between prof-gen and prof-use. 2137 for (auto &F : HotFunctions) { 2138 F->addFnAttr(Attribute::InlineHint); 2139 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2140 << "\n"); 2141 } 2142 for (auto &F : ColdFunctions) { 2143 // Only set when there is no Attribute::Hot set by the user. For Hot 2144 // attribute, user's annotation has the precedence over the profile. 2145 if (F->hasFnAttribute(Attribute::Hot)) { 2146 auto &Ctx = M.getContext(); 2147 std::string Msg = std::string("Function ") + F->getName().str() + 2148 std::string(" is annotated as a hot function but" 2149 " the profile is cold"); 2150 Ctx.diagnose( 2151 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2152 continue; 2153 } 2154 F->addFnAttr(Attribute::Cold); 2155 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2156 << "\n"); 2157 } 2158 return true; 2159 } 2160 2161 PGOInstrumentationUse::PGOInstrumentationUse( 2162 std::string Filename, std::string RemappingFilename, bool IsCS, 2163 IntrusiveRefCntPtr<vfs::FileSystem> VFS) 2164 : ProfileFileName(std::move(Filename)), 2165 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), 2166 FS(std::move(VFS)) { 2167 if (!PGOTestProfileFile.empty()) 2168 ProfileFileName = PGOTestProfileFile; 2169 if (!PGOTestProfileRemappingFile.empty()) 2170 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2171 if (!FS) 2172 FS = vfs::getRealFileSystem(); 2173 } 2174 2175 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2176 ModuleAnalysisManager &MAM) { 2177 2178 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2179 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2180 return FAM.getResult<TargetLibraryAnalysis>(F); 2181 }; 2182 auto LookupBPI = [&FAM](Function &F) { 2183 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2184 }; 2185 auto LookupBFI = [&FAM](Function &F) { 2186 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2187 }; 2188 2189 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 2190 2191 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, 2192 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2193 return PreservedAnalyses::all(); 2194 2195 return PreservedAnalyses::none(); 2196 } 2197 2198 static std::string getSimpleNodeName(const BasicBlock *Node) { 2199 if (!Node->getName().empty()) 2200 return Node->getName().str(); 2201 2202 std::string SimpleNodeName; 2203 raw_string_ostream OS(SimpleNodeName); 2204 Node->printAsOperand(OS, false); 2205 return OS.str(); 2206 } 2207 2208 void llvm::setProfMetadata(Module *M, Instruction *TI, 2209 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 2210 assert(MaxCount > 0 && "Bad max count"); 2211 uint64_t Scale = calculateCountScale(MaxCount); 2212 SmallVector<unsigned, 4> Weights; 2213 for (const auto &ECI : EdgeCounts) 2214 Weights.push_back(scaleBranchCount(ECI, Scale)); 2215 2216 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2217 : Weights) { 2218 dbgs() << W << " "; 2219 } dbgs() << "\n";); 2220 2221 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 2222 2223 setBranchWeights(*TI, Weights); 2224 if (EmitBranchProbability) { 2225 std::string BrCondStr = getBranchCondString(TI); 2226 if (BrCondStr.empty()) 2227 return; 2228 2229 uint64_t WSum = 2230 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2231 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2232 uint64_t TotalCount = 2233 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2234 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2235 Scale = calculateCountScale(WSum); 2236 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2237 scaleBranchCount(WSum, Scale)); 2238 std::string BranchProbStr; 2239 raw_string_ostream OS(BranchProbStr); 2240 OS << BP; 2241 OS << " (total count : " << TotalCount << ")"; 2242 OS.flush(); 2243 Function *F = TI->getParent()->getParent(); 2244 OptimizationRemarkEmitter ORE(F); 2245 ORE.emit([&]() { 2246 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2247 << BrCondStr << " is true with probability : " << BranchProbStr; 2248 }); 2249 } 2250 } 2251 2252 namespace llvm { 2253 2254 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2255 MDBuilder MDB(M->getContext()); 2256 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2257 MDB.createIrrLoopHeaderWeight(Count)); 2258 } 2259 2260 template <> struct GraphTraits<PGOUseFunc *> { 2261 using NodeRef = const BasicBlock *; 2262 using ChildIteratorType = const_succ_iterator; 2263 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2264 2265 static NodeRef getEntryNode(const PGOUseFunc *G) { 2266 return &G->getFunc().front(); 2267 } 2268 2269 static ChildIteratorType child_begin(const NodeRef N) { 2270 return succ_begin(N); 2271 } 2272 2273 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2274 2275 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2276 return nodes_iterator(G->getFunc().begin()); 2277 } 2278 2279 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2280 return nodes_iterator(G->getFunc().end()); 2281 } 2282 }; 2283 2284 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2285 explicit DOTGraphTraits(bool isSimple = false) 2286 : DefaultDOTGraphTraits(isSimple) {} 2287 2288 static std::string getGraphName(const PGOUseFunc *G) { 2289 return std::string(G->getFunc().getName()); 2290 } 2291 2292 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2293 std::string Result; 2294 raw_string_ostream OS(Result); 2295 2296 OS << getSimpleNodeName(Node) << ":\\l"; 2297 PGOUseBBInfo *BI = Graph->findBBInfo(Node); 2298 OS << "Count : "; 2299 if (BI && BI->CountValid) 2300 OS << BI->CountValue << "\\l"; 2301 else 2302 OS << "Unknown\\l"; 2303 2304 if (!PGOInstrSelect) 2305 return Result; 2306 2307 for (const Instruction &I : *Node) { 2308 if (!isa<SelectInst>(&I)) 2309 continue; 2310 // Display scaled counts for SELECT instruction: 2311 OS << "SELECT : { T = "; 2312 uint64_t TC, FC; 2313 bool HasProf = extractBranchWeights(I, TC, FC); 2314 if (!HasProf) 2315 OS << "Unknown, F = Unknown }\\l"; 2316 else 2317 OS << TC << ", F = " << FC << " }\\l"; 2318 } 2319 return Result; 2320 } 2321 }; 2322 2323 } // end namespace llvm 2324