1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "ValueProfileCollector.h" 52 #include "llvm/ADT/APInt.h" 53 #include "llvm/ADT/ArrayRef.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/SmallVector.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/ADT/StringRef.h" 58 #include "llvm/ADT/Twine.h" 59 #include "llvm/ADT/iterator.h" 60 #include "llvm/ADT/iterator_range.h" 61 #include "llvm/Analysis/BlockFrequencyInfo.h" 62 #include "llvm/Analysis/BranchProbabilityInfo.h" 63 #include "llvm/Analysis/CFG.h" 64 #include "llvm/Analysis/LoopInfo.h" 65 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 66 #include "llvm/Analysis/ProfileSummaryInfo.h" 67 #include "llvm/Analysis/TargetLibraryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/Comdat.h" 72 #include "llvm/IR/Constant.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/DiagnosticInfo.h" 75 #include "llvm/IR/Dominators.h" 76 #include "llvm/IR/EHPersonalities.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfDataUtils.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/CRC.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/VirtualFileSystem.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/TargetParser/Triple.h" 110 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" 111 #include "llvm/Transforms/Instrumentation/CFGMST.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include "llvm/Transforms/Utils/Instrumentation.h" 114 #include "llvm/Transforms/Utils/MisExpect.h" 115 #include "llvm/Transforms/Utils/ModuleUtils.h" 116 #include <algorithm> 117 #include <cassert> 118 #include <cstdint> 119 #include <memory> 120 #include <numeric> 121 #include <optional> 122 #include <stack> 123 #include <string> 124 #include <unordered_map> 125 #include <utility> 126 #include <vector> 127 128 using namespace llvm; 129 using ProfileCount = Function::ProfileCount; 130 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 131 132 #define DEBUG_TYPE "pgo-instrumentation" 133 134 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 135 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 136 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 137 STATISTIC(NumOfPGOEdge, "Number of edges."); 138 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 139 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 140 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 141 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 142 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 143 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 144 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 145 STATISTIC(NumOfCSPGOSelectInsts, 146 "Number of select instruction instrumented in CSPGO."); 147 STATISTIC(NumOfCSPGOMemIntrinsics, 148 "Number of mem intrinsics instrumented in CSPGO."); 149 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 150 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 151 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 152 STATISTIC(NumOfCSPGOFunc, 153 "Number of functions having valid profile counts in CSPGO."); 154 STATISTIC(NumOfCSPGOMismatch, 155 "Number of functions having mismatch profile in CSPGO."); 156 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 157 STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); 158 159 // Command line option to specify the file to read profile from. This is 160 // mainly used for testing. 161 static cl::opt<std::string> PGOTestProfileFile( 162 "pgo-test-profile-file", cl::init(""), cl::Hidden, 163 cl::value_desc("filename"), 164 cl::desc("Specify the path of profile data file. This is " 165 "mainly for test purpose.")); 166 static cl::opt<std::string> PGOTestProfileRemappingFile( 167 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 168 cl::value_desc("filename"), 169 cl::desc("Specify the path of profile remapping file. This is mainly for " 170 "test purpose.")); 171 172 // Command line option to disable value profiling. The default is false: 173 // i.e. value profiling is enabled by default. This is for debug purpose. 174 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 175 cl::Hidden, 176 cl::desc("Disable Value Profiling")); 177 178 // Command line option to set the maximum number of VP annotations to write to 179 // the metadata for a single indirect call callsite. 180 static cl::opt<unsigned> MaxNumAnnotations( 181 "icp-max-annotations", cl::init(3), cl::Hidden, 182 cl::desc("Max number of annotations for a single indirect " 183 "call callsite")); 184 185 // Command line option to set the maximum number of value annotations 186 // to write to the metadata for a single memop intrinsic. 187 static cl::opt<unsigned> MaxNumMemOPAnnotations( 188 "memop-max-annotations", cl::init(4), cl::Hidden, 189 cl::desc("Max number of precise value annotations for a single memop" 190 "intrinsic")); 191 192 // Command line option to control appending FunctionHash to the name of a COMDAT 193 // function. This is to avoid the hash mismatch caused by the preinliner. 194 static cl::opt<bool> DoComdatRenaming( 195 "do-comdat-renaming", cl::init(false), cl::Hidden, 196 cl::desc("Append function hash to the name of COMDAT function to avoid " 197 "function hash mismatch due to the preinliner")); 198 199 namespace llvm { 200 // Command line option to enable/disable the warning about missing profile 201 // information. 202 cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false), 203 cl::Hidden, 204 cl::desc("Use this option to turn on/off " 205 "warnings about missing profile data for " 206 "functions.")); 207 208 // Command line option to enable/disable the warning about a hash mismatch in 209 // the profile data. 210 cl::opt<bool> 211 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 212 cl::desc("Use this option to turn off/on " 213 "warnings about profile cfg mismatch.")); 214 215 // Command line option to enable/disable the warning about a hash mismatch in 216 // the profile data for Comdat functions, which often turns out to be false 217 // positive due to the pre-instrumentation inline. 218 cl::opt<bool> NoPGOWarnMismatchComdatWeak( 219 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 220 cl::desc("The option is used to turn on/off " 221 "warnings about hash mismatch for comdat " 222 "or weak functions.")); 223 } // namespace llvm 224 225 // Command line option to enable/disable select instruction instrumentation. 226 static cl::opt<bool> 227 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 228 cl::desc("Use this option to turn on/off SELECT " 229 "instruction instrumentation. ")); 230 231 // Command line option to turn on CFG dot or text dump of raw profile counts 232 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 233 "pgo-view-raw-counts", cl::Hidden, 234 cl::desc("A boolean option to show CFG dag or text " 235 "with raw profile counts from " 236 "profile data. See also option " 237 "-pgo-view-counts. To limit graph " 238 "display to only one function, use " 239 "filtering option -view-bfi-func-name."), 240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 242 clEnumValN(PGOVCT_Text, "text", "show in text."))); 243 244 // Command line option to enable/disable memop intrinsic call.size profiling. 245 static cl::opt<bool> 246 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 247 cl::desc("Use this option to turn on/off " 248 "memory intrinsic size profiling.")); 249 250 // Emit branch probability as optimization remarks. 251 static cl::opt<bool> 252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 253 cl::desc("When this option is on, the annotated " 254 "branch probability will be emitted as " 255 "optimization remarks: -{Rpass|" 256 "pass-remarks}=pgo-instrumentation")); 257 258 static cl::opt<bool> PGOInstrumentEntry( 259 "pgo-instrument-entry", cl::init(false), cl::Hidden, 260 cl::desc("Force to instrument function entry basicblock.")); 261 262 static cl::opt<bool> 263 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), 264 cl::Hidden, 265 cl::desc("Force to instrument loop entries.")); 266 267 static cl::opt<bool> PGOFunctionEntryCoverage( 268 "pgo-function-entry-coverage", cl::Hidden, 269 cl::desc( 270 "Use this option to enable function entry coverage instrumentation.")); 271 272 static cl::opt<bool> PGOBlockCoverage( 273 "pgo-block-coverage", 274 cl::desc("Use this option to enable basic block coverage instrumentation")); 275 276 static cl::opt<bool> 277 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", 278 cl::desc("Create a dot file of CFGs with block " 279 "coverage inference information")); 280 281 static cl::opt<bool> PGOTemporalInstrumentation( 282 "pgo-temporal-instrumentation", 283 cl::desc("Use this option to enable temporal instrumentation")); 284 285 static cl::opt<bool> 286 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 287 cl::desc("Fix function entry count in profile use.")); 288 289 static cl::opt<bool> PGOVerifyHotBFI( 290 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 291 cl::desc("Print out the non-match BFI count if a hot raw profile count " 292 "becomes non-hot, or a cold raw profile count becomes hot. " 293 "The print is enabled under -Rpass-analysis=pgo, or " 294 "internal option -pass-remarks-analysis=pgo.")); 295 296 static cl::opt<bool> PGOVerifyBFI( 297 "pgo-verify-bfi", cl::init(false), cl::Hidden, 298 cl::desc("Print out mismatched BFI counts after setting profile metadata " 299 "The print is enabled under -Rpass-analysis=pgo, or " 300 "internal option -pass-remarks-analysis=pgo.")); 301 302 static cl::opt<unsigned> PGOVerifyBFIRatio( 303 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 304 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 305 "mismatched BFI if the difference percentage is greater than " 306 "this value (in percentage).")); 307 308 static cl::opt<unsigned> PGOVerifyBFICutoff( 309 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 310 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 311 "profile count value is below.")); 312 313 static cl::opt<std::string> PGOTraceFuncHash( 314 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 315 cl::value_desc("function name"), 316 cl::desc("Trace the hash of the function with this name.")); 317 318 static cl::opt<unsigned> PGOFunctionSizeThreshold( 319 "pgo-function-size-threshold", cl::Hidden, 320 cl::desc("Do not instrument functions smaller than this threshold.")); 321 322 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( 323 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, 324 cl::desc("Do not instrument functions with the number of critical edges " 325 " greater than this threshold.")); 326 327 static cl::opt<uint64_t> PGOColdInstrumentEntryThreshold( 328 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, 329 cl::desc("For cold function instrumentation, skip instrumenting functions " 330 "whose entry count is above the given value.")); 331 332 static cl::opt<bool> PGOTreatUnknownAsCold( 333 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, 334 cl::desc("For cold function instrumentation, treat count unknown(e.g. " 335 "unprofiled) functions as cold.")); 336 337 cl::opt<bool> PGOInstrumentColdFunctionOnly( 338 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden, 339 cl::desc("Enable cold function only instrumentation.")); 340 341 extern cl::opt<unsigned> MaxNumVTableAnnotations; 342 343 namespace llvm { 344 // Command line option to turn on CFG dot dump after profile annotation. 345 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 346 extern cl::opt<PGOViewCountsType> PGOViewCounts; 347 348 // Command line option to specify the name of the function for CFG dump 349 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 350 extern cl::opt<std::string> ViewBlockFreqFuncName; 351 352 // Command line option to enable vtable value profiling. Defined in 353 // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= 354 extern cl::opt<bool> EnableVTableValueProfiling; 355 extern cl::opt<bool> EnableVTableProfileUse; 356 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; 357 } // namespace llvm 358 359 namespace { 360 class FunctionInstrumenter final { 361 Module &M; 362 Function &F; 363 TargetLibraryInfo &TLI; 364 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 365 BranchProbabilityInfo *const BPI; 366 BlockFrequencyInfo *const BFI; 367 LoopInfo *const LI; 368 369 const PGOInstrumentationType InstrumentationType; 370 371 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. 372 // Ctx profiling implicitly captures indirect call cases, but not other 373 // values. Supporting other values is relatively straight-forward - just 374 // another counter range within the context. 375 bool isValueProfilingDisabled() const { 376 return DisableValueProfiling || 377 InstrumentationType == PGOInstrumentationType::CTXPROF; 378 } 379 380 bool shouldInstrumentEntryBB() const { 381 return PGOInstrumentEntry || 382 InstrumentationType == PGOInstrumentationType::CTXPROF; 383 } 384 385 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; } 386 387 public: 388 FunctionInstrumenter( 389 Module &M, Function &F, TargetLibraryInfo &TLI, 390 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 391 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, 392 LoopInfo *LI = nullptr, 393 PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO) 394 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI), 395 LI(LI), InstrumentationType(InstrumentationType) {} 396 397 void instrument(); 398 }; 399 } // namespace 400 401 // Return a string describing the branch condition that can be 402 // used in static branch probability heuristics: 403 static std::string getBranchCondString(Instruction *TI) { 404 BranchInst *BI = dyn_cast<BranchInst>(TI); 405 if (!BI || !BI->isConditional()) 406 return std::string(); 407 408 Value *Cond = BI->getCondition(); 409 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 410 if (!CI) 411 return std::string(); 412 413 std::string result; 414 raw_string_ostream OS(result); 415 OS << CI->getPredicate() << "_"; 416 CI->getOperand(0)->getType()->print(OS, true); 417 418 Value *RHS = CI->getOperand(1); 419 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 420 if (CV) { 421 if (CV->isZero()) 422 OS << "_Zero"; 423 else if (CV->isOne()) 424 OS << "_One"; 425 else if (CV->isMinusOne()) 426 OS << "_MinusOne"; 427 else 428 OS << "_Const"; 429 } 430 return result; 431 } 432 433 static const char *ValueProfKindDescr[] = { 434 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 435 #include "llvm/ProfileData/InstrProfData.inc" 436 }; 437 438 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 439 // aware this is an ir_level profile so it can set the version flag. 440 static GlobalVariable * 441 createIRLevelProfileFlagVar(Module &M, 442 PGOInstrumentationType InstrumentationType) { 443 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 444 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 445 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 446 if (InstrumentationType == PGOInstrumentationType::CSFDO) 447 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 448 if (PGOInstrumentEntry || 449 InstrumentationType == PGOInstrumentationType::CTXPROF) 450 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 451 if (PGOInstrumentLoopEntries) 452 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES; 453 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) 454 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 455 if (PGOFunctionEntryCoverage) 456 ProfileVersion |= 457 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 458 if (PGOBlockCoverage) 459 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; 460 if (PGOTemporalInstrumentation) 461 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; 462 auto IRLevelVersionVariable = new GlobalVariable( 463 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 464 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 465 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); 466 Triple TT(M.getTargetTriple()); 467 if (TT.supportsCOMDAT()) { 468 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 469 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 470 } 471 return IRLevelVersionVariable; 472 } 473 474 namespace { 475 476 /// The select instruction visitor plays three roles specified 477 /// by the mode. In \c VM_counting mode, it simply counts the number of 478 /// select instructions. In \c VM_instrument mode, it inserts code to count 479 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 480 /// it reads the profile data and annotate the select instruction with metadata. 481 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 482 class PGOUseFunc; 483 484 /// Instruction Visitor class to visit select instructions. 485 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 486 Function &F; 487 unsigned NSIs = 0; // Number of select instructions instrumented. 488 VisitMode Mode = VM_counting; // Visiting mode. 489 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 490 unsigned TotalNumCtrs = 0; // Total number of counters 491 GlobalValue *FuncNameVar = nullptr; 492 uint64_t FuncHash = 0; 493 PGOUseFunc *UseFunc = nullptr; 494 bool HasSingleByteCoverage; 495 496 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) 497 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} 498 499 void countSelects() { 500 NSIs = 0; 501 Mode = VM_counting; 502 visit(F); 503 } 504 505 // Visit the IR stream and instrument all select instructions. \p 506 // Ind is a pointer to the counter index variable; \p TotalNC 507 // is the total number of counters; \p FNV is the pointer to the 508 // PGO function name var; \p FHash is the function hash. 509 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV, 510 uint64_t FHash) { 511 Mode = VM_instrument; 512 CurCtrIdx = Ind; 513 TotalNumCtrs = TotalNC; 514 FuncHash = FHash; 515 FuncNameVar = FNV; 516 visit(F); 517 } 518 519 // Visit the IR stream and annotate all select instructions. 520 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { 521 Mode = VM_annotate; 522 UseFunc = UF; 523 CurCtrIdx = Ind; 524 visit(F); 525 } 526 527 void instrumentOneSelectInst(SelectInst &SI); 528 void annotateOneSelectInst(SelectInst &SI); 529 530 // Visit \p SI instruction and perform tasks according to visit mode. 531 void visitSelectInst(SelectInst &SI); 532 533 // Return the number of select instructions. This needs be called after 534 // countSelects(). 535 unsigned getNumOfSelectInsts() const { return NSIs; } 536 }; 537 538 /// This class implements the CFG edges for the Minimum Spanning Tree (MST) 539 /// based instrumentation. 540 /// Note that the CFG can be a multi-graph. So there might be multiple edges 541 /// with the same SrcBB and DestBB. 542 struct PGOEdge { 543 BasicBlock *SrcBB; 544 BasicBlock *DestBB; 545 uint64_t Weight; 546 bool InMST = false; 547 bool Removed = false; 548 bool IsCritical = false; 549 550 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) 551 : SrcBB(Src), DestBB(Dest), Weight(W) {} 552 553 /// Return the information string of an edge. 554 std::string infoString() const { 555 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 556 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)) 557 .str(); 558 } 559 }; 560 561 /// This class stores the auxiliary information for each BB in the MST. 562 struct PGOBBInfo { 563 PGOBBInfo *Group; 564 uint32_t Index; 565 uint32_t Rank = 0; 566 567 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} 568 569 /// Return the information string of this object. 570 std::string infoString() const { 571 return (Twine("Index=") + Twine(Index)).str(); 572 } 573 }; 574 575 // This class implements the CFG edges. Note the CFG can be a multi-graph. 576 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 577 private: 578 Function &F; 579 580 // Is this is context-sensitive instrumentation. 581 bool IsCS; 582 583 // A map that stores the Comdat group in function F. 584 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 585 586 ValueProfileCollector VPC; 587 588 void computeCFGHash(); 589 void renameComdatFunction(); 590 591 public: 592 const TargetLibraryInfo &TLI; 593 std::vector<std::vector<VPCandidateInfo>> ValueSites; 594 SelectInstVisitor SIVisitor; 595 std::string FuncName; 596 std::string DeprecatedFuncName; 597 GlobalVariable *FuncNameVar; 598 599 // CFG hash value for this function. 600 uint64_t FunctionHash = 0; 601 602 // The Minimum Spanning Tree of function CFG. 603 CFGMST<Edge, BBInfo> MST; 604 605 const std::optional<BlockCoverageInference> BCI; 606 607 static std::optional<BlockCoverageInference> 608 constructBCI(Function &Func, bool HasSingleByteCoverage, 609 bool InstrumentFuncEntry) { 610 if (HasSingleByteCoverage) 611 return BlockCoverageInference(Func, InstrumentFuncEntry); 612 return {}; 613 } 614 615 // Collect all the BBs that will be instrumented, and store them in 616 // InstrumentBBs. 617 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 618 619 // Give an edge, find the BB that will be instrumented. 620 // Return nullptr if there is no BB to be instrumented. 621 BasicBlock *getInstrBB(Edge *E); 622 623 // Return the auxiliary BB information. 624 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 625 626 // Return the auxiliary BB information if available. 627 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 628 629 // Dump edges and BB information. 630 void dumpInfo(StringRef Str = "") const { 631 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + 632 " Hash: " + Twine(FunctionHash) + "\t" + Str); 633 } 634 635 FuncPGOInstrumentation( 636 Function &Func, TargetLibraryInfo &TLI, 637 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 638 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 639 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr, 640 bool IsCS = false, bool InstrumentFuncEntry = true, 641 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false) 642 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 643 TLI(TLI), ValueSites(IPVK_Last + 1), 644 SIVisitor(Func, HasSingleByteCoverage), 645 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI), 646 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { 647 if (BCI && PGOViewBlockCoverageGraph) 648 BCI->viewBlockCoverageGraph(); 649 // This should be done before CFG hash computation. 650 SIVisitor.countSelects(); 651 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 652 if (!IsCS) { 653 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 654 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 655 NumOfPGOBB += MST.bbInfoSize(); 656 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 657 if (EnableVTableValueProfiling) 658 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); 659 } else { 660 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 661 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 662 NumOfCSPGOBB += MST.bbInfoSize(); 663 } 664 665 FuncName = getIRPGOFuncName(F); 666 DeprecatedFuncName = getPGOFuncName(F); 667 computeCFGHash(); 668 if (!ComdatMembers.empty()) 669 renameComdatFunction(); 670 LLVM_DEBUG(dumpInfo("after CFGMST")); 671 672 for (const auto &E : MST.allEdges()) { 673 if (E->Removed) 674 continue; 675 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 676 if (!E->InMST) 677 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 678 } 679 680 if (CreateGlobalVar) 681 FuncNameVar = createPGOFuncNameVar(F, FuncName); 682 } 683 }; 684 685 } // end anonymous namespace 686 687 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 688 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 689 // of selects, indirect calls, mem ops and edges. 690 template <class Edge, class BBInfo> 691 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 692 std::vector<uint8_t> Indexes; 693 JamCRC JC; 694 for (auto &BB : F) { 695 for (BasicBlock *Succ : successors(&BB)) { 696 auto BI = findBBInfo(Succ); 697 if (BI == nullptr) 698 continue; 699 uint32_t Index = BI->Index; 700 for (int J = 0; J < 4; J++) 701 Indexes.push_back((uint8_t)(Index >> (J * 8))); 702 } 703 } 704 JC.update(Indexes); 705 706 JamCRC JCH; 707 // The higher 32 bits. 708 auto updateJCH = [&JCH](uint64_t Num) { 709 uint8_t Data[8]; 710 support::endian::write64le(Data, Num); 711 JCH.update(Data); 712 }; 713 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 714 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 715 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 716 if (BCI) { 717 updateJCH(BCI->getInstrumentedBlocksHash()); 718 } else { 719 updateJCH((uint64_t)MST.numEdges()); 720 } 721 722 // Hash format for context sensitive profile. Reserve 4 bits for other 723 // information. 724 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 725 726 // Reserve bit 60-63 for other information purpose. 727 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 728 if (IsCS) 729 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 730 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 731 << " CRC = " << JC.getCRC() 732 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 733 << ", Edges = " << MST.numEdges() << ", ICSites = " 734 << ValueSites[IPVK_IndirectCallTarget].size() 735 << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 736 << ", High32 CRC = " << JCH.getCRC() 737 << ", Hash = " << FunctionHash << "\n";); 738 739 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 740 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 741 << " in building " << F.getParent()->getSourceFileName() << "\n"; 742 } 743 744 // Check if we can safely rename this Comdat function. 745 static bool canRenameComdat( 746 Function &F, 747 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 748 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 749 return false; 750 751 // FIXME: Current only handle those Comdat groups that only containing one 752 // function. 753 // (1) For a Comdat group containing multiple functions, we need to have a 754 // unique postfix based on the hashes for each function. There is a 755 // non-trivial code refactoring to do this efficiently. 756 // (2) Variables can not be renamed, so we can not rename Comdat function in a 757 // group including global vars. 758 Comdat *C = F.getComdat(); 759 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 760 assert(!isa<GlobalAlias>(CM.second)); 761 Function *FM = dyn_cast<Function>(CM.second); 762 if (FM != &F) 763 return false; 764 } 765 return true; 766 } 767 768 // Append the CFGHash to the Comdat function name. 769 template <class Edge, class BBInfo> 770 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 771 if (!canRenameComdat(F, ComdatMembers)) 772 return; 773 std::string OrigName = F.getName().str(); 774 std::string NewFuncName = 775 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 776 F.setName(Twine(NewFuncName)); 777 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 778 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 779 Comdat *NewComdat; 780 Module *M = F.getParent(); 781 // For AvailableExternallyLinkage functions, change the linkage to 782 // LinkOnceODR and put them into comdat. This is because after renaming, there 783 // is no backup external copy available for the function. 784 if (!F.hasComdat()) { 785 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 786 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 787 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 788 F.setComdat(NewComdat); 789 return; 790 } 791 792 // This function belongs to a single function Comdat group. 793 Comdat *OrigComdat = F.getComdat(); 794 std::string NewComdatName = 795 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 796 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 797 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 798 799 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 800 // Must be a function. 801 cast<Function>(CM.second)->setComdat(NewComdat); 802 } 803 } 804 805 /// Collect all the BBs that will be instruments and add them to 806 /// `InstrumentBBs`. 807 template <class Edge, class BBInfo> 808 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 809 std::vector<BasicBlock *> &InstrumentBBs) { 810 if (BCI) { 811 for (auto &BB : F) 812 if (BCI->shouldInstrumentBlock(BB)) 813 InstrumentBBs.push_back(&BB); 814 return; 815 } 816 817 // Use a worklist as we will update the vector during the iteration. 818 std::vector<Edge *> EdgeList; 819 EdgeList.reserve(MST.numEdges()); 820 for (const auto &E : MST.allEdges()) 821 EdgeList.push_back(E.get()); 822 823 for (auto &E : EdgeList) { 824 BasicBlock *InstrBB = getInstrBB(E); 825 if (InstrBB) 826 InstrumentBBs.push_back(InstrBB); 827 } 828 } 829 830 // Given a CFG E to be instrumented, find which BB to place the instrumented 831 // code. The function will split the critical edge if necessary. 832 template <class Edge, class BBInfo> 833 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 834 if (E->InMST || E->Removed) 835 return nullptr; 836 837 BasicBlock *SrcBB = E->SrcBB; 838 BasicBlock *DestBB = E->DestBB; 839 // For a fake edge, instrument the real BB. 840 if (SrcBB == nullptr) 841 return DestBB; 842 if (DestBB == nullptr) 843 return SrcBB; 844 845 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 846 // There are basic blocks (such as catchswitch) cannot be instrumented. 847 // If the returned first insertion point is the end of BB, skip this BB. 848 if (BB->getFirstInsertionPt() == BB->end()) 849 return nullptr; 850 return BB; 851 }; 852 853 // Instrument the SrcBB if it has a single successor, 854 // otherwise, the DestBB if this is not a critical edge. 855 Instruction *TI = SrcBB->getTerminator(); 856 if (TI->getNumSuccessors() <= 1) 857 return canInstrument(SrcBB); 858 if (!E->IsCritical) 859 return canInstrument(DestBB); 860 861 // Some IndirectBr critical edges cannot be split by the previous 862 // SplitIndirectBrCriticalEdges call. Bail out. 863 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 864 BasicBlock *InstrBB = 865 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 866 if (!InstrBB) { 867 LLVM_DEBUG( 868 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 869 return nullptr; 870 } 871 // For a critical edge, we have to split. Instrument the newly 872 // created BB. 873 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 874 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 875 << " --> " << getBBInfo(DestBB).Index << "\n"); 876 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 877 MST.addEdge(SrcBB, InstrBB, 0); 878 // Second one: Add new edge of InstrBB->DestBB. 879 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 880 NewEdge1.InMST = true; 881 E->Removed = true; 882 883 return canInstrument(InstrBB); 884 } 885 886 // When generating value profiling calls on Windows routines that make use of 887 // handler funclets for exception processing an operand bundle needs to attached 888 // to the called function. This routine will set \p OpBundles to contain the 889 // funclet information, if any is needed, that should be placed on the generated 890 // value profiling call for the value profile candidate call. 891 static void 892 populateEHOperandBundle(VPCandidateInfo &Cand, 893 DenseMap<BasicBlock *, ColorVector> &BlockColors, 894 SmallVectorImpl<OperandBundleDef> &OpBundles) { 895 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 896 if (!OrigCall) 897 return; 898 899 if (!isa<IntrinsicInst>(OrigCall)) { 900 // The instrumentation call should belong to the same funclet as a 901 // non-intrinsic call, so just copy the operand bundle, if any exists. 902 std::optional<OperandBundleUse> ParentFunclet = 903 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 904 if (ParentFunclet) 905 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 906 } else { 907 // Intrinsics or other instructions do not get funclet information from the 908 // front-end. Need to use the BlockColors that was computed by the routine 909 // colorEHFunclets to determine whether a funclet is needed. 910 if (!BlockColors.empty()) { 911 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 912 assert(CV.size() == 1 && "non-unique color for block!"); 913 BasicBlock::iterator EHPadIt = CV.front()->getFirstNonPHIIt(); 914 if (EHPadIt->isEHPad()) 915 OpBundles.emplace_back("funclet", &*EHPadIt); 916 } 917 } 918 } 919 920 // Visit all edge and instrument the edges not in MST, and do value profiling. 921 // Critical edges will be split. 922 void FunctionInstrumenter::instrument() { 923 if (!PGOBlockCoverage) { 924 // Split indirectbr critical edges here before computing the MST rather than 925 // later in getInstrBB() to avoid invalidating it. 926 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 927 } 928 929 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF; 930 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( 931 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI, 932 InstrumentationType == PGOInstrumentationType::CSFDO, 933 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(), 934 PGOBlockCoverage); 935 936 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar; 937 auto *const CFGHash = 938 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash); 939 // Make sure that pointer to global is passed in with zero addrspace 940 // This is relevant during GPU profiling 941 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( 942 Name, PointerType::get(M.getContext(), 0)); 943 if (PGOFunctionEntryCoverage) { 944 auto &EntryBB = F.getEntryBlock(); 945 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 946 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 947 // i32 <index>) 948 Builder.CreateIntrinsic( 949 Intrinsic::instrprof_cover, {}, 950 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 951 return; 952 } 953 954 std::vector<BasicBlock *> InstrumentBBs; 955 FuncInfo.getInstrumentBBs(InstrumentBBs); 956 unsigned NumCounters = 957 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 958 959 if (IsCtxProf) { 960 auto *CSIntrinsic = 961 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite); 962 // We want to count the instrumentable callsites, then instrument them. This 963 // is because the llvm.instrprof.callsite intrinsic has an argument (like 964 // the other instrprof intrinsics) capturing the total number of 965 // instrumented objects (counters, or callsites, in this case). In this 966 // case, we want that value so we can readily pass it to the compiler-rt 967 // APIs that may have to allocate memory based on the nr of callsites. 968 // The traversal logic is the same for both counting and instrumentation, 969 // just needs to be done in succession. 970 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) { 971 for (auto &BB : F) 972 for (auto &Instr : BB) 973 if (auto *CS = dyn_cast<CallBase>(&Instr)) { 974 if (!InstrProfCallsite::canInstrumentCallsite(*CS)) 975 continue; 976 Visitor(CS); 977 } 978 }; 979 // First, count callsites. 980 uint32_t TotalNumCallsites = 0; 981 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; }); 982 983 // Now instrument. 984 uint32_t CallsiteIndex = 0; 985 Visit([&](auto *CB) { 986 IRBuilder<> Builder(CB); 987 Builder.CreateCall(CSIntrinsic, 988 {Name, CFGHash, Builder.getInt32(TotalNumCallsites), 989 Builder.getInt32(CallsiteIndex++), 990 CB->getCalledOperand()}); 991 }); 992 } 993 994 uint32_t I = 0; 995 if (PGOTemporalInstrumentation) { 996 NumCounters += PGOBlockCoverage ? 8 : 1; 997 auto &EntryBB = F.getEntryBlock(); 998 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 999 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, 1000 // i32 <index>) 1001 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp, {}, 1002 {NormalizedNamePtr, CFGHash, 1003 Builder.getInt32(NumCounters), 1004 Builder.getInt32(I)}); 1005 I += PGOBlockCoverage ? 8 : 1; 1006 } 1007 1008 for (auto *InstrBB : InstrumentBBs) { 1009 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 1010 assert(Builder.GetInsertPoint() != InstrBB->end() && 1011 "Cannot get the Instrumentation point"); 1012 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 1013 // i32 <index>) 1014 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover 1015 : Intrinsic::instrprof_increment, 1016 {}, 1017 {NormalizedNamePtr, CFGHash, 1018 Builder.getInt32(NumCounters), 1019 Builder.getInt32(I++)}); 1020 } 1021 1022 // Now instrument select instructions: 1023 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name, 1024 FuncInfo.FunctionHash); 1025 assert(I == NumCounters); 1026 1027 if (isValueProfilingDisabled()) 1028 return; 1029 1030 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 1031 1032 // Intrinsic function calls do not have funclet operand bundles needed for 1033 // Windows exception handling attached to them. However, if value profiling is 1034 // inserted for one of these calls, then a funclet value will need to be set 1035 // on the instrumentation call based on the funclet coloring. 1036 DenseMap<BasicBlock *, ColorVector> BlockColors; 1037 if (F.hasPersonalityFn() && 1038 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 1039 BlockColors = colorEHFunclets(F); 1040 1041 // For each VP Kind, walk the VP candidates and instrument each one. 1042 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 1043 unsigned SiteIndex = 0; 1044 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 1045 continue; 1046 1047 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 1048 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 1049 << " site: CallSite Index = " << SiteIndex << "\n"); 1050 1051 IRBuilder<> Builder(Cand.InsertPt); 1052 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 1053 "Cannot get the Instrumentation point"); 1054 1055 Value *ToProfile = nullptr; 1056 if (Cand.V->getType()->isIntegerTy()) 1057 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 1058 else if (Cand.V->getType()->isPointerTy()) 1059 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 1060 assert(ToProfile && "value profiling Value is of unexpected type"); 1061 1062 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( 1063 Name, PointerType::get(M.getContext(), 0)); 1064 1065 SmallVector<OperandBundleDef, 1> OpBundles; 1066 populateEHOperandBundle(Cand, BlockColors, OpBundles); 1067 Builder.CreateCall( 1068 Intrinsic::getOrInsertDeclaration(&M, 1069 Intrinsic::instrprof_value_profile), 1070 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash), 1071 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 1072 OpBundles); 1073 } 1074 } // IPVK_First <= Kind <= IPVK_Last 1075 } 1076 1077 namespace { 1078 1079 // This class represents a CFG edge in profile use compilation. 1080 struct PGOUseEdge : public PGOEdge { 1081 using PGOEdge::PGOEdge; 1082 1083 std::optional<uint64_t> Count; 1084 1085 // Set edge count value 1086 void setEdgeCount(uint64_t Value) { Count = Value; } 1087 1088 // Return the information string for this object. 1089 std::string infoString() const { 1090 if (!Count) 1091 return PGOEdge::infoString(); 1092 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str(); 1093 } 1094 }; 1095 1096 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1097 1098 // This class stores the auxiliary information for each BB. 1099 struct PGOUseBBInfo : public PGOBBInfo { 1100 std::optional<uint64_t> Count; 1101 int32_t UnknownCountInEdge = 0; 1102 int32_t UnknownCountOutEdge = 0; 1103 DirectEdges InEdges; 1104 DirectEdges OutEdges; 1105 1106 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {} 1107 1108 // Set the profile count value for this BB. 1109 void setBBInfoCount(uint64_t Value) { Count = Value; } 1110 1111 // Return the information string of this object. 1112 std::string infoString() const { 1113 if (!Count) 1114 return PGOBBInfo::infoString(); 1115 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str(); 1116 } 1117 1118 // Add an OutEdge and update the edge count. 1119 void addOutEdge(PGOUseEdge *E) { 1120 OutEdges.push_back(E); 1121 UnknownCountOutEdge++; 1122 } 1123 1124 // Add an InEdge and update the edge count. 1125 void addInEdge(PGOUseEdge *E) { 1126 InEdges.push_back(E); 1127 UnknownCountInEdge++; 1128 } 1129 }; 1130 1131 } // end anonymous namespace 1132 1133 // Sum up the count values for all the edges. 1134 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1135 uint64_t Total = 0; 1136 for (const auto &E : Edges) { 1137 if (E->Removed) 1138 continue; 1139 if (E->Count) 1140 Total += *E->Count; 1141 } 1142 return Total; 1143 } 1144 1145 namespace { 1146 1147 class PGOUseFunc { 1148 public: 1149 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1150 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1151 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1152 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS, 1153 bool InstrumentFuncEntry, bool InstrumentLoopEntries, 1154 bool HasSingleByteCoverage) 1155 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1156 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS, 1157 InstrumentFuncEntry, InstrumentLoopEntries, 1158 HasSingleByteCoverage), 1159 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} 1160 1161 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); 1162 1163 // Read counts for the instrumented BB from profile. 1164 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1165 InstrProfRecord::CountPseudoKind &PseudoKind); 1166 1167 // Populate the counts for all BBs. 1168 void populateCounters(); 1169 1170 // Set block coverage based on profile coverage values. 1171 void populateCoverage(IndexedInstrProfReader *PGOReader); 1172 1173 // Set the branch weights based on the count values. 1174 void setBranchWeights(); 1175 1176 // Annotate the value profile call sites for all value kind. 1177 void annotateValueSites(); 1178 1179 // Annotate the value profile call sites for one value kind. 1180 void annotateValueSites(uint32_t Kind); 1181 1182 // Annotate the irreducible loop header weights. 1183 void annotateIrrLoopHeaderWeights(); 1184 1185 // The hotness of the function from the profile count. 1186 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1187 1188 // Return the function hotness from the profile. 1189 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1190 1191 // Return the function hash. 1192 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1193 1194 // Return the profile record for this function; 1195 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1196 1197 // Return the auxiliary BB information. 1198 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { 1199 return FuncInfo.getBBInfo(BB); 1200 } 1201 1202 // Return the auxiliary BB information if available. 1203 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { 1204 return FuncInfo.findBBInfo(BB); 1205 } 1206 1207 Function &getFunc() const { return F; } 1208 1209 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); } 1210 1211 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1212 1213 private: 1214 Function &F; 1215 Module *M; 1216 BlockFrequencyInfo *BFI; 1217 ProfileSummaryInfo *PSI; 1218 1219 // This member stores the shared information with class PGOGenFunc. 1220 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; 1221 1222 // The maximum count value in the profile. This is only used in PGO use 1223 // compilation. 1224 uint64_t ProgramMaxCount; 1225 1226 // Position of counter that remains to be read. 1227 uint32_t CountPosition = 0; 1228 1229 // Total size of the profile count for this function. 1230 uint32_t ProfileCountSize = 0; 1231 1232 // ProfileRecord for this function. 1233 InstrProfRecord ProfileRecord; 1234 1235 // Function hotness info derived from profile. 1236 FuncFreqAttr FreqAttr; 1237 1238 // Is to use the context sensitive profile. 1239 bool IsCS; 1240 1241 ValueProfileCollector VPC; 1242 1243 // Find the Instrumented BB and set the value. Return false on error. 1244 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1245 1246 // Set the edge counter value for the unknown edge -- there should be only 1247 // one unknown edge. 1248 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1249 1250 // Set the hot/cold inline hints based on the count values. 1251 // FIXME: This function should be removed once the functionality in 1252 // the inliner is implemented. 1253 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1254 if (PSI->isHotCount(EntryCount)) 1255 FreqAttr = FFA_Hot; 1256 else if (PSI->isColdCount(MaxCount)) 1257 FreqAttr = FFA_Cold; 1258 } 1259 }; 1260 1261 } // end anonymous namespace 1262 1263 /// Set up InEdges/OutEdges for all BBs in the MST. 1264 static void setupBBInfoEdges( 1265 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { 1266 // This is not required when there is block coverage inference. 1267 if (FuncInfo.BCI) 1268 return; 1269 for (const auto &E : FuncInfo.MST.allEdges()) { 1270 if (E->Removed) 1271 continue; 1272 const BasicBlock *SrcBB = E->SrcBB; 1273 const BasicBlock *DestBB = E->DestBB; 1274 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB); 1275 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB); 1276 SrcInfo.addOutEdge(E.get()); 1277 DestInfo.addInEdge(E.get()); 1278 } 1279 } 1280 1281 // Visit all the edges and assign the count value for the instrumented 1282 // edges and the BB. Return false on error. 1283 bool PGOUseFunc::setInstrumentedCounts( 1284 const std::vector<uint64_t> &CountFromProfile) { 1285 1286 std::vector<BasicBlock *> InstrumentBBs; 1287 FuncInfo.getInstrumentBBs(InstrumentBBs); 1288 1289 setupBBInfoEdges(FuncInfo); 1290 1291 unsigned NumCounters = 1292 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1293 // The number of counters here should match the number of counters 1294 // in profile. Return if they mismatch. 1295 if (NumCounters != CountFromProfile.size()) { 1296 return false; 1297 } 1298 auto *FuncEntry = &*F.begin(); 1299 1300 // Set the profile count to the Instrumented BBs. 1301 uint32_t I = 0; 1302 for (BasicBlock *InstrBB : InstrumentBBs) { 1303 uint64_t CountValue = CountFromProfile[I++]; 1304 PGOUseBBInfo &Info = getBBInfo(InstrBB); 1305 // If we reach here, we know that we have some nonzero count 1306 // values in this function. The entry count should not be 0. 1307 // Fix it if necessary. 1308 if (InstrBB == FuncEntry && CountValue == 0) 1309 CountValue = 1; 1310 Info.setBBInfoCount(CountValue); 1311 } 1312 ProfileCountSize = CountFromProfile.size(); 1313 CountPosition = I; 1314 1315 // Set the edge count and update the count of unknown edges for BBs. 1316 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1317 E->setEdgeCount(Value); 1318 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1319 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1320 }; 1321 1322 // Set the profile count the Instrumented edges. There are BBs that not in 1323 // MST but not instrumented. Need to set the edge count value so that we can 1324 // populate the profile counts later. 1325 for (const auto &E : FuncInfo.MST.allEdges()) { 1326 if (E->Removed || E->InMST) 1327 continue; 1328 const BasicBlock *SrcBB = E->SrcBB; 1329 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB); 1330 1331 // If only one out-edge, the edge profile count should be the same as BB 1332 // profile count. 1333 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1) 1334 setEdgeCount(E.get(), *SrcInfo.Count); 1335 else { 1336 const BasicBlock *DestBB = E->DestBB; 1337 PGOUseBBInfo &DestInfo = getBBInfo(DestBB); 1338 // If only one in-edge, the edge profile count should be the same as BB 1339 // profile count. 1340 if (DestInfo.Count && DestInfo.InEdges.size() == 1) 1341 setEdgeCount(E.get(), *DestInfo.Count); 1342 } 1343 if (E->Count) 1344 continue; 1345 // E's count should have been set from profile. If not, this meenas E skips 1346 // the instrumentation. We set the count to 0. 1347 setEdgeCount(E.get(), 0); 1348 } 1349 return true; 1350 } 1351 1352 // Set the count value for the unknown edge. There should be one and only one 1353 // unknown edge in Edges vector. 1354 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1355 for (auto &E : Edges) { 1356 if (E->Count) 1357 continue; 1358 E->setEdgeCount(Value); 1359 1360 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1361 getBBInfo(E->DestBB).UnknownCountInEdge--; 1362 return; 1363 } 1364 llvm_unreachable("Cannot find the unknown count edge"); 1365 } 1366 1367 // Emit function metadata indicating PGO profile mismatch. 1368 static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { 1369 const char MetadataName[] = "instr_prof_hash_mismatch"; 1370 SmallVector<Metadata *, 2> Names; 1371 // If this metadata already exists, ignore. 1372 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1373 if (Existing) { 1374 MDTuple *Tuple = cast<MDTuple>(Existing); 1375 for (const auto &N : Tuple->operands()) { 1376 if (N.equalsStr(MetadataName)) 1377 return; 1378 Names.push_back(N.get()); 1379 } 1380 } 1381 1382 MDBuilder MDB(ctx); 1383 Names.push_back(MDB.createString(MetadataName)); 1384 MDNode *MD = MDTuple::get(ctx, Names); 1385 F.setMetadata(LLVMContext::MD_annotation, MD); 1386 } 1387 1388 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { 1389 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 1390 auto &Ctx = M->getContext(); 1391 auto Err = IPE.get(); 1392 bool SkipWarning = false; 1393 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1394 << FuncInfo.FuncName << ": "); 1395 if (Err == instrprof_error::unknown_function) { 1396 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1397 SkipWarning = !PGOWarnMissing; 1398 LLVM_DEBUG(dbgs() << "unknown function"); 1399 } else if (Err == instrprof_error::hash_mismatch || 1400 Err == instrprof_error::malformed) { 1401 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1402 SkipWarning = 1403 NoPGOWarnMismatch || 1404 (NoPGOWarnMismatchComdatWeak && 1405 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1406 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1407 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1408 << " skip=" << SkipWarning << ")"); 1409 // Emit function metadata indicating PGO profile mismatch. 1410 annotateFunctionWithHashMismatch(F, M->getContext()); 1411 } 1412 1413 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1414 if (SkipWarning) 1415 return; 1416 1417 std::string Msg = 1418 IPE.message() + std::string(" ") + F.getName().str() + 1419 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1420 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1421 std::string(" count discarded"); 1422 1423 Ctx.diagnose( 1424 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1425 }); 1426 } 1427 1428 // Read the profile from ProfileFileName and assign the value to the 1429 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1430 // Return true if the profile are successfully read, and false on errors. 1431 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1432 InstrProfRecord::CountPseudoKind &PseudoKind) { 1433 auto &Ctx = M->getContext(); 1434 uint64_t MismatchedFuncSum = 0; 1435 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1436 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1437 &MismatchedFuncSum); 1438 if (Error E = Result.takeError()) { 1439 handleInstrProfError(std::move(E), MismatchedFuncSum); 1440 return false; 1441 } 1442 ProfileRecord = std::move(Result.get()); 1443 PseudoKind = ProfileRecord.getCountPseudoKind(); 1444 if (PseudoKind != InstrProfRecord::NotPseudo) { 1445 return true; 1446 } 1447 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1448 1449 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1450 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1451 1452 uint64_t ValueSum = 0; 1453 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1454 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1455 ValueSum += CountFromProfile[I]; 1456 } 1457 AllZeros = (ValueSum == 0); 1458 1459 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1460 1461 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1462 getBBInfo(nullptr).UnknownCountInEdge = 2; 1463 1464 if (!setInstrumentedCounts(CountFromProfile)) { 1465 LLVM_DEBUG( 1466 dbgs() << "Inconsistent number of counts, skipping this function"); 1467 Ctx.diagnose(DiagnosticInfoPGOProfile( 1468 M->getName().data(), 1469 Twine("Inconsistent number of counts in ") + F.getName().str() + 1470 Twine(": the profile may be stale or there is a function name " 1471 "collision."), 1472 DS_Warning)); 1473 return false; 1474 } 1475 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1476 return true; 1477 } 1478 1479 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { 1480 uint64_t MismatchedFuncSum = 0; 1481 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1482 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1483 &MismatchedFuncSum); 1484 if (auto Err = Result.takeError()) { 1485 handleInstrProfError(std::move(Err), MismatchedFuncSum); 1486 return; 1487 } 1488 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1489 1490 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; 1491 DenseMap<const BasicBlock *, bool> Coverage; 1492 unsigned Index = 0; 1493 for (auto &BB : F) 1494 if (FuncInfo.BCI->shouldInstrumentBlock(BB)) 1495 Coverage[&BB] = (CountsFromProfile[Index++] != 0); 1496 assert(Index == CountsFromProfile.size()); 1497 1498 // For each B in InverseDependencies[A], if A is covered then B is covered. 1499 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> 1500 InverseDependencies; 1501 for (auto &BB : F) { 1502 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { 1503 // If Dep is covered then BB is covered. 1504 InverseDependencies[Dep].insert(&BB); 1505 } 1506 } 1507 1508 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. 1509 std::stack<const BasicBlock *> CoveredBlocksToProcess; 1510 for (auto &[BB, IsCovered] : Coverage) 1511 if (IsCovered) 1512 CoveredBlocksToProcess.push(BB); 1513 1514 while (!CoveredBlocksToProcess.empty()) { 1515 auto *CoveredBlock = CoveredBlocksToProcess.top(); 1516 assert(Coverage[CoveredBlock]); 1517 CoveredBlocksToProcess.pop(); 1518 for (auto *BB : InverseDependencies[CoveredBlock]) { 1519 // If CoveredBlock is covered then BB is covered. 1520 if (Coverage[BB]) 1521 continue; 1522 Coverage[BB] = true; 1523 CoveredBlocksToProcess.push(BB); 1524 } 1525 } 1526 1527 // Annotate block coverage. 1528 MDBuilder MDB(F.getContext()); 1529 // We set the entry count to 10000 if the entry block is covered so that BFI 1530 // can propagate a fraction of this count to the other covered blocks. 1531 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0); 1532 for (auto &BB : F) { 1533 // For a block A and its successor B, we set the edge weight as follows: 1534 // If A is covered and B is covered, set weight=1. 1535 // If A is covered and B is uncovered, set weight=0. 1536 // If A is uncovered, set weight=1. 1537 // This setup will allow BFI to give nonzero profile counts to only covered 1538 // blocks. 1539 SmallVector<uint32_t, 4> Weights; 1540 for (auto *Succ : successors(&BB)) 1541 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); 1542 if (Weights.size() >= 2) 1543 llvm::setBranchWeights(*BB.getTerminator(), Weights, 1544 /*IsExpected=*/false); 1545 } 1546 1547 unsigned NumCorruptCoverage = 0; 1548 DominatorTree DT(F); 1549 LoopInfo LI(DT); 1550 BranchProbabilityInfo BPI(F, LI); 1551 BlockFrequencyInfo BFI(F, BPI, LI); 1552 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { 1553 if (auto C = BFI.getBlockProfileCount(&BB)) 1554 return C == 0; 1555 return {}; 1556 }; 1557 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n"); 1558 for (auto &BB : F) { 1559 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ") 1560 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName() 1561 << "\n"); 1562 // In some cases it is possible to find a covered block that has no covered 1563 // successors, e.g., when a block calls a function that may call exit(). In 1564 // those cases, BFI could find its successor to be covered while BCI could 1565 // find its successor to be dead. 1566 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) { 1567 LLVM_DEBUG( 1568 dbgs() << "Found inconsistent block covearge for " << BB.getName() 1569 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI=" 1570 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n"); 1571 ++NumCorruptCoverage; 1572 } 1573 if (Coverage[&BB]) 1574 ++NumCoveredBlocks; 1575 } 1576 if (PGOVerifyBFI && NumCorruptCoverage) { 1577 auto &Ctx = M->getContext(); 1578 Ctx.diagnose(DiagnosticInfoPGOProfile( 1579 M->getName().data(), 1580 Twine("Found inconsistent block coverage for function ") + F.getName() + 1581 " in " + Twine(NumCorruptCoverage) + " blocks.", 1582 DS_Warning)); 1583 } 1584 if (PGOViewBlockCoverageGraph) 1585 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage); 1586 } 1587 1588 // Populate the counters from instrumented BBs to all BBs. 1589 // In the end of this operation, all BBs should have a valid count value. 1590 void PGOUseFunc::populateCounters() { 1591 bool Changes = true; 1592 unsigned NumPasses = 0; 1593 while (Changes) { 1594 NumPasses++; 1595 Changes = false; 1596 1597 // For efficient traversal, it's better to start from the end as most 1598 // of the instrumented edges are at the end. 1599 for (auto &BB : reverse(F)) { 1600 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB); 1601 if (UseBBInfo == nullptr) 1602 continue; 1603 if (!UseBBInfo->Count) { 1604 if (UseBBInfo->UnknownCountOutEdge == 0) { 1605 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges); 1606 Changes = true; 1607 } else if (UseBBInfo->UnknownCountInEdge == 0) { 1608 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges); 1609 Changes = true; 1610 } 1611 } 1612 if (UseBBInfo->Count) { 1613 if (UseBBInfo->UnknownCountOutEdge == 1) { 1614 uint64_t Total = 0; 1615 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges); 1616 // If the one of the successor block can early terminate (no-return), 1617 // we can end up with situation where out edge sum count is larger as 1618 // the source BB's count is collected by a post-dominated block. 1619 if (*UseBBInfo->Count > OutSum) 1620 Total = *UseBBInfo->Count - OutSum; 1621 setEdgeCount(UseBBInfo->OutEdges, Total); 1622 Changes = true; 1623 } 1624 if (UseBBInfo->UnknownCountInEdge == 1) { 1625 uint64_t Total = 0; 1626 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges); 1627 if (*UseBBInfo->Count > InSum) 1628 Total = *UseBBInfo->Count - InSum; 1629 setEdgeCount(UseBBInfo->InEdges, Total); 1630 Changes = true; 1631 } 1632 } 1633 } 1634 } 1635 1636 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1637 (void)NumPasses; 1638 #ifndef NDEBUG 1639 // Assert every BB has a valid counter. 1640 for (auto &BB : F) { 1641 auto BI = findBBInfo(&BB); 1642 if (BI == nullptr) 1643 continue; 1644 assert(BI->Count && "BB count is not valid"); 1645 } 1646 #endif 1647 // Now annotate select instructions. This may fixup impossible block counts. 1648 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition); 1649 assert(CountPosition == ProfileCountSize); 1650 1651 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count; 1652 uint64_t FuncMaxCount = FuncEntryCount; 1653 for (auto &BB : F) { 1654 auto BI = findBBInfo(&BB); 1655 if (BI == nullptr) 1656 continue; 1657 FuncMaxCount = std::max(FuncMaxCount, *BI->Count); 1658 } 1659 1660 // Fix the obviously inconsistent entry count. 1661 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1662 FuncEntryCount = 1; 1663 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1664 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1665 1666 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1667 } 1668 1669 // Assign the scaled count values to the BB with multiple out edges. 1670 void PGOUseFunc::setBranchWeights() { 1671 // Generate MD_prof metadata for every branch instruction. 1672 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1673 << " IsCS=" << IsCS << "\n"); 1674 for (auto &BB : F) { 1675 Instruction *TI = BB.getTerminator(); 1676 if (TI->getNumSuccessors() < 2) 1677 continue; 1678 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1679 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) || 1680 isa<CallBrInst>(TI))) 1681 continue; 1682 1683 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1684 if (!*BBCountInfo.Count) 1685 continue; 1686 1687 // We have a non-zero Branch BB. 1688 1689 // SuccessorCount can be greater than OutEdgesCount, because 1690 // removed edges don't appear in OutEdges. 1691 unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); 1692 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); 1693 assert(OutEdgesCount <= SuccessorCount); 1694 1695 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0); 1696 uint64_t MaxCount = 0; 1697 for (unsigned It = 0; It < OutEdgesCount; It++) { 1698 const PGOUseEdge *E = BBCountInfo.OutEdges[It]; 1699 const BasicBlock *SrcBB = E->SrcBB; 1700 const BasicBlock *DestBB = E->DestBB; 1701 if (DestBB == nullptr) 1702 continue; 1703 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1704 uint64_t EdgeCount = *E->Count; 1705 if (EdgeCount > MaxCount) 1706 MaxCount = EdgeCount; 1707 EdgeCounts[SuccNum] = EdgeCount; 1708 } 1709 1710 if (MaxCount) 1711 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1712 else { 1713 // A zero MaxCount can come about when we have a BB with a positive 1714 // count, and whose successor blocks all have 0 count. This can happen 1715 // when there is no exit block and the code exits via a noreturn function. 1716 auto &Ctx = M->getContext(); 1717 Ctx.diagnose(DiagnosticInfoPGOProfile( 1718 M->getName().data(), 1719 Twine("Profile in ") + F.getName().str() + 1720 Twine(" partially ignored") + 1721 Twine(", possibly due to the lack of a return path."), 1722 DS_Warning)); 1723 } 1724 } 1725 } 1726 1727 static bool isIndirectBrTarget(BasicBlock *BB) { 1728 for (BasicBlock *Pred : predecessors(BB)) { 1729 if (isa<IndirectBrInst>(Pred->getTerminator())) 1730 return true; 1731 } 1732 return false; 1733 } 1734 1735 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1736 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1737 // Find irr loop headers 1738 for (auto &BB : F) { 1739 // As a heuristic also annotate indrectbr targets as they have a high chance 1740 // to become an irreducible loop header after the indirectbr tail 1741 // duplication. 1742 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1743 Instruction *TI = BB.getTerminator(); 1744 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1745 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count); 1746 } 1747 } 1748 } 1749 1750 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1751 Module *M = F.getParent(); 1752 IRBuilder<> Builder(&SI); 1753 Type *Int64Ty = Builder.getInt64Ty(); 1754 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1755 auto *NormalizedFuncNameVarPtr = 1756 ConstantExpr::getPointerBitCastOrAddrSpaceCast( 1757 FuncNameVar, PointerType::get(M->getContext(), 0)); 1758 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step, {}, 1759 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash), 1760 Builder.getInt32(TotalNumCtrs), 1761 Builder.getInt32(*CurCtrIdx), Step}); 1762 ++(*CurCtrIdx); 1763 } 1764 1765 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1766 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1767 assert(*CurCtrIdx < CountFromProfile.size() && 1768 "Out of bound access of counters"); 1769 uint64_t SCounts[2]; 1770 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1771 ++(*CurCtrIdx); 1772 uint64_t TotalCount = 0; 1773 auto BI = UseFunc->findBBInfo(SI.getParent()); 1774 if (BI != nullptr) { 1775 TotalCount = *BI->Count; 1776 1777 // Fix the block count if it is impossible. 1778 if (TotalCount < SCounts[0]) 1779 BI->Count = SCounts[0]; 1780 } 1781 // False Count 1782 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1783 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1784 if (MaxCount) 1785 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1786 } 1787 1788 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1789 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) 1790 return; 1791 // FIXME: do not handle this yet. 1792 if (SI.getCondition()->getType()->isVectorTy()) 1793 return; 1794 1795 switch (Mode) { 1796 case VM_counting: 1797 NSIs++; 1798 return; 1799 case VM_instrument: 1800 instrumentOneSelectInst(SI); 1801 return; 1802 case VM_annotate: 1803 annotateOneSelectInst(SI); 1804 return; 1805 } 1806 1807 llvm_unreachable("Unknown visiting mode"); 1808 } 1809 1810 static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { 1811 if (ValueProfKind == IPVK_MemOPSize) 1812 return MaxNumMemOPAnnotations; 1813 if (ValueProfKind == llvm::IPVK_VTableTarget) 1814 return MaxNumVTableAnnotations; 1815 return MaxNumAnnotations; 1816 } 1817 1818 // Traverse all valuesites and annotate the instructions for all value kind. 1819 void PGOUseFunc::annotateValueSites() { 1820 if (DisableValueProfiling) 1821 return; 1822 1823 // Create the PGOFuncName meta data. 1824 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1825 1826 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1827 annotateValueSites(Kind); 1828 } 1829 1830 // Annotate the instructions for a specific value kind. 1831 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1832 assert(Kind <= IPVK_Last); 1833 unsigned ValueSiteIndex = 0; 1834 1835 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1836 1837 // Since there isn't a reliable or fast way for profile reader to tell if a 1838 // profile is generated with `-enable-vtable-value-profiling` on, we run the 1839 // value profile collector over the function IR to find the instrumented sites 1840 // iff function profile records shows the number of instrumented vtable sites 1841 // is not zero. Function cfg already takes the number of instrumented 1842 // indirect call sites into account so it doesn't hash the number of 1843 // instrumented vtables; as a side effect it makes it easier to enable 1844 // profiling and profile use in two steps if needed. 1845 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default. 1846 if (NumValueSites > 0 && Kind == IPVK_VTableTarget && 1847 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() && 1848 MaxNumVTableAnnotations != 0) 1849 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); 1850 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1851 if (NumValueSites != ValueSites.size()) { 1852 auto &Ctx = M->getContext(); 1853 Ctx.diagnose(DiagnosticInfoPGOProfile( 1854 M->getName().data(), 1855 Twine("Inconsistent number of value sites for ") + 1856 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") + 1857 F.getName().str() + 1858 Twine("\", possibly due to the use of a stale profile."), 1859 DS_Warning)); 1860 return; 1861 } 1862 1863 for (VPCandidateInfo &I : ValueSites) { 1864 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1865 << "): Index = " << ValueSiteIndex << " out of " 1866 << NumValueSites << "\n"); 1867 annotateValueSite( 1868 *M, *I.AnnotatedInst, ProfileRecord, 1869 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1870 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind))); 1871 ValueSiteIndex++; 1872 } 1873 } 1874 1875 // Collect the set of members for each Comdat in module M and store 1876 // in ComdatMembers. 1877 static void collectComdatMembers( 1878 Module &M, 1879 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1880 if (!DoComdatRenaming) 1881 return; 1882 for (Function &F : M) 1883 if (Comdat *C = F.getComdat()) 1884 ComdatMembers.insert(std::make_pair(C, &F)); 1885 for (GlobalVariable &GV : M.globals()) 1886 if (Comdat *C = GV.getComdat()) 1887 ComdatMembers.insert(std::make_pair(C, &GV)); 1888 for (GlobalAlias &GA : M.aliases()) 1889 if (Comdat *C = GA.getComdat()) 1890 ComdatMembers.insert(std::make_pair(C, &GA)); 1891 } 1892 1893 // Return true if we should not find instrumentation data for this function 1894 static bool skipPGOUse(const Function &F) { 1895 if (F.isDeclaration()) 1896 return true; 1897 // If there are too many critical edges, PGO might cause 1898 // compiler time problem. Skip PGO if the number of 1899 // critical edges execeed the threshold. 1900 unsigned NumCriticalEdges = 0; 1901 for (auto &BB : F) { 1902 const Instruction *TI = BB.getTerminator(); 1903 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 1904 if (isCriticalEdge(TI, I)) 1905 NumCriticalEdges++; 1906 } 1907 } 1908 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { 1909 LLVM_DEBUG(dbgs() << "In func " << F.getName() 1910 << ", NumCriticalEdges=" << NumCriticalEdges 1911 << " exceed the threshold. Skip PGO.\n"); 1912 return true; 1913 } 1914 return false; 1915 } 1916 1917 // Return true if we should not instrument this function 1918 static bool skipPGOGen(const Function &F) { 1919 if (skipPGOUse(F)) 1920 return true; 1921 if (F.hasFnAttribute(llvm::Attribute::Naked)) 1922 return true; 1923 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1924 return true; 1925 if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) 1926 return true; 1927 if (F.getInstructionCount() < PGOFunctionSizeThreshold) 1928 return true; 1929 if (PGOInstrumentColdFunctionOnly) { 1930 if (auto EntryCount = F.getEntryCount()) 1931 return EntryCount->getCount() > PGOColdInstrumentEntryThreshold; 1932 return !PGOTreatUnknownAsCold; 1933 } 1934 return false; 1935 } 1936 1937 static bool InstrumentAllFunctions( 1938 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1939 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1940 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1941 function_ref<LoopInfo *(Function &)> LookupLI, 1942 PGOInstrumentationType InstrumentationType) { 1943 // For the context-sensitve instrumentation, we should have a separated pass 1944 // (before LTO/ThinLTO linking) to create these variables. 1945 if (InstrumentationType == PGOInstrumentationType::FDO) 1946 createIRLevelProfileFlagVar(M, InstrumentationType); 1947 1948 Triple TT(M.getTargetTriple()); 1949 LLVMContext &Ctx = M.getContext(); 1950 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) 1951 Ctx.diagnose(DiagnosticInfoPGOProfile( 1952 M.getName().data(), 1953 Twine("VTable value profiling is presently not " 1954 "supported for non-ELF object formats"), 1955 DS_Warning)); 1956 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1957 collectComdatMembers(M, ComdatMembers); 1958 1959 for (auto &F : M) { 1960 if (skipPGOGen(F)) 1961 continue; 1962 TargetLibraryInfo &TLI = LookupTLI(F); 1963 BranchProbabilityInfo *BPI = LookupBPI(F); 1964 BlockFrequencyInfo *BFI = LookupBFI(F); 1965 LoopInfo *LI = LookupLI(F); 1966 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI, 1967 InstrumentationType); 1968 FI.instrument(); 1969 } 1970 return true; 1971 } 1972 1973 PreservedAnalyses 1974 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { 1975 createProfileFileNameVar(M, CSInstrName); 1976 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1977 // will be retained. 1978 appendToCompilerUsed( 1979 M, createIRLevelProfileFlagVar(M, PGOInstrumentationType::CSFDO)); 1980 if (ProfileSampling) 1981 createProfileSamplingVar(M); 1982 PreservedAnalyses PA; 1983 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 1984 PA.preserveSet<AllAnalysesOn<Function>>(); 1985 return PA; 1986 } 1987 1988 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1989 ModuleAnalysisManager &MAM) { 1990 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1991 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1992 return FAM.getResult<TargetLibraryAnalysis>(F); 1993 }; 1994 auto LookupBPI = [&FAM](Function &F) { 1995 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1996 }; 1997 auto LookupBFI = [&FAM](Function &F) { 1998 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1999 }; 2000 auto LookupLI = [&FAM](Function &F) { 2001 return &FAM.getResult<LoopAnalysis>(F); 2002 }; 2003 2004 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI, 2005 InstrumentationType)) 2006 return PreservedAnalyses::all(); 2007 2008 return PreservedAnalyses::none(); 2009 } 2010 2011 // Using the ratio b/w sums of profile count values and BFI count values to 2012 // adjust the func entry count. 2013 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 2014 BranchProbabilityInfo &NBPI) { 2015 Function &F = Func.getFunc(); 2016 BlockFrequencyInfo NBFI(F, NBPI, LI); 2017 #ifndef NDEBUG 2018 auto BFIEntryCount = F.getEntryCount(); 2019 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 2020 "Invalid BFI Entrycount"); 2021 #endif 2022 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 2023 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 2024 for (auto &BBI : F) { 2025 uint64_t CountValue = 0; 2026 uint64_t BFICountValue = 0; 2027 if (!Func.findBBInfo(&BBI)) 2028 continue; 2029 auto BFICount = NBFI.getBlockProfileCount(&BBI); 2030 CountValue = *Func.getBBInfo(&BBI).Count; 2031 BFICountValue = *BFICount; 2032 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 2033 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 2034 } 2035 if (SumCount.isZero()) 2036 return; 2037 2038 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 2039 "Incorrect sum of BFI counts"); 2040 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 2041 return; 2042 double Scale = (SumCount / SumBFICount).convertToDouble(); 2043 if (Scale < 1.001 && Scale > 0.999) 2044 return; 2045 2046 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count; 2047 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 2048 if (NewEntryCount == 0) 2049 NewEntryCount = 1; 2050 if (NewEntryCount != FuncEntryCount) { 2051 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 2052 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 2053 << ", entry_count " << FuncEntryCount << " --> " 2054 << NewEntryCount << "\n"); 2055 } 2056 } 2057 2058 // Compare the profile count values with BFI count values, and print out 2059 // the non-matching ones. 2060 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 2061 BranchProbabilityInfo &NBPI, 2062 uint64_t HotCountThreshold, 2063 uint64_t ColdCountThreshold) { 2064 Function &F = Func.getFunc(); 2065 BlockFrequencyInfo NBFI(F, NBPI, LI); 2066 // bool PrintFunc = false; 2067 bool HotBBOnly = PGOVerifyHotBFI; 2068 StringRef Msg; 2069 OptimizationRemarkEmitter ORE(&F); 2070 2071 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 2072 for (auto &BBI : F) { 2073 uint64_t CountValue = 0; 2074 uint64_t BFICountValue = 0; 2075 2076 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue); 2077 2078 BBNum++; 2079 if (CountValue) 2080 NonZeroBBNum++; 2081 auto BFICount = NBFI.getBlockProfileCount(&BBI); 2082 if (BFICount) 2083 BFICountValue = *BFICount; 2084 2085 if (HotBBOnly) { 2086 bool rawIsHot = CountValue >= HotCountThreshold; 2087 bool BFIIsHot = BFICountValue >= HotCountThreshold; 2088 bool rawIsCold = CountValue <= ColdCountThreshold; 2089 bool ShowCount = false; 2090 if (rawIsHot && !BFIIsHot) { 2091 Msg = "raw-Hot to BFI-nonHot"; 2092 ShowCount = true; 2093 } else if (rawIsCold && BFIIsHot) { 2094 Msg = "raw-Cold to BFI-Hot"; 2095 ShowCount = true; 2096 } 2097 if (!ShowCount) 2098 continue; 2099 } else { 2100 if ((CountValue < PGOVerifyBFICutoff) && 2101 (BFICountValue < PGOVerifyBFICutoff)) 2102 continue; 2103 uint64_t Diff = (BFICountValue >= CountValue) 2104 ? BFICountValue - CountValue 2105 : CountValue - BFICountValue; 2106 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 2107 continue; 2108 } 2109 BBMisMatchNum++; 2110 2111 ORE.emit([&]() { 2112 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 2113 F.getSubprogram(), &BBI); 2114 Remark << "BB " << ore::NV("Block", BBI.getName()) 2115 << " Count=" << ore::NV("Count", CountValue) 2116 << " BFI_Count=" << ore::NV("Count", BFICountValue); 2117 if (!Msg.empty()) 2118 Remark << " (" << Msg << ")"; 2119 return Remark; 2120 }); 2121 } 2122 if (BBMisMatchNum) 2123 ORE.emit([&]() { 2124 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 2125 F.getSubprogram(), &F.getEntryBlock()) 2126 << "In Func " << ore::NV("Function", F.getName()) 2127 << ": Num_of_BB=" << ore::NV("Count", BBNum) 2128 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 2129 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 2130 }); 2131 } 2132 2133 static bool annotateAllFunctions( 2134 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 2135 vfs::FileSystem &FS, 2136 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 2137 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 2138 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 2139 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, 2140 bool IsCS) { 2141 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 2142 auto &Ctx = M.getContext(); 2143 // Read the counter array from file. 2144 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, 2145 ProfileRemappingFileName); 2146 if (Error E = ReaderOrErr.takeError()) { 2147 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 2148 Ctx.diagnose( 2149 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 2150 }); 2151 return false; 2152 } 2153 2154 std::unique_ptr<IndexedInstrProfReader> PGOReader = 2155 std::move(ReaderOrErr.get()); 2156 if (!PGOReader) { 2157 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 2158 StringRef("Cannot get PGOReader"))); 2159 return false; 2160 } 2161 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 2162 return false; 2163 2164 // TODO: might need to change the warning once the clang option is finalized. 2165 if (!PGOReader->isIRLevelProfile()) { 2166 Ctx.diagnose(DiagnosticInfoPGOProfile( 2167 ProfileFileName.data(), "Not an IR level instrumentation profile")); 2168 return false; 2169 } 2170 if (PGOReader->functionEntryOnly()) { 2171 Ctx.diagnose(DiagnosticInfoPGOProfile( 2172 ProfileFileName.data(), 2173 "Function entry profiles are not yet supported for optimization")); 2174 return false; 2175 } 2176 2177 if (EnableVTableProfileUse) { 2178 for (GlobalVariable &G : M.globals()) { 2179 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) 2180 continue; 2181 2182 // Create the PGOFuncName meta data. 2183 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/)); 2184 } 2185 } 2186 2187 // Add the profile summary (read from the header of the indexed summary) here 2188 // so that we can use it below when reading counters (which checks if the 2189 // function should be marked with a cold or inlinehint attribute). 2190 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 2191 IsCS ? ProfileSummary::PSK_CSInstr 2192 : ProfileSummary::PSK_Instr); 2193 PSI->refresh(); 2194 2195 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 2196 collectComdatMembers(M, ComdatMembers); 2197 std::vector<Function *> HotFunctions; 2198 std::vector<Function *> ColdFunctions; 2199 2200 // If the profile marked as always instrument the entry BB, do the 2201 // same. Note this can be overwritten by the internal option in CFGMST.h 2202 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 2203 if (PGOInstrumentEntry.getNumOccurrences() > 0) 2204 InstrumentFuncEntry = PGOInstrumentEntry; 2205 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled(); 2206 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0) 2207 InstrumentLoopEntries = PGOInstrumentLoopEntries; 2208 2209 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); 2210 for (auto &F : M) { 2211 if (skipPGOUse(F)) 2212 continue; 2213 TargetLibraryInfo &TLI = LookupTLI(F); 2214 BranchProbabilityInfo *BPI = LookupBPI(F); 2215 BlockFrequencyInfo *BFI = LookupBFI(F); 2216 LoopInfo *LI = LookupLI(F); 2217 if (!HasSingleByteCoverage) { 2218 // Split indirectbr critical edges here before computing the MST rather 2219 // than later in getInstrBB() to avoid invalidating it. 2220 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, 2221 BFI); 2222 } 2223 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS, 2224 InstrumentFuncEntry, InstrumentLoopEntries, 2225 HasSingleByteCoverage); 2226 if (HasSingleByteCoverage) { 2227 Func.populateCoverage(PGOReader.get()); 2228 continue; 2229 } 2230 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, 2231 // it means the profile for the function is unrepresentative and this 2232 // function is actually hot / warm. We will reset the function hot / cold 2233 // attribute and drop all the profile counters. 2234 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; 2235 bool AllZeros = false; 2236 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) 2237 continue; 2238 if (AllZeros) { 2239 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 2240 if (Func.getProgramMaxCount() != 0) 2241 ColdFunctions.push_back(&F); 2242 continue; 2243 } 2244 if (PseudoKind != InstrProfRecord::NotPseudo) { 2245 // Clear function attribute cold. 2246 if (F.hasFnAttribute(Attribute::Cold)) 2247 F.removeFnAttr(Attribute::Cold); 2248 // Set function attribute as hot. 2249 if (PseudoKind == InstrProfRecord::PseudoHot) 2250 F.addFnAttr(Attribute::Hot); 2251 continue; 2252 } 2253 Func.populateCounters(); 2254 Func.setBranchWeights(); 2255 Func.annotateValueSites(); 2256 Func.annotateIrrLoopHeaderWeights(); 2257 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 2258 if (FreqAttr == PGOUseFunc::FFA_Cold) 2259 ColdFunctions.push_back(&F); 2260 else if (FreqAttr == PGOUseFunc::FFA_Hot) 2261 HotFunctions.push_back(&F); 2262 if (PGOViewCounts != PGOVCT_None && 2263 (ViewBlockFreqFuncName.empty() || 2264 F.getName() == ViewBlockFreqFuncName)) { 2265 LoopInfo LI{DominatorTree(F)}; 2266 std::unique_ptr<BranchProbabilityInfo> NewBPI = 2267 std::make_unique<BranchProbabilityInfo>(F, LI); 2268 std::unique_ptr<BlockFrequencyInfo> NewBFI = 2269 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 2270 if (PGOViewCounts == PGOVCT_Graph) 2271 NewBFI->view(); 2272 else if (PGOViewCounts == PGOVCT_Text) { 2273 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 2274 NewBFI->print(dbgs()); 2275 } 2276 } 2277 if (PGOViewRawCounts != PGOVCT_None && 2278 (ViewBlockFreqFuncName.empty() || 2279 F.getName() == ViewBlockFreqFuncName)) { 2280 if (PGOViewRawCounts == PGOVCT_Graph) 2281 if (ViewBlockFreqFuncName.empty()) 2282 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2283 else 2284 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2285 else if (PGOViewRawCounts == PGOVCT_Text) { 2286 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 2287 Func.dumpInfo(); 2288 } 2289 } 2290 2291 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 2292 LoopInfo LI{DominatorTree(F)}; 2293 BranchProbabilityInfo NBPI(F, LI); 2294 2295 // Fix func entry count. 2296 if (PGOFixEntryCount) 2297 fixFuncEntryCount(Func, LI, NBPI); 2298 2299 // Verify BlockFrequency information. 2300 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2301 if (PGOVerifyHotBFI) { 2302 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2303 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2304 } 2305 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2306 } 2307 } 2308 2309 // Set function hotness attribute from the profile. 2310 // We have to apply these attributes at the end because their presence 2311 // can affect the BranchProbabilityInfo of any callers, resulting in an 2312 // inconsistent MST between prof-gen and prof-use. 2313 for (auto &F : HotFunctions) { 2314 F->addFnAttr(Attribute::InlineHint); 2315 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2316 << "\n"); 2317 } 2318 for (auto &F : ColdFunctions) { 2319 // Only set when there is no Attribute::Hot set by the user. For Hot 2320 // attribute, user's annotation has the precedence over the profile. 2321 if (F->hasFnAttribute(Attribute::Hot)) { 2322 auto &Ctx = M.getContext(); 2323 std::string Msg = std::string("Function ") + F->getName().str() + 2324 std::string(" is annotated as a hot function but" 2325 " the profile is cold"); 2326 Ctx.diagnose( 2327 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2328 continue; 2329 } 2330 F->addFnAttr(Attribute::Cold); 2331 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2332 << "\n"); 2333 } 2334 return true; 2335 } 2336 2337 PGOInstrumentationUse::PGOInstrumentationUse( 2338 std::string Filename, std::string RemappingFilename, bool IsCS, 2339 IntrusiveRefCntPtr<vfs::FileSystem> VFS) 2340 : ProfileFileName(std::move(Filename)), 2341 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), 2342 FS(std::move(VFS)) { 2343 if (!PGOTestProfileFile.empty()) 2344 ProfileFileName = PGOTestProfileFile; 2345 if (!PGOTestProfileRemappingFile.empty()) 2346 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2347 if (!FS) 2348 FS = vfs::getRealFileSystem(); 2349 } 2350 2351 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2352 ModuleAnalysisManager &MAM) { 2353 2354 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2355 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2356 return FAM.getResult<TargetLibraryAnalysis>(F); 2357 }; 2358 auto LookupBPI = [&FAM](Function &F) { 2359 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2360 }; 2361 auto LookupBFI = [&FAM](Function &F) { 2362 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2363 }; 2364 auto LookupLI = [&FAM](Function &F) { 2365 return &FAM.getResult<LoopAnalysis>(F); 2366 }; 2367 2368 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 2369 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, 2370 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI, 2371 IsCS)) 2372 return PreservedAnalyses::all(); 2373 2374 return PreservedAnalyses::none(); 2375 } 2376 2377 static std::string getSimpleNodeName(const BasicBlock *Node) { 2378 if (!Node->getName().empty()) 2379 return Node->getName().str(); 2380 2381 std::string SimpleNodeName; 2382 raw_string_ostream OS(SimpleNodeName); 2383 Node->printAsOperand(OS, false); 2384 return SimpleNodeName; 2385 } 2386 2387 void llvm::setProfMetadata(Module *M, Instruction *TI, 2388 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 2389 assert(MaxCount > 0 && "Bad max count"); 2390 uint64_t Scale = calculateCountScale(MaxCount); 2391 SmallVector<unsigned, 4> Weights; 2392 for (const auto &ECI : EdgeCounts) 2393 Weights.push_back(scaleBranchCount(ECI, Scale)); 2394 2395 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2396 : Weights) { 2397 dbgs() << W << " "; 2398 } dbgs() << "\n";); 2399 2400 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 2401 2402 setBranchWeights(*TI, Weights, /*IsExpected=*/false); 2403 if (EmitBranchProbability) { 2404 std::string BrCondStr = getBranchCondString(TI); 2405 if (BrCondStr.empty()) 2406 return; 2407 2408 uint64_t WSum = 2409 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2410 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2411 uint64_t TotalCount = 2412 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2413 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2414 Scale = calculateCountScale(WSum); 2415 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2416 scaleBranchCount(WSum, Scale)); 2417 std::string BranchProbStr; 2418 raw_string_ostream OS(BranchProbStr); 2419 OS << BP; 2420 OS << " (total count : " << TotalCount << ")"; 2421 Function *F = TI->getParent()->getParent(); 2422 OptimizationRemarkEmitter ORE(F); 2423 ORE.emit([&]() { 2424 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2425 << BrCondStr << " is true with probability : " << BranchProbStr; 2426 }); 2427 } 2428 } 2429 2430 namespace llvm { 2431 2432 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2433 MDBuilder MDB(M->getContext()); 2434 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2435 MDB.createIrrLoopHeaderWeight(Count)); 2436 } 2437 2438 template <> struct GraphTraits<PGOUseFunc *> { 2439 using NodeRef = const BasicBlock *; 2440 using ChildIteratorType = const_succ_iterator; 2441 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2442 2443 static NodeRef getEntryNode(const PGOUseFunc *G) { 2444 return &G->getFunc().front(); 2445 } 2446 2447 static ChildIteratorType child_begin(const NodeRef N) { 2448 return succ_begin(N); 2449 } 2450 2451 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2452 2453 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2454 return nodes_iterator(G->getFunc().begin()); 2455 } 2456 2457 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2458 return nodes_iterator(G->getFunc().end()); 2459 } 2460 }; 2461 2462 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2463 explicit DOTGraphTraits(bool isSimple = false) 2464 : DefaultDOTGraphTraits(isSimple) {} 2465 2466 static std::string getGraphName(const PGOUseFunc *G) { 2467 return std::string(G->getFunc().getName()); 2468 } 2469 2470 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2471 std::string Result; 2472 raw_string_ostream OS(Result); 2473 2474 OS << getSimpleNodeName(Node) << ":\\l"; 2475 PGOUseBBInfo *BI = Graph->findBBInfo(Node); 2476 OS << "Count : "; 2477 if (BI && BI->Count) 2478 OS << *BI->Count << "\\l"; 2479 else 2480 OS << "Unknown\\l"; 2481 2482 if (!PGOInstrSelect) 2483 return Result; 2484 2485 for (const Instruction &I : *Node) { 2486 if (!isa<SelectInst>(&I)) 2487 continue; 2488 // Display scaled counts for SELECT instruction: 2489 OS << "SELECT : { T = "; 2490 uint64_t TC, FC; 2491 bool HasProf = extractBranchWeights(I, TC, FC); 2492 if (!HasProf) 2493 OS << "Unknown, F = Unknown }\\l"; 2494 else 2495 OS << TC << ", F = " << FC << " }\\l"; 2496 } 2497 return Result; 2498 } 2499 }; 2500 2501 } // end namespace llvm 2502