1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/EHPersonalities.h" 67 #include "llvm/Analysis/LoopInfo.h" 68 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69 #include "llvm/Analysis/ProfileSummaryInfo.h" 70 #include "llvm/Analysis/TargetLibraryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/ProfileData/InstrProf.h" 98 #include "llvm/ProfileData/InstrProfReader.h" 99 #include "llvm/Support/BranchProbability.h" 100 #include "llvm/Support/CRC.h" 101 #include "llvm/Support/Casting.h" 102 #include "llvm/Support/CommandLine.h" 103 #include "llvm/Support/DOTGraphTraits.h" 104 #include "llvm/Support/Debug.h" 105 #include "llvm/Support/Error.h" 106 #include "llvm/Support/ErrorHandling.h" 107 #include "llvm/Support/GraphWriter.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/Transforms/Instrumentation.h" 110 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 111 #include "llvm/Transforms/Utils/MisExpect.h" 112 #include "llvm/Transforms/Utils/ModuleUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 namespace llvm { 202 // Command line option to enable/disable the warning about a hash mismatch in 203 // the profile data. 204 cl::opt<bool> 205 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 206 cl::desc("Use this option to turn off/on " 207 "warnings about profile cfg mismatch.")); 208 } // namespace llvm 209 210 // Command line option to enable/disable the warning about a hash mismatch in 211 // the profile data for Comdat functions, which often turns out to be false 212 // positive due to the pre-instrumentation inline. 213 static cl::opt<bool> 214 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 215 cl::Hidden, 216 cl::desc("The option is used to turn on/off " 217 "warnings about hash mismatch for comdat " 218 "functions.")); 219 220 // Command line option to enable/disable select instruction instrumentation. 221 static cl::opt<bool> 222 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 223 cl::desc("Use this option to turn on/off SELECT " 224 "instruction instrumentation. ")); 225 226 // Command line option to turn on CFG dot or text dump of raw profile counts 227 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 228 "pgo-view-raw-counts", cl::Hidden, 229 cl::desc("A boolean option to show CFG dag or text " 230 "with raw profile counts from " 231 "profile data. See also option " 232 "-pgo-view-counts. To limit graph " 233 "display to only one function, use " 234 "filtering option -view-bfi-func-name."), 235 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 236 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 237 clEnumValN(PGOVCT_Text, "text", "show in text."))); 238 239 // Command line option to enable/disable memop intrinsic call.size profiling. 240 static cl::opt<bool> 241 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 242 cl::desc("Use this option to turn on/off " 243 "memory intrinsic size profiling.")); 244 245 // Emit branch probability as optimization remarks. 246 static cl::opt<bool> 247 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 248 cl::desc("When this option is on, the annotated " 249 "branch probability will be emitted as " 250 "optimization remarks: -{Rpass|" 251 "pass-remarks}=pgo-instrumentation")); 252 253 static cl::opt<bool> PGOInstrumentEntry( 254 "pgo-instrument-entry", cl::init(false), cl::Hidden, 255 cl::desc("Force to instrument function entry basicblock.")); 256 257 static cl::opt<bool> PGOFunctionEntryCoverage( 258 "pgo-function-entry-coverage", cl::Hidden, 259 cl::desc( 260 "Use this option to enable function entry coverage instrumentation.")); 261 262 static cl::opt<bool> 263 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 264 cl::desc("Fix function entry count in profile use.")); 265 266 static cl::opt<bool> PGOVerifyHotBFI( 267 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 268 cl::desc("Print out the non-match BFI count if a hot raw profile count " 269 "becomes non-hot, or a cold raw profile count becomes hot. " 270 "The print is enabled under -Rpass-analysis=pgo, or " 271 "internal option -pass-remakrs-analysis=pgo.")); 272 273 static cl::opt<bool> PGOVerifyBFI( 274 "pgo-verify-bfi", cl::init(false), cl::Hidden, 275 cl::desc("Print out mismatched BFI counts after setting profile metadata " 276 "The print is enabled under -Rpass-analysis=pgo, or " 277 "internal option -pass-remakrs-analysis=pgo.")); 278 279 static cl::opt<unsigned> PGOVerifyBFIRatio( 280 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 281 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 282 "mismatched BFI if the difference percentage is greater than " 283 "this value (in percentage).")); 284 285 static cl::opt<unsigned> PGOVerifyBFICutoff( 286 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 287 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 288 "profile count value is below.")); 289 290 namespace llvm { 291 // Command line option to turn on CFG dot dump after profile annotation. 292 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 293 extern cl::opt<PGOViewCountsType> PGOViewCounts; 294 295 // Command line option to specify the name of the function for CFG dump 296 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 297 extern cl::opt<std::string> ViewBlockFreqFuncName; 298 299 extern cl::opt<bool> DebugInfoCorrelate; 300 } // namespace llvm 301 302 static cl::opt<bool> 303 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 304 cl::desc("Use the old CFG function hashing")); 305 306 // Return a string describing the branch condition that can be 307 // used in static branch probability heuristics: 308 static std::string getBranchCondString(Instruction *TI) { 309 BranchInst *BI = dyn_cast<BranchInst>(TI); 310 if (!BI || !BI->isConditional()) 311 return std::string(); 312 313 Value *Cond = BI->getCondition(); 314 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 315 if (!CI) 316 return std::string(); 317 318 std::string result; 319 raw_string_ostream OS(result); 320 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 321 CI->getOperand(0)->getType()->print(OS, true); 322 323 Value *RHS = CI->getOperand(1); 324 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 325 if (CV) { 326 if (CV->isZero()) 327 OS << "_Zero"; 328 else if (CV->isOne()) 329 OS << "_One"; 330 else if (CV->isMinusOne()) 331 OS << "_MinusOne"; 332 else 333 OS << "_Const"; 334 } 335 OS.flush(); 336 return result; 337 } 338 339 static const char *ValueProfKindDescr[] = { 340 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 341 #include "llvm/ProfileData/InstrProfData.inc" 342 }; 343 344 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 345 // aware this is an ir_level profile so it can set the version flag. 346 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 347 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 348 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 349 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 350 if (IsCS) 351 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 352 if (PGOInstrumentEntry) 353 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 354 if (DebugInfoCorrelate) 355 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 356 if (PGOFunctionEntryCoverage) 357 ProfileVersion |= 358 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 359 auto IRLevelVersionVariable = new GlobalVariable( 360 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 361 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 362 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 363 Triple TT(M.getTargetTriple()); 364 if (TT.supportsCOMDAT()) { 365 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 366 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 367 } 368 return IRLevelVersionVariable; 369 } 370 371 namespace { 372 373 /// The select instruction visitor plays three roles specified 374 /// by the mode. In \c VM_counting mode, it simply counts the number of 375 /// select instructions. In \c VM_instrument mode, it inserts code to count 376 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 377 /// it reads the profile data and annotate the select instruction with metadata. 378 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 379 class PGOUseFunc; 380 381 /// Instruction Visitor class to visit select instructions. 382 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 383 Function &F; 384 unsigned NSIs = 0; // Number of select instructions instrumented. 385 VisitMode Mode = VM_counting; // Visiting mode. 386 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 387 unsigned TotalNumCtrs = 0; // Total number of counters 388 GlobalVariable *FuncNameVar = nullptr; 389 uint64_t FuncHash = 0; 390 PGOUseFunc *UseFunc = nullptr; 391 392 SelectInstVisitor(Function &Func) : F(Func) {} 393 394 void countSelects(Function &Func) { 395 NSIs = 0; 396 Mode = VM_counting; 397 visit(Func); 398 } 399 400 // Visit the IR stream and instrument all select instructions. \p 401 // Ind is a pointer to the counter index variable; \p TotalNC 402 // is the total number of counters; \p FNV is the pointer to the 403 // PGO function name var; \p FHash is the function hash. 404 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 405 GlobalVariable *FNV, uint64_t FHash) { 406 Mode = VM_instrument; 407 CurCtrIdx = Ind; 408 TotalNumCtrs = TotalNC; 409 FuncHash = FHash; 410 FuncNameVar = FNV; 411 visit(Func); 412 } 413 414 // Visit the IR stream and annotate all select instructions. 415 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 416 Mode = VM_annotate; 417 UseFunc = UF; 418 CurCtrIdx = Ind; 419 visit(Func); 420 } 421 422 void instrumentOneSelectInst(SelectInst &SI); 423 void annotateOneSelectInst(SelectInst &SI); 424 425 // Visit \p SI instruction and perform tasks according to visit mode. 426 void visitSelectInst(SelectInst &SI); 427 428 // Return the number of select instructions. This needs be called after 429 // countSelects(). 430 unsigned getNumOfSelectInsts() const { return NSIs; } 431 }; 432 433 } // end anonymous namespace 434 435 namespace { 436 437 /// An MST based instrumentation for PGO 438 /// 439 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 440 /// in the function level. 441 struct PGOEdge { 442 // This class implements the CFG edges. Note the CFG can be a multi-graph. 443 // So there might be multiple edges with same SrcBB and DestBB. 444 const BasicBlock *SrcBB; 445 const BasicBlock *DestBB; 446 uint64_t Weight; 447 bool InMST = false; 448 bool Removed = false; 449 bool IsCritical = false; 450 451 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 452 : SrcBB(Src), DestBB(Dest), Weight(W) {} 453 454 // Return the information string of an edge. 455 std::string infoString() const { 456 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 457 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 458 } 459 }; 460 461 // This class stores the auxiliary information for each BB. 462 struct BBInfo { 463 BBInfo *Group; 464 uint32_t Index; 465 uint32_t Rank = 0; 466 467 BBInfo(unsigned IX) : Group(this), Index(IX) {} 468 469 // Return the information string of this object. 470 std::string infoString() const { 471 return (Twine("Index=") + Twine(Index)).str(); 472 } 473 474 // Empty function -- only applicable to UseBBInfo. 475 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 476 477 // Empty function -- only applicable to UseBBInfo. 478 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 479 }; 480 481 // This class implements the CFG edges. Note the CFG can be a multi-graph. 482 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 483 private: 484 Function &F; 485 486 // Is this is context-sensitive instrumentation. 487 bool IsCS; 488 489 // A map that stores the Comdat group in function F. 490 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 491 492 ValueProfileCollector VPC; 493 494 void computeCFGHash(); 495 void renameComdatFunction(); 496 497 public: 498 std::vector<std::vector<VPCandidateInfo>> ValueSites; 499 SelectInstVisitor SIVisitor; 500 std::string FuncName; 501 GlobalVariable *FuncNameVar; 502 503 // CFG hash value for this function. 504 uint64_t FunctionHash = 0; 505 506 // The Minimum Spanning Tree of function CFG. 507 CFGMST<Edge, BBInfo> MST; 508 509 // Collect all the BBs that will be instrumented, and store them in 510 // InstrumentBBs. 511 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 512 513 // Give an edge, find the BB that will be instrumented. 514 // Return nullptr if there is no BB to be instrumented. 515 BasicBlock *getInstrBB(Edge *E); 516 517 // Return the auxiliary BB information. 518 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 519 520 // Return the auxiliary BB information if available. 521 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 522 523 // Dump edges and BB information. 524 void dumpInfo(std::string Str = "") const { 525 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 526 Twine(FunctionHash) + "\t" + Str); 527 } 528 529 FuncPGOInstrumentation( 530 Function &Func, TargetLibraryInfo &TLI, 531 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 532 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 533 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 534 bool InstrumentFuncEntry = true) 535 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 536 ValueSites(IPVK_Last + 1), SIVisitor(Func), 537 MST(F, InstrumentFuncEntry, BPI, BFI) { 538 // This should be done before CFG hash computation. 539 SIVisitor.countSelects(Func); 540 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 541 if (!IsCS) { 542 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 543 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 544 NumOfPGOBB += MST.BBInfos.size(); 545 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 546 } else { 547 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 548 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 549 NumOfCSPGOBB += MST.BBInfos.size(); 550 } 551 552 FuncName = getPGOFuncName(F); 553 computeCFGHash(); 554 if (!ComdatMembers.empty()) 555 renameComdatFunction(); 556 LLVM_DEBUG(dumpInfo("after CFGMST")); 557 558 for (auto &E : MST.AllEdges) { 559 if (E->Removed) 560 continue; 561 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 562 if (!E->InMST) 563 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 564 } 565 566 if (CreateGlobalVar) 567 FuncNameVar = createPGOFuncNameVar(F, FuncName); 568 } 569 }; 570 571 } // end anonymous namespace 572 573 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 574 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 575 // of selects, indirect calls, mem ops and edges. 576 template <class Edge, class BBInfo> 577 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 578 std::vector<uint8_t> Indexes; 579 JamCRC JC; 580 for (auto &BB : F) { 581 const Instruction *TI = BB.getTerminator(); 582 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 583 BasicBlock *Succ = TI->getSuccessor(I); 584 auto BI = findBBInfo(Succ); 585 if (BI == nullptr) 586 continue; 587 uint32_t Index = BI->Index; 588 for (int J = 0; J < 4; J++) 589 Indexes.push_back((uint8_t)(Index >> (J * 8))); 590 } 591 } 592 JC.update(Indexes); 593 594 JamCRC JCH; 595 if (PGOOldCFGHashing) { 596 // Hash format for context sensitive profile. Reserve 4 bits for other 597 // information. 598 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 599 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 600 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 601 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 602 } else { 603 // The higher 32 bits. 604 auto updateJCH = [&JCH](uint64_t Num) { 605 uint8_t Data[8]; 606 support::endian::write64le(Data, Num); 607 JCH.update(Data); 608 }; 609 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 610 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 611 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 612 updateJCH((uint64_t)MST.AllEdges.size()); 613 614 // Hash format for context sensitive profile. Reserve 4 bits for other 615 // information. 616 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 617 } 618 619 // Reserve bit 60-63 for other information purpose. 620 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 621 if (IsCS) 622 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 623 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 624 << " CRC = " << JC.getCRC() 625 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 626 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 627 << ValueSites[IPVK_IndirectCallTarget].size()); 628 if (!PGOOldCFGHashing) { 629 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 630 << ", High32 CRC = " << JCH.getCRC()); 631 } 632 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 633 } 634 635 // Check if we can safely rename this Comdat function. 636 static bool canRenameComdat( 637 Function &F, 638 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 639 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 640 return false; 641 642 // FIXME: Current only handle those Comdat groups that only containing one 643 // function. 644 // (1) For a Comdat group containing multiple functions, we need to have a 645 // unique postfix based on the hashes for each function. There is a 646 // non-trivial code refactoring to do this efficiently. 647 // (2) Variables can not be renamed, so we can not rename Comdat function in a 648 // group including global vars. 649 Comdat *C = F.getComdat(); 650 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 651 assert(!isa<GlobalAlias>(CM.second)); 652 Function *FM = dyn_cast<Function>(CM.second); 653 if (FM != &F) 654 return false; 655 } 656 return true; 657 } 658 659 // Append the CFGHash to the Comdat function name. 660 template <class Edge, class BBInfo> 661 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 662 if (!canRenameComdat(F, ComdatMembers)) 663 return; 664 std::string OrigName = F.getName().str(); 665 std::string NewFuncName = 666 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 667 F.setName(Twine(NewFuncName)); 668 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 669 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 670 Comdat *NewComdat; 671 Module *M = F.getParent(); 672 // For AvailableExternallyLinkage functions, change the linkage to 673 // LinkOnceODR and put them into comdat. This is because after renaming, there 674 // is no backup external copy available for the function. 675 if (!F.hasComdat()) { 676 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 677 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 678 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 679 F.setComdat(NewComdat); 680 return; 681 } 682 683 // This function belongs to a single function Comdat group. 684 Comdat *OrigComdat = F.getComdat(); 685 std::string NewComdatName = 686 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 687 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 688 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 689 690 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 691 // Must be a function. 692 cast<Function>(CM.second)->setComdat(NewComdat); 693 } 694 } 695 696 // Collect all the BBs that will be instruments and return them in 697 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 698 template <class Edge, class BBInfo> 699 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 700 std::vector<BasicBlock *> &InstrumentBBs) { 701 // Use a worklist as we will update the vector during the iteration. 702 std::vector<Edge *> EdgeList; 703 EdgeList.reserve(MST.AllEdges.size()); 704 for (auto &E : MST.AllEdges) 705 EdgeList.push_back(E.get()); 706 707 for (auto &E : EdgeList) { 708 BasicBlock *InstrBB = getInstrBB(E); 709 if (InstrBB) 710 InstrumentBBs.push_back(InstrBB); 711 } 712 713 // Set up InEdges/OutEdges for all BBs. 714 for (auto &E : MST.AllEdges) { 715 if (E->Removed) 716 continue; 717 const BasicBlock *SrcBB = E->SrcBB; 718 const BasicBlock *DestBB = E->DestBB; 719 BBInfo &SrcInfo = getBBInfo(SrcBB); 720 BBInfo &DestInfo = getBBInfo(DestBB); 721 SrcInfo.addOutEdge(E.get()); 722 DestInfo.addInEdge(E.get()); 723 } 724 } 725 726 // Given a CFG E to be instrumented, find which BB to place the instrumented 727 // code. The function will split the critical edge if necessary. 728 template <class Edge, class BBInfo> 729 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 730 if (E->InMST || E->Removed) 731 return nullptr; 732 733 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 734 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 735 // For a fake edge, instrument the real BB. 736 if (SrcBB == nullptr) 737 return DestBB; 738 if (DestBB == nullptr) 739 return SrcBB; 740 741 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 742 // There are basic blocks (such as catchswitch) cannot be instrumented. 743 // If the returned first insertion point is the end of BB, skip this BB. 744 if (BB->getFirstInsertionPt() == BB->end()) 745 return nullptr; 746 return BB; 747 }; 748 749 // Instrument the SrcBB if it has a single successor, 750 // otherwise, the DestBB if this is not a critical edge. 751 Instruction *TI = SrcBB->getTerminator(); 752 if (TI->getNumSuccessors() <= 1) 753 return canInstrument(SrcBB); 754 if (!E->IsCritical) 755 return canInstrument(DestBB); 756 757 // Some IndirectBr critical edges cannot be split by the previous 758 // SplitIndirectBrCriticalEdges call. Bail out. 759 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 760 BasicBlock *InstrBB = 761 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 762 if (!InstrBB) { 763 LLVM_DEBUG( 764 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 765 return nullptr; 766 } 767 // For a critical edge, we have to split. Instrument the newly 768 // created BB. 769 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 770 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 771 << " --> " << getBBInfo(DestBB).Index << "\n"); 772 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 773 MST.addEdge(SrcBB, InstrBB, 0); 774 // Second one: Add new edge of InstrBB->DestBB. 775 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 776 NewEdge1.InMST = true; 777 E->Removed = true; 778 779 return canInstrument(InstrBB); 780 } 781 782 // When generating value profiling calls on Windows routines that make use of 783 // handler funclets for exception processing an operand bundle needs to attached 784 // to the called function. This routine will set \p OpBundles to contain the 785 // funclet information, if any is needed, that should be placed on the generated 786 // value profiling call for the value profile candidate call. 787 static void 788 populateEHOperandBundle(VPCandidateInfo &Cand, 789 DenseMap<BasicBlock *, ColorVector> &BlockColors, 790 SmallVectorImpl<OperandBundleDef> &OpBundles) { 791 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 792 if (!OrigCall) 793 return; 794 795 if (!isa<IntrinsicInst>(OrigCall)) { 796 // The instrumentation call should belong to the same funclet as a 797 // non-intrinsic call, so just copy the operand bundle, if any exists. 798 Optional<OperandBundleUse> ParentFunclet = 799 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 800 if (ParentFunclet) 801 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 802 } else { 803 // Intrinsics or other instructions do not get funclet information from the 804 // front-end. Need to use the BlockColors that was computed by the routine 805 // colorEHFunclets to determine whether a funclet is needed. 806 if (!BlockColors.empty()) { 807 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 808 assert(CV.size() == 1 && "non-unique color for block!"); 809 Instruction *EHPad = CV.front()->getFirstNonPHI(); 810 if (EHPad->isEHPad()) 811 OpBundles.emplace_back("funclet", EHPad); 812 } 813 } 814 } 815 816 // Visit all edge and instrument the edges not in MST, and do value profiling. 817 // Critical edges will be split. 818 static void instrumentOneFunc( 819 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 820 BlockFrequencyInfo *BFI, 821 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 822 bool IsCS) { 823 // Split indirectbr critical edges here before computing the MST rather than 824 // later in getInstrBB() to avoid invalidating it. 825 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 826 827 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 828 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 829 830 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 831 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 832 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 833 FuncInfo.FunctionHash); 834 if (PGOFunctionEntryCoverage) { 835 assert(!IsCS && 836 "entry coverge does not support context-sensitive instrumentation"); 837 auto &EntryBB = F.getEntryBlock(); 838 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 839 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 840 // i32 <index>) 841 Builder.CreateCall( 842 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 843 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 844 return; 845 } 846 847 std::vector<BasicBlock *> InstrumentBBs; 848 FuncInfo.getInstrumentBBs(InstrumentBBs); 849 unsigned NumCounters = 850 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 851 852 uint32_t I = 0; 853 for (auto *InstrBB : InstrumentBBs) { 854 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 855 assert(Builder.GetInsertPoint() != InstrBB->end() && 856 "Cannot get the Instrumentation point"); 857 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 858 // i32 <index>) 859 Builder.CreateCall( 860 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 861 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 862 } 863 864 // Now instrument select instructions: 865 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 866 FuncInfo.FunctionHash); 867 assert(I == NumCounters); 868 869 if (DisableValueProfiling) 870 return; 871 872 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 873 874 // Intrinsic function calls do not have funclet operand bundles needed for 875 // Windows exception handling attached to them. However, if value profiling is 876 // inserted for one of these calls, then a funclet value will need to be set 877 // on the instrumentation call based on the funclet coloring. 878 DenseMap<BasicBlock *, ColorVector> BlockColors; 879 if (F.hasPersonalityFn() && 880 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 881 BlockColors = colorEHFunclets(F); 882 883 // For each VP Kind, walk the VP candidates and instrument each one. 884 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 885 unsigned SiteIndex = 0; 886 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 887 continue; 888 889 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 890 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 891 << " site: CallSite Index = " << SiteIndex << "\n"); 892 893 IRBuilder<> Builder(Cand.InsertPt); 894 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 895 "Cannot get the Instrumentation point"); 896 897 Value *ToProfile = nullptr; 898 if (Cand.V->getType()->isIntegerTy()) 899 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 900 else if (Cand.V->getType()->isPointerTy()) 901 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 902 assert(ToProfile && "value profiling Value is of unexpected type"); 903 904 SmallVector<OperandBundleDef, 1> OpBundles; 905 populateEHOperandBundle(Cand, BlockColors, OpBundles); 906 Builder.CreateCall( 907 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 908 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 909 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 910 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 911 OpBundles); 912 } 913 } // IPVK_First <= Kind <= IPVK_Last 914 } 915 916 namespace { 917 918 // This class represents a CFG edge in profile use compilation. 919 struct PGOUseEdge : public PGOEdge { 920 bool CountValid = false; 921 uint64_t CountValue = 0; 922 923 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 924 : PGOEdge(Src, Dest, W) {} 925 926 // Set edge count value 927 void setEdgeCount(uint64_t Value) { 928 CountValue = Value; 929 CountValid = true; 930 } 931 932 // Return the information string for this object. 933 std::string infoString() const { 934 if (!CountValid) 935 return PGOEdge::infoString(); 936 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 937 .str(); 938 } 939 }; 940 941 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 942 943 // This class stores the auxiliary information for each BB. 944 struct UseBBInfo : public BBInfo { 945 uint64_t CountValue = 0; 946 bool CountValid; 947 int32_t UnknownCountInEdge = 0; 948 int32_t UnknownCountOutEdge = 0; 949 DirectEdges InEdges; 950 DirectEdges OutEdges; 951 952 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 953 954 UseBBInfo(unsigned IX, uint64_t C) 955 : BBInfo(IX), CountValue(C), CountValid(true) {} 956 957 // Set the profile count value for this BB. 958 void setBBInfoCount(uint64_t Value) { 959 CountValue = Value; 960 CountValid = true; 961 } 962 963 // Return the information string of this object. 964 std::string infoString() const { 965 if (!CountValid) 966 return BBInfo::infoString(); 967 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 968 } 969 970 // Add an OutEdge and update the edge count. 971 void addOutEdge(PGOUseEdge *E) { 972 OutEdges.push_back(E); 973 UnknownCountOutEdge++; 974 } 975 976 // Add an InEdge and update the edge count. 977 void addInEdge(PGOUseEdge *E) { 978 InEdges.push_back(E); 979 UnknownCountInEdge++; 980 } 981 }; 982 983 } // end anonymous namespace 984 985 // Sum up the count values for all the edges. 986 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 987 uint64_t Total = 0; 988 for (auto &E : Edges) { 989 if (E->Removed) 990 continue; 991 Total += E->CountValue; 992 } 993 return Total; 994 } 995 996 namespace { 997 998 class PGOUseFunc { 999 public: 1000 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1001 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1002 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1003 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1004 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1005 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1006 InstrumentFuncEntry), 1007 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1008 1009 // Read counts for the instrumented BB from profile. 1010 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1011 bool &AllMinusOnes); 1012 1013 // Populate the counts for all BBs. 1014 void populateCounters(); 1015 1016 // Set the branch weights based on the count values. 1017 void setBranchWeights(); 1018 1019 // Annotate the value profile call sites for all value kind. 1020 void annotateValueSites(); 1021 1022 // Annotate the value profile call sites for one value kind. 1023 void annotateValueSites(uint32_t Kind); 1024 1025 // Annotate the irreducible loop header weights. 1026 void annotateIrrLoopHeaderWeights(); 1027 1028 // The hotness of the function from the profile count. 1029 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1030 1031 // Return the function hotness from the profile. 1032 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1033 1034 // Return the function hash. 1035 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1036 1037 // Return the profile record for this function; 1038 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1039 1040 // Return the auxiliary BB information. 1041 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1042 return FuncInfo.getBBInfo(BB); 1043 } 1044 1045 // Return the auxiliary BB information if available. 1046 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1047 return FuncInfo.findBBInfo(BB); 1048 } 1049 1050 Function &getFunc() const { return F; } 1051 1052 void dumpInfo(std::string Str = "") const { 1053 FuncInfo.dumpInfo(Str); 1054 } 1055 1056 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1057 private: 1058 Function &F; 1059 Module *M; 1060 BlockFrequencyInfo *BFI; 1061 ProfileSummaryInfo *PSI; 1062 1063 // This member stores the shared information with class PGOGenFunc. 1064 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1065 1066 // The maximum count value in the profile. This is only used in PGO use 1067 // compilation. 1068 uint64_t ProgramMaxCount; 1069 1070 // Position of counter that remains to be read. 1071 uint32_t CountPosition = 0; 1072 1073 // Total size of the profile count for this function. 1074 uint32_t ProfileCountSize = 0; 1075 1076 // ProfileRecord for this function. 1077 InstrProfRecord ProfileRecord; 1078 1079 // Function hotness info derived from profile. 1080 FuncFreqAttr FreqAttr; 1081 1082 // Is to use the context sensitive profile. 1083 bool IsCS; 1084 1085 // Find the Instrumented BB and set the value. Return false on error. 1086 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1087 1088 // Set the edge counter value for the unknown edge -- there should be only 1089 // one unknown edge. 1090 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1091 1092 // Return FuncName string; 1093 std::string getFuncName() const { return FuncInfo.FuncName; } 1094 1095 // Set the hot/cold inline hints based on the count values. 1096 // FIXME: This function should be removed once the functionality in 1097 // the inliner is implemented. 1098 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1099 if (PSI->isHotCount(EntryCount)) 1100 FreqAttr = FFA_Hot; 1101 else if (PSI->isColdCount(MaxCount)) 1102 FreqAttr = FFA_Cold; 1103 } 1104 }; 1105 1106 } // end anonymous namespace 1107 1108 // Visit all the edges and assign the count value for the instrumented 1109 // edges and the BB. Return false on error. 1110 bool PGOUseFunc::setInstrumentedCounts( 1111 const std::vector<uint64_t> &CountFromProfile) { 1112 1113 std::vector<BasicBlock *> InstrumentBBs; 1114 FuncInfo.getInstrumentBBs(InstrumentBBs); 1115 unsigned NumCounters = 1116 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1117 // The number of counters here should match the number of counters 1118 // in profile. Return if they mismatch. 1119 if (NumCounters != CountFromProfile.size()) { 1120 return false; 1121 } 1122 auto *FuncEntry = &*F.begin(); 1123 1124 // Set the profile count to the Instrumented BBs. 1125 uint32_t I = 0; 1126 for (BasicBlock *InstrBB : InstrumentBBs) { 1127 uint64_t CountValue = CountFromProfile[I++]; 1128 UseBBInfo &Info = getBBInfo(InstrBB); 1129 // If we reach here, we know that we have some nonzero count 1130 // values in this function. The entry count should not be 0. 1131 // Fix it if necessary. 1132 if (InstrBB == FuncEntry && CountValue == 0) 1133 CountValue = 1; 1134 Info.setBBInfoCount(CountValue); 1135 } 1136 ProfileCountSize = CountFromProfile.size(); 1137 CountPosition = I; 1138 1139 // Set the edge count and update the count of unknown edges for BBs. 1140 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1141 E->setEdgeCount(Value); 1142 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1143 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1144 }; 1145 1146 // Set the profile count the Instrumented edges. There are BBs that not in 1147 // MST but not instrumented. Need to set the edge count value so that we can 1148 // populate the profile counts later. 1149 for (auto &E : FuncInfo.MST.AllEdges) { 1150 if (E->Removed || E->InMST) 1151 continue; 1152 const BasicBlock *SrcBB = E->SrcBB; 1153 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1154 1155 // If only one out-edge, the edge profile count should be the same as BB 1156 // profile count. 1157 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1158 setEdgeCount(E.get(), SrcInfo.CountValue); 1159 else { 1160 const BasicBlock *DestBB = E->DestBB; 1161 UseBBInfo &DestInfo = getBBInfo(DestBB); 1162 // If only one in-edge, the edge profile count should be the same as BB 1163 // profile count. 1164 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1165 setEdgeCount(E.get(), DestInfo.CountValue); 1166 } 1167 if (E->CountValid) 1168 continue; 1169 // E's count should have been set from profile. If not, this meenas E skips 1170 // the instrumentation. We set the count to 0. 1171 setEdgeCount(E.get(), 0); 1172 } 1173 return true; 1174 } 1175 1176 // Set the count value for the unknown edge. There should be one and only one 1177 // unknown edge in Edges vector. 1178 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1179 for (auto &E : Edges) { 1180 if (E->CountValid) 1181 continue; 1182 E->setEdgeCount(Value); 1183 1184 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1185 getBBInfo(E->DestBB).UnknownCountInEdge--; 1186 return; 1187 } 1188 llvm_unreachable("Cannot find the unknown count edge"); 1189 } 1190 1191 // Emit function metadata indicating PGO profile mismatch. 1192 static void annotateFunctionWithHashMismatch(Function &F, 1193 LLVMContext &ctx) { 1194 const char MetadataName[] = "instr_prof_hash_mismatch"; 1195 SmallVector<Metadata *, 2> Names; 1196 // If this metadata already exists, ignore. 1197 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1198 if (Existing) { 1199 MDTuple *Tuple = cast<MDTuple>(Existing); 1200 for (auto &N : Tuple->operands()) { 1201 if (cast<MDString>(N.get())->getString() == MetadataName) 1202 return; 1203 Names.push_back(N.get()); 1204 } 1205 } 1206 1207 MDBuilder MDB(ctx); 1208 Names.push_back(MDB.createString(MetadataName)); 1209 MDNode *MD = MDTuple::get(ctx, Names); 1210 F.setMetadata(LLVMContext::MD_annotation, MD); 1211 } 1212 1213 // Read the profile from ProfileFileName and assign the value to the 1214 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1215 // Return true if the profile are successfully read, and false on errors. 1216 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1217 bool &AllMinusOnes) { 1218 auto &Ctx = M->getContext(); 1219 Expected<InstrProfRecord> Result = 1220 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1221 if (Error E = Result.takeError()) { 1222 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1223 auto Err = IPE.get(); 1224 bool SkipWarning = false; 1225 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1226 << FuncInfo.FuncName << ": "); 1227 if (Err == instrprof_error::unknown_function) { 1228 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1229 SkipWarning = !PGOWarnMissing; 1230 LLVM_DEBUG(dbgs() << "unknown function"); 1231 } else if (Err == instrprof_error::hash_mismatch || 1232 Err == instrprof_error::malformed) { 1233 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1234 SkipWarning = 1235 NoPGOWarnMismatch || 1236 (NoPGOWarnMismatchComdat && 1237 (F.hasComdat() || 1238 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1239 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1240 // Emit function metadata indicating PGO profile mismatch. 1241 annotateFunctionWithHashMismatch(F, M->getContext()); 1242 } 1243 1244 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1245 if (SkipWarning) 1246 return; 1247 1248 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1249 std::string(" Hash = ") + 1250 std::to_string(FuncInfo.FunctionHash); 1251 1252 Ctx.diagnose( 1253 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1254 }); 1255 return false; 1256 } 1257 ProfileRecord = std::move(Result.get()); 1258 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1259 1260 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1261 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1262 AllMinusOnes = (CountFromProfile.size() > 0); 1263 uint64_t ValueSum = 0; 1264 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1265 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1266 ValueSum += CountFromProfile[I]; 1267 if (CountFromProfile[I] != (uint64_t)-1) 1268 AllMinusOnes = false; 1269 } 1270 AllZeros = (ValueSum == 0); 1271 1272 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1273 1274 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1275 getBBInfo(nullptr).UnknownCountInEdge = 2; 1276 1277 if (!setInstrumentedCounts(CountFromProfile)) { 1278 LLVM_DEBUG( 1279 dbgs() << "Inconsistent number of counts, skipping this function"); 1280 Ctx.diagnose(DiagnosticInfoPGOProfile( 1281 M->getName().data(), 1282 Twine("Inconsistent number of counts in ") + F.getName().str() 1283 + Twine(": the profile may be stale or there is a function name collision."), 1284 DS_Warning)); 1285 return false; 1286 } 1287 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1288 return true; 1289 } 1290 1291 // Populate the counters from instrumented BBs to all BBs. 1292 // In the end of this operation, all BBs should have a valid count value. 1293 void PGOUseFunc::populateCounters() { 1294 bool Changes = true; 1295 unsigned NumPasses = 0; 1296 while (Changes) { 1297 NumPasses++; 1298 Changes = false; 1299 1300 // For efficient traversal, it's better to start from the end as most 1301 // of the instrumented edges are at the end. 1302 for (auto &BB : reverse(F)) { 1303 UseBBInfo *Count = findBBInfo(&BB); 1304 if (Count == nullptr) 1305 continue; 1306 if (!Count->CountValid) { 1307 if (Count->UnknownCountOutEdge == 0) { 1308 Count->CountValue = sumEdgeCount(Count->OutEdges); 1309 Count->CountValid = true; 1310 Changes = true; 1311 } else if (Count->UnknownCountInEdge == 0) { 1312 Count->CountValue = sumEdgeCount(Count->InEdges); 1313 Count->CountValid = true; 1314 Changes = true; 1315 } 1316 } 1317 if (Count->CountValid) { 1318 if (Count->UnknownCountOutEdge == 1) { 1319 uint64_t Total = 0; 1320 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1321 // If the one of the successor block can early terminate (no-return), 1322 // we can end up with situation where out edge sum count is larger as 1323 // the source BB's count is collected by a post-dominated block. 1324 if (Count->CountValue > OutSum) 1325 Total = Count->CountValue - OutSum; 1326 setEdgeCount(Count->OutEdges, Total); 1327 Changes = true; 1328 } 1329 if (Count->UnknownCountInEdge == 1) { 1330 uint64_t Total = 0; 1331 uint64_t InSum = sumEdgeCount(Count->InEdges); 1332 if (Count->CountValue > InSum) 1333 Total = Count->CountValue - InSum; 1334 setEdgeCount(Count->InEdges, Total); 1335 Changes = true; 1336 } 1337 } 1338 } 1339 } 1340 1341 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1342 (void) NumPasses; 1343 #ifndef NDEBUG 1344 // Assert every BB has a valid counter. 1345 for (auto &BB : F) { 1346 auto BI = findBBInfo(&BB); 1347 if (BI == nullptr) 1348 continue; 1349 assert(BI->CountValid && "BB count is not valid"); 1350 } 1351 #endif 1352 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1353 uint64_t FuncMaxCount = FuncEntryCount; 1354 for (auto &BB : F) { 1355 auto BI = findBBInfo(&BB); 1356 if (BI == nullptr) 1357 continue; 1358 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1359 } 1360 1361 // Fix the obviously inconsistent entry count. 1362 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1363 FuncEntryCount = 1; 1364 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1365 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1366 1367 // Now annotate select instructions 1368 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1369 assert(CountPosition == ProfileCountSize); 1370 1371 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1372 } 1373 1374 // Assign the scaled count values to the BB with multiple out edges. 1375 void PGOUseFunc::setBranchWeights() { 1376 // Generate MD_prof metadata for every branch instruction. 1377 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1378 << " IsCS=" << IsCS << "\n"); 1379 for (auto &BB : F) { 1380 Instruction *TI = BB.getTerminator(); 1381 if (TI->getNumSuccessors() < 2) 1382 continue; 1383 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1384 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1385 continue; 1386 1387 if (getBBInfo(&BB).CountValue == 0) 1388 continue; 1389 1390 // We have a non-zero Branch BB. 1391 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1392 unsigned Size = BBCountInfo.OutEdges.size(); 1393 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1394 uint64_t MaxCount = 0; 1395 for (unsigned s = 0; s < Size; s++) { 1396 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1397 const BasicBlock *SrcBB = E->SrcBB; 1398 const BasicBlock *DestBB = E->DestBB; 1399 if (DestBB == nullptr) 1400 continue; 1401 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1402 uint64_t EdgeCount = E->CountValue; 1403 if (EdgeCount > MaxCount) 1404 MaxCount = EdgeCount; 1405 EdgeCounts[SuccNum] = EdgeCount; 1406 } 1407 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1408 } 1409 } 1410 1411 static bool isIndirectBrTarget(BasicBlock *BB) { 1412 for (BasicBlock *Pred : predecessors(BB)) { 1413 if (isa<IndirectBrInst>(Pred->getTerminator())) 1414 return true; 1415 } 1416 return false; 1417 } 1418 1419 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1420 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1421 // Find irr loop headers 1422 for (auto &BB : F) { 1423 // As a heuristic also annotate indrectbr targets as they have a high chance 1424 // to become an irreducible loop header after the indirectbr tail 1425 // duplication. 1426 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1427 Instruction *TI = BB.getTerminator(); 1428 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1429 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1430 } 1431 } 1432 } 1433 1434 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1435 if (PGOFunctionEntryCoverage) 1436 return; 1437 Module *M = F.getParent(); 1438 IRBuilder<> Builder(&SI); 1439 Type *Int64Ty = Builder.getInt64Ty(); 1440 Type *I8PtrTy = Builder.getInt8PtrTy(); 1441 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1442 Builder.CreateCall( 1443 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1444 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1445 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1446 Builder.getInt32(*CurCtrIdx), Step}); 1447 ++(*CurCtrIdx); 1448 } 1449 1450 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1451 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1452 assert(*CurCtrIdx < CountFromProfile.size() && 1453 "Out of bound access of counters"); 1454 uint64_t SCounts[2]; 1455 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1456 ++(*CurCtrIdx); 1457 uint64_t TotalCount = 0; 1458 auto BI = UseFunc->findBBInfo(SI.getParent()); 1459 if (BI != nullptr) 1460 TotalCount = BI->CountValue; 1461 // False Count 1462 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1463 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1464 if (MaxCount) 1465 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1466 } 1467 1468 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1469 if (!PGOInstrSelect) 1470 return; 1471 // FIXME: do not handle this yet. 1472 if (SI.getCondition()->getType()->isVectorTy()) 1473 return; 1474 1475 switch (Mode) { 1476 case VM_counting: 1477 NSIs++; 1478 return; 1479 case VM_instrument: 1480 instrumentOneSelectInst(SI); 1481 return; 1482 case VM_annotate: 1483 annotateOneSelectInst(SI); 1484 return; 1485 } 1486 1487 llvm_unreachable("Unknown visiting mode"); 1488 } 1489 1490 // Traverse all valuesites and annotate the instructions for all value kind. 1491 void PGOUseFunc::annotateValueSites() { 1492 if (DisableValueProfiling) 1493 return; 1494 1495 // Create the PGOFuncName meta data. 1496 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1497 1498 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1499 annotateValueSites(Kind); 1500 } 1501 1502 // Annotate the instructions for a specific value kind. 1503 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1504 assert(Kind <= IPVK_Last); 1505 unsigned ValueSiteIndex = 0; 1506 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1507 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1508 if (NumValueSites != ValueSites.size()) { 1509 auto &Ctx = M->getContext(); 1510 Ctx.diagnose(DiagnosticInfoPGOProfile( 1511 M->getName().data(), 1512 Twine("Inconsistent number of value sites for ") + 1513 Twine(ValueProfKindDescr[Kind]) + 1514 Twine(" profiling in \"") + F.getName().str() + 1515 Twine("\", possibly due to the use of a stale profile."), 1516 DS_Warning)); 1517 return; 1518 } 1519 1520 for (VPCandidateInfo &I : ValueSites) { 1521 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1522 << "): Index = " << ValueSiteIndex << " out of " 1523 << NumValueSites << "\n"); 1524 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1525 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1526 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1527 : MaxNumAnnotations); 1528 ValueSiteIndex++; 1529 } 1530 } 1531 1532 // Collect the set of members for each Comdat in module M and store 1533 // in ComdatMembers. 1534 static void collectComdatMembers( 1535 Module &M, 1536 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1537 if (!DoComdatRenaming) 1538 return; 1539 for (Function &F : M) 1540 if (Comdat *C = F.getComdat()) 1541 ComdatMembers.insert(std::make_pair(C, &F)); 1542 for (GlobalVariable &GV : M.globals()) 1543 if (Comdat *C = GV.getComdat()) 1544 ComdatMembers.insert(std::make_pair(C, &GV)); 1545 for (GlobalAlias &GA : M.aliases()) 1546 if (Comdat *C = GA.getComdat()) 1547 ComdatMembers.insert(std::make_pair(C, &GA)); 1548 } 1549 1550 static bool InstrumentAllFunctions( 1551 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1552 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1553 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1554 // For the context-sensitve instrumentation, we should have a separated pass 1555 // (before LTO/ThinLTO linking) to create these variables. 1556 if (!IsCS) 1557 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1558 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1559 collectComdatMembers(M, ComdatMembers); 1560 1561 for (auto &F : M) { 1562 if (F.isDeclaration()) 1563 continue; 1564 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1565 continue; 1566 auto &TLI = LookupTLI(F); 1567 auto *BPI = LookupBPI(F); 1568 auto *BFI = LookupBFI(F); 1569 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1570 } 1571 return true; 1572 } 1573 1574 PreservedAnalyses 1575 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1576 createProfileFileNameVar(M, CSInstrName); 1577 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1578 // will be retained. 1579 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1580 return PreservedAnalyses::all(); 1581 } 1582 1583 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1584 ModuleAnalysisManager &AM) { 1585 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1586 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1587 return FAM.getResult<TargetLibraryAnalysis>(F); 1588 }; 1589 auto LookupBPI = [&FAM](Function &F) { 1590 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1591 }; 1592 auto LookupBFI = [&FAM](Function &F) { 1593 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1594 }; 1595 1596 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1597 return PreservedAnalyses::all(); 1598 1599 return PreservedAnalyses::none(); 1600 } 1601 1602 // Using the ratio b/w sums of profile count values and BFI count values to 1603 // adjust the func entry count. 1604 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1605 BranchProbabilityInfo &NBPI) { 1606 Function &F = Func.getFunc(); 1607 BlockFrequencyInfo NBFI(F, NBPI, LI); 1608 #ifndef NDEBUG 1609 auto BFIEntryCount = F.getEntryCount(); 1610 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1611 "Invalid BFI Entrycount"); 1612 #endif 1613 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1614 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1615 for (auto &BBI : F) { 1616 uint64_t CountValue = 0; 1617 uint64_t BFICountValue = 0; 1618 if (!Func.findBBInfo(&BBI)) 1619 continue; 1620 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1621 CountValue = Func.getBBInfo(&BBI).CountValue; 1622 BFICountValue = *BFICount; 1623 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1624 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1625 } 1626 if (SumCount.isZero()) 1627 return; 1628 1629 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1630 "Incorrect sum of BFI counts"); 1631 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1632 return; 1633 double Scale = (SumCount / SumBFICount).convertToDouble(); 1634 if (Scale < 1.001 && Scale > 0.999) 1635 return; 1636 1637 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1638 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1639 if (NewEntryCount == 0) 1640 NewEntryCount = 1; 1641 if (NewEntryCount != FuncEntryCount) { 1642 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1643 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1644 << ", entry_count " << FuncEntryCount << " --> " 1645 << NewEntryCount << "\n"); 1646 } 1647 } 1648 1649 // Compare the profile count values with BFI count values, and print out 1650 // the non-matching ones. 1651 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1652 BranchProbabilityInfo &NBPI, 1653 uint64_t HotCountThreshold, 1654 uint64_t ColdCountThreshold) { 1655 Function &F = Func.getFunc(); 1656 BlockFrequencyInfo NBFI(F, NBPI, LI); 1657 // bool PrintFunc = false; 1658 bool HotBBOnly = PGOVerifyHotBFI; 1659 std::string Msg; 1660 OptimizationRemarkEmitter ORE(&F); 1661 1662 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1663 for (auto &BBI : F) { 1664 uint64_t CountValue = 0; 1665 uint64_t BFICountValue = 0; 1666 1667 if (Func.getBBInfo(&BBI).CountValid) 1668 CountValue = Func.getBBInfo(&BBI).CountValue; 1669 1670 BBNum++; 1671 if (CountValue) 1672 NonZeroBBNum++; 1673 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1674 if (BFICount) 1675 BFICountValue = *BFICount; 1676 1677 if (HotBBOnly) { 1678 bool rawIsHot = CountValue >= HotCountThreshold; 1679 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1680 bool rawIsCold = CountValue <= ColdCountThreshold; 1681 bool ShowCount = false; 1682 if (rawIsHot && !BFIIsHot) { 1683 Msg = "raw-Hot to BFI-nonHot"; 1684 ShowCount = true; 1685 } else if (rawIsCold && BFIIsHot) { 1686 Msg = "raw-Cold to BFI-Hot"; 1687 ShowCount = true; 1688 } 1689 if (!ShowCount) 1690 continue; 1691 } else { 1692 if ((CountValue < PGOVerifyBFICutoff) && 1693 (BFICountValue < PGOVerifyBFICutoff)) 1694 continue; 1695 uint64_t Diff = (BFICountValue >= CountValue) 1696 ? BFICountValue - CountValue 1697 : CountValue - BFICountValue; 1698 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1699 continue; 1700 } 1701 BBMisMatchNum++; 1702 1703 ORE.emit([&]() { 1704 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1705 F.getSubprogram(), &BBI); 1706 Remark << "BB " << ore::NV("Block", BBI.getName()) 1707 << " Count=" << ore::NV("Count", CountValue) 1708 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1709 if (!Msg.empty()) 1710 Remark << " (" << Msg << ")"; 1711 return Remark; 1712 }); 1713 } 1714 if (BBMisMatchNum) 1715 ORE.emit([&]() { 1716 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1717 F.getSubprogram(), &F.getEntryBlock()) 1718 << "In Func " << ore::NV("Function", F.getName()) 1719 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1720 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1721 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1722 }); 1723 } 1724 1725 static bool annotateAllFunctions( 1726 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1727 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1728 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1729 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1730 ProfileSummaryInfo *PSI, bool IsCS) { 1731 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1732 auto &Ctx = M.getContext(); 1733 // Read the counter array from file. 1734 auto ReaderOrErr = 1735 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1736 if (Error E = ReaderOrErr.takeError()) { 1737 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1738 Ctx.diagnose( 1739 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1740 }); 1741 return false; 1742 } 1743 1744 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1745 std::move(ReaderOrErr.get()); 1746 if (!PGOReader) { 1747 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1748 StringRef("Cannot get PGOReader"))); 1749 return false; 1750 } 1751 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1752 return false; 1753 1754 // TODO: might need to change the warning once the clang option is finalized. 1755 if (!PGOReader->isIRLevelProfile()) { 1756 Ctx.diagnose(DiagnosticInfoPGOProfile( 1757 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1758 return false; 1759 } 1760 if (PGOReader->hasSingleByteCoverage()) { 1761 Ctx.diagnose(DiagnosticInfoPGOProfile( 1762 ProfileFileName.data(), 1763 "Cannot use coverage profiles for optimization")); 1764 return false; 1765 } 1766 if (PGOReader->functionEntryOnly()) { 1767 Ctx.diagnose(DiagnosticInfoPGOProfile( 1768 ProfileFileName.data(), 1769 "Function entry profiles are not yet supported for optimization")); 1770 return false; 1771 } 1772 1773 // Add the profile summary (read from the header of the indexed summary) here 1774 // so that we can use it below when reading counters (which checks if the 1775 // function should be marked with a cold or inlinehint attribute). 1776 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1777 IsCS ? ProfileSummary::PSK_CSInstr 1778 : ProfileSummary::PSK_Instr); 1779 PSI->refresh(); 1780 1781 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1782 collectComdatMembers(M, ComdatMembers); 1783 std::vector<Function *> HotFunctions; 1784 std::vector<Function *> ColdFunctions; 1785 1786 // If the profile marked as always instrument the entry BB, do the 1787 // same. Note this can be overwritten by the internal option in CFGMST.h 1788 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1789 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1790 InstrumentFuncEntry = PGOInstrumentEntry; 1791 for (auto &F : M) { 1792 if (F.isDeclaration()) 1793 continue; 1794 auto &TLI = LookupTLI(F); 1795 auto *BPI = LookupBPI(F); 1796 auto *BFI = LookupBFI(F); 1797 // Split indirectbr critical edges here before computing the MST rather than 1798 // later in getInstrBB() to avoid invalidating it. 1799 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 1800 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1801 InstrumentFuncEntry); 1802 // When AllMinusOnes is true, it means the profile for the function 1803 // is unrepresentative and this function is actually hot. Set the 1804 // entry count of the function to be multiple times of hot threshold 1805 // and drop all its internal counters. 1806 bool AllMinusOnes = false; 1807 bool AllZeros = false; 1808 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1809 continue; 1810 if (AllZeros) { 1811 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1812 if (Func.getProgramMaxCount() != 0) 1813 ColdFunctions.push_back(&F); 1814 continue; 1815 } 1816 const unsigned MultiplyFactor = 3; 1817 if (AllMinusOnes) { 1818 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1819 if (HotThreshold) 1820 F.setEntryCount( 1821 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1822 HotFunctions.push_back(&F); 1823 continue; 1824 } 1825 Func.populateCounters(); 1826 Func.setBranchWeights(); 1827 Func.annotateValueSites(); 1828 Func.annotateIrrLoopHeaderWeights(); 1829 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1830 if (FreqAttr == PGOUseFunc::FFA_Cold) 1831 ColdFunctions.push_back(&F); 1832 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1833 HotFunctions.push_back(&F); 1834 if (PGOViewCounts != PGOVCT_None && 1835 (ViewBlockFreqFuncName.empty() || 1836 F.getName().equals(ViewBlockFreqFuncName))) { 1837 LoopInfo LI{DominatorTree(F)}; 1838 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1839 std::make_unique<BranchProbabilityInfo>(F, LI); 1840 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1841 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1842 if (PGOViewCounts == PGOVCT_Graph) 1843 NewBFI->view(); 1844 else if (PGOViewCounts == PGOVCT_Text) { 1845 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1846 NewBFI->print(dbgs()); 1847 } 1848 } 1849 if (PGOViewRawCounts != PGOVCT_None && 1850 (ViewBlockFreqFuncName.empty() || 1851 F.getName().equals(ViewBlockFreqFuncName))) { 1852 if (PGOViewRawCounts == PGOVCT_Graph) 1853 if (ViewBlockFreqFuncName.empty()) 1854 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1855 else 1856 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1857 else if (PGOViewRawCounts == PGOVCT_Text) { 1858 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1859 Func.dumpInfo(); 1860 } 1861 } 1862 1863 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1864 LoopInfo LI{DominatorTree(F)}; 1865 BranchProbabilityInfo NBPI(F, LI); 1866 1867 // Fix func entry count. 1868 if (PGOFixEntryCount) 1869 fixFuncEntryCount(Func, LI, NBPI); 1870 1871 // Verify BlockFrequency information. 1872 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1873 if (PGOVerifyHotBFI) { 1874 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1875 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1876 } 1877 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1878 } 1879 } 1880 1881 // Set function hotness attribute from the profile. 1882 // We have to apply these attributes at the end because their presence 1883 // can affect the BranchProbabilityInfo of any callers, resulting in an 1884 // inconsistent MST between prof-gen and prof-use. 1885 for (auto &F : HotFunctions) { 1886 F->addFnAttr(Attribute::InlineHint); 1887 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1888 << "\n"); 1889 } 1890 for (auto &F : ColdFunctions) { 1891 // Only set when there is no Attribute::Hot set by the user. For Hot 1892 // attribute, user's annotation has the precedence over the profile. 1893 if (F->hasFnAttribute(Attribute::Hot)) { 1894 auto &Ctx = M.getContext(); 1895 std::string Msg = std::string("Function ") + F->getName().str() + 1896 std::string(" is annotated as a hot function but" 1897 " the profile is cold"); 1898 Ctx.diagnose( 1899 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1900 continue; 1901 } 1902 F->addFnAttr(Attribute::Cold); 1903 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1904 << "\n"); 1905 } 1906 return true; 1907 } 1908 1909 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1910 std::string RemappingFilename, 1911 bool IsCS) 1912 : ProfileFileName(std::move(Filename)), 1913 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1914 if (!PGOTestProfileFile.empty()) 1915 ProfileFileName = PGOTestProfileFile; 1916 if (!PGOTestProfileRemappingFile.empty()) 1917 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1918 } 1919 1920 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1921 ModuleAnalysisManager &AM) { 1922 1923 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1924 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1925 return FAM.getResult<TargetLibraryAnalysis>(F); 1926 }; 1927 auto LookupBPI = [&FAM](Function &F) { 1928 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1929 }; 1930 auto LookupBFI = [&FAM](Function &F) { 1931 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1932 }; 1933 1934 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1935 1936 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1937 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1938 return PreservedAnalyses::all(); 1939 1940 return PreservedAnalyses::none(); 1941 } 1942 1943 static std::string getSimpleNodeName(const BasicBlock *Node) { 1944 if (!Node->getName().empty()) 1945 return std::string(Node->getName()); 1946 1947 std::string SimpleNodeName; 1948 raw_string_ostream OS(SimpleNodeName); 1949 Node->printAsOperand(OS, false); 1950 return OS.str(); 1951 } 1952 1953 void llvm::setProfMetadata(Module *M, Instruction *TI, 1954 ArrayRef<uint64_t> EdgeCounts, 1955 uint64_t MaxCount) { 1956 MDBuilder MDB(M->getContext()); 1957 assert(MaxCount > 0 && "Bad max count"); 1958 uint64_t Scale = calculateCountScale(MaxCount); 1959 SmallVector<unsigned, 4> Weights; 1960 for (const auto &ECI : EdgeCounts) 1961 Weights.push_back(scaleBranchCount(ECI, Scale)); 1962 1963 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1964 : Weights) { 1965 dbgs() << W << " "; 1966 } dbgs() << "\n";); 1967 1968 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 1969 1970 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1971 if (EmitBranchProbability) { 1972 std::string BrCondStr = getBranchCondString(TI); 1973 if (BrCondStr.empty()) 1974 return; 1975 1976 uint64_t WSum = 1977 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1978 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1979 uint64_t TotalCount = 1980 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1981 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1982 Scale = calculateCountScale(WSum); 1983 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1984 scaleBranchCount(WSum, Scale)); 1985 std::string BranchProbStr; 1986 raw_string_ostream OS(BranchProbStr); 1987 OS << BP; 1988 OS << " (total count : " << TotalCount << ")"; 1989 OS.flush(); 1990 Function *F = TI->getParent()->getParent(); 1991 OptimizationRemarkEmitter ORE(F); 1992 ORE.emit([&]() { 1993 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1994 << BrCondStr << " is true with probability : " << BranchProbStr; 1995 }); 1996 } 1997 } 1998 1999 namespace llvm { 2000 2001 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2002 MDBuilder MDB(M->getContext()); 2003 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2004 MDB.createIrrLoopHeaderWeight(Count)); 2005 } 2006 2007 template <> struct GraphTraits<PGOUseFunc *> { 2008 using NodeRef = const BasicBlock *; 2009 using ChildIteratorType = const_succ_iterator; 2010 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2011 2012 static NodeRef getEntryNode(const PGOUseFunc *G) { 2013 return &G->getFunc().front(); 2014 } 2015 2016 static ChildIteratorType child_begin(const NodeRef N) { 2017 return succ_begin(N); 2018 } 2019 2020 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2021 2022 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2023 return nodes_iterator(G->getFunc().begin()); 2024 } 2025 2026 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2027 return nodes_iterator(G->getFunc().end()); 2028 } 2029 }; 2030 2031 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2032 explicit DOTGraphTraits(bool isSimple = false) 2033 : DefaultDOTGraphTraits(isSimple) {} 2034 2035 static std::string getGraphName(const PGOUseFunc *G) { 2036 return std::string(G->getFunc().getName()); 2037 } 2038 2039 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2040 std::string Result; 2041 raw_string_ostream OS(Result); 2042 2043 OS << getSimpleNodeName(Node) << ":\\l"; 2044 UseBBInfo *BI = Graph->findBBInfo(Node); 2045 OS << "Count : "; 2046 if (BI && BI->CountValid) 2047 OS << BI->CountValue << "\\l"; 2048 else 2049 OS << "Unknown\\l"; 2050 2051 if (!PGOInstrSelect) 2052 return Result; 2053 2054 for (const Instruction &I : *Node) { 2055 if (!isa<SelectInst>(&I)) 2056 continue; 2057 // Display scaled counts for SELECT instruction: 2058 OS << "SELECT : { T = "; 2059 uint64_t TC, FC; 2060 bool HasProf = I.extractProfMetadata(TC, FC); 2061 if (!HasProf) 2062 OS << "Unknown, F = Unknown }\\l"; 2063 else 2064 OS << TC << ", F = " << FC << " }\\l"; 2065 } 2066 return Result; 2067 } 2068 }; 2069 2070 } // end namespace llvm 2071