xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp (revision 753f127f3ace09432b2baeffd71a308760641a62)
1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 //   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 //   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 //   Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
51 #include "CFGMST.h"
52 #include "ValueProfileCollector.h"
53 #include "llvm/ADT/APInt.h"
54 #include "llvm/ADT/ArrayRef.h"
55 #include "llvm/ADT/STLExtras.h"
56 #include "llvm/ADT/SmallVector.h"
57 #include "llvm/ADT/Statistic.h"
58 #include "llvm/ADT/StringRef.h"
59 #include "llvm/ADT/Triple.h"
60 #include "llvm/ADT/Twine.h"
61 #include "llvm/ADT/iterator.h"
62 #include "llvm/ADT/iterator_range.h"
63 #include "llvm/Analysis/BlockFrequencyInfo.h"
64 #include "llvm/Analysis/BranchProbabilityInfo.h"
65 #include "llvm/Analysis/CFG.h"
66 #include "llvm/Analysis/EHPersonalities.h"
67 #include "llvm/Analysis/LoopInfo.h"
68 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
69 #include "llvm/Analysis/ProfileSummaryInfo.h"
70 #include "llvm/Analysis/TargetLibraryInfo.h"
71 #include "llvm/IR/Attributes.h"
72 #include "llvm/IR/BasicBlock.h"
73 #include "llvm/IR/CFG.h"
74 #include "llvm/IR/Comdat.h"
75 #include "llvm/IR/Constant.h"
76 #include "llvm/IR/Constants.h"
77 #include "llvm/IR/DiagnosticInfo.h"
78 #include "llvm/IR/Dominators.h"
79 #include "llvm/IR/Function.h"
80 #include "llvm/IR/GlobalAlias.h"
81 #include "llvm/IR/GlobalValue.h"
82 #include "llvm/IR/GlobalVariable.h"
83 #include "llvm/IR/IRBuilder.h"
84 #include "llvm/IR/InstVisitor.h"
85 #include "llvm/IR/InstrTypes.h"
86 #include "llvm/IR/Instruction.h"
87 #include "llvm/IR/Instructions.h"
88 #include "llvm/IR/IntrinsicInst.h"
89 #include "llvm/IR/Intrinsics.h"
90 #include "llvm/IR/LLVMContext.h"
91 #include "llvm/IR/MDBuilder.h"
92 #include "llvm/IR/Module.h"
93 #include "llvm/IR/PassManager.h"
94 #include "llvm/IR/ProfileSummary.h"
95 #include "llvm/IR/Type.h"
96 #include "llvm/IR/Value.h"
97 #include "llvm/ProfileData/InstrProf.h"
98 #include "llvm/ProfileData/InstrProfReader.h"
99 #include "llvm/Support/BranchProbability.h"
100 #include "llvm/Support/CRC.h"
101 #include "llvm/Support/Casting.h"
102 #include "llvm/Support/CommandLine.h"
103 #include "llvm/Support/DOTGraphTraits.h"
104 #include "llvm/Support/Debug.h"
105 #include "llvm/Support/Error.h"
106 #include "llvm/Support/ErrorHandling.h"
107 #include "llvm/Support/GraphWriter.h"
108 #include "llvm/Support/raw_ostream.h"
109 #include "llvm/Transforms/Instrumentation.h"
110 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
111 #include "llvm/Transforms/Utils/MisExpect.h"
112 #include "llvm/Transforms/Utils/ModuleUtils.h"
113 #include <algorithm>
114 #include <cassert>
115 #include <cstdint>
116 #include <memory>
117 #include <numeric>
118 #include <string>
119 #include <unordered_map>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
124 using ProfileCount = Function::ProfileCount;
125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
126 
127 #define DEBUG_TYPE "pgo-instrumentation"
128 
129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
132 STATISTIC(NumOfPGOEdge, "Number of edges.");
133 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
137 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
140 STATISTIC(NumOfCSPGOSelectInsts,
141           "Number of select instruction instrumented in CSPGO.");
142 STATISTIC(NumOfCSPGOMemIntrinsics,
143           "Number of mem intrinsics instrumented in CSPGO.");
144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
147 STATISTIC(NumOfCSPGOFunc,
148           "Number of functions having valid profile counts in CSPGO.");
149 STATISTIC(NumOfCSPGOMismatch,
150           "Number of functions having mismatch profile in CSPGO.");
151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
152 
153 // Command line option to specify the file to read profile from. This is
154 // mainly used for testing.
155 static cl::opt<std::string>
156     PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
157                        cl::value_desc("filename"),
158                        cl::desc("Specify the path of profile data file. This is"
159                                 "mainly for test purpose."));
160 static cl::opt<std::string> PGOTestProfileRemappingFile(
161     "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
162     cl::value_desc("filename"),
163     cl::desc("Specify the path of profile remapping file. This is mainly for "
164              "test purpose."));
165 
166 // Command line option to disable value profiling. The default is false:
167 // i.e. value profiling is enabled by default. This is for debug purpose.
168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
169                                            cl::Hidden,
170                                            cl::desc("Disable Value Profiling"));
171 
172 // Command line option to set the maximum number of VP annotations to write to
173 // the metadata for a single indirect call callsite.
174 static cl::opt<unsigned> MaxNumAnnotations(
175     "icp-max-annotations", cl::init(3), cl::Hidden,
176     cl::desc("Max number of annotations for a single indirect "
177              "call callsite"));
178 
179 // Command line option to set the maximum number of value annotations
180 // to write to the metadata for a single memop intrinsic.
181 static cl::opt<unsigned> MaxNumMemOPAnnotations(
182     "memop-max-annotations", cl::init(4), cl::Hidden,
183     cl::desc("Max number of preicise value annotations for a single memop"
184              "intrinsic"));
185 
186 // Command line option to control appending FunctionHash to the name of a COMDAT
187 // function. This is to avoid the hash mismatch caused by the preinliner.
188 static cl::opt<bool> DoComdatRenaming(
189     "do-comdat-renaming", cl::init(false), cl::Hidden,
190     cl::desc("Append function hash to the name of COMDAT function to avoid "
191              "function hash mismatch due to the preinliner"));
192 
193 // Command line option to enable/disable the warning about missing profile
194 // information.
195 static cl::opt<bool>
196     PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
197                    cl::desc("Use this option to turn on/off "
198                             "warnings about missing profile data for "
199                             "functions."));
200 
201 namespace llvm {
202 // Command line option to enable/disable the warning about a hash mismatch in
203 // the profile data.
204 cl::opt<bool>
205     NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
206                       cl::desc("Use this option to turn off/on "
207                                "warnings about profile cfg mismatch."));
208 } // namespace llvm
209 
210 // Command line option to enable/disable the warning about a hash mismatch in
211 // the profile data for Comdat functions, which often turns out to be false
212 // positive due to the pre-instrumentation inline.
213 static cl::opt<bool>
214     NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
215                             cl::Hidden,
216                             cl::desc("The option is used to turn on/off "
217                                      "warnings about hash mismatch for comdat "
218                                      "functions."));
219 
220 // Command line option to enable/disable select instruction instrumentation.
221 static cl::opt<bool>
222     PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
223                    cl::desc("Use this option to turn on/off SELECT "
224                             "instruction instrumentation. "));
225 
226 // Command line option to turn on CFG dot or text dump of raw profile counts
227 static cl::opt<PGOViewCountsType> PGOViewRawCounts(
228     "pgo-view-raw-counts", cl::Hidden,
229     cl::desc("A boolean option to show CFG dag or text "
230              "with raw profile counts from "
231              "profile data. See also option "
232              "-pgo-view-counts. To limit graph "
233              "display to only one function, use "
234              "filtering option -view-bfi-func-name."),
235     cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
236                clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
237                clEnumValN(PGOVCT_Text, "text", "show in text.")));
238 
239 // Command line option to enable/disable memop intrinsic call.size profiling.
240 static cl::opt<bool>
241     PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
242                   cl::desc("Use this option to turn on/off "
243                            "memory intrinsic size profiling."));
244 
245 // Emit branch probability as optimization remarks.
246 static cl::opt<bool>
247     EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
248                           cl::desc("When this option is on, the annotated "
249                                    "branch probability will be emitted as "
250                                    "optimization remarks: -{Rpass|"
251                                    "pass-remarks}=pgo-instrumentation"));
252 
253 static cl::opt<bool> PGOInstrumentEntry(
254     "pgo-instrument-entry", cl::init(false), cl::Hidden,
255     cl::desc("Force to instrument function entry basicblock."));
256 
257 static cl::opt<bool> PGOFunctionEntryCoverage(
258     "pgo-function-entry-coverage", cl::Hidden,
259     cl::desc(
260         "Use this option to enable function entry coverage instrumentation."));
261 
262 static cl::opt<bool>
263     PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
264                      cl::desc("Fix function entry count in profile use."));
265 
266 static cl::opt<bool> PGOVerifyHotBFI(
267     "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
268     cl::desc("Print out the non-match BFI count if a hot raw profile count "
269              "becomes non-hot, or a cold raw profile count becomes hot. "
270              "The print is enabled under -Rpass-analysis=pgo, or "
271              "internal option -pass-remakrs-analysis=pgo."));
272 
273 static cl::opt<bool> PGOVerifyBFI(
274     "pgo-verify-bfi", cl::init(false), cl::Hidden,
275     cl::desc("Print out mismatched BFI counts after setting profile metadata "
276              "The print is enabled under -Rpass-analysis=pgo, or "
277              "internal option -pass-remakrs-analysis=pgo."));
278 
279 static cl::opt<unsigned> PGOVerifyBFIRatio(
280     "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
281     cl::desc("Set the threshold for pgo-verify-bfi:  only print out "
282              "mismatched BFI if the difference percentage is greater than "
283              "this value (in percentage)."));
284 
285 static cl::opt<unsigned> PGOVerifyBFICutoff(
286     "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
287     cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
288              "profile count value is below."));
289 
290 namespace llvm {
291 // Command line option to turn on CFG dot dump after profile annotation.
292 // Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
293 extern cl::opt<PGOViewCountsType> PGOViewCounts;
294 
295 // Command line option to specify the name of the function for CFG dump
296 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
297 extern cl::opt<std::string> ViewBlockFreqFuncName;
298 
299 extern cl::opt<bool> DebugInfoCorrelate;
300 } // namespace llvm
301 
302 static cl::opt<bool>
303     PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
304                      cl::desc("Use the old CFG function hashing"));
305 
306 // Return a string describing the branch condition that can be
307 // used in static branch probability heuristics:
308 static std::string getBranchCondString(Instruction *TI) {
309   BranchInst *BI = dyn_cast<BranchInst>(TI);
310   if (!BI || !BI->isConditional())
311     return std::string();
312 
313   Value *Cond = BI->getCondition();
314   ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
315   if (!CI)
316     return std::string();
317 
318   std::string result;
319   raw_string_ostream OS(result);
320   OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
321   CI->getOperand(0)->getType()->print(OS, true);
322 
323   Value *RHS = CI->getOperand(1);
324   ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
325   if (CV) {
326     if (CV->isZero())
327       OS << "_Zero";
328     else if (CV->isOne())
329       OS << "_One";
330     else if (CV->isMinusOne())
331       OS << "_MinusOne";
332     else
333       OS << "_Const";
334   }
335   OS.flush();
336   return result;
337 }
338 
339 static const char *ValueProfKindDescr[] = {
340 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
341 #include "llvm/ProfileData/InstrProfData.inc"
342 };
343 
344 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
345 // aware this is an ir_level profile so it can set the version flag.
346 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
347   const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
348   Type *IntTy64 = Type::getInt64Ty(M.getContext());
349   uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
350   if (IsCS)
351     ProfileVersion |= VARIANT_MASK_CSIR_PROF;
352   if (PGOInstrumentEntry)
353     ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
354   if (DebugInfoCorrelate)
355     ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
356   if (PGOFunctionEntryCoverage)
357     ProfileVersion |=
358         VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
359   auto IRLevelVersionVariable = new GlobalVariable(
360       M, IntTy64, true, GlobalValue::WeakAnyLinkage,
361       Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
362   IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
363   Triple TT(M.getTargetTriple());
364   if (TT.supportsCOMDAT()) {
365     IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
366     IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
367   }
368   return IRLevelVersionVariable;
369 }
370 
371 namespace {
372 
373 /// The select instruction visitor plays three roles specified
374 /// by the mode. In \c VM_counting mode, it simply counts the number of
375 /// select instructions. In \c VM_instrument mode, it inserts code to count
376 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
377 /// it reads the profile data and annotate the select instruction with metadata.
378 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
379 class PGOUseFunc;
380 
381 /// Instruction Visitor class to visit select instructions.
382 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
383   Function &F;
384   unsigned NSIs = 0;             // Number of select instructions instrumented.
385   VisitMode Mode = VM_counting;  // Visiting mode.
386   unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
387   unsigned TotalNumCtrs = 0;     // Total number of counters
388   GlobalVariable *FuncNameVar = nullptr;
389   uint64_t FuncHash = 0;
390   PGOUseFunc *UseFunc = nullptr;
391 
392   SelectInstVisitor(Function &Func) : F(Func) {}
393 
394   void countSelects(Function &Func) {
395     NSIs = 0;
396     Mode = VM_counting;
397     visit(Func);
398   }
399 
400   // Visit the IR stream and instrument all select instructions. \p
401   // Ind is a pointer to the counter index variable; \p TotalNC
402   // is the total number of counters; \p FNV is the pointer to the
403   // PGO function name var; \p FHash is the function hash.
404   void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
405                          GlobalVariable *FNV, uint64_t FHash) {
406     Mode = VM_instrument;
407     CurCtrIdx = Ind;
408     TotalNumCtrs = TotalNC;
409     FuncHash = FHash;
410     FuncNameVar = FNV;
411     visit(Func);
412   }
413 
414   // Visit the IR stream and annotate all select instructions.
415   void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
416     Mode = VM_annotate;
417     UseFunc = UF;
418     CurCtrIdx = Ind;
419     visit(Func);
420   }
421 
422   void instrumentOneSelectInst(SelectInst &SI);
423   void annotateOneSelectInst(SelectInst &SI);
424 
425   // Visit \p SI instruction and perform tasks according to visit mode.
426   void visitSelectInst(SelectInst &SI);
427 
428   // Return the number of select instructions. This needs be called after
429   // countSelects().
430   unsigned getNumOfSelectInsts() const { return NSIs; }
431 };
432 
433 } // end anonymous namespace
434 
435 namespace {
436 
437 /// An MST based instrumentation for PGO
438 ///
439 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
440 /// in the function level.
441 struct PGOEdge {
442   // This class implements the CFG edges. Note the CFG can be a multi-graph.
443   // So there might be multiple edges with same SrcBB and DestBB.
444   const BasicBlock *SrcBB;
445   const BasicBlock *DestBB;
446   uint64_t Weight;
447   bool InMST = false;
448   bool Removed = false;
449   bool IsCritical = false;
450 
451   PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
452       : SrcBB(Src), DestBB(Dest), Weight(W) {}
453 
454   // Return the information string of an edge.
455   std::string infoString() const {
456     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
457             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight)).str();
458   }
459 };
460 
461 // This class stores the auxiliary information for each BB.
462 struct BBInfo {
463   BBInfo *Group;
464   uint32_t Index;
465   uint32_t Rank = 0;
466 
467   BBInfo(unsigned IX) : Group(this), Index(IX) {}
468 
469   // Return the information string of this object.
470   std::string infoString() const {
471     return (Twine("Index=") + Twine(Index)).str();
472   }
473 
474   // Empty function -- only applicable to UseBBInfo.
475   void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
476 
477   // Empty function -- only applicable to UseBBInfo.
478   void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
479 };
480 
481 // This class implements the CFG edges. Note the CFG can be a multi-graph.
482 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
483 private:
484   Function &F;
485 
486   // Is this is context-sensitive instrumentation.
487   bool IsCS;
488 
489   // A map that stores the Comdat group in function F.
490   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
491 
492   ValueProfileCollector VPC;
493 
494   void computeCFGHash();
495   void renameComdatFunction();
496 
497 public:
498   std::vector<std::vector<VPCandidateInfo>> ValueSites;
499   SelectInstVisitor SIVisitor;
500   std::string FuncName;
501   GlobalVariable *FuncNameVar;
502 
503   // CFG hash value for this function.
504   uint64_t FunctionHash = 0;
505 
506   // The Minimum Spanning Tree of function CFG.
507   CFGMST<Edge, BBInfo> MST;
508 
509   // Collect all the BBs that will be instrumented, and store them in
510   // InstrumentBBs.
511   void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
512 
513   // Give an edge, find the BB that will be instrumented.
514   // Return nullptr if there is no BB to be instrumented.
515   BasicBlock *getInstrBB(Edge *E);
516 
517   // Return the auxiliary BB information.
518   BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
519 
520   // Return the auxiliary BB information if available.
521   BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
522 
523   // Dump edges and BB information.
524   void dumpInfo(std::string Str = "") const {
525     MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
526                               Twine(FunctionHash) + "\t" + Str);
527   }
528 
529   FuncPGOInstrumentation(
530       Function &Func, TargetLibraryInfo &TLI,
531       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
532       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
533       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
534       bool InstrumentFuncEntry = true)
535       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
536         ValueSites(IPVK_Last + 1), SIVisitor(Func),
537         MST(F, InstrumentFuncEntry, BPI, BFI) {
538     // This should be done before CFG hash computation.
539     SIVisitor.countSelects(Func);
540     ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
541     if (!IsCS) {
542       NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
543       NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
544       NumOfPGOBB += MST.BBInfos.size();
545       ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
546     } else {
547       NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
548       NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
549       NumOfCSPGOBB += MST.BBInfos.size();
550     }
551 
552     FuncName = getPGOFuncName(F);
553     computeCFGHash();
554     if (!ComdatMembers.empty())
555       renameComdatFunction();
556     LLVM_DEBUG(dumpInfo("after CFGMST"));
557 
558     for (auto &E : MST.AllEdges) {
559       if (E->Removed)
560         continue;
561       IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
562       if (!E->InMST)
563         IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
564     }
565 
566     if (CreateGlobalVar)
567       FuncNameVar = createPGOFuncNameVar(F, FuncName);
568   }
569 };
570 
571 } // end anonymous namespace
572 
573 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
574 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
575 // of selects, indirect calls, mem ops and edges.
576 template <class Edge, class BBInfo>
577 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
578   std::vector<uint8_t> Indexes;
579   JamCRC JC;
580   for (auto &BB : F) {
581     const Instruction *TI = BB.getTerminator();
582     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
583       BasicBlock *Succ = TI->getSuccessor(I);
584       auto BI = findBBInfo(Succ);
585       if (BI == nullptr)
586         continue;
587       uint32_t Index = BI->Index;
588       for (int J = 0; J < 4; J++)
589         Indexes.push_back((uint8_t)(Index >> (J * 8)));
590     }
591   }
592   JC.update(Indexes);
593 
594   JamCRC JCH;
595   if (PGOOldCFGHashing) {
596     // Hash format for context sensitive profile. Reserve 4 bits for other
597     // information.
598     FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
599                    (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
600                    //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
601                    (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
602   } else {
603     // The higher 32 bits.
604     auto updateJCH = [&JCH](uint64_t Num) {
605       uint8_t Data[8];
606       support::endian::write64le(Data, Num);
607       JCH.update(Data);
608     };
609     updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
610     updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
611     updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
612     updateJCH((uint64_t)MST.AllEdges.size());
613 
614     // Hash format for context sensitive profile. Reserve 4 bits for other
615     // information.
616     FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
617   }
618 
619   // Reserve bit 60-63 for other information purpose.
620   FunctionHash &= 0x0FFFFFFFFFFFFFFF;
621   if (IsCS)
622     NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
623   LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
624                     << " CRC = " << JC.getCRC()
625                     << ", Selects = " << SIVisitor.getNumOfSelectInsts()
626                     << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
627                     << ValueSites[IPVK_IndirectCallTarget].size());
628   if (!PGOOldCFGHashing) {
629     LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
630                       << ", High32 CRC = " << JCH.getCRC());
631   }
632   LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
633 }
634 
635 // Check if we can safely rename this Comdat function.
636 static bool canRenameComdat(
637     Function &F,
638     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
639   if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
640     return false;
641 
642   // FIXME: Current only handle those Comdat groups that only containing one
643   // function.
644   // (1) For a Comdat group containing multiple functions, we need to have a
645   // unique postfix based on the hashes for each function. There is a
646   // non-trivial code refactoring to do this efficiently.
647   // (2) Variables can not be renamed, so we can not rename Comdat function in a
648   // group including global vars.
649   Comdat *C = F.getComdat();
650   for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
651     assert(!isa<GlobalAlias>(CM.second));
652     Function *FM = dyn_cast<Function>(CM.second);
653     if (FM != &F)
654       return false;
655   }
656   return true;
657 }
658 
659 // Append the CFGHash to the Comdat function name.
660 template <class Edge, class BBInfo>
661 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
662   if (!canRenameComdat(F, ComdatMembers))
663     return;
664   std::string OrigName = F.getName().str();
665   std::string NewFuncName =
666       Twine(F.getName() + "." + Twine(FunctionHash)).str();
667   F.setName(Twine(NewFuncName));
668   GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
669   FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
670   Comdat *NewComdat;
671   Module *M = F.getParent();
672   // For AvailableExternallyLinkage functions, change the linkage to
673   // LinkOnceODR and put them into comdat. This is because after renaming, there
674   // is no backup external copy available for the function.
675   if (!F.hasComdat()) {
676     assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
677     NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
678     F.setLinkage(GlobalValue::LinkOnceODRLinkage);
679     F.setComdat(NewComdat);
680     return;
681   }
682 
683   // This function belongs to a single function Comdat group.
684   Comdat *OrigComdat = F.getComdat();
685   std::string NewComdatName =
686       Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
687   NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
688   NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
689 
690   for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
691     // Must be a function.
692     cast<Function>(CM.second)->setComdat(NewComdat);
693   }
694 }
695 
696 // Collect all the BBs that will be instruments and return them in
697 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
698 template <class Edge, class BBInfo>
699 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
700     std::vector<BasicBlock *> &InstrumentBBs) {
701   // Use a worklist as we will update the vector during the iteration.
702   std::vector<Edge *> EdgeList;
703   EdgeList.reserve(MST.AllEdges.size());
704   for (auto &E : MST.AllEdges)
705     EdgeList.push_back(E.get());
706 
707   for (auto &E : EdgeList) {
708     BasicBlock *InstrBB = getInstrBB(E);
709     if (InstrBB)
710       InstrumentBBs.push_back(InstrBB);
711   }
712 
713   // Set up InEdges/OutEdges for all BBs.
714   for (auto &E : MST.AllEdges) {
715     if (E->Removed)
716       continue;
717     const BasicBlock *SrcBB = E->SrcBB;
718     const BasicBlock *DestBB = E->DestBB;
719     BBInfo &SrcInfo = getBBInfo(SrcBB);
720     BBInfo &DestInfo = getBBInfo(DestBB);
721     SrcInfo.addOutEdge(E.get());
722     DestInfo.addInEdge(E.get());
723   }
724 }
725 
726 // Given a CFG E to be instrumented, find which BB to place the instrumented
727 // code. The function will split the critical edge if necessary.
728 template <class Edge, class BBInfo>
729 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
730   if (E->InMST || E->Removed)
731     return nullptr;
732 
733   BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
734   BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
735   // For a fake edge, instrument the real BB.
736   if (SrcBB == nullptr)
737     return DestBB;
738   if (DestBB == nullptr)
739     return SrcBB;
740 
741   auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
742     // There are basic blocks (such as catchswitch) cannot be instrumented.
743     // If the returned first insertion point is the end of BB, skip this BB.
744     if (BB->getFirstInsertionPt() == BB->end())
745       return nullptr;
746     return BB;
747   };
748 
749   // Instrument the SrcBB if it has a single successor,
750   // otherwise, the DestBB if this is not a critical edge.
751   Instruction *TI = SrcBB->getTerminator();
752   if (TI->getNumSuccessors() <= 1)
753     return canInstrument(SrcBB);
754   if (!E->IsCritical)
755     return canInstrument(DestBB);
756 
757   // Some IndirectBr critical edges cannot be split by the previous
758   // SplitIndirectBrCriticalEdges call. Bail out.
759   unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
760   BasicBlock *InstrBB =
761       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
762   if (!InstrBB) {
763     LLVM_DEBUG(
764         dbgs() << "Fail to split critical edge: not instrument this edge.\n");
765     return nullptr;
766   }
767   // For a critical edge, we have to split. Instrument the newly
768   // created BB.
769   IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
770   LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
771                     << " --> " << getBBInfo(DestBB).Index << "\n");
772   // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
773   MST.addEdge(SrcBB, InstrBB, 0);
774   // Second one: Add new edge of InstrBB->DestBB.
775   Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
776   NewEdge1.InMST = true;
777   E->Removed = true;
778 
779   return canInstrument(InstrBB);
780 }
781 
782 // When generating value profiling calls on Windows routines that make use of
783 // handler funclets for exception processing an operand bundle needs to attached
784 // to the called function. This routine will set \p OpBundles to contain the
785 // funclet information, if any is needed, that should be placed on the generated
786 // value profiling call for the value profile candidate call.
787 static void
788 populateEHOperandBundle(VPCandidateInfo &Cand,
789                         DenseMap<BasicBlock *, ColorVector> &BlockColors,
790                         SmallVectorImpl<OperandBundleDef> &OpBundles) {
791   auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
792   if (!OrigCall)
793     return;
794 
795   if (!isa<IntrinsicInst>(OrigCall)) {
796     // The instrumentation call should belong to the same funclet as a
797     // non-intrinsic call, so just copy the operand bundle, if any exists.
798     Optional<OperandBundleUse> ParentFunclet =
799         OrigCall->getOperandBundle(LLVMContext::OB_funclet);
800     if (ParentFunclet)
801       OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
802   } else {
803     // Intrinsics or other instructions do not get funclet information from the
804     // front-end. Need to use the BlockColors that was computed by the routine
805     // colorEHFunclets to determine whether a funclet is needed.
806     if (!BlockColors.empty()) {
807       const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
808       assert(CV.size() == 1 && "non-unique color for block!");
809       Instruction *EHPad = CV.front()->getFirstNonPHI();
810       if (EHPad->isEHPad())
811         OpBundles.emplace_back("funclet", EHPad);
812     }
813   }
814 }
815 
816 // Visit all edge and instrument the edges not in MST, and do value profiling.
817 // Critical edges will be split.
818 static void instrumentOneFunc(
819     Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
820     BlockFrequencyInfo *BFI,
821     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
822     bool IsCS) {
823   // Split indirectbr critical edges here before computing the MST rather than
824   // later in getInstrBB() to avoid invalidating it.
825   SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
826 
827   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
828       F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
829 
830   Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
831   auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
832   auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
833                                   FuncInfo.FunctionHash);
834   if (PGOFunctionEntryCoverage) {
835     assert(!IsCS &&
836            "entry coverge does not support context-sensitive instrumentation");
837     auto &EntryBB = F.getEntryBlock();
838     IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
839     // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
840     //                      i32 <index>)
841     Builder.CreateCall(
842         Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
843         {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
844     return;
845   }
846 
847   std::vector<BasicBlock *> InstrumentBBs;
848   FuncInfo.getInstrumentBBs(InstrumentBBs);
849   unsigned NumCounters =
850       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
851 
852   uint32_t I = 0;
853   for (auto *InstrBB : InstrumentBBs) {
854     IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
855     assert(Builder.GetInsertPoint() != InstrBB->end() &&
856            "Cannot get the Instrumentation point");
857     // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
858     //                          i32 <index>)
859     Builder.CreateCall(
860         Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
861         {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
862   }
863 
864   // Now instrument select instructions:
865   FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
866                                        FuncInfo.FunctionHash);
867   assert(I == NumCounters);
868 
869   if (DisableValueProfiling)
870     return;
871 
872   NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
873 
874   // Intrinsic function calls do not have funclet operand bundles needed for
875   // Windows exception handling attached to them. However, if value profiling is
876   // inserted for one of these calls, then a funclet value will need to be set
877   // on the instrumentation call based on the funclet coloring.
878   DenseMap<BasicBlock *, ColorVector> BlockColors;
879   if (F.hasPersonalityFn() &&
880       isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
881     BlockColors = colorEHFunclets(F);
882 
883   // For each VP Kind, walk the VP candidates and instrument each one.
884   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
885     unsigned SiteIndex = 0;
886     if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
887       continue;
888 
889     for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
890       LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
891                         << " site: CallSite Index = " << SiteIndex << "\n");
892 
893       IRBuilder<> Builder(Cand.InsertPt);
894       assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
895              "Cannot get the Instrumentation point");
896 
897       Value *ToProfile = nullptr;
898       if (Cand.V->getType()->isIntegerTy())
899         ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
900       else if (Cand.V->getType()->isPointerTy())
901         ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
902       assert(ToProfile && "value profiling Value is of unexpected type");
903 
904       SmallVector<OperandBundleDef, 1> OpBundles;
905       populateEHOperandBundle(Cand, BlockColors, OpBundles);
906       Builder.CreateCall(
907           Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
908           {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
909            Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
910            Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
911           OpBundles);
912     }
913   } // IPVK_First <= Kind <= IPVK_Last
914 }
915 
916 namespace {
917 
918 // This class represents a CFG edge in profile use compilation.
919 struct PGOUseEdge : public PGOEdge {
920   bool CountValid = false;
921   uint64_t CountValue = 0;
922 
923   PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
924       : PGOEdge(Src, Dest, W) {}
925 
926   // Set edge count value
927   void setEdgeCount(uint64_t Value) {
928     CountValue = Value;
929     CountValid = true;
930   }
931 
932   // Return the information string for this object.
933   std::string infoString() const {
934     if (!CountValid)
935       return PGOEdge::infoString();
936     return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue))
937         .str();
938   }
939 };
940 
941 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
942 
943 // This class stores the auxiliary information for each BB.
944 struct UseBBInfo : public BBInfo {
945   uint64_t CountValue = 0;
946   bool CountValid;
947   int32_t UnknownCountInEdge = 0;
948   int32_t UnknownCountOutEdge = 0;
949   DirectEdges InEdges;
950   DirectEdges OutEdges;
951 
952   UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
953 
954   UseBBInfo(unsigned IX, uint64_t C)
955       : BBInfo(IX), CountValue(C), CountValid(true) {}
956 
957   // Set the profile count value for this BB.
958   void setBBInfoCount(uint64_t Value) {
959     CountValue = Value;
960     CountValid = true;
961   }
962 
963   // Return the information string of this object.
964   std::string infoString() const {
965     if (!CountValid)
966       return BBInfo::infoString();
967     return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
968   }
969 
970   // Add an OutEdge and update the edge count.
971   void addOutEdge(PGOUseEdge *E) {
972     OutEdges.push_back(E);
973     UnknownCountOutEdge++;
974   }
975 
976   // Add an InEdge and update the edge count.
977   void addInEdge(PGOUseEdge *E) {
978     InEdges.push_back(E);
979     UnknownCountInEdge++;
980   }
981 };
982 
983 } // end anonymous namespace
984 
985 // Sum up the count values for all the edges.
986 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
987   uint64_t Total = 0;
988   for (auto &E : Edges) {
989     if (E->Removed)
990       continue;
991     Total += E->CountValue;
992   }
993   return Total;
994 }
995 
996 namespace {
997 
998 class PGOUseFunc {
999 public:
1000   PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1001              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1002              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1003              ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1004       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1005         FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1006                  InstrumentFuncEntry),
1007         FreqAttr(FFA_Normal), IsCS(IsCS) {}
1008 
1009   // Read counts for the instrumented BB from profile.
1010   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1011                     bool &AllMinusOnes);
1012 
1013   // Populate the counts for all BBs.
1014   void populateCounters();
1015 
1016   // Set the branch weights based on the count values.
1017   void setBranchWeights();
1018 
1019   // Annotate the value profile call sites for all value kind.
1020   void annotateValueSites();
1021 
1022   // Annotate the value profile call sites for one value kind.
1023   void annotateValueSites(uint32_t Kind);
1024 
1025   // Annotate the irreducible loop header weights.
1026   void annotateIrrLoopHeaderWeights();
1027 
1028   // The hotness of the function from the profile count.
1029   enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1030 
1031   // Return the function hotness from the profile.
1032   FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1033 
1034   // Return the function hash.
1035   uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1036 
1037   // Return the profile record for this function;
1038   InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1039 
1040   // Return the auxiliary BB information.
1041   UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1042     return FuncInfo.getBBInfo(BB);
1043   }
1044 
1045   // Return the auxiliary BB information if available.
1046   UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1047     return FuncInfo.findBBInfo(BB);
1048   }
1049 
1050   Function &getFunc() const { return F; }
1051 
1052   void dumpInfo(std::string Str = "") const {
1053     FuncInfo.dumpInfo(Str);
1054   }
1055 
1056   uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1057 private:
1058   Function &F;
1059   Module *M;
1060   BlockFrequencyInfo *BFI;
1061   ProfileSummaryInfo *PSI;
1062 
1063   // This member stores the shared information with class PGOGenFunc.
1064   FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1065 
1066   // The maximum count value in the profile. This is only used in PGO use
1067   // compilation.
1068   uint64_t ProgramMaxCount;
1069 
1070   // Position of counter that remains to be read.
1071   uint32_t CountPosition = 0;
1072 
1073   // Total size of the profile count for this function.
1074   uint32_t ProfileCountSize = 0;
1075 
1076   // ProfileRecord for this function.
1077   InstrProfRecord ProfileRecord;
1078 
1079   // Function hotness info derived from profile.
1080   FuncFreqAttr FreqAttr;
1081 
1082   // Is to use the context sensitive profile.
1083   bool IsCS;
1084 
1085   // Find the Instrumented BB and set the value. Return false on error.
1086   bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1087 
1088   // Set the edge counter value for the unknown edge -- there should be only
1089   // one unknown edge.
1090   void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1091 
1092   // Return FuncName string;
1093   std::string getFuncName() const { return FuncInfo.FuncName; }
1094 
1095   // Set the hot/cold inline hints based on the count values.
1096   // FIXME: This function should be removed once the functionality in
1097   // the inliner is implemented.
1098   void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1099     if (PSI->isHotCount(EntryCount))
1100       FreqAttr = FFA_Hot;
1101     else if (PSI->isColdCount(MaxCount))
1102       FreqAttr = FFA_Cold;
1103   }
1104 };
1105 
1106 } // end anonymous namespace
1107 
1108 // Visit all the edges and assign the count value for the instrumented
1109 // edges and the BB. Return false on error.
1110 bool PGOUseFunc::setInstrumentedCounts(
1111     const std::vector<uint64_t> &CountFromProfile) {
1112 
1113   std::vector<BasicBlock *> InstrumentBBs;
1114   FuncInfo.getInstrumentBBs(InstrumentBBs);
1115   unsigned NumCounters =
1116       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1117   // The number of counters here should match the number of counters
1118   // in profile. Return if they mismatch.
1119   if (NumCounters != CountFromProfile.size()) {
1120     return false;
1121   }
1122   auto *FuncEntry = &*F.begin();
1123 
1124   // Set the profile count to the Instrumented BBs.
1125   uint32_t I = 0;
1126   for (BasicBlock *InstrBB : InstrumentBBs) {
1127     uint64_t CountValue = CountFromProfile[I++];
1128     UseBBInfo &Info = getBBInfo(InstrBB);
1129     // If we reach here, we know that we have some nonzero count
1130     // values in this function. The entry count should not be 0.
1131     // Fix it if necessary.
1132     if (InstrBB == FuncEntry && CountValue == 0)
1133       CountValue = 1;
1134     Info.setBBInfoCount(CountValue);
1135   }
1136   ProfileCountSize = CountFromProfile.size();
1137   CountPosition = I;
1138 
1139   // Set the edge count and update the count of unknown edges for BBs.
1140   auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1141     E->setEdgeCount(Value);
1142     this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1143     this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1144   };
1145 
1146   // Set the profile count the Instrumented edges. There are BBs that not in
1147   // MST but not instrumented. Need to set the edge count value so that we can
1148   // populate the profile counts later.
1149   for (auto &E : FuncInfo.MST.AllEdges) {
1150     if (E->Removed || E->InMST)
1151       continue;
1152     const BasicBlock *SrcBB = E->SrcBB;
1153     UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1154 
1155     // If only one out-edge, the edge profile count should be the same as BB
1156     // profile count.
1157     if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1158       setEdgeCount(E.get(), SrcInfo.CountValue);
1159     else {
1160       const BasicBlock *DestBB = E->DestBB;
1161       UseBBInfo &DestInfo = getBBInfo(DestBB);
1162       // If only one in-edge, the edge profile count should be the same as BB
1163       // profile count.
1164       if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1165         setEdgeCount(E.get(), DestInfo.CountValue);
1166     }
1167     if (E->CountValid)
1168       continue;
1169     // E's count should have been set from profile. If not, this meenas E skips
1170     // the instrumentation. We set the count to 0.
1171     setEdgeCount(E.get(), 0);
1172   }
1173   return true;
1174 }
1175 
1176 // Set the count value for the unknown edge. There should be one and only one
1177 // unknown edge in Edges vector.
1178 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1179   for (auto &E : Edges) {
1180     if (E->CountValid)
1181       continue;
1182     E->setEdgeCount(Value);
1183 
1184     getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1185     getBBInfo(E->DestBB).UnknownCountInEdge--;
1186     return;
1187   }
1188   llvm_unreachable("Cannot find the unknown count edge");
1189 }
1190 
1191 // Emit function metadata indicating PGO profile mismatch.
1192 static void annotateFunctionWithHashMismatch(Function &F,
1193                                              LLVMContext &ctx) {
1194   const char MetadataName[] = "instr_prof_hash_mismatch";
1195   SmallVector<Metadata *, 2> Names;
1196   // If this metadata already exists, ignore.
1197   auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1198   if (Existing) {
1199     MDTuple *Tuple = cast<MDTuple>(Existing);
1200     for (auto &N : Tuple->operands()) {
1201       if (cast<MDString>(N.get())->getString() ==  MetadataName)
1202         return;
1203       Names.push_back(N.get());
1204     }
1205   }
1206 
1207   MDBuilder MDB(ctx);
1208   Names.push_back(MDB.createString(MetadataName));
1209   MDNode *MD = MDTuple::get(ctx, Names);
1210   F.setMetadata(LLVMContext::MD_annotation, MD);
1211 }
1212 
1213 // Read the profile from ProfileFileName and assign the value to the
1214 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1215 // Return true if the profile are successfully read, and false on errors.
1216 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1217                               bool &AllMinusOnes) {
1218   auto &Ctx = M->getContext();
1219   Expected<InstrProfRecord> Result =
1220       PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1221   if (Error E = Result.takeError()) {
1222     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1223       auto Err = IPE.get();
1224       bool SkipWarning = false;
1225       LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1226                         << FuncInfo.FuncName << ": ");
1227       if (Err == instrprof_error::unknown_function) {
1228         IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1229         SkipWarning = !PGOWarnMissing;
1230         LLVM_DEBUG(dbgs() << "unknown function");
1231       } else if (Err == instrprof_error::hash_mismatch ||
1232                  Err == instrprof_error::malformed) {
1233         IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1234         SkipWarning =
1235             NoPGOWarnMismatch ||
1236             (NoPGOWarnMismatchComdat &&
1237              (F.hasComdat() ||
1238               F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1239         LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1240         // Emit function metadata indicating PGO profile mismatch.
1241         annotateFunctionWithHashMismatch(F, M->getContext());
1242       }
1243 
1244       LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1245       if (SkipWarning)
1246         return;
1247 
1248       std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1249                         std::string(" Hash = ") +
1250                         std::to_string(FuncInfo.FunctionHash);
1251 
1252       Ctx.diagnose(
1253           DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1254     });
1255     return false;
1256   }
1257   ProfileRecord = std::move(Result.get());
1258   std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1259 
1260   IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1261   LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1262   AllMinusOnes = (CountFromProfile.size() > 0);
1263   uint64_t ValueSum = 0;
1264   for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1265     LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
1266     ValueSum += CountFromProfile[I];
1267     if (CountFromProfile[I] != (uint64_t)-1)
1268       AllMinusOnes = false;
1269   }
1270   AllZeros = (ValueSum == 0);
1271 
1272   LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
1273 
1274   getBBInfo(nullptr).UnknownCountOutEdge = 2;
1275   getBBInfo(nullptr).UnknownCountInEdge = 2;
1276 
1277   if (!setInstrumentedCounts(CountFromProfile)) {
1278     LLVM_DEBUG(
1279         dbgs() << "Inconsistent number of counts, skipping this function");
1280     Ctx.diagnose(DiagnosticInfoPGOProfile(
1281         M->getName().data(),
1282         Twine("Inconsistent number of counts in ") + F.getName().str()
1283         + Twine(": the profile may be stale or there is a function name collision."),
1284         DS_Warning));
1285     return false;
1286   }
1287   ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1288   return true;
1289 }
1290 
1291 // Populate the counters from instrumented BBs to all BBs.
1292 // In the end of this operation, all BBs should have a valid count value.
1293 void PGOUseFunc::populateCounters() {
1294   bool Changes = true;
1295   unsigned NumPasses = 0;
1296   while (Changes) {
1297     NumPasses++;
1298     Changes = false;
1299 
1300     // For efficient traversal, it's better to start from the end as most
1301     // of the instrumented edges are at the end.
1302     for (auto &BB : reverse(F)) {
1303       UseBBInfo *Count = findBBInfo(&BB);
1304       if (Count == nullptr)
1305         continue;
1306       if (!Count->CountValid) {
1307         if (Count->UnknownCountOutEdge == 0) {
1308           Count->CountValue = sumEdgeCount(Count->OutEdges);
1309           Count->CountValid = true;
1310           Changes = true;
1311         } else if (Count->UnknownCountInEdge == 0) {
1312           Count->CountValue = sumEdgeCount(Count->InEdges);
1313           Count->CountValid = true;
1314           Changes = true;
1315         }
1316       }
1317       if (Count->CountValid) {
1318         if (Count->UnknownCountOutEdge == 1) {
1319           uint64_t Total = 0;
1320           uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1321           // If the one of the successor block can early terminate (no-return),
1322           // we can end up with situation where out edge sum count is larger as
1323           // the source BB's count is collected by a post-dominated block.
1324           if (Count->CountValue > OutSum)
1325             Total = Count->CountValue - OutSum;
1326           setEdgeCount(Count->OutEdges, Total);
1327           Changes = true;
1328         }
1329         if (Count->UnknownCountInEdge == 1) {
1330           uint64_t Total = 0;
1331           uint64_t InSum = sumEdgeCount(Count->InEdges);
1332           if (Count->CountValue > InSum)
1333             Total = Count->CountValue - InSum;
1334           setEdgeCount(Count->InEdges, Total);
1335           Changes = true;
1336         }
1337       }
1338     }
1339   }
1340 
1341   LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1342   (void) NumPasses;
1343 #ifndef NDEBUG
1344   // Assert every BB has a valid counter.
1345   for (auto &BB : F) {
1346     auto BI = findBBInfo(&BB);
1347     if (BI == nullptr)
1348       continue;
1349     assert(BI->CountValid && "BB count is not valid");
1350   }
1351 #endif
1352   uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1353   uint64_t FuncMaxCount = FuncEntryCount;
1354   for (auto &BB : F) {
1355     auto BI = findBBInfo(&BB);
1356     if (BI == nullptr)
1357       continue;
1358     FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1359   }
1360 
1361   // Fix the obviously inconsistent entry count.
1362   if (FuncMaxCount > 0 && FuncEntryCount == 0)
1363     FuncEntryCount = 1;
1364   F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1365   markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1366 
1367   // Now annotate select instructions
1368   FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1369   assert(CountPosition == ProfileCountSize);
1370 
1371   LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1372 }
1373 
1374 // Assign the scaled count values to the BB with multiple out edges.
1375 void PGOUseFunc::setBranchWeights() {
1376   // Generate MD_prof metadata for every branch instruction.
1377   LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1378                     << " IsCS=" << IsCS << "\n");
1379   for (auto &BB : F) {
1380     Instruction *TI = BB.getTerminator();
1381     if (TI->getNumSuccessors() < 2)
1382       continue;
1383     if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1384           isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
1385       continue;
1386 
1387     if (getBBInfo(&BB).CountValue == 0)
1388       continue;
1389 
1390     // We have a non-zero Branch BB.
1391     const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1392     unsigned Size = BBCountInfo.OutEdges.size();
1393     SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1394     uint64_t MaxCount = 0;
1395     for (unsigned s = 0; s < Size; s++) {
1396       const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1397       const BasicBlock *SrcBB = E->SrcBB;
1398       const BasicBlock *DestBB = E->DestBB;
1399       if (DestBB == nullptr)
1400         continue;
1401       unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1402       uint64_t EdgeCount = E->CountValue;
1403       if (EdgeCount > MaxCount)
1404         MaxCount = EdgeCount;
1405       EdgeCounts[SuccNum] = EdgeCount;
1406     }
1407     setProfMetadata(M, TI, EdgeCounts, MaxCount);
1408   }
1409 }
1410 
1411 static bool isIndirectBrTarget(BasicBlock *BB) {
1412   for (BasicBlock *Pred : predecessors(BB)) {
1413     if (isa<IndirectBrInst>(Pred->getTerminator()))
1414       return true;
1415   }
1416   return false;
1417 }
1418 
1419 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1420   LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1421   // Find irr loop headers
1422   for (auto &BB : F) {
1423     // As a heuristic also annotate indrectbr targets as they have a high chance
1424     // to become an irreducible loop header after the indirectbr tail
1425     // duplication.
1426     if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1427       Instruction *TI = BB.getTerminator();
1428       const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1429       setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1430     }
1431   }
1432 }
1433 
1434 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1435   if (PGOFunctionEntryCoverage)
1436     return;
1437   Module *M = F.getParent();
1438   IRBuilder<> Builder(&SI);
1439   Type *Int64Ty = Builder.getInt64Ty();
1440   Type *I8PtrTy = Builder.getInt8PtrTy();
1441   auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1442   Builder.CreateCall(
1443       Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1444       {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1445        Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1446        Builder.getInt32(*CurCtrIdx), Step});
1447   ++(*CurCtrIdx);
1448 }
1449 
1450 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1451   std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1452   assert(*CurCtrIdx < CountFromProfile.size() &&
1453          "Out of bound access of counters");
1454   uint64_t SCounts[2];
1455   SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1456   ++(*CurCtrIdx);
1457   uint64_t TotalCount = 0;
1458   auto BI = UseFunc->findBBInfo(SI.getParent());
1459   if (BI != nullptr)
1460     TotalCount = BI->CountValue;
1461   // False Count
1462   SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1463   uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1464   if (MaxCount)
1465     setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1466 }
1467 
1468 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1469   if (!PGOInstrSelect)
1470     return;
1471   // FIXME: do not handle this yet.
1472   if (SI.getCondition()->getType()->isVectorTy())
1473     return;
1474 
1475   switch (Mode) {
1476   case VM_counting:
1477     NSIs++;
1478     return;
1479   case VM_instrument:
1480     instrumentOneSelectInst(SI);
1481     return;
1482   case VM_annotate:
1483     annotateOneSelectInst(SI);
1484     return;
1485   }
1486 
1487   llvm_unreachable("Unknown visiting mode");
1488 }
1489 
1490 // Traverse all valuesites and annotate the instructions for all value kind.
1491 void PGOUseFunc::annotateValueSites() {
1492   if (DisableValueProfiling)
1493     return;
1494 
1495   // Create the PGOFuncName meta data.
1496   createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1497 
1498   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1499     annotateValueSites(Kind);
1500 }
1501 
1502 // Annotate the instructions for a specific value kind.
1503 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1504   assert(Kind <= IPVK_Last);
1505   unsigned ValueSiteIndex = 0;
1506   auto &ValueSites = FuncInfo.ValueSites[Kind];
1507   unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1508   if (NumValueSites != ValueSites.size()) {
1509     auto &Ctx = M->getContext();
1510     Ctx.diagnose(DiagnosticInfoPGOProfile(
1511         M->getName().data(),
1512         Twine("Inconsistent number of value sites for ") +
1513             Twine(ValueProfKindDescr[Kind]) +
1514             Twine(" profiling in \"") + F.getName().str() +
1515             Twine("\", possibly due to the use of a stale profile."),
1516         DS_Warning));
1517     return;
1518   }
1519 
1520   for (VPCandidateInfo &I : ValueSites) {
1521     LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1522                       << "): Index = " << ValueSiteIndex << " out of "
1523                       << NumValueSites << "\n");
1524     annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1525                       static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1526                       Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1527                                              : MaxNumAnnotations);
1528     ValueSiteIndex++;
1529   }
1530 }
1531 
1532 // Collect the set of members for each Comdat in module M and store
1533 // in ComdatMembers.
1534 static void collectComdatMembers(
1535     Module &M,
1536     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1537   if (!DoComdatRenaming)
1538     return;
1539   for (Function &F : M)
1540     if (Comdat *C = F.getComdat())
1541       ComdatMembers.insert(std::make_pair(C, &F));
1542   for (GlobalVariable &GV : M.globals())
1543     if (Comdat *C = GV.getComdat())
1544       ComdatMembers.insert(std::make_pair(C, &GV));
1545   for (GlobalAlias &GA : M.aliases())
1546     if (Comdat *C = GA.getComdat())
1547       ComdatMembers.insert(std::make_pair(C, &GA));
1548 }
1549 
1550 static bool InstrumentAllFunctions(
1551     Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1552     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1553     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1554   // For the context-sensitve instrumentation, we should have a separated pass
1555   // (before LTO/ThinLTO linking) to create these variables.
1556   if (!IsCS)
1557     createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1558   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1559   collectComdatMembers(M, ComdatMembers);
1560 
1561   for (auto &F : M) {
1562     if (F.isDeclaration())
1563       continue;
1564     if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1565       continue;
1566     auto &TLI = LookupTLI(F);
1567     auto *BPI = LookupBPI(F);
1568     auto *BFI = LookupBFI(F);
1569     instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1570   }
1571   return true;
1572 }
1573 
1574 PreservedAnalyses
1575 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
1576   createProfileFileNameVar(M, CSInstrName);
1577   // The variable in a comdat may be discarded by LTO. Ensure the declaration
1578   // will be retained.
1579   appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
1580   return PreservedAnalyses::all();
1581 }
1582 
1583 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1584                                              ModuleAnalysisManager &AM) {
1585   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1586   auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1587     return FAM.getResult<TargetLibraryAnalysis>(F);
1588   };
1589   auto LookupBPI = [&FAM](Function &F) {
1590     return &FAM.getResult<BranchProbabilityAnalysis>(F);
1591   };
1592   auto LookupBFI = [&FAM](Function &F) {
1593     return &FAM.getResult<BlockFrequencyAnalysis>(F);
1594   };
1595 
1596   if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1597     return PreservedAnalyses::all();
1598 
1599   return PreservedAnalyses::none();
1600 }
1601 
1602 // Using the ratio b/w sums of profile count values and BFI count values to
1603 // adjust the func entry count.
1604 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1605                               BranchProbabilityInfo &NBPI) {
1606   Function &F = Func.getFunc();
1607   BlockFrequencyInfo NBFI(F, NBPI, LI);
1608 #ifndef NDEBUG
1609   auto BFIEntryCount = F.getEntryCount();
1610   assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1611          "Invalid BFI Entrycount");
1612 #endif
1613   auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1614   auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1615   for (auto &BBI : F) {
1616     uint64_t CountValue = 0;
1617     uint64_t BFICountValue = 0;
1618     if (!Func.findBBInfo(&BBI))
1619       continue;
1620     auto BFICount = NBFI.getBlockProfileCount(&BBI);
1621     CountValue = Func.getBBInfo(&BBI).CountValue;
1622     BFICountValue = *BFICount;
1623     SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1624     SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1625   }
1626   if (SumCount.isZero())
1627     return;
1628 
1629   assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1630          "Incorrect sum of BFI counts");
1631   if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1632     return;
1633   double Scale = (SumCount / SumBFICount).convertToDouble();
1634   if (Scale < 1.001 && Scale > 0.999)
1635     return;
1636 
1637   uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1638   uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1639   if (NewEntryCount == 0)
1640     NewEntryCount = 1;
1641   if (NewEntryCount != FuncEntryCount) {
1642     F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1643     LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1644                       << ", entry_count " << FuncEntryCount << " --> "
1645                       << NewEntryCount << "\n");
1646   }
1647 }
1648 
1649 // Compare the profile count values with BFI count values, and print out
1650 // the non-matching ones.
1651 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1652                           BranchProbabilityInfo &NBPI,
1653                           uint64_t HotCountThreshold,
1654                           uint64_t ColdCountThreshold) {
1655   Function &F = Func.getFunc();
1656   BlockFrequencyInfo NBFI(F, NBPI, LI);
1657   //  bool PrintFunc = false;
1658   bool HotBBOnly = PGOVerifyHotBFI;
1659   std::string Msg;
1660   OptimizationRemarkEmitter ORE(&F);
1661 
1662   unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1663   for (auto &BBI : F) {
1664     uint64_t CountValue = 0;
1665     uint64_t BFICountValue = 0;
1666 
1667     if (Func.getBBInfo(&BBI).CountValid)
1668       CountValue = Func.getBBInfo(&BBI).CountValue;
1669 
1670     BBNum++;
1671     if (CountValue)
1672       NonZeroBBNum++;
1673     auto BFICount = NBFI.getBlockProfileCount(&BBI);
1674     if (BFICount)
1675       BFICountValue = *BFICount;
1676 
1677     if (HotBBOnly) {
1678       bool rawIsHot = CountValue >= HotCountThreshold;
1679       bool BFIIsHot = BFICountValue >= HotCountThreshold;
1680       bool rawIsCold = CountValue <= ColdCountThreshold;
1681       bool ShowCount = false;
1682       if (rawIsHot && !BFIIsHot) {
1683         Msg = "raw-Hot to BFI-nonHot";
1684         ShowCount = true;
1685       } else if (rawIsCold && BFIIsHot) {
1686         Msg = "raw-Cold to BFI-Hot";
1687         ShowCount = true;
1688       }
1689       if (!ShowCount)
1690         continue;
1691     } else {
1692       if ((CountValue < PGOVerifyBFICutoff) &&
1693           (BFICountValue < PGOVerifyBFICutoff))
1694         continue;
1695       uint64_t Diff = (BFICountValue >= CountValue)
1696                           ? BFICountValue - CountValue
1697                           : CountValue - BFICountValue;
1698       if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1699         continue;
1700     }
1701     BBMisMatchNum++;
1702 
1703     ORE.emit([&]() {
1704       OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
1705                                         F.getSubprogram(), &BBI);
1706       Remark << "BB " << ore::NV("Block", BBI.getName())
1707              << " Count=" << ore::NV("Count", CountValue)
1708              << " BFI_Count=" << ore::NV("Count", BFICountValue);
1709       if (!Msg.empty())
1710         Remark << " (" << Msg << ")";
1711       return Remark;
1712     });
1713   }
1714   if (BBMisMatchNum)
1715     ORE.emit([&]() {
1716       return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1717                                         F.getSubprogram(), &F.getEntryBlock())
1718              << "In Func " << ore::NV("Function", F.getName())
1719              << ": Num_of_BB=" << ore::NV("Count", BBNum)
1720              << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1721              << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1722     });
1723 }
1724 
1725 static bool annotateAllFunctions(
1726     Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1727     function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1728     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1729     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1730     ProfileSummaryInfo *PSI, bool IsCS) {
1731   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1732   auto &Ctx = M.getContext();
1733   // Read the counter array from file.
1734   auto ReaderOrErr =
1735       IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1736   if (Error E = ReaderOrErr.takeError()) {
1737     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1738       Ctx.diagnose(
1739           DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1740     });
1741     return false;
1742   }
1743 
1744   std::unique_ptr<IndexedInstrProfReader> PGOReader =
1745       std::move(ReaderOrErr.get());
1746   if (!PGOReader) {
1747     Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1748                                           StringRef("Cannot get PGOReader")));
1749     return false;
1750   }
1751   if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1752     return false;
1753 
1754   // TODO: might need to change the warning once the clang option is finalized.
1755   if (!PGOReader->isIRLevelProfile()) {
1756     Ctx.diagnose(DiagnosticInfoPGOProfile(
1757         ProfileFileName.data(), "Not an IR level instrumentation profile"));
1758     return false;
1759   }
1760   if (PGOReader->hasSingleByteCoverage()) {
1761     Ctx.diagnose(DiagnosticInfoPGOProfile(
1762         ProfileFileName.data(),
1763         "Cannot use coverage profiles for optimization"));
1764     return false;
1765   }
1766   if (PGOReader->functionEntryOnly()) {
1767     Ctx.diagnose(DiagnosticInfoPGOProfile(
1768         ProfileFileName.data(),
1769         "Function entry profiles are not yet supported for optimization"));
1770     return false;
1771   }
1772 
1773   // Add the profile summary (read from the header of the indexed summary) here
1774   // so that we can use it below when reading counters (which checks if the
1775   // function should be marked with a cold or inlinehint attribute).
1776   M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1777                       IsCS ? ProfileSummary::PSK_CSInstr
1778                            : ProfileSummary::PSK_Instr);
1779   PSI->refresh();
1780 
1781   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1782   collectComdatMembers(M, ComdatMembers);
1783   std::vector<Function *> HotFunctions;
1784   std::vector<Function *> ColdFunctions;
1785 
1786   // If the profile marked as always instrument the entry BB, do the
1787   // same. Note this can be overwritten by the internal option in CFGMST.h
1788   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
1789   if (PGOInstrumentEntry.getNumOccurrences() > 0)
1790     InstrumentFuncEntry = PGOInstrumentEntry;
1791   for (auto &F : M) {
1792     if (F.isDeclaration())
1793       continue;
1794     auto &TLI = LookupTLI(F);
1795     auto *BPI = LookupBPI(F);
1796     auto *BFI = LookupBFI(F);
1797     // Split indirectbr critical edges here before computing the MST rather than
1798     // later in getInstrBB() to avoid invalidating it.
1799     SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
1800     PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
1801                     InstrumentFuncEntry);
1802     // When AllMinusOnes is true, it means the profile for the function
1803     // is unrepresentative and this function is actually hot. Set the
1804     // entry count of the function to be multiple times of hot threshold
1805     // and drop all its internal counters.
1806     bool AllMinusOnes = false;
1807     bool AllZeros = false;
1808     if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
1809       continue;
1810     if (AllZeros) {
1811       F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1812       if (Func.getProgramMaxCount() != 0)
1813         ColdFunctions.push_back(&F);
1814       continue;
1815     }
1816     const unsigned MultiplyFactor = 3;
1817     if (AllMinusOnes) {
1818       uint64_t HotThreshold = PSI->getHotCountThreshold();
1819       if (HotThreshold)
1820         F.setEntryCount(
1821             ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
1822       HotFunctions.push_back(&F);
1823       continue;
1824     }
1825     Func.populateCounters();
1826     Func.setBranchWeights();
1827     Func.annotateValueSites();
1828     Func.annotateIrrLoopHeaderWeights();
1829     PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1830     if (FreqAttr == PGOUseFunc::FFA_Cold)
1831       ColdFunctions.push_back(&F);
1832     else if (FreqAttr == PGOUseFunc::FFA_Hot)
1833       HotFunctions.push_back(&F);
1834     if (PGOViewCounts != PGOVCT_None &&
1835         (ViewBlockFreqFuncName.empty() ||
1836          F.getName().equals(ViewBlockFreqFuncName))) {
1837       LoopInfo LI{DominatorTree(F)};
1838       std::unique_ptr<BranchProbabilityInfo> NewBPI =
1839           std::make_unique<BranchProbabilityInfo>(F, LI);
1840       std::unique_ptr<BlockFrequencyInfo> NewBFI =
1841           std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1842       if (PGOViewCounts == PGOVCT_Graph)
1843         NewBFI->view();
1844       else if (PGOViewCounts == PGOVCT_Text) {
1845         dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1846         NewBFI->print(dbgs());
1847       }
1848     }
1849     if (PGOViewRawCounts != PGOVCT_None &&
1850         (ViewBlockFreqFuncName.empty() ||
1851          F.getName().equals(ViewBlockFreqFuncName))) {
1852       if (PGOViewRawCounts == PGOVCT_Graph)
1853         if (ViewBlockFreqFuncName.empty())
1854           WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1855         else
1856           ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1857       else if (PGOViewRawCounts == PGOVCT_Text) {
1858         dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1859         Func.dumpInfo();
1860       }
1861     }
1862 
1863     if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
1864       LoopInfo LI{DominatorTree(F)};
1865       BranchProbabilityInfo NBPI(F, LI);
1866 
1867       // Fix func entry count.
1868       if (PGOFixEntryCount)
1869         fixFuncEntryCount(Func, LI, NBPI);
1870 
1871       // Verify BlockFrequency information.
1872       uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
1873       if (PGOVerifyHotBFI) {
1874         HotCountThreshold = PSI->getOrCompHotCountThreshold();
1875         ColdCountThreshold = PSI->getOrCompColdCountThreshold();
1876       }
1877       verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
1878     }
1879   }
1880 
1881   // Set function hotness attribute from the profile.
1882   // We have to apply these attributes at the end because their presence
1883   // can affect the BranchProbabilityInfo of any callers, resulting in an
1884   // inconsistent MST between prof-gen and prof-use.
1885   for (auto &F : HotFunctions) {
1886     F->addFnAttr(Attribute::InlineHint);
1887     LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1888                       << "\n");
1889   }
1890   for (auto &F : ColdFunctions) {
1891     // Only set when there is no Attribute::Hot set by the user. For Hot
1892     // attribute, user's annotation has the precedence over the profile.
1893     if (F->hasFnAttribute(Attribute::Hot)) {
1894       auto &Ctx = M.getContext();
1895       std::string Msg = std::string("Function ") + F->getName().str() +
1896                         std::string(" is annotated as a hot function but"
1897                                     " the profile is cold");
1898       Ctx.diagnose(
1899           DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1900       continue;
1901     }
1902     F->addFnAttr(Attribute::Cold);
1903     LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
1904                       << "\n");
1905   }
1906   return true;
1907 }
1908 
1909 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
1910                                              std::string RemappingFilename,
1911                                              bool IsCS)
1912     : ProfileFileName(std::move(Filename)),
1913       ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1914   if (!PGOTestProfileFile.empty())
1915     ProfileFileName = PGOTestProfileFile;
1916   if (!PGOTestProfileRemappingFile.empty())
1917     ProfileRemappingFileName = PGOTestProfileRemappingFile;
1918 }
1919 
1920 PreservedAnalyses PGOInstrumentationUse::run(Module &M,
1921                                              ModuleAnalysisManager &AM) {
1922 
1923   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1924   auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1925     return FAM.getResult<TargetLibraryAnalysis>(F);
1926   };
1927   auto LookupBPI = [&FAM](Function &F) {
1928     return &FAM.getResult<BranchProbabilityAnalysis>(F);
1929   };
1930   auto LookupBFI = [&FAM](Function &F) {
1931     return &FAM.getResult<BlockFrequencyAnalysis>(F);
1932   };
1933 
1934   auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1935 
1936   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1937                             LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
1938     return PreservedAnalyses::all();
1939 
1940   return PreservedAnalyses::none();
1941 }
1942 
1943 static std::string getSimpleNodeName(const BasicBlock *Node) {
1944   if (!Node->getName().empty())
1945     return std::string(Node->getName());
1946 
1947   std::string SimpleNodeName;
1948   raw_string_ostream OS(SimpleNodeName);
1949   Node->printAsOperand(OS, false);
1950   return OS.str();
1951 }
1952 
1953 void llvm::setProfMetadata(Module *M, Instruction *TI,
1954                            ArrayRef<uint64_t> EdgeCounts,
1955                            uint64_t MaxCount) {
1956   MDBuilder MDB(M->getContext());
1957   assert(MaxCount > 0 && "Bad max count");
1958   uint64_t Scale = calculateCountScale(MaxCount);
1959   SmallVector<unsigned, 4> Weights;
1960   for (const auto &ECI : EdgeCounts)
1961     Weights.push_back(scaleBranchCount(ECI, Scale));
1962 
1963   LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
1964                                            : Weights) {
1965     dbgs() << W << " ";
1966   } dbgs() << "\n";);
1967 
1968   misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1969 
1970   TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1971   if (EmitBranchProbability) {
1972     std::string BrCondStr = getBranchCondString(TI);
1973     if (BrCondStr.empty())
1974       return;
1975 
1976     uint64_t WSum =
1977         std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
1978                         [](uint64_t w1, uint64_t w2) { return w1 + w2; });
1979     uint64_t TotalCount =
1980         std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
1981                         [](uint64_t c1, uint64_t c2) { return c1 + c2; });
1982     Scale = calculateCountScale(WSum);
1983     BranchProbability BP(scaleBranchCount(Weights[0], Scale),
1984                          scaleBranchCount(WSum, Scale));
1985     std::string BranchProbStr;
1986     raw_string_ostream OS(BranchProbStr);
1987     OS << BP;
1988     OS << " (total count : " << TotalCount << ")";
1989     OS.flush();
1990     Function *F = TI->getParent()->getParent();
1991     OptimizationRemarkEmitter ORE(F);
1992     ORE.emit([&]() {
1993       return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
1994              << BrCondStr << " is true with probability : " << BranchProbStr;
1995     });
1996   }
1997 }
1998 
1999 namespace llvm {
2000 
2001 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
2002   MDBuilder MDB(M->getContext());
2003   TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2004                   MDB.createIrrLoopHeaderWeight(Count));
2005 }
2006 
2007 template <> struct GraphTraits<PGOUseFunc *> {
2008   using NodeRef = const BasicBlock *;
2009   using ChildIteratorType = const_succ_iterator;
2010   using nodes_iterator = pointer_iterator<Function::const_iterator>;
2011 
2012   static NodeRef getEntryNode(const PGOUseFunc *G) {
2013     return &G->getFunc().front();
2014   }
2015 
2016   static ChildIteratorType child_begin(const NodeRef N) {
2017     return succ_begin(N);
2018   }
2019 
2020   static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2021 
2022   static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2023     return nodes_iterator(G->getFunc().begin());
2024   }
2025 
2026   static nodes_iterator nodes_end(const PGOUseFunc *G) {
2027     return nodes_iterator(G->getFunc().end());
2028   }
2029 };
2030 
2031 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2032   explicit DOTGraphTraits(bool isSimple = false)
2033       : DefaultDOTGraphTraits(isSimple) {}
2034 
2035   static std::string getGraphName(const PGOUseFunc *G) {
2036     return std::string(G->getFunc().getName());
2037   }
2038 
2039   std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2040     std::string Result;
2041     raw_string_ostream OS(Result);
2042 
2043     OS << getSimpleNodeName(Node) << ":\\l";
2044     UseBBInfo *BI = Graph->findBBInfo(Node);
2045     OS << "Count : ";
2046     if (BI && BI->CountValid)
2047       OS << BI->CountValue << "\\l";
2048     else
2049       OS << "Unknown\\l";
2050 
2051     if (!PGOInstrSelect)
2052       return Result;
2053 
2054     for (const Instruction &I : *Node) {
2055       if (!isa<SelectInst>(&I))
2056         continue;
2057       // Display scaled counts for SELECT instruction:
2058       OS << "SELECT : { T = ";
2059       uint64_t TC, FC;
2060       bool HasProf = I.extractProfMetadata(TC, FC);
2061       if (!HasProf)
2062         OS << "Unknown, F = Unknown }\\l";
2063       else
2064         OS << TC << ", F = " << FC << " }\\l";
2065     }
2066     return Result;
2067   }
2068 };
2069 
2070 } // end namespace llvm
2071