xref: /llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp (revision 656c5d652ce10257e90c7693b34336b6ce0ecfa3)
1 //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit warnings for potentially incorrect usage of the
10 // llvm.expect intrinsic. This utility extracts the threshold values from
11 // metadata associated with the instrumented Branch or Switch instruction. The
12 // threshold values are then used to determine if a warning should be emmited.
13 //
14 // MisExpect's implementation relies on two assumptions about how branch weights
15 // are managed in LLVM.
16 //
17 // 1) Frontend profiling weights are always in place before llvm.expect is
18 // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
19 // needs to extract the branch weights and then compare them to the weights
20 // being added by the llvm.expect intrinsic lowering.
21 //
22 // 2) Sampling and IR based profiles will *only* have branch weight metadata
23 // before profiling data is consulted if they are from a lowered llvm.expect
24 // intrinsic. These profiles thus always extract the expected weights and then
25 // compare them to the weights collected during profiling to determine if a
26 // diagnostic message is warranted.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "llvm/Transforms/Utils/MisExpect.h"
31 #include "llvm/ADT/Twine.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/LLVMContext.h"
38 #include "llvm/IR/ProfDataUtils.h"
39 #include "llvm/Support/BranchProbability.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/FormatVariadic.h"
43 #include <cstdint>
44 #include <functional>
45 #include <numeric>
46 
47 #define DEBUG_TYPE "misexpect"
48 
49 using namespace llvm;
50 using namespace misexpect;
51 
52 namespace llvm {
53 
54 // Command line option to enable/disable the warning when profile data suggests
55 // a mismatch with the use of the llvm.expect intrinsic
56 static cl::opt<bool> PGOWarnMisExpect(
57     "pgo-warn-misexpect", cl::init(false), cl::Hidden,
58     cl::desc("Use this option to turn on/off "
59              "warnings about incorrect usage of llvm.expect intrinsics."));
60 
61 static cl::opt<uint32_t> MisExpectTolerance(
62     "misexpect-tolerance", cl::init(0),
63     cl::desc("Prevents emiting diagnostics when profile counts are "
64              "within N% of the threshold.."));
65 
66 } // namespace llvm
67 
68 namespace {
69 
70 bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
71   return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
72 }
73 
74 uint64_t getMisExpectTolerance(LLVMContext &Ctx) {
75   return std::max(static_cast<uint32_t>(MisExpectTolerance),
76                   Ctx.getDiagnosticsMisExpectTolerance());
77 }
78 
79 Instruction *getInstCondition(Instruction *I) {
80   assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
81   Instruction *Ret = nullptr;
82   if (auto *B = dyn_cast<BranchInst>(I)) {
83     Ret = dyn_cast<Instruction>(B->getCondition());
84   }
85   // TODO: Find a way to resolve condition location for switches
86   // Using the condition of the switch seems to often resolve to an earlier
87   // point in the program, i.e. the calculation of the switch condition, rather
88   // than the switch's location in the source code. Thus, we should use the
89   // instruction to get source code locations rather than the condition to
90   // improve diagnostic output, such as the caret. If the same problem exists
91   // for branch instructions, then we should remove this function and directly
92   // use the instruction
93   //
94   else if (auto *S = dyn_cast<SwitchInst>(I)) {
95     Ret = dyn_cast<Instruction>(S->getCondition());
96   }
97   return Ret ? Ret : I;
98 }
99 
100 void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
101                              uint64_t ProfCount, uint64_t TotalCount) {
102   double PercentageCorrect = (double)ProfCount / TotalCount;
103   auto PerString =
104       formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
105   auto RemStr = formatv(
106       "Potential performance regression from use of the llvm.expect intrinsic: "
107       "Annotation was correct on {0} of profiled executions.",
108       PerString);
109   Twine Msg(PerString);
110   Instruction *Cond = getInstCondition(I);
111   if (isMisExpectDiagEnabled(Ctx))
112     Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
113   OptimizationRemarkEmitter ORE(I->getParent()->getParent());
114   ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
115 }
116 
117 } // namespace
118 
119 namespace llvm {
120 namespace misexpect {
121 
122 // TODO: when clang allows c++17, use std::clamp instead
123 uint32_t clamp(uint64_t value, uint32_t low, uint32_t hi) {
124   if (value > hi)
125     return hi;
126   if (value < low)
127     return low;
128   return value;
129 }
130 
131 void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
132                      ArrayRef<uint32_t> ExpectedWeights) {
133   // To determine if we emit a diagnostic, we need to compare the branch weights
134   // from the profile to those added by the llvm.expect intrinsic.
135   // So first, we extract the "likely" and "unlikely" weights from
136   // ExpectedWeights And determine the correct weight in the profile to compare
137   // against.
138   uint64_t LikelyBranchWeight = 0,
139            UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
140   size_t MaxIndex = 0;
141   for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
142     uint32_t V = ExpectedWeights[Idx];
143     if (LikelyBranchWeight < V) {
144       LikelyBranchWeight = V;
145       MaxIndex = Idx;
146     }
147     if (UnlikelyBranchWeight > V) {
148       UnlikelyBranchWeight = V;
149     }
150   }
151 
152   const uint64_t ProfiledWeight = RealWeights[MaxIndex];
153   const uint64_t RealWeightsTotal =
154       std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
155                       std::plus<uint64_t>());
156   const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
157 
158   uint64_t TotalBranchWeight =
159       LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
160 
161   // FIXME: When we've addressed sample profiling, restore the assertion
162   //
163   // We cannot calculate branch probability if either of these invariants aren't
164   // met. However, MisExpect diagnostics should not prevent code from compiling,
165   // so we simply forgo emitting diagnostics here, and return early.
166   if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight))
167     return;
168 
169   // To determine our threshold value we need to obtain the branch probability
170   // for the weights added by llvm.expect and use that proportion to calculate
171   // our threshold based on the collected profile data.
172   auto LikelyProbablilty = BranchProbability::getBranchProbability(
173       LikelyBranchWeight, TotalBranchWeight);
174 
175   uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
176 
177   // clamp tolerance range to [0, 100)
178   auto Tolerance = getMisExpectTolerance(I.getContext());
179   Tolerance = clamp(Tolerance, 0, 99);
180 
181   // Allow users to relax checking by N%  i.e., if they use a 5% tolerance,
182   // then we check against 0.95*ScaledThreshold
183   if (Tolerance > 0)
184     ScaledThreshold *= (1.0 - Tolerance / 100.0);
185 
186   // When the profile weight is below the threshold, we emit the diagnostic
187   if (ProfiledWeight < ScaledThreshold)
188     emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
189                             RealWeightsTotal);
190 }
191 
192 void checkBackendInstrumentation(Instruction &I,
193                                  const ArrayRef<uint32_t> RealWeights) {
194   SmallVector<uint32_t> ExpectedWeights;
195   if (!extractBranchWeights(I, ExpectedWeights))
196     return;
197   verifyMisExpect(I, RealWeights, ExpectedWeights);
198 }
199 
200 void checkFrontendInstrumentation(Instruction &I,
201                                   const ArrayRef<uint32_t> ExpectedWeights) {
202   SmallVector<uint32_t> RealWeights;
203   if (!extractBranchWeights(I, RealWeights))
204     return;
205   verifyMisExpect(I, RealWeights, ExpectedWeights);
206 }
207 
208 void checkExpectAnnotations(Instruction &I,
209                             const ArrayRef<uint32_t> ExistingWeights,
210                             bool IsFrontend) {
211   if (IsFrontend) {
212     checkFrontendInstrumentation(I, ExistingWeights);
213   } else {
214     checkBackendInstrumentation(I, ExistingWeights);
215   }
216 }
217 
218 } // namespace misexpect
219 } // namespace llvm
220 #undef DEBUG_TYPE
221