181ad6265SDimitry Andric //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric // 981ad6265SDimitry Andric // This contains code to emit warnings for potentially incorrect usage of the 1081ad6265SDimitry Andric // llvm.expect intrinsic. This utility extracts the threshold values from 1181ad6265SDimitry Andric // metadata associated with the instrumented Branch or Switch instruction. The 1281ad6265SDimitry Andric // threshold values are then used to determine if a warning should be emmited. 1381ad6265SDimitry Andric // 1481ad6265SDimitry Andric // MisExpect's implementation relies on two assumptions about how branch weights 1581ad6265SDimitry Andric // are managed in LLVM. 1681ad6265SDimitry Andric // 1781ad6265SDimitry Andric // 1) Frontend profiling weights are always in place before llvm.expect is 1881ad6265SDimitry Andric // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore 1981ad6265SDimitry Andric // needs to extract the branch weights and then compare them to the weights 2081ad6265SDimitry Andric // being added by the llvm.expect intrinsic lowering. 2181ad6265SDimitry Andric // 2281ad6265SDimitry Andric // 2) Sampling and IR based profiles will *only* have branch weight metadata 2381ad6265SDimitry Andric // before profiling data is consulted if they are from a lowered llvm.expect 2481ad6265SDimitry Andric // intrinsic. These profiles thus always extract the expected weights and then 2581ad6265SDimitry Andric // compare them to the weights collected during profiling to determine if a 2681ad6265SDimitry Andric // diagnostic message is warranted. 2781ad6265SDimitry Andric // 2881ad6265SDimitry Andric //===----------------------------------------------------------------------===// 2981ad6265SDimitry Andric 3081ad6265SDimitry Andric #include "llvm/Transforms/Utils/MisExpect.h" 3181ad6265SDimitry Andric #include "llvm/ADT/Twine.h" 3281ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 3381ad6265SDimitry Andric #include "llvm/IR/Constants.h" 3481ad6265SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 3581ad6265SDimitry Andric #include "llvm/IR/Instruction.h" 3681ad6265SDimitry Andric #include "llvm/IR/Instructions.h" 3781ad6265SDimitry Andric #include "llvm/IR/LLVMContext.h" 38bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 3981ad6265SDimitry Andric #include "llvm/Support/BranchProbability.h" 4081ad6265SDimitry Andric #include "llvm/Support/CommandLine.h" 4181ad6265SDimitry Andric #include "llvm/Support/Debug.h" 4281ad6265SDimitry Andric #include "llvm/Support/FormatVariadic.h" 43bdd1243dSDimitry Andric #include <algorithm> 4481ad6265SDimitry Andric #include <cstdint> 4581ad6265SDimitry Andric #include <functional> 4681ad6265SDimitry Andric #include <numeric> 4781ad6265SDimitry Andric 4881ad6265SDimitry Andric #define DEBUG_TYPE "misexpect" 4981ad6265SDimitry Andric 5081ad6265SDimitry Andric using namespace llvm; 5181ad6265SDimitry Andric using namespace misexpect; 5281ad6265SDimitry Andric 5381ad6265SDimitry Andric namespace llvm { 5481ad6265SDimitry Andric 5581ad6265SDimitry Andric // Command line option to enable/disable the warning when profile data suggests 5681ad6265SDimitry Andric // a mismatch with the use of the llvm.expect intrinsic 5781ad6265SDimitry Andric static cl::opt<bool> PGOWarnMisExpect( 5881ad6265SDimitry Andric "pgo-warn-misexpect", cl::init(false), cl::Hidden, 5981ad6265SDimitry Andric cl::desc("Use this option to turn on/off " 6081ad6265SDimitry Andric "warnings about incorrect usage of llvm.expect intrinsics.")); 6181ad6265SDimitry Andric 62*0fca6ea1SDimitry Andric // Command line option for setting the diagnostic tolerance threshold 63bdd1243dSDimitry Andric static cl::opt<uint32_t> MisExpectTolerance( 6481ad6265SDimitry Andric "misexpect-tolerance", cl::init(0), 65*0fca6ea1SDimitry Andric cl::desc("Prevents emitting diagnostics when profile counts are " 6681ad6265SDimitry Andric "within N% of the threshold..")); 6781ad6265SDimitry Andric 6881ad6265SDimitry Andric } // namespace llvm 6981ad6265SDimitry Andric 7081ad6265SDimitry Andric namespace { 7181ad6265SDimitry Andric 7281ad6265SDimitry Andric bool isMisExpectDiagEnabled(LLVMContext &Ctx) { 7381ad6265SDimitry Andric return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); 7481ad6265SDimitry Andric } 7581ad6265SDimitry Andric 76bdd1243dSDimitry Andric uint32_t getMisExpectTolerance(LLVMContext &Ctx) { 77bdd1243dSDimitry Andric return std::max(static_cast<uint32_t>(MisExpectTolerance), 7881ad6265SDimitry Andric Ctx.getDiagnosticsMisExpectTolerance()); 7981ad6265SDimitry Andric } 8081ad6265SDimitry Andric 8181ad6265SDimitry Andric Instruction *getInstCondition(Instruction *I) { 8281ad6265SDimitry Andric assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); 8381ad6265SDimitry Andric Instruction *Ret = nullptr; 8481ad6265SDimitry Andric if (auto *B = dyn_cast<BranchInst>(I)) { 8581ad6265SDimitry Andric Ret = dyn_cast<Instruction>(B->getCondition()); 8681ad6265SDimitry Andric } 8781ad6265SDimitry Andric // TODO: Find a way to resolve condition location for switches 8881ad6265SDimitry Andric // Using the condition of the switch seems to often resolve to an earlier 8981ad6265SDimitry Andric // point in the program, i.e. the calculation of the switch condition, rather 9081ad6265SDimitry Andric // than the switch's location in the source code. Thus, we should use the 9181ad6265SDimitry Andric // instruction to get source code locations rather than the condition to 9281ad6265SDimitry Andric // improve diagnostic output, such as the caret. If the same problem exists 9381ad6265SDimitry Andric // for branch instructions, then we should remove this function and directly 9481ad6265SDimitry Andric // use the instruction 9581ad6265SDimitry Andric // 9681ad6265SDimitry Andric else if (auto *S = dyn_cast<SwitchInst>(I)) { 9781ad6265SDimitry Andric Ret = dyn_cast<Instruction>(S->getCondition()); 9881ad6265SDimitry Andric } 9981ad6265SDimitry Andric return Ret ? Ret : I; 10081ad6265SDimitry Andric } 10181ad6265SDimitry Andric 10281ad6265SDimitry Andric void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, 10381ad6265SDimitry Andric uint64_t ProfCount, uint64_t TotalCount) { 10481ad6265SDimitry Andric double PercentageCorrect = (double)ProfCount / TotalCount; 10581ad6265SDimitry Andric auto PerString = 10681ad6265SDimitry Andric formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); 10781ad6265SDimitry Andric auto RemStr = formatv( 10881ad6265SDimitry Andric "Potential performance regression from use of the llvm.expect intrinsic: " 10981ad6265SDimitry Andric "Annotation was correct on {0} of profiled executions.", 11081ad6265SDimitry Andric PerString); 11181ad6265SDimitry Andric Twine Msg(PerString); 11281ad6265SDimitry Andric Instruction *Cond = getInstCondition(I); 11381ad6265SDimitry Andric if (isMisExpectDiagEnabled(Ctx)) 11481ad6265SDimitry Andric Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); 11581ad6265SDimitry Andric OptimizationRemarkEmitter ORE(I->getParent()->getParent()); 11681ad6265SDimitry Andric ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); 11781ad6265SDimitry Andric } 11881ad6265SDimitry Andric 11981ad6265SDimitry Andric } // namespace 12081ad6265SDimitry Andric 12181ad6265SDimitry Andric namespace llvm { 12281ad6265SDimitry Andric namespace misexpect { 12381ad6265SDimitry Andric 12481ad6265SDimitry Andric void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, 12581ad6265SDimitry Andric ArrayRef<uint32_t> ExpectedWeights) { 12681ad6265SDimitry Andric // To determine if we emit a diagnostic, we need to compare the branch weights 12781ad6265SDimitry Andric // from the profile to those added by the llvm.expect intrinsic. 12881ad6265SDimitry Andric // So first, we extract the "likely" and "unlikely" weights from 12981ad6265SDimitry Andric // ExpectedWeights And determine the correct weight in the profile to compare 13081ad6265SDimitry Andric // against. 13181ad6265SDimitry Andric uint64_t LikelyBranchWeight = 0, 13281ad6265SDimitry Andric UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max(); 13381ad6265SDimitry Andric size_t MaxIndex = 0; 13481ad6265SDimitry Andric for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { 13581ad6265SDimitry Andric uint32_t V = ExpectedWeights[Idx]; 13681ad6265SDimitry Andric if (LikelyBranchWeight < V) { 13781ad6265SDimitry Andric LikelyBranchWeight = V; 13881ad6265SDimitry Andric MaxIndex = Idx; 13981ad6265SDimitry Andric } 14081ad6265SDimitry Andric if (UnlikelyBranchWeight > V) { 14181ad6265SDimitry Andric UnlikelyBranchWeight = V; 14281ad6265SDimitry Andric } 14381ad6265SDimitry Andric } 14481ad6265SDimitry Andric 14581ad6265SDimitry Andric const uint64_t ProfiledWeight = RealWeights[MaxIndex]; 14681ad6265SDimitry Andric const uint64_t RealWeightsTotal = 14781ad6265SDimitry Andric std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0, 14881ad6265SDimitry Andric std::plus<uint64_t>()); 14981ad6265SDimitry Andric const uint64_t NumUnlikelyTargets = RealWeights.size() - 1; 15081ad6265SDimitry Andric 15181ad6265SDimitry Andric uint64_t TotalBranchWeight = 15281ad6265SDimitry Andric LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); 15381ad6265SDimitry Andric 154*0fca6ea1SDimitry Andric // Failing this assert means that we have corrupted metadata. 155*0fca6ea1SDimitry Andric assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) && 156*0fca6ea1SDimitry Andric "TotalBranchWeight is less than the Likely branch weight"); 15781ad6265SDimitry Andric 15881ad6265SDimitry Andric // To determine our threshold value we need to obtain the branch probability 15981ad6265SDimitry Andric // for the weights added by llvm.expect and use that proportion to calculate 16081ad6265SDimitry Andric // our threshold based on the collected profile data. 16181ad6265SDimitry Andric auto LikelyProbablilty = BranchProbability::getBranchProbability( 16281ad6265SDimitry Andric LikelyBranchWeight, TotalBranchWeight); 16381ad6265SDimitry Andric 16481ad6265SDimitry Andric uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal); 16581ad6265SDimitry Andric 16681ad6265SDimitry Andric // clamp tolerance range to [0, 100) 16781ad6265SDimitry Andric auto Tolerance = getMisExpectTolerance(I.getContext()); 168bdd1243dSDimitry Andric Tolerance = std::clamp(Tolerance, 0u, 99u); 16981ad6265SDimitry Andric 17081ad6265SDimitry Andric // Allow users to relax checking by N% i.e., if they use a 5% tolerance, 17181ad6265SDimitry Andric // then we check against 0.95*ScaledThreshold 17281ad6265SDimitry Andric if (Tolerance > 0) 17381ad6265SDimitry Andric ScaledThreshold *= (1.0 - Tolerance / 100.0); 17481ad6265SDimitry Andric 17581ad6265SDimitry Andric // When the profile weight is below the threshold, we emit the diagnostic 17681ad6265SDimitry Andric if (ProfiledWeight < ScaledThreshold) 17781ad6265SDimitry Andric emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight, 17881ad6265SDimitry Andric RealWeightsTotal); 17981ad6265SDimitry Andric } 18081ad6265SDimitry Andric 18181ad6265SDimitry Andric void checkBackendInstrumentation(Instruction &I, 18281ad6265SDimitry Andric const ArrayRef<uint32_t> RealWeights) { 183*0fca6ea1SDimitry Andric // Backend checking assumes any existing weight comes from an `llvm.expect` 184*0fca6ea1SDimitry Andric // intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple 185*0fca6ea1SDimitry Andric // times, leading to an invalid assumption in our checking. Backend checks 186*0fca6ea1SDimitry Andric // should only operate on branch weights that carry the "!expected" field, 187*0fca6ea1SDimitry Andric // since they are guaranteed to be added by the LowerExpectIntrinsic pass. 188*0fca6ea1SDimitry Andric if (!hasBranchWeightOrigin(I)) 189*0fca6ea1SDimitry Andric return; 190bdd1243dSDimitry Andric SmallVector<uint32_t> ExpectedWeights; 191bdd1243dSDimitry Andric if (!extractBranchWeights(I, ExpectedWeights)) 19281ad6265SDimitry Andric return; 19381ad6265SDimitry Andric verifyMisExpect(I, RealWeights, ExpectedWeights); 19481ad6265SDimitry Andric } 19581ad6265SDimitry Andric 19681ad6265SDimitry Andric void checkFrontendInstrumentation(Instruction &I, 19781ad6265SDimitry Andric const ArrayRef<uint32_t> ExpectedWeights) { 198bdd1243dSDimitry Andric SmallVector<uint32_t> RealWeights; 199bdd1243dSDimitry Andric if (!extractBranchWeights(I, RealWeights)) 20081ad6265SDimitry Andric return; 20181ad6265SDimitry Andric verifyMisExpect(I, RealWeights, ExpectedWeights); 20281ad6265SDimitry Andric } 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric void checkExpectAnnotations(Instruction &I, 20581ad6265SDimitry Andric const ArrayRef<uint32_t> ExistingWeights, 206bdd1243dSDimitry Andric bool IsFrontend) { 207bdd1243dSDimitry Andric if (IsFrontend) { 20881ad6265SDimitry Andric checkFrontendInstrumentation(I, ExistingWeights); 20981ad6265SDimitry Andric } else { 21081ad6265SDimitry Andric checkBackendInstrumentation(I, ExistingWeights); 21181ad6265SDimitry Andric } 21281ad6265SDimitry Andric } 21381ad6265SDimitry Andric 21481ad6265SDimitry Andric } // namespace misexpect 21581ad6265SDimitry Andric } // namespace llvm 21681ad6265SDimitry Andric #undef DEBUG_TYPE 217